Skip to content
This repository was archived by the owner on Sep 12, 2019. It is now read-only.

WIP: Zenodo importer. #21

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ gem 'jquery-rails'

# Build JSON APIs with ease. Read more: https://github.com/rails/jbuilder
gem 'pry'

gem 'pry-rails'
# Use ActiveModel has_secure_password
# gem 'bcrypt-ruby', '~> 3.1.2'

Expand Down
3 changes: 3 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,8 @@ GEM
coderay (~> 1.1.0)
method_source (~> 0.8.1)
slop (~> 3.4)
pry-rails (0.3.4)
pry (>= 0.9.10)
rack (1.6.4)
rack-cors (0.3.1)
rack-test (0.6.3)
Expand Down Expand Up @@ -279,6 +281,7 @@ DEPENDENCIES
open_uri_redirections
pg
pry
pry-rails
rack-cors
rails (= 4.2.3)
rails-api
Expand Down
6 changes: 4 additions & 2 deletions app/models/tool.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ class Tool < ActiveRecord::Base
after_save :invalidate_cache
has_and_belongs_to_many :users
has_many :citations
has_many :tool_versions
validates_uniqueness_of :url
validates_presence_of :url
validates_presence_of :name
Expand Down Expand Up @@ -54,8 +55,9 @@ def check_health
when :github
contents = JSON.parse RestClient.get "https://api.github.com/repos/#{repo_name}/contents",
{:params =>
{:client_id => ENV['ST_GITHUB_CLIENT_ID'],
'client_secret' => ENV['ST_GITHUB_CLIENT_SECRET']
{
:client_id => ENV["GITHUB_CLIENT_ID"],

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use the new Ruby 1.9 hash syntax.

:client_secret => ENV["GITHUB_CLIENT_SECRET"]

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use the new Ruby 1.9 hash syntax.

}
}
path_key = 'name'
Expand Down
3 changes: 3 additions & 0 deletions app/models/tool_version.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
class ToolVersion < ActiveRecord::Base
belongs_to :tool
end
10 changes: 10 additions & 0 deletions db/migrate/20150705173334_create_tool_versions.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
class CreateToolVersions < ActiveRecord::Migration
def change
create_table :tool_versions do |t|
t.integer :tool_id
t.string :url

t.timestamps null: false
end
end
end
9 changes: 8 additions & 1 deletion db/schema.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#
# It's strongly recommended that you check this file into your version control system.

ActiveRecord::Schema.define(version: 20150107134549) do
ActiveRecord::Schema.define(version: 20150705173334) do

# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
Expand Down Expand Up @@ -49,6 +49,13 @@

add_index "tags", ["name"], name: "index_tags_on_name", unique: true, using: :btree

create_table "tool_versions", force: :cascade do |t|
t.integer "tool_id"
t.string "url"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
end

create_table "tools", force: :cascade do |t|
t.string "url"
t.string "doi"
Expand Down
18 changes: 15 additions & 3 deletions lib/importer/repository.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
module Importer
class Repository
attr_reader :repo

def self.process(text, source)
repos = text.scan(/(?:https?\:\/\/)(#{source})\/
([^),.\/]+)\/([^,\s)(\/]+)\/?([^ )]*)/x)
Expand All @@ -10,9 +12,19 @@ def self.process(text, source)
end

def initialize(repo)
@source = repo[0]
@username = repo[1].gsub(/\p{Z}/, "")
@repository_name = repo[2].gsub(/\.$/, "").gsub(/[\p{Z}​​]/, "")
@repo = repo
end

def username
repo[1].gsub(/\p{Z}/, "")
end

def source
repo[0]
end

def repository_name
repo[2].gsub(/\.$/, "").gsub(/[\p{Z}​​]/, "")
end

def process
Expand Down
76 changes: 76 additions & 0 deletions lib/importer/zenodo.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
module Importer
class Zenodo
XMLNS = { xmlns: "http://datacite.org/schema/kernel-3" }

# Long running
def self.import
new.import
end

def import
loop do
@response = open(
api_url, "User-Agent" => USER_AGENT
).read

@response = Nokogiri::XML(@response)
@results = @response.css("record")
results = process_results
break if results.empty?
sleep 0.6
end
end

def resumption_token
@response.css("resumptionToken").text if @response
end

def process_results
@results.map do |result|
process_result(result)
end
end

def process_result(result)
doi_css = "xmlns|identifier[identifierType='DOI']"
relation_css = "xmlns|relatedIdentifier[relationType='IsSupplementTo']"
cited_css = "xmlns|relatedIdentifier[relationType='IsCitedBy']"

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Useless assignment to variable - cited_css.


# GitHub
result.css(relation_css, XMLNS).each do |url|
if url.text =~ /github.com/
process_github_url(url.text)
else
doi = result.css(doi_css, XMLNS).first

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Useless assignment to variable - doi.

# process_other("http://dx.doi.org/#{doi.text}")
end
end

# Citations
# result.css(cited_css, XMLNS).first.try(:tap) do |url|
# end

result
end

def process_github_url(url)
url_parts = url.match(/.*github.com\/(.+?)\/(.+?)(\/|\z)/)
main_url = "https://github.com/#{url_parts[1]}/#{url_parts[2]}"
tool = Tool.where(url: main_url).first_or_create
tool_version = ToolVersion.where(url: url, tool: tool).first_or_create

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Useless assignment to variable - tool_version.

end

def process_other(url)
tool = Tool.where(url: url).first_or_create

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Useless assignment to variable - tool.

end

def api_url
base_url = "https://zenodo.org/oai2d?verb=ListRecords"
if resumption_token.present?
"#{base_url}&resumptionToken=#{resumption_token}"
else
"#{base_url}&metadataPrefix=oai_datacite3&set=software"
end
end
end
end
5 changes: 5 additions & 0 deletions spec/models/tool_version_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
require "rails_helper"

RSpec.describe ToolVersion, type: :model do
pending "add some examples to (or delete) #{__FILE__}"
end