Skip to content
This repository was archived by the owner on Jan 5, 2019. It is now read-only.

Commit 5e41a8f

Browse files
committed
Merge branch 'tag-1.4'
2 parents 69d6368 + ad4a1ef commit 5e41a8f

File tree

4 files changed

+70
-19
lines changed

4 files changed

+70
-19
lines changed

Gemfile

+3-1
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,9 @@ gem "pdf-reader"
6969
# to generate sitemap for google scholar et al
7070
gem 'sitemap', github: 'ualbertalib/rails-sitemap'
7171

72+
# to fetch noid from fedora for reindex job
73+
gem 'rest-client'
74+
7275
gem 'noid', '~> 0.8'
7376

7477
group :test do
@@ -96,5 +99,4 @@ group :development, :test do
9699
gem "better_errors"
97100
gem "binding_of_caller"
98101
gem 'ruby-prof'
99-
gem 'rest-client'
100102
end

README.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,8 @@ Audit Fix
138138
A set of rake tasks is also added for index jobs:
139139
* ```rake hydranorth:solr:index[id]``` Index a single object with ID
140140
* ```rake hydranorth:solr:index_pairtree[input]``` Index with a pairtree structure
141-
* ```rake hydranorth:solr:update_generic_file_index``` Index all Generic File objects
141+
* ```rake hydranorth:solr:batch_index[directory|file]``` Index from a list of noids, usually from a solr csv output that just contains noids.
142+
* ```rake hydranorth:solr:reindex_all``` Complete reindex of the repository
142143

143144
A shell script will update namespace uris
144145
* ```/bin/fix/fix.rb``` is to update all the namespace uris. Requires user to replace @server with the Fedora server location before using.

lib/tasks/hydranorth-solr.rake

+65-12
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,29 @@ require 'rdf/turtle'
66
namespace :hydranorth do
77
namespace :solr do
88

9+
desc "Index objects by batch files"
10+
task :batch_index, [:batch_dir] => :environment do |t, args|
11+
batch_dir = args[:batch_dir]
12+
raise "Please provide a directory where the batch files are located" if batch_dir.nil?
13+
RakeLogger.info "Run through all the files in #{batch_dir}"
14+
Dir.glob(batch_dir+"*").each do |f|
15+
RakeLogger.info "Currently working on file #{f}"
16+
start = Time.now
17+
File.open(f, 'r+').each_line do |l|
18+
noid = l.strip
19+
RakeLogger.info "Currently working on #{noid}"
20+
begin
21+
ActiveFedora::Base.find(noid).update_index
22+
rescue Exception => e
23+
RakeLogger.error "ERROR: #{noid} with #{e.message}"
24+
end
25+
end
26+
finish = Time.now
27+
used_time = finish - start
28+
RakeLogger.info "This file #{f} used #{used_time}"
29+
end
30+
end
31+
932
desc "Index a single object in solr"
1033
task :index, [:id] => :environment do |t, args|
1134
id = args[:id]
@@ -17,37 +40,64 @@ namespace :hydranorth do
1740
RakeLogger.info "reindexed #{id} used #{used_time}"
1841
end
1942

20-
desc "update the index on all GenericFiles"
21-
task update_generic_file_index: :environment do
22-
GenericFile.all.each(&:update_index)
23-
end
24-
2543
desc "Index with a pairtree"
2644
task "index_pairtree", [:input] => :environment do |t, args|
2745
input = args[:input]
2846
RakeLogger.info "***********START index_pairtree***************"
2947
read_config
48+
RakeLogger.info "reindex #{input}"
49+
index_pairtree(input)
50+
RakeLogger.info "***********FINISH index_pairtree**************"
51+
end
52+
53+
desc "Complete Reindex"
54+
task "reindex_all" => :environment do |t, args|
55+
RakeLogger.info "***********START reindex *********************"
56+
read_config
3057
start = Time.now
31-
objects = find_objects(input)
32-
objects.each do |o|
33-
index_single(o)
34-
end
58+
count = index_all_objects
3559
finish = Time.now
36-
used_time = finish - start
37-
RakeLogger.info "Indexed #{objects.size} objects, used #{used_time}"
60+
used_time = finish-start
61+
RakeLogger.info "A Complete Reindex of #{count} objects, used #{used_time}"
3862
RakeLogger.info "***********FINISH index_pairtree**************"
3963
end
4064

4165
def read_config
4266
rails_env = Rails.env
43-
67+
4468
config = YAML.load_file("config/fedora.yml")
4569
@user = config[rails_env]['user']
4670
@password = config[rails_env]['password']
4771
@rest = config[rails_env]['url']
4872
@base_path = config[rails_env]['base_path']
4973
end
5074

75+
def index_all_objects
76+
count = 0
77+
[(0..9),('a'..'z')].map {|i| i.to_a}.flatten.each do |a|
78+
[(0..9),('a'..'z')].map {|i| i.to_a}.flatten.each do |b|
79+
pairtree = a.to_s + b.to_s
80+
number_reindexed = index_pairtree(pairtree)
81+
count = count + number_reindexed
82+
end
83+
end
84+
return count
85+
end
86+
87+
def index_pairtree(pairtree)
88+
RakeLogger.info "Start to reindex all objects starting with #{pairtree}"
89+
start = Time.now
90+
objects = find_objects(pairtree)
91+
RakeLogger.info "Reindex #{objects.size} objects"
92+
objects.each do |o|
93+
index_single(o)
94+
end
95+
finish = Time.now
96+
used_time = finish-start
97+
RakeLogger.info "Indexed #{objects.size} objects that starts with #{pairtree}, used #{used_time} seconds"
98+
return objects.size
99+
end
100+
51101
def find_objects(input)
52102
require 'rest-client'
53103
objects=[]
@@ -84,6 +134,7 @@ namespace :hydranorth do
84134
end
85135

86136
def index_single(id)
137+
RakeLogger.info "start reindexing #{id}"
87138
start = Time.now
88139
ActiveFedora::Base.find(id).update_index
89140
finish = Time.now
@@ -92,3 +143,5 @@ namespace :hydranorth do
92143
end
93144
end
94145
end
146+
147+

lib/tasks/hydranorth.rake

-5
Original file line numberDiff line numberDiff line change
@@ -79,11 +79,6 @@ namespace :hydranorth do
7979
end
8080

8181

82-
desc "Re-solrize all objects"
83-
task resolrize: :environment do
84-
Sufia.queue.push(ResolrizeJob.new)
85-
end
86-
8782
namespace :export do
8883
desc "Dump metadata as RDF/XML for e.g. Summon integration"
8984
task rdfxml: :environment do

0 commit comments

Comments
 (0)