@@ -6,6 +6,29 @@ require 'rdf/turtle'
6
6
namespace :hydranorth do
7
7
namespace :solr do
8
8
9
+ desc "Index objects by batch files"
10
+ task :batch_index , [ :batch_dir ] => :environment do |t , args |
11
+ batch_dir = args [ :batch_dir ]
12
+ raise "Please provide a directory where the batch files are located" if batch_dir . nil?
13
+ RakeLogger . info "Run through all the files in #{ batch_dir } "
14
+ Dir . glob ( batch_dir +"*" ) . each do |f |
15
+ RakeLogger . info "Currently working on file #{ f } "
16
+ start = Time . now
17
+ File . open ( f , 'r+' ) . each_line do |l |
18
+ noid = l . strip
19
+ RakeLogger . info "Currently working on #{ noid } "
20
+ begin
21
+ ActiveFedora ::Base . find ( noid ) . update_index
22
+ rescue Exception => e
23
+ RakeLogger . error "ERROR: #{ noid } with #{ e . message } "
24
+ end
25
+ end
26
+ finish = Time . now
27
+ used_time = finish - start
28
+ RakeLogger . info "This file #{ f } used #{ used_time } "
29
+ end
30
+ end
31
+
9
32
desc "Index a single object in solr"
10
33
task :index , [ :id ] => :environment do |t , args |
11
34
id = args [ :id ]
@@ -17,37 +40,64 @@ namespace :hydranorth do
17
40
RakeLogger . info "reindexed #{ id } used #{ used_time } "
18
41
end
19
42
20
- desc "update the index on all GenericFiles"
21
- task update_generic_file_index : :environment do
22
- GenericFile . all . each ( &:update_index )
23
- end
24
-
25
43
desc "Index with a pairtree"
26
44
task "index_pairtree" , [ :input ] => :environment do |t , args |
27
45
input = args [ :input ]
28
46
RakeLogger . info "***********START index_pairtree***************"
29
47
read_config
48
+ RakeLogger . info "reindex #{ input } "
49
+ index_pairtree ( input )
50
+ RakeLogger . info "***********FINISH index_pairtree**************"
51
+ end
52
+
53
+ desc "Complete Reindex"
54
+ task "reindex_all" => :environment do |t , args |
55
+ RakeLogger . info "***********START reindex *********************"
56
+ read_config
30
57
start = Time . now
31
- objects = find_objects ( input )
32
- objects . each do |o |
33
- index_single ( o )
34
- end
58
+ count = index_all_objects
35
59
finish = Time . now
36
- used_time = finish - start
37
- RakeLogger . info "Indexed #{ objects . size } objects, used #{ used_time } "
60
+ used_time = finish - start
61
+ RakeLogger . info "A Complete Reindex of #{ count } objects, used #{ used_time } "
38
62
RakeLogger . info "***********FINISH index_pairtree**************"
39
63
end
40
64
41
65
def read_config
42
66
rails_env = Rails . env
43
-
67
+
44
68
config = YAML . load_file ( "config/fedora.yml" )
45
69
@user = config [ rails_env ] [ 'user' ]
46
70
@password = config [ rails_env ] [ 'password' ]
47
71
@rest = config [ rails_env ] [ 'url' ]
48
72
@base_path = config [ rails_env ] [ 'base_path' ]
49
73
end
50
74
75
+ def index_all_objects
76
+ count = 0
77
+ [ ( 0 ..9 ) , ( 'a' ..'z' ) ] . map { |i | i . to_a } . flatten . each do |a |
78
+ [ ( 0 ..9 ) , ( 'a' ..'z' ) ] . map { |i | i . to_a } . flatten . each do |b |
79
+ pairtree = a . to_s + b . to_s
80
+ number_reindexed = index_pairtree ( pairtree )
81
+ count = count + number_reindexed
82
+ end
83
+ end
84
+ return count
85
+ end
86
+
87
+ def index_pairtree ( pairtree )
88
+ RakeLogger . info "Start to reindex all objects starting with #{ pairtree } "
89
+ start = Time . now
90
+ objects = find_objects ( pairtree )
91
+ RakeLogger . info "Reindex #{ objects . size } objects"
92
+ objects . each do |o |
93
+ index_single ( o )
94
+ end
95
+ finish = Time . now
96
+ used_time = finish -start
97
+ RakeLogger . info "Indexed #{ objects . size } objects that starts with #{ pairtree } , used #{ used_time } seconds"
98
+ return objects . size
99
+ end
100
+
51
101
def find_objects ( input )
52
102
require 'rest-client'
53
103
objects = [ ]
@@ -84,6 +134,7 @@ namespace :hydranorth do
84
134
end
85
135
86
136
def index_single ( id )
137
+ RakeLogger . info "start reindexing #{ id } "
87
138
start = Time . now
88
139
ActiveFedora ::Base . find ( id ) . update_index
89
140
finish = Time . now
@@ -92,3 +143,5 @@ namespace :hydranorth do
92
143
end
93
144
end
94
145
end
146
+
147
+
0 commit comments