@@ -51,18 +51,23 @@ def log_path(self, log_datetime, endpoint):
51
51
log_date = log_datetime .isoformat ()[:10 ]
52
52
return os .path .join (self .log_dir , log_date , endpoint + ".json" )
53
53
54
- def save_log (self , path , log ):
55
- self .save (path , canonicaljson .encode_canonical_json (log ))
54
+ def save_log (self , path , log , refill_todays_logs = False ):
55
+ self .save (
56
+ path ,
57
+ canonicaljson .encode_canonical_json (log ),
58
+ refill_todays_logs = refill_todays_logs ,
59
+ )
56
60
57
61
def save_content (self , content ):
58
62
resource = hashlib .sha256 (content ).hexdigest ()
59
63
path = os .path .join (self .resource_dir , resource )
60
64
self .save (path , content )
61
65
return resource
62
66
63
- def save (self , path , data ):
67
+ def save (self , path , data , refill_todays_logs = False ):
64
68
os .makedirs (os .path .dirname (path ), exist_ok = True )
65
- if not os .path .exists (path ):
69
+ # if refill_todays_logs=True then files in log_path need to be overwritten
70
+ if not os .path .exists (path ) or refill_todays_logs :
66
71
logging .info (path )
67
72
with open (path , "wb" ) as f :
68
73
f .write (data )
@@ -126,6 +131,7 @@ def fetch(
126
131
log_datetime = datetime .utcnow (),
127
132
end_date = "" ,
128
133
plugin = "" ,
134
+ refill_todays_logs = False ,
129
135
):
130
136
if end_date and datetime .strptime (end_date , "%Y-%m-%d" ) < log_datetime :
131
137
return FetchStatus .EXPIRED
@@ -139,11 +145,12 @@ def fetch(
139
145
)
140
146
return FetchStatus .HASH_FAILURE
141
147
142
- # fetch each source at most once per-day
148
+ # fetch each source at most once per-day, though with an option to re-collect the latest day's sources
143
149
log_path = self .log_path (log_datetime , endpoint )
144
- if os .path .isfile (log_path ):
145
- logging .debug (f"{ log_path } exists" )
146
- return FetchStatus .ALREADY_FETCHED
150
+ if not refill_todays_logs :
151
+ if os .path .isfile (log_path ):
152
+ logging .debug (f"{ log_path } exists" )
153
+ return FetchStatus .ALREADY_FETCHED
147
154
148
155
log = {
149
156
"endpoint-url" : url ,
@@ -167,8 +174,7 @@ def fetch(
167
174
log ["elapsed" ] = str (round (timer () - start , 3 ))
168
175
169
176
status = self .save_resource (content , log_path , log )
170
-
171
- self .save_log (log_path , log )
177
+ self .save_log (log_path , log , refill_todays_logs = refill_todays_logs )
172
178
return status
173
179
174
180
def save_resource (self , content , url , log ):
@@ -182,7 +188,7 @@ def save_resource(self, content, url, log):
182
188
183
189
return FetchStatus .FAILED
184
190
185
- def collect (self , endpoint_path ):
191
+ def collect (self , endpoint_path , refill_todays_logs = False ):
186
192
for row in csv .DictReader (open (endpoint_path , newline = "" )):
187
193
endpoint = row ["endpoint" ]
188
194
url = row ["endpoint-url" ]
@@ -197,4 +203,5 @@ def collect(self, endpoint_path):
197
203
endpoint = endpoint ,
198
204
end_date = row .get ("end-date" , "" ),
199
205
plugin = plugin ,
206
+ refill_todays_logs = refill_todays_logs ,
200
207
)
0 commit comments