Skip to content

Commit 9fad971

Browse files
adds get_processing_option to makerules.py
1 parent 9679397 commit 9fad971

File tree

6 files changed

+181
-40
lines changed

6 files changed

+181
-40
lines changed

digital_land/cli.py

+46-7
Original file line numberDiff line numberDiff line change
@@ -111,8 +111,47 @@ def collection_list_resources_cmd(collection_dir):
111111
short_help="generate makerules for processing a collection",
112112
)
113113
@collection_dir
114-
def collection_pipeline_makerules_cmd(collection_dir):
115-
return collection_pipeline_makerules(collection_dir)
114+
@click.option(
115+
"--specification-dir",
116+
type=click.Path(),
117+
default="specification",
118+
help="directory containing the specification",
119+
)
120+
@click.option(
121+
"--pipeline-dir",
122+
type=click.Path(),
123+
default="pipeline",
124+
help="directory containing the pipeline",
125+
)
126+
@click.option(
127+
"--resource-dir",
128+
type=click.Path(),
129+
default="collection/resource",
130+
help="directory containing resources",
131+
)
132+
@click.option("--incremental-loading-override", type=click.BOOL, default=False)
133+
@click.option(
134+
"--state-path",
135+
type=click.Path(),
136+
default=None,
137+
help="path of the output state file",
138+
)
139+
def collection_pipeline_makerules_cmd(
140+
collection_dir,
141+
specification_dir,
142+
pipeline_dir,
143+
resource_dir,
144+
incremental_loading_override,
145+
state_path,
146+
):
147+
return collection_pipeline_makerules(
148+
collection_dir,
149+
specification_dir,
150+
pipeline_dir,
151+
resource_dir,
152+
incremental_loading_override,
153+
state_path,
154+
)
116155

117156

118157
@cli.command("collection-save-csv", short_help="save collection as CSV package")
@@ -661,15 +700,15 @@ def save_state_cmd(
661700
collection_dir,
662701
pipeline_dir,
663702
resource_dir,
664-
incremental_override,
703+
incremental_loading_override,
665704
output_path,
666705
):
667706
save_state(
668707
specification_dir,
669708
collection_dir,
670709
pipeline_dir,
671710
resource_dir,
672-
incremental_override,
711+
incremental_loading_override,
673712
output_path,
674713
)
675714

@@ -714,14 +753,14 @@ def check_state_cmd(
714753
collection_dir,
715754
pipeline_dir,
716755
resource_dir,
717-
incremental_override,
756+
incremental_loading_override,
718757
state_path,
719758
):
720759
# If the state isn't the same, use a non-zero return code so scripts can
721760
# detect this, and print a message. If it is the same, exit silenty wirh a
722761
# 0 retun code.
723762

724-
if incremental_override:
763+
if incremental_loading_override:
725764
print("State comparison skipped as incremental override enabled")
726765
sys.exit(1)
727766

@@ -730,7 +769,7 @@ def check_state_cmd(
730769
collection_dir,
731770
pipeline_dir,
732771
resource_dir,
733-
incremental_override,
772+
incremental_loading_override,
734773
state_path,
735774
)
736775
if diffs:

digital_land/collection.py

+16-2
Original file line numberDiff line numberDiff line change
@@ -412,8 +412,22 @@ def resource_organisations(self, resource):
412412
def resource_path(self, resource):
413413
return resource_path(resource, self.dir)
414414

415-
def pipeline_makerules(self):
416-
pipeline_makerules(self)
415+
def pipeline_makerules(
416+
self,
417+
specification_dir,
418+
pipeline_dir,
419+
resource_dir,
420+
incremental_loading_override,
421+
state_path,
422+
):
423+
pipeline_makerules(
424+
self,
425+
specification_dir,
426+
pipeline_dir,
427+
resource_dir,
428+
incremental_loading_override,
429+
state_path,
430+
)
417431

418432
def dataset_resource_map(self):
419433
"a map of resources needed by each dataset in a collection"

digital_land/commands.py

+21-8
Original file line numberDiff line numberDiff line change
@@ -103,10 +103,23 @@ def collection_list_resources(collection_dir):
103103
print(resource_path(resource, directory=collection_dir))
104104

105105

106-
def collection_pipeline_makerules(collection_dir):
106+
def collection_pipeline_makerules(
107+
collection_dir,
108+
specification_dir,
109+
pipeline_dir,
110+
resource_dir,
111+
incremental_loading_override,
112+
state_path,
113+
):
107114
collection = Collection(name=None, directory=collection_dir)
108115
collection.load()
109-
collection.pipeline_makerules()
116+
collection.pipeline_makerules(
117+
specification_dir,
118+
pipeline_dir,
119+
resource_dir,
120+
incremental_loading_override,
121+
state_path,
122+
)
110123

111124

112125
def collection_save_csv(collection_dir, refill_todays_logs=False):
@@ -1476,15 +1489,15 @@ def save_state(
14761489
collection_dir,
14771490
pipeline_dir,
14781491
resource_dir,
1479-
incremental_override,
1492+
incremental_loading_override,
14801493
output_path,
14811494
):
14821495
state = State.build(
14831496
specification_dir=specification_dir,
14841497
collection_dir=collection_dir,
14851498
pipeline_dir=pipeline_dir,
14861499
resource_dir=resource_dir,
1487-
incremental_override=incremental_override,
1500+
incremental_loading_override=incremental_loading_override,
14881501
)
14891502
state.save(
14901503
output_path=output_path,
@@ -1496,7 +1509,7 @@ def compare_state(
14961509
collection_dir,
14971510
pipeline_dir,
14981511
resource_dir,
1499-
incremental_override,
1512+
incremental_loading_override,
15001513
state_path,
15011514
):
15021515
"""Compares the current state against the one in state_path.
@@ -1506,14 +1519,14 @@ def compare_state(
15061519
collection_dir=collection_dir,
15071520
pipeline_dir=pipeline_dir,
15081521
resource_dir=resource_dir,
1509-
incremental_override=incremental_override,
1522+
incremental_loading_override=incremental_loading_override,
15101523
)
15111524
# in here current incremental override must be false
15121525

15131526
compare = State.load(state_path)
15141527
# we don't want to include whether the previous state was an incremental override in comparison
1515-
current.pop("incremental_override", None)
1516-
compare.pop("incremental_override", None)
1528+
current.pop("incremental_loading_override", None)
1529+
compare.pop("incremental_loading_override", None)
15171530

15181531
if current == compare:
15191532
return None

digital_land/makerules.py

+88-13
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,16 @@
66
#
77

88

9+
from digital_land.commands import compare_state
10+
from enum import Enum
11+
12+
13+
class ProcessingOption(Enum):
14+
PROCESS_ALL = "all"
15+
PROCESS_PARTIAL = "partial"
16+
PROCESS_NONE = "none"
17+
18+
919
def transformed_path(resource, dataset):
1020
return "$(TRANSFORMED_DIR)" + dataset + "/" + resource + ".csv"
1121

@@ -14,8 +24,62 @@ def dataset_path(dataset):
1424
return "$(DATASET_DIR)" + dataset + ".csv"
1525

1626

17-
def pipeline_makerules(collection):
27+
def get_processing_option(
28+
collection,
29+
specification_dir,
30+
pipeline_dir,
31+
resource_dir,
32+
incremental_loading_override,
33+
state_path,
34+
):
35+
# If there's no previous state, process everything
36+
if not state_path:
37+
return ProcessingOption.PROCESS_ALL
38+
39+
# Compare current state with the previous state
40+
diffs = compare_state(
41+
specification_dir,
42+
collection.dir,
43+
pipeline_dir,
44+
resource_dir,
45+
incremental_loading_override,
46+
state_path,
47+
)
48+
49+
# If incremental loading is overridden or critical configs changed, process everything
50+
critical_changes = {"code", "pipeline", "collection", "specification"}
51+
if incremental_loading_override or critical_changes & set(diffs):
52+
return ProcessingOption.PROCESS_ALL
53+
54+
# New resources downloaded
55+
if "resource" in diffs:
56+
return ProcessingOption.PROCESS_ALL # To be changed to partial in the future
57+
58+
if not diffs:
59+
return ProcessingOption.PROCESS_NONE
60+
61+
# If there are diffs we don't recognise then play safe and reprocess everything
62+
return ProcessingOption.PROCESS_ALL
63+
64+
65+
def pipeline_makerules(
66+
collection,
67+
specification_dir,
68+
pipeline_dir,
69+
resource_dir,
70+
incremental_loading_override,
71+
state_path,
72+
):
1873
dataset_resource = collection.dataset_resource_map()
74+
process = get_processing_option(
75+
collection,
76+
specification_dir,
77+
pipeline_dir,
78+
resource_dir,
79+
incremental_loading_override,
80+
state_path,
81+
)
82+
1983
redirect = {}
2084
for entry in collection.old_resource.entries:
2185
redirect[entry["old-resource"]] = entry["resource"]
@@ -35,6 +99,15 @@ def pipeline_makerules(collection):
3599
print("\\\n %s" % (transformed_path(resource, dataset)), end="")
36100
print()
37101

102+
if process == ProcessingOption.PROCESS_NONE:
103+
print("\n$(%s)::" % dataset_var)
104+
print("\techo skipping dataset")
105+
print("\ntransformed::")
106+
print("\techo skipping transformed")
107+
print("\ndataset::")
108+
print("\techo skipping dataset")
109+
continue # Skip the rest of the loop for this dataset
110+
38111
for resource in sorted(dataset_resource[dataset]):
39112
old_resource = resource
40113
resource = redirect.get(resource, resource)
@@ -55,20 +128,22 @@ def pipeline_makerules(collection):
55128
resource_path,
56129
)
57130
)
58-
call_pipeline = (
59-
"\t$(call run-pipeline,"
60-
+ f" --endpoints '{endpoints}'"
61-
+ f" --organisations '{organisations}'"
62-
+ f" --entry-date '{entry_date}'"
63-
)
64-
# we will include the resource arguement if the old resource
65-
# is different so it's processed as the old_resource
66-
if resource != old_resource:
67-
call_pipeline = call_pipeline + f" --resource '{old_resource}'"
68131

69-
call_pipeline = call_pipeline + " )"
132+
if process == ProcessingOption.PROCESS_ALL:
133+
call_pipeline = (
134+
"\t$(call run-pipeline,"
135+
+ " --endpoints '%s'" % endpoints
136+
+ " --organisations '%s'" % organisations
137+
+ " --entry-date '%s'" % entry_date
138+
)
139+
# we will include the resource arguement if the old resource
140+
# is different so it's processed as the old_resource
141+
if resource != old_resource:
142+
call_pipeline = call_pipeline + f" --resource '{old_resource}'"
143+
144+
call_pipeline = call_pipeline + " )"
70145

71-
print(call_pipeline)
146+
print(call_pipeline)
72147

73148
print("\n$(%s): $(%s)" % (dataset_var, dataset_files_var))
74149
print("\t$(build-dataset)")

digital_land/state.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def build(
1717
collection_dir,
1818
pipeline_dir,
1919
resource_dir,
20-
incremental_override,
20+
incremental_loading_override,
2121
):
2222
"""Build a state object from the current configuration and code"""
2323
return State(
@@ -29,7 +29,7 @@ def build(
2929
),
3030
"resource": State.get_dir_hash(resource_dir),
3131
"pipeline": State.get_dir_hash(pipeline_dir),
32-
"incremental_override": incremental_override,
32+
"incremental_loading_override": incremental_loading_override,
3333
}
3434
)
3535

0 commit comments

Comments
 (0)