Skip to content

Commit 41a52fc

Browse files
Merge pull request #50 from OCHA-DAP/main
HDXDSYS-1301 Simplify operational presence HAPI pipeline to read from global HDX dataset
2 parents 95f7c02 + c74df0f commit 41a52fc

File tree

50 files changed

+56319
-26106
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+56319
-26106
lines changed

.github/workflows/publish.yaml

+6-6
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,17 @@ jobs:
1717
submodules: true
1818

1919
- name: Set up Docker Buildx
20-
uses: docker/setup-buildx-action@v2
20+
uses: docker/setup-buildx-action@v3
2121

2222
- name: Configure AWS credentials
23-
uses: aws-actions/configure-aws-credentials@v1
23+
uses: aws-actions/configure-aws-credentials@v4
2424
with:
2525
aws-access-key-id: ${{ secrets.ECR_AWS_ACCESS_KEY_ID }}
2626
aws-secret-access-key: ${{ secrets.ECR_AWS_SECRET_ACCESS_KEY }}
2727
aws-region: us-east-1
2828

2929
- name: Login to Public ECR
30-
uses: docker/login-action@v2
30+
uses: docker/login-action@v3
3131
with:
3232
registry: public.ecr.aws
3333
username: ${{ secrets.ECR_AWS_ACCESS_KEY_ID }}
@@ -37,11 +37,11 @@ jobs:
3737

3838
- name: Create prod requirements
3939
run: |
40-
pip install --upgrade pip-tools
41-
pip-compile pyproject.toml --resolver=backtracking --upgrade -q -c requirements.txt -o prod-requirements.txt
40+
pip install --upgrade uv
41+
uv pip compile pyproject.toml --resolver=backtracking --upgrade -q -c requirements.txt -o prod-requirements.txt
4242
4343
- name: Build, tag, and push image to Amazon ECR
44-
uses: docker/build-push-action@v4
44+
uses: docker/build-push-action@v6
4545
with:
4646
context: .
4747
push: true

CHANGELOG.md

+14
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,20 @@ All notable changes to this project will be documented in this file.
44

55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
66

7+
## [0.10.33] = 2025-01-22
8+
9+
### Fixed
10+
11+
- uv instead of pip-compile for publish GH Actions job
12+
13+
## [0.10.32] = 2025-01-22
14+
15+
### Changed
16+
17+
- 3W from global dataset
18+
- Remove negative and rounded checks from HNO as are now in scraper
19+
- Common logic for 3W and HNO
20+
721
## [0.10.31] = 2025-01-13
822

923
### Changed

Dockerfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM public.ecr.aws/unocha/hdx-scraper-baseimage:stable
1+
FROM public.ecr.aws/unocha/python:3.12-stable
22

33
WORKDIR /srv
44

pyproject.toml

+4-4
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,11 @@ requires-python = ">=3.8"
3535

3636
dependencies = [
3737
"hapi-schema>= 0.9.6",
38-
"hdx-python-api>= 6.3.6",
39-
"hdx-python-country>= 3.8.6",
38+
"hdx-python-api>= 6.3.7",
39+
"hdx-python-country>= 3.8.7",
4040
"hdx-python-database[postgresql]>= 1.3.4",
41-
"hdx-python-scraper>= 2.5.1",
42-
"hdx-python-utilities>= 3.8.0",
41+
"hdx-python-scraper>= 2.5.5",
42+
"hdx-python-utilities>= 3.8.2",
4343
"libhxl",
4444
"sqlalchemy"
4545
]

requirements.txt

+15-14
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ attrs==24.3.0
88
# jsonlines
99
# jsonschema
1010
# referencing
11-
cachetools==5.5.0
11+
cachetools==5.5.1
1212
# via google-auth
1313
certifi==2024.12.14
1414
# via requests
@@ -40,7 +40,7 @@ email-validator==2.2.0
4040
# via hdx-python-api
4141
et-xmlfile==2.0.0
4242
# via openpyxl
43-
filelock==3.16.1
43+
filelock==3.17.0
4444
# via virtualenv
4545
frictionless==5.18.0
4646
# via hdx-python-utilities
@@ -56,28 +56,28 @@ gspread==6.1.4
5656
# via hdx-python-scraper
5757
hapi-schema==0.9.6
5858
# via hapi-pipelines (pyproject.toml)
59-
hdx-python-api==6.3.6
59+
hdx-python-api==6.3.7
6060
# via
6161
# hapi-pipelines (pyproject.toml)
6262
# hdx-python-scraper
63-
hdx-python-country==3.8.6
63+
hdx-python-country==3.8.7
6464
# via
6565
# hapi-pipelines (pyproject.toml)
6666
# hdx-python-api
6767
# hdx-python-scraper
6868
hdx-python-database==1.3.4
6969
# via hapi-pipelines (pyproject.toml)
70-
hdx-python-scraper==2.5.1
70+
hdx-python-scraper==2.5.5
7171
# via hapi-pipelines (pyproject.toml)
72-
hdx-python-utilities==3.8.0
72+
hdx-python-utilities==3.8.2
7373
# via
7474
# hapi-pipelines (pyproject.toml)
7575
# hdx-python-api
7676
# hdx-python-country
7777
# hdx-python-scraper
7878
humanize==4.11.0
7979
# via frictionless
80-
identify==2.6.5
80+
identify==2.6.6
8181
# via pre-commit
8282
idna==3.10
8383
# via
@@ -120,7 +120,7 @@ markupsafe==3.0.2
120120
# via jinja2
121121
mdurl==0.1.2
122122
# via markdown-it-py
123-
more-itertools==10.5.0
123+
more-itertools==10.6.0
124124
# via inflect
125125
nodeenv==1.9.1
126126
# via pre-commit
@@ -144,11 +144,11 @@ ply==3.11
144144
# libhxl
145145
pockets==0.9.1
146146
# via sphinxcontrib-napoleon
147-
pre-commit==4.0.1
147+
pre-commit==4.1.0
148148
# via hapi-pipelines (pyproject.toml)
149-
psycopg==3.2.3
149+
psycopg==3.2.4
150150
# via hdx-python-database
151-
psycopg-binary==3.2.3
151+
psycopg-binary==3.2.4
152152
# via psycopg
153153
pyasn1==0.6.1
154154
# via
@@ -196,7 +196,7 @@ quantulum3==0.9.2
196196
# via hdx-python-api
197197
ratelimit==2.2.1
198198
# via hdx-python-utilities
199-
referencing==0.35.1
199+
referencing==0.36.1
200200
# via
201201
# jsonschema
202202
# jsonschema-specifications
@@ -251,7 +251,7 @@ sqlalchemy==2.0.37
251251
# hdx-python-database
252252
stringcase==1.2.0
253253
# via frictionless
254-
structlog==24.4.0
254+
structlog==25.1.0
255255
# via libhxl
256256
tableschema-to-template==0.0.13
257257
# via hdx-python-utilities
@@ -271,6 +271,7 @@ typing-extensions==4.12.2
271271
# psycopg
272272
# pydantic
273273
# pydantic-core
274+
# referencing
274275
# sqlalchemy
275276
# typeguard
276277
# typer
@@ -284,7 +285,7 @@ urllib3==2.3.0
284285
# requests
285286
validators==0.34.0
286287
# via frictionless
287-
virtualenv==20.28.1
288+
virtualenv==20.29.1
288289
# via pre-commit
289290
wheel==0.45.1
290291
# via libhxl

src/hapi/pipelines/app/__main__.py

+1-15
Original file line numberDiff line numberDiff line change
@@ -79,13 +79,6 @@ def parse_args():
7979
action="store_true",
8080
help="Use saved data",
8181
)
82-
parser.add_argument(
83-
"-dbg",
84-
"--debug",
85-
default=False,
86-
action="store_true",
87-
help="Debug",
88-
)
8982
parser.add_argument(
9083
"-ehx",
9184
"--err-to-hdx",
@@ -104,7 +97,6 @@ def main(
10497
basic_auths: Optional[Dict[str, str]] = None,
10598
save: bool = False,
10699
use_saved: bool = False,
107-
debug: bool = False,
108100
err_to_hdx: bool = False,
109101
**ignore,
110102
) -> None:
@@ -121,7 +113,6 @@ def main(
121113
basic_auths (Optional[Dict[str, str]]): Basic authorisations
122114
save (bool): Whether to save state for testing. Defaults to False.
123115
use_saved (bool): Whether to use saved state for testing. Defaults to False.
124-
debug (bool): Whether to output debug info. Defaults to False.
125116
err_to_hdx (bool): Whether to write any errors to HDX metadata. Defaults to False.
126117
127118
Returns:
@@ -142,7 +133,7 @@ def main(
142133
params["prepare_fn"] = prepare_hapi_views
143134
logger.info(f"> Database parameters: {params}")
144135
configuration = Configuration.read()
145-
with HDXErrorHandler(should_exit_on_error=False) as error_handler:
136+
with HDXErrorHandler(write_to_hdx=err_to_hdx) as error_handler:
146137
with temp_dir() as temp_folder:
147138
with Database(**params) as database:
148139
session = database.get_session()
@@ -169,9 +160,6 @@ def main(
169160
)
170161
pipelines.run()
171162
pipelines.output()
172-
pipelines.output_errors(err_to_hdx)
173-
if debug:
174-
pipelines.debug("debug")
175163
logger.info("HAPI pipelines completed!")
176164

177165

@@ -216,7 +204,6 @@ def main(
216204
"food_security.yaml",
217205
"idps.yaml",
218206
"national_risk.yaml",
219-
"operational_presence.yaml",
220207
"refugees_and_returnees.yaml",
221208
"wfp.yaml",
222209
]
@@ -235,6 +222,5 @@ def main(
235222
basic_auths=basic_auths,
236223
save=args.save,
237224
use_saved=args.use_saved,
238-
debug=args.debug,
239225
err_to_hdx=ehx,
240226
)

src/hapi/pipelines/app/pipelines.py

+14-37
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,11 @@ def __init__(
6666
url=AdminLevel.formats_url, retriever=reader
6767
).cache()
6868
self.admins = Admins(
69-
configuration, session, self.locations, libhxl_dataset
69+
configuration,
70+
session,
71+
self.locations,
72+
libhxl_dataset,
73+
error_handler,
7074
)
7175
admin1_config = configuration["admin1"]
7276
self.adminone = AdminLevel(admin_config=admin1_config, admin_level=1)
@@ -88,21 +92,13 @@ def __init__(
8892
logger.info("Admin two name replacements:")
8993
self.admintwo.output_admin_name_replacements()
9094

91-
self.org = Org(
92-
session=session,
93-
datasetinfo=configuration["org"],
94-
)
9595
self.org_type = OrgType(
9696
session=session,
97-
datasetinfo=configuration["org_type"],
98-
org_type_map=configuration["org_type_map"],
9997
)
10098
self.sector = Sector(
10199
session=session,
102-
datasetinfo=configuration["sector"],
103-
sector_map=configuration["sector_map"],
104100
)
105-
self.currency = Currency(configuration=configuration, session=session)
101+
self.currency = Currency(session=session, configuration=configuration)
106102

107103
Sources.set_default_source_date_format("%Y-%m-%d")
108104
self.runner = Runner(
@@ -113,6 +109,7 @@ def __init__(
113109
)
114110
self.configurable_scrapers = {}
115111
self.create_configurable_scrapers()
112+
116113
self.metadata = Metadata(
117114
runner=self.runner, session=session, today=today
118115
)
@@ -135,7 +132,7 @@ def setup_configurable_scrapers(
135132
if countryiso3s:
136133
configuration = {}
137134
# This assumes format prefix_iso_.... eg.
138-
# population_gtm, operational_presence_afg_total
135+
# population_gtm
139136
iso3_index = len(prefix) + 1
140137
for key, value in self.configuration[f"{prefix}{suffix}"].items():
141138
if len(key) < iso3_index + 3:
@@ -172,13 +169,6 @@ def _create_configurable_scrapers(
172169
current_scrapers + scraper_names
173170
)
174171

175-
_create_configurable_scrapers(
176-
"operational_presence", "admintwo", adminlevel=self.admintwo
177-
)
178-
_create_configurable_scrapers(
179-
"operational_presence", "adminone", adminlevel=self.adminone
180-
)
181-
_create_configurable_scrapers("operational_presence", "national")
182172
_create_configurable_scrapers("national_risk", "national")
183173
_create_configurable_scrapers("refugees_and_returnees", "national")
184174
_create_configurable_scrapers("idps", "national")
@@ -212,21 +202,17 @@ def output_operational_presence(self):
212202
not self.themes_to_run
213203
or "operational_presence" in self.themes_to_run
214204
):
215-
results = self.runner.get_hapi_results(
216-
self.configurable_scrapers["operational_presence"]
205+
org = Org(
206+
session=self.session,
207+
metadata=self.metadata,
208+
configuration=self.configuration,
217209
)
210+
org.populate()
218211
operational_presence = OperationalPresence(
219212
session=self.session,
220213
metadata=self.metadata,
221214
admins=self.admins,
222-
adminone=self.adminone,
223-
admintwo=self.admintwo,
224-
org=self.org,
225-
org_type=self.org_type,
226-
sector=self.sector,
227-
results=results,
228-
config=self.configuration,
229-
error_handler=self.error_handler,
215+
configuration=self.configuration,
230216
)
231217
operational_presence.populate()
232218

@@ -253,9 +239,7 @@ def output_humanitarian_needs(self):
253239
session=self.session,
254240
metadata=self.metadata,
255241
admins=self.admins,
256-
sector=self.sector,
257242
configuration=self.configuration,
258-
error_handler=self.error_handler,
259243
)
260244
humanitarian_needs.populate()
261245

@@ -374,7 +358,6 @@ def output(self):
374358
self.locations.populate()
375359
self.admins.populate()
376360
self.metadata.populate()
377-
self.org.populate()
378361
self.org_type.populate()
379362
self.sector.populate()
380363
self.currency.populate()
@@ -389,9 +372,3 @@ def output(self):
389372
self.output_poverty_rate()
390373
self.output_conflict_event()
391374
self.output_food_prices()
392-
393-
def debug(self, folder: str) -> None:
394-
self.org.output_org_map(folder)
395-
396-
def output_errors(self, err_to_hdx: bool) -> None:
397-
self.error_handler.output_errors(err_to_hdx)

0 commit comments

Comments
 (0)