Skip to content

Commit fdfe2b7

Browse files
Merge pull request #8 from OCHA-DAP/main
New 3W data and remove phonetic matching
2 parents 68956a7 + ef4386d commit fdfe2b7

10 files changed

+113
-80
lines changed

CHANGELOG.md

+25-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,31 @@ All notable changes to this project will be documented in this file.
44

55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
66

7-
### [0.9.20] - 2024-06-29
7+
## [0.9.23] - 2024-07-05
8+
9+
### Changed
10+
11+
- Use normalise function from HDX Python Utilities
12+
- Update mappings for changes in HDX Python Country
13+
14+
## [0.9.22] - 2024-07-05
15+
16+
### Fixed
17+
18+
- Use latest operational presence data for NER and SDN
19+
20+
## [0.9.21] - 2024-07-05
21+
22+
### Changed
23+
24+
- Added parameter to make phonetic matching optional for org types and sectors
25+
- Added clean_text function to utilities
26+
27+
### Fixed
28+
29+
- Removed outdated error messages from operational presence pipeline
30+
31+
## [0.9.20] - 2024-06-29
832

933
### Fixed
1034

pyproject.toml

+4-4
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,12 @@ classifiers = [
3434
requires-python = ">=3.8"
3535

3636
dependencies = [
37-
"hapi-schema>=0.8.9",
37+
"hapi-schema>=0.8.12",
3838
"hdx-python-api>= 6.2.9",
39-
"hdx-python-country>= 3.7.4",
39+
"hdx-python-country>= 3.7.5",
4040
"hdx-python-database[postgresql]>= 1.3.1",
41-
"hdx-python-scraper>= 2.3.7",
42-
"hdx-python-utilities>= 3.6.9",
41+
"hdx-python-scraper>= 2.3.8",
42+
"hdx-python-utilities>= 3.7.2",
4343
"libhxl",
4444
"sqlalchemy"
4545
]

requirements.txt

+12-12
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ attrs==23.2.0
1010
# referencing
1111
cachetools==5.3.3
1212
# via google-auth
13-
certifi==2024.6.2
13+
certifi==2024.7.4
1414
# via requests
1515
cffi==1.16.0
1616
# via cryptography
@@ -48,7 +48,7 @@ filelock==3.15.4
4848
# via virtualenv
4949
frictionless==5.17.0
5050
# via hdx-python-utilities
51-
google-auth==2.30.0
51+
google-auth==2.31.0
5252
# via
5353
# google-auth-oauthlib
5454
# gspread
@@ -58,13 +58,13 @@ greenlet==3.0.3
5858
# via sqlalchemy
5959
gspread==6.1.2
6060
# via hdx-python-scraper
61-
hapi-schema==0.8.12
61+
hapi-schema==0.8.13
6262
# via hapi-pipelines (pyproject.toml)
6363
hdx-python-api==6.3.1
6464
# via
6565
# hapi-pipelines (pyproject.toml)
6666
# hdx-python-scraper
67-
hdx-python-country==3.7.4
67+
hdx-python-country==3.7.5
6868
# via
6969
# hapi-pipelines (pyproject.toml)
7070
# hdx-python-api
@@ -73,7 +73,7 @@ hdx-python-database==1.3.1
7373
# via hapi-pipelines (pyproject.toml)
7474
hdx-python-scraper==2.3.8
7575
# via hapi-pipelines (pyproject.toml)
76-
hdx-python-utilities==3.7.0
76+
hdx-python-utilities==3.7.2
7777
# via
7878
# hapi-pipelines (pyproject.toml)
7979
# hdx-python-api
@@ -89,7 +89,7 @@ idna==3.7
8989
# requests
9090
ijson==3.3.0
9191
# via hdx-python-utilities
92-
inflect==7.3.0
92+
inflect==7.3.1
9393
# via quantulum3
9494
iniconfig==2.0.0
9595
# via pytest
@@ -152,9 +152,9 @@ pockets==0.9.1
152152
# via sphinxcontrib-napoleon
153153
pre-commit==3.7.1
154154
# via hapi-pipelines (pyproject.toml)
155-
psycopg==3.1.19
155+
psycopg==3.2.1
156156
# via hdx-python-database
157-
psycopg-binary==3.1.19
157+
psycopg-binary==3.2.1
158158
# via psycopg
159159
pyasn1==0.6.0
160160
# via
@@ -166,9 +166,9 @@ pyasn1-modules==0.4.0
166166
# via google-auth
167167
pycparser==2.22
168168
# via cffi
169-
pydantic==2.7.4
169+
pydantic==2.8.2
170170
# via frictionless
171-
pydantic-core==2.18.4
171+
pydantic-core==2.20.1
172172
# via pydantic
173173
pygments==2.18.0
174174
# via rich
@@ -242,7 +242,7 @@ ruamel-yaml==0.18.6
242242
# via hdx-python-utilities
243243
ruamel-yaml-clib==0.2.8
244244
# via ruamel-yaml
245-
setuptools==70.1.1
245+
setuptools==70.2.0
246246
# via ckanapi
247247
shellingham==1.5.4
248248
# via typer
@@ -295,7 +295,7 @@ urllib3==2.2.2
295295
# via
296296
# libhxl
297297
# requests
298-
validators==0.28.3
298+
validators==0.30.0
299299
# via frictionless
300300
virtualenv==20.26.3
301301
# via pre-commit

src/hapi/pipelines/configs/core.yaml

+29-22
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ admin2:
5555
"AF08|Onaba": "AF0805"
5656
"AF14|Khost": "AF1401"
5757
"AF23|Chaghcharan": "AF2301"
58+
"CM008|Ndé": "CM008007"
5859
"CO08|Distrito Especial, Industrial Y Portuario De Barranquilla": "CO08001"
5960
"ET01|C. TIGRAY": "ET0102"
6061
"ET01|NW. TIGRAY": "ET0101"
@@ -187,16 +188,17 @@ admin2:
187188
"UA23|Vilnianskyi": "UA2306"
188189
"UA23|Yakymivskyi": "UA2308"
189190
"YE14|Radman Al Awad": "YE1412"
191+
"YE21|Ain": "YE2106"
190192
"YE24|Craiter": "YE2407"
191193
"YE26|Medghal": "YE2603"
192194

193195
admin_name_replacements:
194196
"COD| city": ""
195-
"ETH|c.": "central"
196-
"ETH|e.": "east"
197-
"ETH|n.": "north"
198-
"ETH|s.": "south"
199-
"ETH|w.": "west"
197+
"ETH|c ": "central "
198+
"ETH|e ": "east "
199+
"ETH|n ": "north "
200+
"ETH|s ": "south "
201+
"ETH|w ": "west "
200202

201203
orphan_admin2s:
202204
SS0001: "SSD"
@@ -232,6 +234,7 @@ org_type_map:
232234
ingo: "437"
233235
institution etatique: "435"
234236
international institution: "438"
237+
international ngos: "437"
235238
local ngo: "504"
236239
mouv cr: "445"
237240
mouvement croix rouge: "445"
@@ -250,6 +253,7 @@ org_type_map:
250253
ong nationale: "441"
251254
organisation des nations unies: "447"
252255
organismo internacional: "438"
256+
others: "443"
253257
otro: "443"
254258
programme de developpement: "503"
255259
red cross: "445"
@@ -270,31 +274,30 @@ sector_map:
270274
abri bna: "SHL"
271275
abris: "SHL"
272276
abris ame: "SHL"
273-
abris/ame: "SHL"
274-
abris/bna: "SHL"
275-
abris/bna/cccm: "SHL"
276-
abris / nfi: "SHL"
277+
abris bna: "SHL"
278+
abris bna cccm: "SHL"
277279
abris d'urgence et nfi: "SHL"
280+
abris nfi: "SHL"
278281
action contre les mines: "PRO-MIN"
279282
aee: "SHL"
280283
agriculture: "FSC"
281284
agua saneamiento e higiene: "WSH"
282285
all: "Intersectoral"
283-
ame: "SHL"
284286
alojamiento de emergencia: "SHL"
285-
alojamiento de emergencia (shelter): "SHL"
287+
alojamiento de emergencia shelter: "SHL"
286288
alojamientos y asentamientos: "SHL"
289+
ame: "SHL"
287290
ash: "WSH"
288291
assainissement: "WSH"
289-
camp coordination camp management: "CCM"
290292
camp coordination and camp management: "CCM"
293+
camp coordination camp management: "CCM"
291294
cash: "Cash"
292295
cccm: "CCM"
293296
ccs: "CCM"
294297
cluster coordination: "CCM"
295-
coord & services support: "CCM"
298+
coord services support: "CCM"
299+
coordinacion informacion: "CCM"
296300
coordination: "CCM"
297-
coordinacion / informacion: "CCM"
298301
coordination et gestion des camps: "CCM"
299302
eah: "WSH"
300303
eau: "WSH"
@@ -303,8 +306,8 @@ sector_map:
303306
eau hygiene assainissement: "WSH"
304307
eau hygiene et assainissement: "WSH"
305308
educacion: "EDU"
306-
education: "EDU"
307309
educacion en emergencias: "EDU"
310+
education: "EDU"
308311
eha: "WSH"
309312
emergency shelter and non food items: "SHL"
310313
epah: "WSH"
@@ -318,6 +321,7 @@ sector_map:
318321
food security livelihood: "FSC"
319322
fsl: "FSC"
320323
gestion des sites d'accueil temporaires: "SHL"
324+
gestion des sites d accueil temporaires: "SHL"
321325
gbv: "PRO-GBV"
322326
hlp: "PRO-HLP"
323327
humanitaire: "Hum"
@@ -333,13 +337,14 @@ sector_map:
333337
multisectoriel: "Multi"
334338
nutricion: "NUT"
335339
nutrition: "NUT"
336-
operatioanl presence water sanitation & hygiene: "WSH"
340+
operatioanl presence water sanitation hygiene: "WSH"
337341
operational presence education in emergencies: "EDU"
338-
operational presence emergency shelter & non food items: "SHL"
339-
operational presence food security & agriculture: "FSC"
342+
operational presence emergency shelter non food items: "SHL"
343+
operational presence food security agriculture: "FSC"
340344
operational presence health: "HEA"
341345
operational presence nutrition: "NUT"
342346
operational presence protection: "PRO"
347+
pro cpm: "PRO-CPN"
343348
pronna: "PRO-CPN"
344349
propg: "PRO"
345350
proteccion infantil: "PRO-CPN"
@@ -350,6 +355,7 @@ sector_map:
350355
protection logement terre et propriete: "PRO-HLP"
351356
protection ltb: "PRO-HLP"
352357
protection lutte anti mines: "PRO-MIN"
358+
protection pe: "PRO-CPN"
353359
protection protection de l'enfant: "PRO-CPN"
354360
protection violences basees sur le genre: "PRO-GBV"
355361
protection vgb: "PRO-GBV"
@@ -363,10 +369,10 @@ sector_map:
363369
rcf health and nutrtion: "HEA"
364370
rcf protection: "PRO"
365371
recuperacion temprana: "ERY"
366-
relevement rapide: "ERY"
367372
relevement precoce: "ERY"
373+
relevement rapide: "ERY"
368374
refugee response: "CCM"
369-
refugees & migrants multi sector: "CCM"
375+
refugees migrants multi sector: "CCM"
370376
reponse aux refugies: "CCM"
371377
sa: "FSC"
372378
sal: "HEA"
@@ -379,18 +385,19 @@ sector_map:
379385
services humanitaires communs: "Hum"
380386
sexual and reproductive health: "HEA"
381387
shelter: "SHL"
388+
shelter nfi: "SHL"
382389
shelter nfis: "SHL"
383-
shelter/nfi: "SHL"
384390
shelter and nfi: "SHL"
385391
shelter and nfis: "SHL"
386392
shelter and non food items: "SHL"
387393
site management: "CCM"
388394
snfi: "SHL"
389395
telecommunications: "TEL"
390-
telecomunicaciones de emergencia: "TEL"
391396
telecommunications d'urgence: "TEL"
397+
telecomunicaciones de emergencia: "TEL"
392398
vbg: "PRO-GBV"
393399
violences basees sur le genre: "PRO-GBV"
400+
violence basee sur le genre: "PRO-GBV"
394401
violencia basada en genero: "PRO-GBV"
395402
wash: "WSH"
396403
water sanitation and hygiene: "WSH"

src/hapi/pipelines/configs/operational_presence.yaml

+12-17
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
11
#Operational presence config file
2-
operational_presence_error_messages:
3-
burkina-faso-presence-operationnelle: "Data file cannot be read"
4-
drc_presence_operationnelle: "Data file cannot be read"
52

63
operational_presence_default:
74
scrapers_with_defaults:
@@ -461,15 +458,15 @@ operational_presence_national:
461458

462459
operational_presence_ner:
463460
dataset: "niger-operational-presence"
464-
resource: "NER_Sep_2023"
461+
resource: "3w_OP_Nationale_Juin 2024 au 08062024.xlsx"
465462
format: "xlsx"
466-
sheet: "3W_Data_T2"
463+
sheet: "3w_OP_Nationale_compilation"
467464
headers:
468-
- 4
469-
- 5
465+
- 1
466+
- 2
470467
source_date:
471-
start: "01/07/2023"
472-
end: "30/09/2023"
468+
start: "01/06/2024"
469+
end: "08/06/2024"
473470
admin_single: "NER"
474471
input:
475472
- "NOM ORGANISATION
@@ -478,21 +475,19 @@ operational_presence_national:
478475
(Choisir dans la liste déroulante)
479476
#org+acronym"
480477
- "TYPE ORGANISATION
481-
(cette colonne est renseignée automatiquement selon la selection dans le colonne B)"
478+
(cette colonne est renseignée automatiquement selon la selection dans le colonne B"
482479
- "CLUSTER
483-
(Choisir dans la liste déroulante)
484-
#sector"
480+
(Choisir dans la liste déroulante) Pour les projets humanitaires uniquement"
485481
list:
486482
- "NOM ORGANISATION
487483
(cette colonne est renseignée automatiquement selon la selection dans le colonne A)"
488484
- "ACCRONYME
489485
(Choisir dans la liste déroulante)
490486
#org+acronym"
491487
- "TYPE ORGANISATION
492-
(cette colonne est renseignée automatiquement selon la selection dans le colonne B)"
488+
(cette colonne est renseignée automatiquement selon la selection dans le colonne B"
493489
- "CLUSTER
494-
(Choisir dans la liste déroulante)
495-
#sector"
490+
(Choisir dans la liste déroulante) Pour les projets humanitaires uniquement"
496491

497492
operational_presence_pse:
498493
dataset: "state-of-palestine-who-does-what-and-where-3ws"
@@ -517,13 +512,13 @@ operational_presence_national:
517512

518513
operational_presence_sdn:
519514
dataset: "sudan-operational-presence"
520-
resource: "2024-consolidated-3w-data-jan-to-31-march.xlsx"
515+
resource: "2024-consolidated-3w-data-jan-to-31-may.xlsx"
521516
format: "xlsx"
522517
sheet: "3W_master_data"
523518
headers: 1
524519
source_date:
525520
start: "01/01/2024"
526-
end: "31/03/2024"
521+
end: "31/05/2024"
527522
admin_single: "SDN"
528523
input:
529524
- "Organization"

0 commit comments

Comments
 (0)