Skip to content

Commit

Permalink
First draft
Browse files Browse the repository at this point in the history
  • Loading branch information
b-j-mills committed Mar 7, 2025
1 parent 318fb20 commit 1ea436a
Show file tree
Hide file tree
Showing 6 changed files with 33,701 additions and 5 deletions.
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ name = "hdx-scraper-wfp-rainfall"
requires-python = ">=3.12"
dependencies = [
"hdx-python-api",
"hdx-python-scraper",
"hdx-python-utilities",
"kalendar",
]

dynamic = ["version"]
Expand Down
54 changes: 54 additions & 0 deletions requirements-test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ attrs==25.1.0
# jsonlines
# jsonschema
# referencing
cachetools==5.5.2
# via
# -c requirements.txt
# google-auth
certifi==2025.1.31
# via
# -c requirements.txt
Expand Down Expand Up @@ -62,20 +66,40 @@ frictionless==5.18.0
# via
# -c requirements.txt
# hdx-python-utilities
google-auth==2.38.0
# via
# -c requirements.txt
# google-auth-oauthlib
# gspread
google-auth-oauthlib==1.2.1
# via
# -c requirements.txt
# gspread
gspread==6.2.0
# via
# -c requirements.txt
# hdx-python-scraper
hdx-python-api==6.3.8
# via
# -c requirements.txt
# hdx-scraper-wfp-rainfall (pyproject.toml)
# hdx-python-scraper
hdx-python-country==3.8.8
# via
# -c requirements.txt
# hdx-python-api
# hdx-python-scraper
hdx-python-scraper==2.6.3
# via
# -c requirements.txt
# hdx-scraper-wfp-rainfall (pyproject.toml)
hdx-python-utilities==3.8.4
# via
# -c requirements.txt
# hdx-scraper-wfp-rainfall (pyproject.toml)
# hdx-python-api
# hdx-python-country
# hdx-python-scraper
humanize==4.12.1
# via
# -c requirements.txt
Expand Down Expand Up @@ -120,6 +144,10 @@ jsonschema-specifications==2024.10.1
# via
# -c requirements.txt
# jsonschema
kalendar==0.1.1
# via
# -c requirements.txt
# hdx-scraper-wfp-rainfall (pyproject.toml)
libhxl==5.2.2
# via
# -c requirements.txt
Expand Down Expand Up @@ -157,6 +185,10 @@ num2words==0.5.14
# via
# -c requirements.txt
# quantulum3
oauthlib==3.2.2
# via
# -c requirements.txt
# requests-oauthlib
openpyxl==3.1.5
# via
# -c requirements.txt
Expand All @@ -178,6 +210,15 @@ pockets==0.9.1
# via
# -c requirements.txt
# sphinxcontrib-napoleon
pyasn1==0.6.1
# via
# -c requirements.txt
# pyasn1-modules
# rsa
pyasn1-modules==0.4.1
# via
# -c requirements.txt
# google-auth
pydantic==2.10.6
# via
# -c requirements.txt
Expand Down Expand Up @@ -233,6 +274,10 @@ referencing==0.36.2
# -c requirements.txt
# jsonschema
# jsonschema-specifications
regex==2024.11.6
# via
# -c requirements.txt
# hdx-python-scraper
requests==2.32.3
# via
# -c requirements.txt
Expand All @@ -241,10 +286,15 @@ requests==2.32.3
# hdx-python-api
# libhxl
# requests-file
# requests-oauthlib
requests-file==2.1.0
# via
# -c requirements.txt
# hdx-python-utilities
requests-oauthlib==2.0.0
# via
# -c requirements.txt
# google-auth-oauthlib
rfc3986==2.0.0
# via
# -c requirements.txt
Expand All @@ -258,6 +308,10 @@ rpds-py==0.23.1
# -c requirements.txt
# jsonschema
# referencing
rsa==4.9
# via
# -c requirements.txt
# google-auth
ruamel-yaml==0.18.10
# via
# -c requirements.txt
Expand Down
38 changes: 36 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ attrs==25.1.0
# jsonlines
# jsonschema
# referencing
cachetools==5.5.2
# via google-auth
certifi==2025.1.31
# via requests
chardet==5.2.0
Expand All @@ -34,15 +36,30 @@ et-xmlfile==2.0.0
# via openpyxl
frictionless==5.18.0
# via hdx-python-utilities
google-auth==2.38.0
# via
# google-auth-oauthlib
# gspread
google-auth-oauthlib==1.2.1
# via gspread
gspread==6.2.0
# via hdx-python-scraper
hdx-python-api==6.3.8
# via hdx-scraper-wfp-rainfall (pyproject.toml)
# via
# hdx-scraper-wfp-rainfall (pyproject.toml)
# hdx-python-scraper
hdx-python-country==3.8.8
# via hdx-python-api
# via
# hdx-python-api
# hdx-python-scraper
hdx-python-scraper==2.6.3
# via hdx-scraper-wfp-rainfall (pyproject.toml)
hdx-python-utilities==3.8.4
# via
# hdx-scraper-wfp-rainfall (pyproject.toml)
# hdx-python-api
# hdx-python-country
# hdx-python-scraper
humanize==4.12.1
# via frictionless
idna==3.10
Expand All @@ -67,6 +84,8 @@ jsonschema==4.23.0
# tableschema-to-template
jsonschema-specifications==2024.10.1
# via jsonschema
kalendar==0.1.1
# via hdx-scraper-wfp-rainfall (pyproject.toml)
libhxl==5.2.2
# via
# hdx-python-api
Expand All @@ -87,6 +106,8 @@ more-itertools==10.6.0
# via inflect
num2words==0.5.14
# via quantulum3
oauthlib==3.2.2
# via requests-oauthlib
openpyxl==3.1.5
# via hdx-python-utilities
petl==1.7.15
Expand All @@ -97,6 +118,12 @@ ply==3.11
# libhxl
pockets==0.9.1
# via sphinxcontrib-napoleon
pyasn1==0.6.1
# via
# pyasn1-modules
# rsa
pyasn1-modules==0.4.1
# via google-auth
pydantic==2.10.6
# via frictionless
pydantic-core==2.27.2
Expand Down Expand Up @@ -128,15 +155,20 @@ referencing==0.36.2
# via
# jsonschema
# jsonschema-specifications
regex==2024.11.6
# via hdx-python-scraper
requests==2.32.3
# via
# ckanapi
# frictionless
# hdx-python-api
# libhxl
# requests-file
# requests-oauthlib
requests-file==2.1.0
# via hdx-python-utilities
requests-oauthlib==2.0.0
# via google-auth-oauthlib
rfc3986==2.0.0
# via frictionless
rich==13.9.4
Expand All @@ -145,6 +177,8 @@ rpds-py==0.23.1
# via
# jsonschema
# referencing
rsa==4.9
# via google-auth
ruamel-yaml==0.18.10
# via hdx-python-utilities
ruamel-yaml-clib==0.2.12
Expand Down
89 changes: 86 additions & 3 deletions src/hdx/scraper/wfp_rainfall/wfp_rainfall.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,29 @@
"""wfp-rainfall scraper"""

import logging
from datetime import timedelta
from typing import List, Optional

from hdx.api.configuration import Configuration
from hdx.api.utilities.hdx_error_handler import HDXErrorHandler
from hdx.data.dataset import Dataset
from hdx.location.adminlevel import AdminLevel
from hdx.location.country import Country
from hdx.scraper.framework.utilities.hapi_admins import complete_admins
from hdx.utilities.dateparse import iso_string_from_datetime, parse_date_range
from hdx.utilities.dateparse import iso_string_from_datetime, parse_date
from hdx.utilities.retriever import Retrieve
from kalendar import Dekad

logger = logging.getLogger(__name__)


_TIME_PERIODS = {
"f": "dekad",
"1": "1-month",
"3": "3-month",
}


class WFPRainfall:
def __init__(
self,
Expand All @@ -27,7 +38,7 @@ def __init__(
self._temp_dir = temp_dir
self._error_handler = error_handler
self._admins = []
self.data = {}
self.data = []
self.dates = []

def get_pcodes(self) -> None:
Expand All @@ -38,8 +49,80 @@ def get_pcodes(self) -> None:
admin.load_pcode_formats()
self._admins.append(admin)

def download_data(self) -> None:
def download_data(self, countryiso3s: Optional[List] = None) -> None:
self.get_pcodes()
if not countryiso3s:
countryiso3s = [key for key in Country.countriesdata()["countries"]]
for countryiso3 in countryiso3s:
dataset_name = f"{countryiso3.lower()}-rainfall-subnational"
dataset = Dataset.read_from_hdx(dataset_name)
if not dataset:
continue
dataset_id = dataset["id"]
hrp = "Y" if Country.get_hrp_status_from_iso3(countryiso3) else "N"
gho = "Y" if Country.get_gho_status_from_iso3(countryiso3) else "N"

resources = [r for r in dataset.get_resources() if "5ytd" in r["name"]]
if len(resources) == 0:
self._error_handler.add_message(
"Rainfall",
dataset_name,
"Could not find resource",
message_type="warning",
)
continue
resource = resources[0]
resource_id = resource["id"]
headers, rows = self._retriever.get_tabular_rows(
resource["url"], dict_form=True
)
for row in rows:
if "#" in row["ADM2_PCODE"]:
continue

provider_adm_names = ["", ""]
adm_codes = ["", row["ADM2_PCODE"]]
adm_names = ["", ""]
adm_level, warnings = complete_admins(
self._admins,
countryiso3,
provider_adm_names,
adm_codes,
adm_names,
)
errors = []

start_date = parse_date(row["date"])
dekad = Dekad.fromdatetime(start_date)
end_date = (dekad + 1).todate() - timedelta(days=1)
start_date = iso_string_from_datetime(start_date)
end_date = end_date.isoformat()

for time_header, time_period in _TIME_PERIODS.items():
hapi_row = {
"location_code": countryiso3,
"has_hrp": hrp,
"in_gho": gho,
"provider_admin1_name": provider_adm_names[0],
"provider_admin2_name": provider_adm_names[1],
"admin1_code": adm_codes[0],
"admin1_name": adm_names[0],
"admin2_code": adm_codes[1],
"admin2_name": adm_names[1],
"admin_level": 2,
"wfp_id": row["adm2_id"],
"time_period": time_period,
"rainfall": row[f"r{time_header}h"],
"rainfall_long_term_average": row[f"r{time_header}h_avg"],
"rainfall_anomaly_pct": row[f"r{time_header}q"],
"reference_period_start": start_date,
"reference_period_end": end_date,
"dataset_hdx_id": dataset_id,
"resource_hdx_id": resource_id,
"warning": "|".join(warnings),
"error": "|".join(errors),
}
self.data.append(hapi_row)

def generate_dataset(self) -> Dataset:
dataset = Dataset(
Expand Down
Loading

0 comments on commit 1ea436a

Please sign in to comment.