diff --git a/Makefile b/Makefile index 8f824d7..7dd862c 100644 --- a/Makefile +++ b/Makefile @@ -19,8 +19,7 @@ # ############################################################################### -DOCKER_COMPOSE_ARGS=--file docker/docker-compose.yml --file docker/docker-compose.override.yml --project-name wis2-gdc -DOCKER_COMPOSE_DEV_ARGS=--file docker/docker-compose.yml --file docker/docker-compose.override.yml --file docker/docker-compose.dev.yml --project-name wis2-gdc +DOCKER_COMPOSE_ARGS=--project-name wis2-gdc --file docker/docker-compose.yml --file docker/docker-compose.override.yml build: docker compose $(DOCKER_COMPOSE_ARGS) build @@ -29,7 +28,7 @@ up: docker compose $(DOCKER_COMPOSE_ARGS) up --detach dev: - docker compose $(DOCKER_COMPOSE_DEV_ARGS) up --detach + docker compose $(DOCKER_COMPOSE_ARGS) --file docker/docker-compose.dev.yml up --detach login: docker exec -it wis2-gdc-management /bin/bash diff --git a/README.md b/README.md index 7426b07..bb8dc55 100644 --- a/README.md +++ b/README.md @@ -10,15 +10,15 @@ wis2-gdc is a Reference Implementation of a WIS2 Global Discovery Catalogue. ## Workflow -- connects to a WIS2 Global Broker, subscribed to the following: +- connects to a WIS2 Global Broker, subscribed to the following topic: - `origin/a/wis2/+/metadata/#` -- on discovery metadata notifications, run the WCMP2 ATS via [pywcmp](https://github.com/wmo-im/pywcmp) - - ETS - - KPIs -- publish ETS and KPI reports to local broker under `gdc-reports` +- on discovery metadata notifications, run the WCMP2 ETS and KPIs via [pywcmp](https://github.com/wmo-im/pywcmp) +- publish ETS and KPI reports to local broker under `monitor/a/wis2/$WIS2_GDC_CENTRE_ID/centre-id` - publish to a WIS2 GDC (OGC API - Records) using one of the supported transaction backends: - [OGC API - Features - Part 4: Create, Replace, Update and Delete](https://docs.ogc.org/DRAFTS/20-002.html) - Elasticsearch direct (default) +- collect real-time and offline GDC metrics and make them available as [OpenMetrics](https://openmetrics.io) +- produce a metadata zipfile for download (daily) ## Installation diff --git a/docker/wis2-gdc-api/wis2-gdc-api.yml b/docker/wis2-gdc-api/wis2-gdc-api.yml index 009510c..2e24eb8 100644 --- a/docker/wis2-gdc-api/wis2-gdc-api.yml +++ b/docker/wis2-gdc-api/wis2-gdc-api.yml @@ -64,11 +64,6 @@ resources: crs: - CRS84 links: - - type: text/html - rel: canonical - title: WMO Information System (WIS) | World Meteorological Organization - href: https://community.wmo.int/en/activity-areas/wis - hreflang: en-CA - type: application/zip rel: enclosure title: WIS2 discovery metadata archive (generated daily) @@ -80,7 +75,11 @@ resources: title: WIS2 Global Discovery Catalogue metrics href: ${WIS2_GDC_API_URL}/wis2-gdc-metrics.txt hreflang: en-CA - extents: + - type: text/html + rel: canonical + title: WMO Information System (WIS) | World Meteorological Organization + href: https://community.wmo.int/en/activity-areas/wis + hreflang: en-CA extents: spatial: bbox: [-180, -90, 180, 90] diff --git a/wis2_gdc/__init__.py b/wis2_gdc/__init__.py index e7fb0ec..401cf7d 100644 --- a/wis2_gdc/__init__.py +++ b/wis2_gdc/__init__.py @@ -23,7 +23,8 @@ from wis2_gdc.registrar import register, setup, teardown from wis2_gdc.monitor import monitor -from wis2_gdc.sync import archive, sync +from wis2_gdc.archive import archive +from wis2_gdc.sync import sync __version__ = '0.1.dev0' diff --git a/wis2_gdc/archive.py b/wis2_gdc/archive.py new file mode 100644 index 0000000..1078396 --- /dev/null +++ b/wis2_gdc/archive.py @@ -0,0 +1,103 @@ +############################################################################### +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +############################################################################### + +import json +import logging +import zipfile + +import click +import requests +from typing import Union + +from pywis_pubsub import cli_options +from pywis_pubsub.mqtt import MQTTPubSubClient + +from wis2_gdc.env import API_URL, API_URL_DOCKER, BROKER_URL, CENTRE_ID + +LOGGER = logging.getLogger(__name__) + + +def archive_metadata(url: str, archive_zipfile: str) -> None: + """ + Archive all discovery metadata from a GDC to an archive zipfile + + :param url: `str` of GDC API URL + :param archive_zipfile: `str` of filename of zipfile + + :returns: `None` + """ + + def _get_next_link(links) -> Union[str, None]: + """ + Inner helper function to derive rel=next link from GDC response + + :param links: `list` of links array + + :returns: `str` of next link or `None` + """ + + for link in links: + if link['rel'] == 'next': + return link['href'] + + return None + + end = False + gdc_items_url = f'{url}/collections/wis2-discovery-metadata/items' + response = None + + with zipfile.ZipFile(archive_zipfile, 'w') as zf: + while not end: + if response is None: + gdc_items_url2 = gdc_items_url + else: + gdc_items_url2 = _get_next_link(response['links']) + + LOGGER.info(f'Querying GDC with {gdc_items_url2}') + response = requests.get(gdc_items_url2).json() + + for feature in response['features']: + LOGGER.debug(f"Saving {feature['id']} to archive") + filename = f"{feature['id']}.json" + zf.writestr(filename, json.dumps(feature)) + + if _get_next_link(response['links']) is None: + end = True + + msg = { + 'type': 'archive-published', + 'description': f'Archive published at {API_URL}/wis2-gdc-archive.zip' + } + + m = MQTTPubSubClient(BROKER_URL) + m.pub(f'monitor/a/wis2/{CENTRE_ID}', json.dumps(msg)) + m.close() + + +@click.command() +@click.pass_context +@click.argument('archive-zipfile') +@cli_options.OPTION_VERBOSITY +def archive(ctx, archive_zipfile, verbosity='NOTSET'): + """Archive discovery metadata records""" + + click.echo(f'Achiving metadata from GDC {API_URL}') + archive_metadata(API_URL_DOCKER, archive_zipfile) diff --git a/wis2_gdc/sync.py b/wis2_gdc/sync.py index 0deb316..c34ce72 100644 --- a/wis2_gdc/sync.py +++ b/wis2_gdc/sync.py @@ -19,86 +19,17 @@ # ############################################################################### -import json import logging -import zipfile import click -import requests -from typing import Union from pywis_pubsub import cli_options -from pywis_pubsub.mqtt import MQTTPubSubClient -from wis2_gdc.env import API_URL, API_URL_DOCKER, BROKER_URL from wis2_gdc.harvester import HARVESTERS LOGGER = logging.getLogger(__name__) -def archive_metadata(url: str, archive_zipfile: str) -> None: - """ - Archive all discovery metadata from a GDC to an archive zipfile - - :param url: `str` of GDC API URL - :param archive_zipfile: `str` of filename of zipfile - - :returns: `None` - """ - - def _get_next_link(links) -> Union[str, None]: - """ - Inner helper function to derive rel=next link from GDC response - - :param links: `list` of links array - - :returns: `str` of next link or `None` - """ - - for link in links: - if link['rel'] == 'next': - return link['href'] - - return None - - end = False - gdc_items_url = f'{url}/collections/wis2-discovery-metadata/items' - response = None - - with zipfile.ZipFile(archive_zipfile, 'w') as zf: - while not end: - if response is None: - gdc_items_url2 = gdc_items_url - else: - gdc_items_url2 = _get_next_link(response['links']) - - LOGGER.info(f'Querying GDC with {gdc_items_url2}') - response = requests.get(gdc_items_url2).json() - - for feature in response['features']: - LOGGER.debug(f"Saving {feature['id']} to archive") - filename = f"{feature['id']}.json" - zf.writestr(filename, json.dumps(feature)) - - if _get_next_link(response['links']) is None: - end = True - - m = MQTTPubSubClient(BROKER_URL) - m.pub('gdc-reports/archive', f'Archive published at {API_URL}/wis2-gdc-archive.zip') # noqa - m.close() - - -@click.command() -@click.pass_context -@click.argument('archive-zipfile') -@cli_options.OPTION_VERBOSITY -def archive(ctx, archive_zipfile, verbosity='NOTSET'): - """Archive discovery metadata records""" - - click.echo(f'Achiving metadata from GDC {API_URL}') - archive_metadata(API_URL_DOCKER, archive_zipfile) - - @click.command @click.argument('harvest_type', nargs=1, type=click.Choice(list(HARVESTERS.keys())))