diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..5890b09 --- /dev/null +++ b/.dockerignore @@ -0,0 +1 @@ +test.py diff --git a/.github/workflows/unit_test.yml b/.github/workflows/unit_test.yml new file mode 100644 index 0000000..1c91a8c --- /dev/null +++ b/.github/workflows/unit_test.yml @@ -0,0 +1,19 @@ +name: Python package + +on: + push: + branches: + - 'main' + pull_request: + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python 3.12 + uses: actions/setup-python@v5 + with: + python-version: 3.12 + - name: Run unit tests + run: ./test.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..026bbc3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.json +__pycache__ diff --git a/Dockerfile b/Dockerfile index 03ecbcc..fe3af3d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,9 @@ FROM node:20-alpine -RUN apk add --no-cache jq=1.7.1-r0 curl=8.8.0-r0 && \ +RUN apk add --no-cache curl=8.8.0-r0 python3=3.12.3-r1 && \ npm install -g wikibase-cli@18.0.3 COPY --chmod=755 ./transferbot.sh /usr/bin/transferbot +COPY --chmod=755 ./mangle_data.py /usr/bin/mangle_data ENTRYPOINT ["transferbot"] diff --git a/mangle_data.py b/mangle_data.py new file mode 100755 index 0000000..e782589 --- /dev/null +++ b/mangle_data.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 + +import argparse +import json +import fileinput +from urllib import request + +parser = argparse.ArgumentParser( + description="Adjust a line from wb-cli so it can be fed into the target wiki" +) + +parser.add_argument("-t", "--target", action="store", dest="target", required=True) +parser.add_argument("-p", "--pick", action="append", dest="pick", required=True) + + +def process_entity(line, pick=[], select_languages=set()): + out = {} + for key, value in line.items(): + if key not in pick: + continue + + if not isinstance(value, dict): + out[key] = value + continue + + out[key] = {} + for lang, value in line[key].items(): + if lang in select_languages: + out[key][lang] = value + + return out + + +def get_contentlanguages(target_origin): + with request.urlopen( + f"{target_origin}/w/api.php?action=query&meta=wbcontentlanguages&format=json" + ) as response: + raw_body = response.read() + body = json.loads(raw_body) + return set(body["query"]["wbcontentlanguages"].keys()) + + +def main(): + args = parser.parse_args() + target_languages = get_contentlanguages(args.target) + + for line in fileinput.input("-"): + out = process_entity( + json.loads(line), pick=args.pick, select_languages=target_languages + ) + print(json.dumps(out, ensure_ascii=False)) + + +if __name__ == "__main__": + main() diff --git a/test.py b/test.py new file mode 100755 index 0000000..cf4598e --- /dev/null +++ b/test.py @@ -0,0 +1,152 @@ +#!/usr/bin/env python3 + +import unittest +import urllib +from unittest.mock import MagicMock, patch + +from mangle_data import process_entity, get_contentlanguages + + +class TestProcessEntity(unittest.TestCase): + def test_empty(self): + result = process_entity({}) + self.assertEqual(result, {}) + + def test_skip_languages(self): + result = process_entity( + { + "labels": { + "en": { + "language": "en", + "value": "pipe", + }, + "fr": { + "language": "fr", + "value": "pipe", + }, + "de": { + "language": "de", + "value": "Pfeife", + }, + } + }, + pick=["labels"], + select_languages=["en", "fr"], + ) + self.assertEqual( + result, + { + "labels": { + "en": { + "language": "en", + "value": "pipe", + }, + "fr": { + "language": "fr", + "value": "pipe", + }, + } + }, + ) + + def test_pick_keys(self): + result = process_entity( + { + "labels": { + "en": { + "language": "en", + "value": "pipe", + }, + "fr": { + "language": "fr", + "value": "pipe", + }, + "de": { + "language": "de", + "value": "Pfeife", + }, + }, + "other": { + "en": { + "language": "en", + "value": "pipe", + }, + "fr": { + "language": "fr", + "value": "pipe", + }, + "de": { + "language": "de", + "value": "Pfeife", + }, + }, + }, + pick=["labels"], + select_languages=["en", "fr", "de"], + ) + self.assertEqual( + result, + { + "labels": { + "en": { + "language": "en", + "value": "pipe", + }, + "fr": { + "language": "fr", + "value": "pipe", + }, + "de": { + "language": "de", + "value": "Pfeife", + }, + }, + }, + ) + + def test_pick_non_dict(self): + result = process_entity( + { + "type": "item", + "labels": { + "en": { + "language": "en", + "value": "pipe", + }, + "fr": { + "language": "fr", + "value": "pipe", + }, + "de": { + "language": "de", + "value": "Pfeife", + }, + }, + }, + pick=["type"], + select_languages=["en", "fr", "de"], + ) + self.assertEqual( + result, + { + "type": "item", + }, + ) + + +class TestGetContentlanguages(unittest.TestCase): + @patch("urllib.request.urlopen") + def test_success(self, mock_urlopen): + cm = MagicMock() + cm.read.return_value = '{"batchcomplete":"","query":{"wbcontentlanguages":{"aa":{"code":"aa"},"bb":{"code":"bb"}}}}' + cm.__enter__.return_value = cm + mock_urlopen.return_value = cm + with urllib.request.urlopen( + "https://test.wikibase.cloud/w/api.php?action=query&meta=wbcontentlanguages&format=json" + ): + result = get_contentlanguages("https://test.wikibase.cloud") + self.assertSetEqual(result, {"aa", "bb"}) + + +if __name__ == "__main__": + unittest.main() diff --git a/transferbot.sh b/transferbot.sh index 1048c5c..abda3b2 100644 --- a/transferbot.sh +++ b/transferbot.sh @@ -37,5 +37,5 @@ CREDS } > $(wb config path) wb data $@ --instance "$source_wiki_origin" |\ - jq --compact-output '{type,labels,descriptions,aliases,datatype}' |\ + mangle_data -t "$target_wiki_origin" -p type -p labels -p descriptions -p aliases -p datatype |\ wb create-entity --batch --instance "$target_wiki_origin"