Skip to content

Commit

Permalink
fix: contentlanguages need to be matched before piping output to targ…
Browse files Browse the repository at this point in the history
…et (#5)

* fix: contentlanguages need to be matched before piping output to target

* refactor: calling source is not required

* fix: dict check never passes

* test: add basic setup for running unit tests
  • Loading branch information
m90 authored Jul 24, 2024
1 parent 125ff34 commit a7bb0b1
Show file tree
Hide file tree
Showing 7 changed files with 232 additions and 2 deletions.
1 change: 1 addition & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
test.py
19 changes: 19 additions & 0 deletions .github/workflows/unit_test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
name: Python package

on:
push:
branches:
- 'main'
pull_request:

jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python 3.12
uses: actions/setup-python@v5
with:
python-version: 3.12
- name: Run unit tests
run: ./test.py
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*.json
__pycache__
3 changes: 2 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
FROM node:20-alpine

RUN apk add --no-cache jq=1.7.1-r0 curl=8.8.0-r0 && \
RUN apk add --no-cache curl=8.8.0-r0 python3=3.12.3-r1 && \
npm install -g wikibase-cli@18.0.3

COPY --chmod=755 ./transferbot.sh /usr/bin/transferbot
COPY --chmod=755 ./mangle_data.py /usr/bin/mangle_data

ENTRYPOINT ["transferbot"]
55 changes: 55 additions & 0 deletions mangle_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#!/usr/bin/env python3

import argparse
import json
import fileinput
from urllib import request

parser = argparse.ArgumentParser(
description="Adjust a line from wb-cli so it can be fed into the target wiki"
)

parser.add_argument("-t", "--target", action="store", dest="target", required=True)
parser.add_argument("-p", "--pick", action="append", dest="pick", required=True)


def process_entity(line, pick=[], select_languages=set()):
out = {}
for key, value in line.items():
if key not in pick:
continue

if not isinstance(value, dict):
out[key] = value
continue

out[key] = {}
for lang, value in line[key].items():
if lang in select_languages:
out[key][lang] = value

return out


def get_contentlanguages(target_origin):
with request.urlopen(
f"{target_origin}/w/api.php?action=query&meta=wbcontentlanguages&format=json"
) as response:
raw_body = response.read()
body = json.loads(raw_body)
return set(body["query"]["wbcontentlanguages"].keys())


def main():
args = parser.parse_args()
target_languages = get_contentlanguages(args.target)

for line in fileinput.input("-"):
out = process_entity(
json.loads(line), pick=args.pick, select_languages=target_languages
)
print(json.dumps(out, ensure_ascii=False))


if __name__ == "__main__":
main()
152 changes: 152 additions & 0 deletions test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
#!/usr/bin/env python3

import unittest
import urllib
from unittest.mock import MagicMock, patch

from mangle_data import process_entity, get_contentlanguages


class TestProcessEntity(unittest.TestCase):
def test_empty(self):
result = process_entity({})
self.assertEqual(result, {})

def test_skip_languages(self):
result = process_entity(
{
"labels": {
"en": {
"language": "en",
"value": "pipe",
},
"fr": {
"language": "fr",
"value": "pipe",
},
"de": {
"language": "de",
"value": "Pfeife",
},
}
},
pick=["labels"],
select_languages=["en", "fr"],
)
self.assertEqual(
result,
{
"labels": {
"en": {
"language": "en",
"value": "pipe",
},
"fr": {
"language": "fr",
"value": "pipe",
},
}
},
)

def test_pick_keys(self):
result = process_entity(
{
"labels": {
"en": {
"language": "en",
"value": "pipe",
},
"fr": {
"language": "fr",
"value": "pipe",
},
"de": {
"language": "de",
"value": "Pfeife",
},
},
"other": {
"en": {
"language": "en",
"value": "pipe",
},
"fr": {
"language": "fr",
"value": "pipe",
},
"de": {
"language": "de",
"value": "Pfeife",
},
},
},
pick=["labels"],
select_languages=["en", "fr", "de"],
)
self.assertEqual(
result,
{
"labels": {
"en": {
"language": "en",
"value": "pipe",
},
"fr": {
"language": "fr",
"value": "pipe",
},
"de": {
"language": "de",
"value": "Pfeife",
},
},
},
)

def test_pick_non_dict(self):
result = process_entity(
{
"type": "item",
"labels": {
"en": {
"language": "en",
"value": "pipe",
},
"fr": {
"language": "fr",
"value": "pipe",
},
"de": {
"language": "de",
"value": "Pfeife",
},
},
},
pick=["type"],
select_languages=["en", "fr", "de"],
)
self.assertEqual(
result,
{
"type": "item",
},
)


class TestGetContentlanguages(unittest.TestCase):
@patch("urllib.request.urlopen")
def test_success(self, mock_urlopen):
cm = MagicMock()
cm.read.return_value = '{"batchcomplete":"","query":{"wbcontentlanguages":{"aa":{"code":"aa"},"bb":{"code":"bb"}}}}'
cm.__enter__.return_value = cm
mock_urlopen.return_value = cm
with urllib.request.urlopen(
"https://test.wikibase.cloud/w/api.php?action=query&meta=wbcontentlanguages&format=json"
):
result = get_contentlanguages("https://test.wikibase.cloud")
self.assertSetEqual(result, {"aa", "bb"})


if __name__ == "__main__":
unittest.main()
2 changes: 1 addition & 1 deletion transferbot.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,5 +37,5 @@ CREDS
} > $(wb config path)

wb data $@ --instance "$source_wiki_origin" |\
jq --compact-output '{type,labels,descriptions,aliases,datatype}' |\
mangle_data -t "$target_wiki_origin" -p type -p labels -p descriptions -p aliases -p datatype |\
wb create-entity --batch --instance "$target_wiki_origin"

0 comments on commit a7bb0b1

Please sign in to comment.