-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix: contentlanguages need to be matched before piping output to targ…
…et (#5) * fix: contentlanguages need to be matched before piping output to target * refactor: calling source is not required * fix: dict check never passes * test: add basic setup for running unit tests
- Loading branch information
Showing
7 changed files
with
232 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
test.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
name: Python package | ||
|
||
on: | ||
push: | ||
branches: | ||
- 'main' | ||
pull_request: | ||
|
||
jobs: | ||
test: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@v4 | ||
- name: Set up Python 3.12 | ||
uses: actions/setup-python@v5 | ||
with: | ||
python-version: 3.12 | ||
- name: Run unit tests | ||
run: ./test.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
*.json | ||
__pycache__ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,9 @@ | ||
FROM node:20-alpine | ||
|
||
RUN apk add --no-cache jq=1.7.1-r0 curl=8.8.0-r0 && \ | ||
RUN apk add --no-cache curl=8.8.0-r0 python3=3.12.3-r1 && \ | ||
npm install -g wikibase-cli@18.0.3 | ||
|
||
COPY --chmod=755 ./transferbot.sh /usr/bin/transferbot | ||
COPY --chmod=755 ./mangle_data.py /usr/bin/mangle_data | ||
|
||
ENTRYPOINT ["transferbot"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import argparse | ||
import json | ||
import fileinput | ||
from urllib import request | ||
|
||
parser = argparse.ArgumentParser( | ||
description="Adjust a line from wb-cli so it can be fed into the target wiki" | ||
) | ||
|
||
parser.add_argument("-t", "--target", action="store", dest="target", required=True) | ||
parser.add_argument("-p", "--pick", action="append", dest="pick", required=True) | ||
|
||
|
||
def process_entity(line, pick=[], select_languages=set()): | ||
out = {} | ||
for key, value in line.items(): | ||
if key not in pick: | ||
continue | ||
|
||
if not isinstance(value, dict): | ||
out[key] = value | ||
continue | ||
|
||
out[key] = {} | ||
for lang, value in line[key].items(): | ||
if lang in select_languages: | ||
out[key][lang] = value | ||
|
||
return out | ||
|
||
|
||
def get_contentlanguages(target_origin): | ||
with request.urlopen( | ||
f"{target_origin}/w/api.php?action=query&meta=wbcontentlanguages&format=json" | ||
) as response: | ||
raw_body = response.read() | ||
body = json.loads(raw_body) | ||
return set(body["query"]["wbcontentlanguages"].keys()) | ||
|
||
|
||
def main(): | ||
args = parser.parse_args() | ||
target_languages = get_contentlanguages(args.target) | ||
|
||
for line in fileinput.input("-"): | ||
out = process_entity( | ||
json.loads(line), pick=args.pick, select_languages=target_languages | ||
) | ||
print(json.dumps(out, ensure_ascii=False)) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,152 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import unittest | ||
import urllib | ||
from unittest.mock import MagicMock, patch | ||
|
||
from mangle_data import process_entity, get_contentlanguages | ||
|
||
|
||
class TestProcessEntity(unittest.TestCase): | ||
def test_empty(self): | ||
result = process_entity({}) | ||
self.assertEqual(result, {}) | ||
|
||
def test_skip_languages(self): | ||
result = process_entity( | ||
{ | ||
"labels": { | ||
"en": { | ||
"language": "en", | ||
"value": "pipe", | ||
}, | ||
"fr": { | ||
"language": "fr", | ||
"value": "pipe", | ||
}, | ||
"de": { | ||
"language": "de", | ||
"value": "Pfeife", | ||
}, | ||
} | ||
}, | ||
pick=["labels"], | ||
select_languages=["en", "fr"], | ||
) | ||
self.assertEqual( | ||
result, | ||
{ | ||
"labels": { | ||
"en": { | ||
"language": "en", | ||
"value": "pipe", | ||
}, | ||
"fr": { | ||
"language": "fr", | ||
"value": "pipe", | ||
}, | ||
} | ||
}, | ||
) | ||
|
||
def test_pick_keys(self): | ||
result = process_entity( | ||
{ | ||
"labels": { | ||
"en": { | ||
"language": "en", | ||
"value": "pipe", | ||
}, | ||
"fr": { | ||
"language": "fr", | ||
"value": "pipe", | ||
}, | ||
"de": { | ||
"language": "de", | ||
"value": "Pfeife", | ||
}, | ||
}, | ||
"other": { | ||
"en": { | ||
"language": "en", | ||
"value": "pipe", | ||
}, | ||
"fr": { | ||
"language": "fr", | ||
"value": "pipe", | ||
}, | ||
"de": { | ||
"language": "de", | ||
"value": "Pfeife", | ||
}, | ||
}, | ||
}, | ||
pick=["labels"], | ||
select_languages=["en", "fr", "de"], | ||
) | ||
self.assertEqual( | ||
result, | ||
{ | ||
"labels": { | ||
"en": { | ||
"language": "en", | ||
"value": "pipe", | ||
}, | ||
"fr": { | ||
"language": "fr", | ||
"value": "pipe", | ||
}, | ||
"de": { | ||
"language": "de", | ||
"value": "Pfeife", | ||
}, | ||
}, | ||
}, | ||
) | ||
|
||
def test_pick_non_dict(self): | ||
result = process_entity( | ||
{ | ||
"type": "item", | ||
"labels": { | ||
"en": { | ||
"language": "en", | ||
"value": "pipe", | ||
}, | ||
"fr": { | ||
"language": "fr", | ||
"value": "pipe", | ||
}, | ||
"de": { | ||
"language": "de", | ||
"value": "Pfeife", | ||
}, | ||
}, | ||
}, | ||
pick=["type"], | ||
select_languages=["en", "fr", "de"], | ||
) | ||
self.assertEqual( | ||
result, | ||
{ | ||
"type": "item", | ||
}, | ||
) | ||
|
||
|
||
class TestGetContentlanguages(unittest.TestCase): | ||
@patch("urllib.request.urlopen") | ||
def test_success(self, mock_urlopen): | ||
cm = MagicMock() | ||
cm.read.return_value = '{"batchcomplete":"","query":{"wbcontentlanguages":{"aa":{"code":"aa"},"bb":{"code":"bb"}}}}' | ||
cm.__enter__.return_value = cm | ||
mock_urlopen.return_value = cm | ||
with urllib.request.urlopen( | ||
"https://test.wikibase.cloud/w/api.php?action=query&meta=wbcontentlanguages&format=json" | ||
): | ||
result = get_contentlanguages("https://test.wikibase.cloud") | ||
self.assertSetEqual(result, {"aa", "bb"}) | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters