From 80111f0b7e1f0ddc287fa79e342504fe3828247f Mon Sep 17 00:00:00 2001 From: Ashot Janibekyan <31385299+ashotjanibekyan@users.noreply.github.com> Date: Thu, 2 Sep 2021 14:53:27 +0400 Subject: [PATCH 1/5] Add an exception for Wiki Loves Monuments 2021 in Armenia. (#22) Since e810058 the expected format is `in Armenia_&_Nagorno-Karabakh`. But the category for WLM 2021 is `Images_from_Wiki_Loves_Monuments_2021_in_Armenia` Same as aebc1f9 for WLE 2021 See https://phabricator.wikimedia.org/T290234 Pull-request #22 Bug: T290234 --- functions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/functions.py b/functions.py index f9f23ec..90faf7b 100644 --- a/functions.py +++ b/functions.py @@ -135,6 +135,7 @@ def get_event_category_template(): ("Monuments", "2019", "Austria"): 'Media_from_WikiDaheim_2019_in_Austria/Cultural_heritage_monuments', ("Monuments", "2018", "Austria"): 'Media_from_WikiDaheim_2018_in_Austria/Cultural_heritage_monuments', ("Monuments", "2017", "Austria"): 'Media_from_WikiDaheim_2017_in_Austria/Cultural_heritage_monuments', + ("Monuments", "2021", "Armenia"): 'Images_from_Wiki_Loves_Monuments_2021_in_Armenia', ("Monuments", "2020", "Armenia"): 'Images_from_Wiki_Loves_Monuments_2020_in_Armenia', ("Monuments", "2019", "Armenia"): 'Images_from_Wiki_Loves_Monuments_2019_in_Armenia', ("Monuments", "2013", "Armenia"): 'Images_from_Wiki_Loves_Monuments_2013_in_Armenia', From 0d60f912e5ba5f03252d58e93a9806509b4281cf Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Thu, 2 Sep 2021 15:01:07 +0200 Subject: [PATCH 2/5] Upgrade to GitHub-native Dependabot (#20) Co-authored-by: dependabot-preview[bot] <27856297+dependabot-preview[bot]@users.noreply.github.com> --- .github/dependabot.yml | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..491deae --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,7 @@ +version: 2 +updates: +- package-ecosystem: pip + directory: "/" + schedule: + interval: daily + open-pull-requests-limit: 10 From 55f1fd692fdf6735b50c6487bb85862c8cb0abd0 Mon Sep 17 00:00:00 2001 From: Jean-Frederic Date: Thu, 3 Mar 2022 17:51:25 +0100 Subject: [PATCH 3/5] Extract configuration-related bits from database.py This class was handling two distinct things: - the handling of the on-wiki configuration file - the commons database querying Let's have each in its own file for clarity. --- configuration.py | 65 +++++++++++++++++++ database.py | 63 +------------------ tests/test_configuration.py | 121 ++++++++++++++++++++++++++++++++++++ tests/test_database.py | 111 --------------------------------- 4 files changed, 188 insertions(+), 172 deletions(-) create mode 100644 configuration.py create mode 100644 tests/test_configuration.py diff --git a/configuration.py b/configuration.py new file mode 100644 index 0000000..e3efede --- /dev/null +++ b/configuration.py @@ -0,0 +1,65 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import json +import re +from urllib import urlopen + +from functions import EVENTS + + +def reData(txt, year): + """ + Parser para linha da configuração + """ + events = '|'.join(EVENTS) + regex = ur''' + \s*wl\["(?P%s)"\]\[(?P20\d\d)]\ ?=\ ?\{| + \s*\["(?P[-a-z]+)"\]\ =\ \{\["start"\]\ =\ (?P%s\d{10}),\ \["end"\]\ =\ (?P%s\d\d{10})\} + ''' % (events, year, str(year)[:3]) + m = re.search(regex, txt, re.X) + return m and m.groupdict() + + +def re_prefix(txt): + return re.search(r'\s*\["(?P[\w-]+)"\] = "(?P[\w\-\' ]+)"|(?P\})', txt, re.UNICODE) + + +def get_config_from_commons(page): + api = urlopen('https://commons.wikimedia.org/w/api.php?action=query&format=json&prop=revisions&titles=%s&rvprop=content' % page) + text = json.loads(api.read())['query']['pages'].values()[0]['revisions'][0]['*'] + return unicode(text) + + +def parse_config(text): + data, event, prefixes = {}, None, {} + lines = iter(text.split(u'\n')) + for line in lines: + m = re_prefix(line) + if prefixes and m and m.group('close'): + break + elif m and m.group('prefix'): + prefixes[m.group('prefix')] = m.group('name') + + for line in lines: + g = reData(line, event[-4:] if event else ur'20\d\d') + if not g: + continue + if g['event']: + event = g['event'] + g['year'] + data[event] = {} + elif g['country'] and event: + if g['country'] not in prefixes: + # updateLog.append(u'Unknown prefix: ' + g['country']) + continue + data[event][prefixes[g['country']]] = {'start': int(g['start']), 'end': int(g['end'])} + + return {name: config for name, config in data.items() if config} + + +def getConfig(page): + """ + Lê a configuração da página de configuração no Commons + """ + text = get_config_from_commons(page) + return parse_config(text) diff --git a/database.py b/database.py index 5d659d7..9db4e46 100755 --- a/database.py +++ b/database.py @@ -3,73 +3,14 @@ import io import json -import re import time -from urllib import urlopen +from configuration import getConfig from commons_database import DB -from functions import EVENTS, get_wikiloves_category_name +from functions import get_wikiloves_category_name updateLog = [] - -def reData(txt, year): - """ - Parser para linha da configuração - """ - events = '|'.join(EVENTS) - regex = ur''' - \s*wl\["(?P%s)"\]\[(?P20\d\d)]\ ?=\ ?\{| - \s*\["(?P[-a-z]+)"\]\ =\ \{\["start"\]\ =\ (?P%s\d{10}),\ \["end"\]\ =\ (?P%s\d\d{10})\} - ''' % (events, year, str(year)[:3]) - m = re.search(regex, txt, re.X) - return m and m.groupdict() - - -def re_prefix(txt): - return re.search(r'\s*\["(?P[\w-]+)"\] = "(?P[\w\-\' ]+)"|(?P\})', txt, re.UNICODE) - - -def get_config_from_commons(page): - api = urlopen('https://commons.wikimedia.org/w/api.php?action=query&format=json&prop=revisions&titles=%s&rvprop=content' % page) - text = json.loads(api.read())['query']['pages'].values()[0]['revisions'][0]['*'] - return unicode(text) - - -def parse_config(text): - data, event, prefixes = {}, None, {} - lines = iter(text.split(u'\n')) - for line in lines: - m = re_prefix(line) - if prefixes and m and m.group('close'): - break - elif m and m.group('prefix'): - prefixes[m.group('prefix')] = m.group('name') - - for line in lines: - g = reData(line, event[-4:] if event else ur'20\d\d') - if not g: - continue - if g['event']: - event = g['event'] + g['year'] - data[event] = {} - elif g['country'] and event: - if g['country'] not in prefixes: - updateLog.append(u'Unknown prefix: ' + g['country']) - continue - data[event][prefixes[g['country']]] = {'start': int(g['start']), 'end': int(g['end'])} - - return {name: config for name, config in data.items() if config} - - -def getConfig(page): - """ - Lê a configuração da página de configuração no Commons - """ - text = get_config_from_commons(page) - return parse_config(text) - - dbquery = u'''SELECT img_timestamp, img_name IN (SELECT DISTINCT gil_to FROM globalimagelinks) AS image_in_use, diff --git a/tests/test_configuration.py b/tests/test_configuration.py new file mode 100644 index 0000000..2ac413b --- /dev/null +++ b/tests/test_configuration.py @@ -0,0 +1,121 @@ +# -*- coding: utf-8 -*- +"""Unit tests for configuration.py.""" + +import unittest + +import configuration + + +class TestReData(unittest.TestCase): + + def test_reData_event_line(self): + input_data = 'wl["monuments"][2010] = {' + result = configuration.reData(input_data, 2014) + expected = { + u'country': None, + u'year': '2010', + u'end': None, + u'event': 'monuments', + u'start': None + } + self.assertEqual(result, expected) + + def test_reData_event_line_public_art(self): + input_data = 'wl["public_art"][2012] = {' + result = configuration.reData(input_data, 2014) + expected = { + u'country': None, + u'year': '2012', + u'end': None, + u'event': 'public_art', + u'start': None + } + self.assertEqual(result, expected) + + def test_reData_country_line(self): + input_data = ''' + ["az"] = {["start"] = 20170430200000, ["end"] = 20170531195959}, + ''' + result = configuration.reData(input_data, 2017) + expected = { + u'country': 'az', + u'year': None, + u'end': '20170531195959', + u'event': None, + u'start': '20170430200000' + } + self.assertEqual(result, expected) + + +class TestRePrefix(unittest.TestCase): + + def test_re_prefix_match_ascii_line(self): + self.assertIsNotNone(configuration.re_prefix(u' ["az"] = "Azerbaijan",')) + + def test_re_prefix_match_ascii_line_with_space(self): + self.assertIsNotNone(configuration.re_prefix(u' ["gq"] = "Equatorial Guinea",')) + + def test_re_prefix_match_ascii_line_with_dash(self): + self.assertIsNotNone(configuration.re_prefix(u' ["gw"] = "Guinea-Bissau",')) + + def test_re_prefix_match_ascii_line_with_accents(self): + self.assertIsNotNone(configuration.re_prefix(u' ["re"] = "Réunion",')) + + def test_re_prefix_match_ascii_line_with_apostrophe(self): + self.assertIsNotNone(configuration.re_prefix(u' ["ci"] = "Côte d\'Ivoire",')) + + +class TestParseConfig(unittest.TestCase): + + def test_parse_config_empty(self): + config = '' + result = configuration.parse_config(config) + expected = {} + self.assertEquals(result, expected) + + def test_parse_config(self): + config = ''' +wl["prefixes"] = { + ["az"] = "Azerbaijan", + ["gw"] = "Guinea-Bissau" +} + +wl["monuments"][2017] = { + ["az"] = {["start"] = 20170430200000, ["end"] = 20170531195959}, + ["gw"] = {["start"] = 20170430200000, ["end"] = 20170531195959}, +} + +wl["monuments"][2018] = { + ["az"] = {["start"] = 20180430200000, ["end"] = 20180531195959}, + ["gw"] = {["start"] = 20180430200000, ["end"] = 20180531195959}, +} + +''' + result = configuration.parse_config(config) + expected = { + u'monuments2017': { + u'Azerbaijan': { + 'start': 20170430200000, + 'end': 20170531195959, + }, + u'Guinea-Bissau': { + 'start': 20170430200000, + 'end': 20170531195959, + }, + }, + u'monuments2018': { + u'Azerbaijan': { + 'start': 20180430200000, + 'end': 20180531195959, + }, + u'Guinea-Bissau': { + 'start': 20180430200000, + 'end': 20180531195959, + }, + } + } + self.assertEquals(result, expected) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_database.py b/tests/test_database.py index 4174f10..5b469ba 100644 --- a/tests/test_database.py +++ b/tests/test_database.py @@ -8,65 +8,6 @@ import database -class TestReData(unittest.TestCase): - - def test_reData_event_line(self): - input_data = 'wl["monuments"][2010] = {' - result = database.reData(input_data, 2014) - expected = { - u'country': None, - u'year': '2010', - u'end': None, - u'event': 'monuments', - u'start': None - } - self.assertEqual(result, expected) - - def test_reData_event_line_public_art(self): - input_data = 'wl["public_art"][2012] = {' - result = database.reData(input_data, 2014) - expected = { - u'country': None, - u'year': '2012', - u'end': None, - u'event': 'public_art', - u'start': None - } - self.assertEqual(result, expected) - - def test_reData_country_line(self): - input_data = ''' - ["az"] = {["start"] = 20170430200000, ["end"] = 20170531195959}, - ''' - result = database.reData(input_data, 2017) - expected = { - u'country': 'az', - u'year': None, - u'end': '20170531195959', - u'event': None, - u'start': '20170430200000' - } - self.assertEqual(result, expected) - - -class TestRePrefix(unittest.TestCase): - - def test_re_prefix_match_ascii_line(self): - self.assertIsNotNone(database.re_prefix(u' ["az"] = "Azerbaijan",')) - - def test_re_prefix_match_ascii_line_with_space(self): - self.assertIsNotNone(database.re_prefix(u' ["gq"] = "Equatorial Guinea",')) - - def test_re_prefix_match_ascii_line_with_dash(self): - self.assertIsNotNone(database.re_prefix(u' ["gw"] = "Guinea-Bissau",')) - - def test_re_prefix_match_ascii_line_with_accents(self): - self.assertIsNotNone(database.re_prefix(u' ["re"] = "Réunion",')) - - def test_re_prefix_match_ascii_line_with_apostrophe(self): - self.assertIsNotNone(database.re_prefix(u' ["ci"] = "Côte d\'Ivoire",')) - - class TestConvertDatabaseRecord(unittest.TestCase): def test_convert_database_record(self): @@ -226,57 +167,5 @@ def test_udpate_event_data(self): self.mock_write_database_as_json.assert_called_once_with(expected) -class TestParseConfig(unittest.TestCase): - - def test_parse_config_empty(self): - config = '' - result = database.parse_config(config) - expected = {} - self.assertEquals(result, expected) - - def test_parse_config(self): - config = ''' -wl["prefixes"] = { - ["az"] = "Azerbaijan", - ["gw"] = "Guinea-Bissau" -} - -wl["monuments"][2017] = { - ["az"] = {["start"] = 20170430200000, ["end"] = 20170531195959}, - ["gw"] = {["start"] = 20170430200000, ["end"] = 20170531195959}, -} - -wl["monuments"][2018] = { - ["az"] = {["start"] = 20180430200000, ["end"] = 20180531195959}, - ["gw"] = {["start"] = 20180430200000, ["end"] = 20180531195959}, -} - -''' - result = database.parse_config(config) - expected = { - u'monuments2017': { - u'Azerbaijan': { - 'start': 20170430200000, - 'end': 20170531195959, - }, - u'Guinea-Bissau': { - 'start': 20170430200000, - 'end': 20170531195959, - }, - }, - u'monuments2018': { - u'Azerbaijan': { - 'start': 20180430200000, - 'end': 20180531195959, - }, - u'Guinea-Bissau': { - 'start': 20180430200000, - 'end': 20180531195959, - }, - } - } - self.assertEquals(result, expected) - - if __name__ == "__main__": unittest.main() From ab234c718bc170d38ca12156cd1962e50d80c1a4 Mon Sep 17 00:00:00 2001 From: Jean-Frederic Date: Thu, 3 Mar 2022 17:51:30 +0100 Subject: [PATCH 4/5] Add SOURCE_PATH override in docker-compose.yml That way we can run the `update-database.sh` Bash script inside the docker-compose environment. --- docker-compose.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/docker-compose.yml b/docker-compose.yml index 78c0974..6ebc136 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -17,6 +17,7 @@ services: - DB_USERNAME=$TOOLFORGE_DB_USERNAME - DB_PASSWORD=$TOOLFORGE_DB_PASSWORD - DB_HOST=db_commons + - SOURCE_PATH=/code/ db_commons: image: jeanfred/wiki-replicas-proxy volumes: From 6bea36179de480a716675993a1e546da26987f0c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 29 Apr 2022 15:28:35 +0000 Subject: [PATCH 5/5] Bump click from 7.1.2 to 8.1.3 Bumps [click](https://github.com/pallets/click) from 7.1.2 to 8.1.3. - [Release notes](https://github.com/pallets/click/releases) - [Changelog](https://github.com/pallets/click/blob/main/CHANGES.rst) - [Commits](https://github.com/pallets/click/compare/7.1.2...8.1.3) --- updated-dependencies: - dependency-name: click dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 475debc..c3cf143 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ -i https://pypi.org/simple -click==7.1.2; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' +click==8.1.3; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' flask==1.1.2 itsdangerous==1.1.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' jinja2==2.11.2; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'