This repository has been archived by the owner on Feb 19, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathreposlist.py
62 lines (49 loc) · 1.52 KB
/
reposlist.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
"""
Refresh the repository list.
Usage:
python reposlist.py GITHUB_ACCESS_TOKEN
We may want to replace this with or call this from a fab task.
"""
import re
import json
import base64
import pathlib
from github import Github, GithubException
ORGS = [
'lexibank',
'numeralbank',
'cldf-datasets',
'dictionaria',
'intercontinental-dictionary-series',
]
def cldfbench_curated(repo):
curator_pattern = re.compile(
r"""["'](?P<curator>lexibank|cldfbench|international-dictionary-series)\.dataset["']""")
for f in repo.get_contents('.'):
if f.name == 'setup.py':
match = curator_pattern.search(base64.b64decode(f.content).decode('utf8'))
if match:
return match.group('curator')
def iter_repos(gh):
for org in ORGS:
for repo in gh.get_organization(org).get_repos():
if repo.private:
continue
try:
yield (
org,
repo.clone_url,
[f.path for f in repo.get_contents('cldf') if f.name.endswith('metadata.json')],
cldfbench_curated(repo)
)
except GithubException:
continue
def main(gh):
with pathlib.Path('reposlist.json').open('w', encoding='utf8') as fp:
json.dump(
[repo for repo in sorted(iter_repos(gh), key=lambda t: (t[0], t[1])) if repo[2]],
fp,
indent=4)
if __name__ == '__main__':
import sys
main(Github(sys.argv[1]))