Skip to content

Commit

Permalink
Merge pull request #74 from lexibank/minutia
Browse files Browse the repository at this point in the history
The usual pre-release minutia
  • Loading branch information
LinguList authored Jan 10, 2025
2 parents 2691a61 + 2cb440a commit 367c67a
Show file tree
Hide file tree
Showing 10 changed files with 126 additions and 39 deletions.
84 changes: 84 additions & 0 deletions .github/workflows/cldf-validation.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
name: CLDF-validation

on:
push:
branches: [ main ]
pull_request:
branches: [ main ]

jobs:
validate-lexicon:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.12]
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pytest-cldf
- name: Validate lexicon
run: |
pytest --cldf-metadata=cldf/lexicon-metadata.json test.py
validate-phonemes:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.12]
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pytest-cldf
- name: Validate phonemes
run: |
pytest --cldf-metadata=cldf/phonemes-metadata.json test.py
validate-phonology:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.12]
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pytest-cldf
- name: Validate phonology
run: |
pytest --cldf-metadata=cldf/phonology-metadata.json test.py
validate-wordlist:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.12]
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pytest-cldf
- name: Validate wordlist
run: |
pytest --cldf-metadata=cldf/wordlist-metadata.json test.py
6 changes: 3 additions & 3 deletions analysis/comrie-query/code.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
concept,
mappings[0][0],
mappings[0][1]
]]
]]

# cursor.execute("insert into LanguageTable(cldf_id) values ('proto');")
# for i, row in enumerate(data):
Expand Down Expand Up @@ -93,8 +93,8 @@
print(tabulate(
table[:10],
tablefmt="pipe",
headers=header)
)
headers=header,
))

with open('matches.tsv', 'w', encoding='utf8', newline='') as f:
writer = csv.writer(f, delimiter='\t')
Expand Down
4 changes: 2 additions & 2 deletions analysis/word_families/cognateset_diversity.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
print(tabulate(
table[:10],
tablefmt="pipe",
headers=header)
)
headers=header,
))

with open('cognateset_diversity.tsv', 'w', encoding='utf8', newline='') as f:
writer = csv.writer(f, delimiter='\t')
Expand Down
4 changes: 2 additions & 2 deletions analysis/word_families/colexifications.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
print(tabulate(
table[:10],
tablefmt="pipe",
headers=header)
)
headers=header,
))

with open('colex.tsv', 'w', encoding='utf8', newline='') as f:
writer = csv.writer(f, delimiter='\t')
Expand Down
4 changes: 2 additions & 2 deletions analysis/word_families/lb_release.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
print(tabulate(
table[:10],
tablefmt="pipe",
headers=header)
)
headers=header,
))

with open('lb_1.tsv', 'w', encoding='utf8', newline='') as f:
writer = csv.writer(f, delimiter='\t')
Expand Down
Empty file.
4 changes: 2 additions & 2 deletions lexibank_analysed_commands/correlations.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from cldfzenodo import Record
from clldutils.clilib import Table, add_format

from lexibank_lexibank_analysed import Dataset as LB, CLTS_2_1
from lexibank_lexibank_analysed import Dataset as LB, CLTS_2_3


def register(parser):
Expand All @@ -30,7 +30,7 @@ def run(args):
lba = LB()

args.log.info('Loading data ...')
clts = CLTS(lba.raw_dir / CLTS_2_1[1])
clts = CLTS(lba.raw_dir / CLTS_2_3[1])
clts2phoible = clts.transcriptiondata_dict["phoible"]

# WALS Online v2020.1
Expand Down
46 changes: 22 additions & 24 deletions lexibank_lexibank_analysed.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,12 +163,12 @@ def cmd_download(self, args):
shutil.rmtree(dest)

args.log.info(f"Downloading {dataset}")
record = cldfzenodoapi.get_record(row["Zenodo"])
record = cldfzenodoapi.get_record(row["Zenodo"])
# check if record is most recent one
rec_new = record.from_concept_doi(record.concept_doi)
if rec_new.doi != record.doi:
record = rec_new
args.log.warn(f"DOI for datasets {row["ID"]} is not the latest version!")
args.log.warn(f'DOI for datasets {row["ID"]} is not the latest version!')
record.download(dest)

# load zenodo info to make a new bibtex and doi
Expand All @@ -188,21 +188,19 @@ def cmd_download(self, args):
meta = json.load(f)
description = meta["citation"]
# create bibtex and write to new file
bib = dict(author=" and ".join(record.creators),
title=record.title,
publisher="Zenodo",
year=record.year,
address="Geneva",
doi=record.doi)
bib = dict(
author=" and ".join(record.creators),
title=record.title,
publisher="Zenodo",
year=record.year,
address="Geneva",
doi=record.doi)
if editors:
bib["editor"] = " and ".join(editors)
if description:
bib["citation"] = description

sources.add(pycldf.Source(
"book",
row["ID"],
**bib))
sources.add(pycldf.Source("book", row["ID"], **bib))

# check if source is in sources
for src_key in row["Source"].split(" "):
Expand Down Expand Up @@ -252,7 +250,7 @@ def _schema(self, writer, with_stats=False, collstats=None):
{'name': 'Forms', 'datatype': 'integer', 'dc:description': 'Number of forms'},
{'name': "FormsWithSounds", "datatype": "integer", "dc:description": "Number of forms with sounds"},
{'name': 'Concepts', 'datatype': 'integer', 'dc:description': 'Number of concepts'},
{'name': 'Incollections', 'datatype': "string", "separator": " ",
{'name': 'Incollections', 'datatype': "string", "separator": " ",
"dc:description": "Subselections of Lexibank"},
'Subgroup',
'Family',
Expand All @@ -277,11 +275,11 @@ def _schema(self, writer, with_stats=False, collstats=None):
'Senses',
'Forms',
{
"name": 'Source',
"name": 'Source',
"propertyUrl": "http://cldf.clld.org/v1.0/terms.rdf#source",
"datatype": "string",
"separator": ";"
},
"separator": ";",
},
)
writer.cldf.add_foreign_key('ContributionTable', 'Collection_IDs', 'collections.csv', 'ID')

Expand Down Expand Up @@ -314,7 +312,7 @@ def _schema(self, writer, with_stats=False, collstats=None):
def cmd_makecldf(self, args):
cid2gls = {c.id: c.gloss for c in
self.concepticon.conceptsets.values()}
languoids = self.glottolog.cached_languoids
languoids = self.glottolog.cached_languoids
visited = set()
collstats = collections.OrderedDict()
for cid, (desc, name) in COLLECTIONS.items():
Expand Down Expand Up @@ -381,7 +379,7 @@ def _add_language(
args.log.warn(f"{language.name} / {language.dataset} / {language.glottocode}")
return False
else:
langs['Incollections'] = langs['Incollections'] + [collection]
langs['Incollections'].append(collection)
if language.id not in visited:
for cid in ["ClicsCore", "LexiCore", "CogCore", "ProtoCore"]:
try:
Expand Down Expand Up @@ -464,8 +462,8 @@ def _add_languages(
duplicates = set()
for form in language.forms_with_sounds:
form_check = "{0}-{1}".format(
form.concept.concepticon_id if form.concept else "",
str(form.sounds))
form.concept.concepticon_id if form.concept else "",
str(form.sounds))
if form.concept and \
form.concept.concepticon_id and \
form.concept.concepticon_id in cid2gls and \
Expand All @@ -488,7 +486,7 @@ def _add_languages(
SCA_Sound_Classes="".join(
clts.soundclass("sca")(form.sounds)),
Source=self.dataset_meta[language.dataset]["ID"],
)
)
visited_concepts.add(cgls)
elif form_check in duplicates:
excluded.append(form)
Expand All @@ -499,11 +497,11 @@ def _add_languages(
f.write("--- | --- | --- | --- | ---\n")
for form in excluded:
f.write(" | ".join([
form.id,
form.id,
form.language.name,
form.concept.concepticon_gloss,
form.form, str(form.sounds)]) + "\n")

args.log.info('added lexibank forms')
# retrieve central concept from Rzymski concept list
central_concepts = {
Expand Down Expand Up @@ -598,7 +596,7 @@ def _add_languages(
args.log.info("write information on selected languages")
for lid, language in languages.items():
if lid in best_languages:
languages[lid]["Incollections"] += ["Selexion"]
languages[lid]["Incollections"].append("Selexion")
collstats["Selexion"]["Glottocodes"].add(best_languages[lid].glottocode)
collstats["Selexion"]["Varieties"] += 1
collstats["Selexion"]["Forms"] += len(best_languages[lid].forms)
Expand Down
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[build-system]
requires = ["setuptools"]
build-backend = "setuptools.build_meta"
10 changes: 6 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
license='MIT',
url='https://github.com/lexibank/lexibank-analysed',
py_modules=['lexibank_lexibank_analysed'],
packages=find_packages(where='.'),
packages=['lexibank_analysed_commands'],
include_package_data=True,
zip_safe=False,
entry_points={
Expand All @@ -25,7 +25,7 @@
],
},
platforms='any',
python_requires='>=3.6',
python_requires='>=3.8',
install_requires=[
'collabutils[googlesheets]',
'cldfbench>=1.7.2',
Expand Down Expand Up @@ -63,10 +63,12 @@
'Natural Language :: English',
'Operating System :: OS Independent',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
'Programming Language :: Python :: 3.11',
'Programming Language :: Python :: 3.12',
'Programming Language :: Python :: 3.13',
'Programming Language :: Python :: Implementation :: CPython',
'Programming Language :: Python :: Implementation :: PyPy'
],
Expand Down

0 comments on commit 367c67a

Please sign in to comment.