Skip to content

Commit

Permalink
fix problem with db tables derived from files with underscores in the…
Browse files Browse the repository at this point in the history
… filename
  • Loading branch information
xrotwang committed Mar 18, 2024
1 parent 944a86e commit 27b44ce
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 6 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

The `pycldf` package adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).

## Unreleased

Fixed bug whereby component names where the CSV filenames contain underscores were not translated
appropriately when creating the SQLite db. (Note that this fix is required for the ParameterNetwork
component in CLDF 1.3.)


## [1.37.0] - 2024-01-22

Expand Down
31 changes: 29 additions & 2 deletions src/pycldf/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,12 +108,39 @@ def translate(d: typing.Dict[str, TableTranslation], table: str, col=None) -> st
# A simple, translateable column name.
return d[table].columns[col]
if '_' in col:
t, _, c = col.partition('_')
parts = col.split('_')
t = '_'.join(parts[:-1])
c = parts[-1]
if t in table and t in d and c in d[t].columns:
# A generated column name of an association table.
return '_'.join([d[t].name or t, d[t].columns[c]])
return col
return '_'.join([(d[t].name or t) if t in d else t for t in table.split('_')])

# To handle association tables, we proceed as follows:
# 1. We split the full table name on underscores - the separators of sub-table names in
# association tables.
# 2. Since regular table names may contain underscores as well, we try to find the longest
# concatenation of _-separated name parts which appears in the translation dict.
# 3. We repeat step 2 until all name parts have been consumed.
def t(n):
if n in d:
return d[n].name or n
tables, comps = [], n.split('_')
while comps and len(comps) > 1:
for i in range(len(comps), 0, -1):
nc = '_'.join(comps[:i])
if nc in d:
tables.append(d[nc].name or nc)
comps = comps[i:]
break
else:
raise ValueError(n) # pragma: no cover
if comps:
assert len(comps) == 1
tables.append(d[comps[0]].name or comps[0] if comps[0] in d else comps[0])
return '_'.join(tables)

return t(table)


def clean_bibtex_key(s):
Expand Down
32 changes: 28 additions & 4 deletions tests/test_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,35 @@ def test_db_write(tmp_path, data):

def test_db_write_extra_tables(md):
ds = Generic.in_dir(md.parent)
ds.add_table('extra.csv', 'ID', 'Name', {'name': 'x', 'separator': '#'})
ds.write(md, **{'extra.csv': [dict(ID=1, Name='Name', x=['a', 'b', 'c'])]})
ds.add_table(
'ext_ra.csv',
{'name': 'ID', 'propertyUrl': 'http://cldf.clld.org/v1.0/terms.rdf#id'},
'Name', {'name': 'x', 'separator': '#'})
ds.add_component('ParameterTable')
ds.add_component(
'ParameterNetwork',
{
'name': 'Source',
'separator': ';',
'propertyUrl': 'http://cldf.clld.org/v1.0/terms.rdf#source'},
{'name': 'ex', 'separator': ' '},
)
ds.add_foreign_key('ParameterNetwork', 'ex', 'ext_ra.csv', 'ID')
ds.write(md, **{
'ParameterTable': [dict(ID='p')],
'ParameterNetwork': [
dict(ID='e', Target_Parameter_ID='p', Source_Parameter_ID='p', ex=['1'])],
'ext_ra.csv': [dict(ID='1', Name='Name', x=['a', 'b', 'c'])]})

db = Database(ds, fname=md.parent / 'db.sqlite')
db.write_from_tg()
rows = db.query("""select x from "extra.csv" """)
rows = db.query("""select x from "ext_ra.csv" """)
assert len(rows) == 1
assert rows[0][0] == 'a#b#c'
assert db.split_value('extra.csv', 'x', rows[0][0]) == ['a', 'b', 'c']
assert db.split_value('ext_ra.csv', 'x', rows[0][0]) == ['a', 'b', 'c']
# We check that association tables where filenames for both components contain underscores work:
rows = db.query("""select count(*) from "ParameterNetwork_ext_ra.csv" """)
assert len(rows) == 1


def test_db_write_extra_columns(md):
Expand Down Expand Up @@ -130,6 +150,8 @@ def translations():
return {
'forms.csv': TableTranslation(columns={'pk': 'id'}),
'parameters.csv': TableTranslation(name='PTable', columns={'pk': 'id'}),
'a_b.csv': TableTranslation(name='ab'),
'c_d.csv': TableTranslation(name='cd'),
}


Expand All @@ -141,6 +163,8 @@ def translations():
('forms.csv_parameters.csv', None, 'forms.csv_PTable'),
('forms.csv_parameters.csv', 'parameters.csv_pk', 'PTable_id'),
('forms.csv_parameters.csv', 'forms.csv_pk', 'forms.csv_id'),
('a_b.csv_c_d.csv', None, 'ab_cd'),
('a_b.csv_cd.csv', None, 'ab_cd.csv'),
]
)
def test_translate(translations, table, col, expected):
Expand Down

0 comments on commit 27b44ce

Please sign in to comment.