From 42c0da7acc8c1de087e46f788974f71afdc9d4f2 Mon Sep 17 00:00:00 2001 From: Robert Forkel Date: Wed, 30 Nov 2022 15:52:36 +0100 Subject: [PATCH] closes #106 --- RELEASING.md | 2 -- cldf/StructureDataset-metadata.json | 9 ++++++++- cldf/docs/chapter_108.html | 2 +- cldf/requirements.txt | 2 +- cldfbench_wals.py | 8 ++++++-- setup.py | 2 ++ 6 files changed, 18 insertions(+), 7 deletions(-) diff --git a/RELEASING.md b/RELEASING.md index 82554e5..6342455 100644 --- a/RELEASING.md +++ b/RELEASING.md @@ -7,5 +7,3 @@ - Run `pytest` - Update `CHANGES.md` - - diff --git a/cldf/StructureDataset-metadata.json b/cldf/StructureDataset-metadata.json index dec1f1c..49759c3 100644 --- a/cldf/StructureDataset-metadata.json +++ b/cldf/StructureDataset-metadata.json @@ -16,7 +16,7 @@ { "rdf:about": "https://github.com/cldf-datasets/wals", "rdf:type": "prov:Entity", - "dc:created": "v2020.2-1-g2034005", + "dc:created": "v2020.2-5-g836d933", "dc:title": "Repository" }, { @@ -170,6 +170,7 @@ }, "propertyUrl": "http://cldf.clld.org/v1.0/terms.rdf#id", "required": true, + "valueUrl": "docs/chapter_{ID}.html", "name": "ID" }, { @@ -631,6 +632,9 @@ "dc:description": "The sequence of words of the primary text to be aligned with glosses", "dc:extent": "multivalued", "datatype": "string", + "null": [ + "_____" + ], "propertyUrl": "http://cldf.clld.org/v1.0/terms.rdf#analyzedWord", "required": false, "separator": "\t", @@ -640,6 +644,9 @@ "dc:description": "The sequence of glosses aligned with the words of the primary text", "dc:extent": "multivalued", "datatype": "string", + "null": [ + "_____" + ], "propertyUrl": "http://cldf.clld.org/v1.0/terms.rdf#gloss", "required": false, "separator": "\t", diff --git a/cldf/docs/chapter_108.html b/cldf/docs/chapter_108.html index e5033e1..1c7095d 100644 --- a/cldf/docs/chapter_108.html +++ b/cldf/docs/chapter_108.html @@ -60,7 +60,7 @@

youth-abs

-

antip-carry-aor.3sg.subj-pl

+

antip-carry-aor.3.subj-pl

load-instr

diff --git a/cldf/requirements.txt b/cldf/requirements.txt index a21993d..83fa6ea 100644 --- a/cldf/requirements.txt +++ b/cldf/requirements.txt @@ -5,7 +5,7 @@ bs4==0.0.1 certifi==2019.11.28 chardet==3.0.4 -e git+https://github.com/cldf/cldfbench@94d5137c50a1dc5ee057865ed171bb4421aa2ff8#egg=cldfbench --e git+https://github.com/cldf-datasets/wals@2034005e0aafbb197dc55c4d4786885df5df0d3e#egg=cldfbench_wals +-e git+https://github.com/cldf-datasets/wals@836d933d6b6ca37e82afab5fd2fc0e7f4200d359#egg=cldfbench_wals cldfcatalog==1.5.1 cldfviz==0.10.0 clldutils==3.14.0 diff --git a/cldfbench_wals.py b/cldfbench_wals.py index 8e52541..19bb3da 100644 --- a/cldfbench_wals.py +++ b/cldfbench_wals.py @@ -6,6 +6,7 @@ import collections from csvw import dsv +from csvw.metadata import URITemplate from cldfbench import Dataset as BaseDataset from cldfbench import CLDFSpec from clldutils.misc import data_url, slug @@ -308,8 +309,8 @@ def cmd_makecldf(self, args): 'Language_ID': pk2id['language'][ex['language_pk']], 'Primary_Text': ex['name'], 'Translated_Text': ex['description'], - 'Analyzed_Word': a, - 'Gloss': g, + 'Analyzed_Word': ['…' if t is None else t for t in a], + 'Gloss': ['…' if t is None else t for t in g], }) example_by_value = { vpk: [r['sentence_pk'] for r in rows] @@ -471,6 +472,8 @@ def create_schema(self, cldf): "the taxonomic units of the Genealogical Language List have IDs prefixed with " \ "'family-', 'subfamily-' or 'genus-'." cldf.add_component('ExampleTable') + cldf[('ExampleTable', 'Gloss')].null = ['_____'] + cldf[('ExampleTable', 'Analyzed_Word')].null = ['_____'] t = cldf.add_table( 'language_names.csv', { @@ -534,6 +537,7 @@ def create_schema(self, cldf): 'separator': ' ', }, ) + cldf[('ContributionTable', 'ID')].valueUrl = URITemplate('docs/chapter_{ID}.html') t = cldf.add_table( 'areas.csv', { diff --git a/setup.py b/setup.py index e8a110a..e9afb08 100644 --- a/setup.py +++ b/setup.py @@ -15,6 +15,8 @@ ], }, install_requires=[ + 'python-nexus', + 'newick', 'cldfbench>=1.6.0', 'clldutils>=3.7.0', 'pycldf>=1.19.0',