From 42c0da7acc8c1de087e46f788974f71afdc9d4f2 Mon Sep 17 00:00:00 2001
From: Robert Forkel <xrotwang@googlemail.com>
Date: Wed, 30 Nov 2022 15:52:36 +0100
Subject: [PATCH] closes #106

---
 RELEASING.md                        | 2 --
 cldf/StructureDataset-metadata.json | 9 ++++++++-
 cldf/docs/chapter_108.html          | 2 +-
 cldf/requirements.txt               | 2 +-
 cldfbench_wals.py                   | 8 ++++++--
 setup.py                            | 2 ++
 6 files changed, 18 insertions(+), 7 deletions(-)
diff --git a/RELEASING.md b/RELEASING.md
index 82554e5..6342455 100644
--- a/RELEASING.md
+++ b/RELEASING.md
@@ -7,5 +7,3 @@
 
 - Run `pytest`
 - Update `CHANGES.md`
-
-
diff --git a/cldf/StructureDataset-metadata.json b/cldf/StructureDataset-metadata.json
index dec1f1c..49759c3 100644
--- a/cldf/StructureDataset-metadata.json
+++ b/cldf/StructureDataset-metadata.json
@@ -16,7 +16,7 @@
         {
             "rdf:about": "https://github.com/cldf-datasets/wals",
             "rdf:type": "prov:Entity",
-            "dc:created": "v2020.2-1-g2034005",
+            "dc:created": "v2020.2-5-g836d933",
             "dc:title": "Repository"
         },
         {
@@ -170,6 +170,7 @@
                         },
                         "propertyUrl": "http://cldf.clld.org/v1.0/terms.rdf#id",
                         "required": true,
+                        "valueUrl": "docs/chapter_{ID}.html",
                         "name": "ID"
                     },
                     {
@@ -631,6 +632,9 @@
                         "dc:description": "The sequence of words of the primary text to be aligned with glosses",
                         "dc:extent": "multivalued",
                         "datatype": "string",
+                        "null": [
+                            "_____"
+                        ],
                         "propertyUrl": "http://cldf.clld.org/v1.0/terms.rdf#analyzedWord",
                         "required": false,
                         "separator": "\t",
@@ -640,6 +644,9 @@
                         "dc:description": "The sequence of glosses aligned with the words of the primary text",
                         "dc:extent": "multivalued",
                         "datatype": "string",
+                        "null": [
+                            "_____"
+                        ],
                         "propertyUrl": "http://cldf.clld.org/v1.0/terms.rdf#gloss",
                         "required": false,
                         "separator": "\t",
diff --git a/cldf/docs/chapter_108.html b/cldf/docs/chapter_108.html
index e5033e1..1c7095d 100644
--- a/cldf/docs/chapter_108.html
+++ b/cldf/docs/chapter_108.html
@@ -60,7 +60,7 @@ <h2 class="heading-1">
 <p class="Standard">youth-<span class="T3"><span class="hint--bottom" data-hint="absolutive">abs</span></span></p>
 </td>
 <td style="text-align:left;">
-<p class="Standard"><span class="T3"><span class="hint--bottom" data-hint="antipassive">antip</span></span>-carry-<span class="T3">aor.<span class="hint--bottom" data-hint="singular">3sg</span>.subj-pl</span></p>
+<p class="Standard"><span class="T3"><span class="hint--bottom" data-hint="antipassive">antip</span></span>-carry-<span class="T3">aor.<span class="hint--bottom" data-hint="singular">3</span>.subj-pl</span></p>
 </td>
 <td style="text-align:left;">
 <p class="Standard">load-<span class="T3">instr</span></p>
diff --git a/cldf/requirements.txt b/cldf/requirements.txt
index a21993d..83fa6ea 100644
--- a/cldf/requirements.txt
+++ b/cldf/requirements.txt
@@ -5,7 +5,7 @@ bs4==0.0.1
 certifi==2019.11.28
 chardet==3.0.4
 -e git+https://github.com/cldf/cldfbench@94d5137c50a1dc5ee057865ed171bb4421aa2ff8#egg=cldfbench
--e git+https://github.com/cldf-datasets/wals@2034005e0aafbb197dc55c4d4786885df5df0d3e#egg=cldfbench_wals
+-e git+https://github.com/cldf-datasets/wals@836d933d6b6ca37e82afab5fd2fc0e7f4200d359#egg=cldfbench_wals
 cldfcatalog==1.5.1
 cldfviz==0.10.0
 clldutils==3.14.0
diff --git a/cldfbench_wals.py b/cldfbench_wals.py
index 8e52541..19bb3da 100644
--- a/cldfbench_wals.py
+++ b/cldfbench_wals.py
@@ -6,6 +6,7 @@
 import collections
 
 from csvw import dsv
+from csvw.metadata import URITemplate
 from cldfbench import Dataset as BaseDataset
 from cldfbench import CLDFSpec
 from clldutils.misc import data_url, slug
@@ -308,8 +309,8 @@ def cmd_makecldf(self, args):
                     'Language_ID': pk2id['language'][ex['language_pk']],
                     'Primary_Text': ex['name'],
                     'Translated_Text': ex['description'],
-                    'Analyzed_Word': a,
-                    'Gloss': g,
+                    'Analyzed_Word': ['…' if t is None else t for t in a],
+                    'Gloss': ['…' if t is None else t for t in g],
                 })
         example_by_value = {
             vpk: [r['sentence_pk'] for r in rows]
@@ -471,6 +472,8 @@ def create_schema(self, cldf):
             "the taxonomic units of the  Genealogical Language List have IDs prefixed with " \
             "'family-', 'subfamily-' or 'genus-'."
         cldf.add_component('ExampleTable')
+        cldf[('ExampleTable', 'Gloss')].null = ['_____']
+        cldf[('ExampleTable', 'Analyzed_Word')].null = ['_____']
         t = cldf.add_table(
             'language_names.csv',
             {
@@ -534,6 +537,7 @@ def create_schema(self, cldf):
                 'separator': ' ',
             },
         )
+        cldf[('ContributionTable', 'ID')].valueUrl = URITemplate('docs/chapter_{ID}.html')
         t = cldf.add_table(
             'areas.csv',
             {
diff --git a/setup.py b/setup.py
index e8a110a..e9afb08 100644
--- a/setup.py
+++ b/setup.py
@@ -15,6 +15,8 @@
         ],
     },
     install_requires=[
+        'python-nexus',
+        'newick',
         'cldfbench>=1.6.0',
         'clldutils>=3.7.0',
         'pycldf>=1.19.0',