diff --git a/.gitignore b/.gitignore index 7aff1ea..2f7e12b 100644 --- a/.gitignore +++ b/.gitignore @@ -61,3 +61,5 @@ coverage.xml venv/ ENV/ +.DS_Store + diff --git a/FORMS.md b/FORMS.md index 17d556b..c5c7c03 100644 --- a/FORMS.md +++ b/FORMS.md @@ -13,7 +13,7 @@ These methods use the attributes of a `FormSpec` instance to configure their beh Pairs of strings that should be recognized as brackets, specified as `dict` mapping opening string to closing string - `separators`: `(';', '/', ',')` Iterable of single character tokens that should be recognized as word separator -- `missing_data`: `('?', '-')` +- `missing_data`: `()` Iterable of strings that are used to mark missing data - `strip_inside_brackets`: `True` Flag signaling whether to strip content in brackets (**and** strip leading and trailing whitespace) diff --git a/README.md b/README.md index b0ac247..8ee5491 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ This dataset is licensed under a CC-BY-4.0 license - **Varieties:** 2,511 - **Concepts:** 180 -- **Lexemes:** 33,507 +- **Lexemes:** 33,517 - **Sources:** 408 - **Synonymy:** 1.09 diff --git a/cldf/README.md b/cldf/README.md index 6a1d68f..069a2e9 100644 --- a/cldf/README.md +++ b/cldf/README.md @@ -12,7 +12,7 @@ property | value [dc:conformsTo](http://purl.org/dc/terms/conformsTo) | [CLDF Wordlist](http://cldf.clld.org/v1.0/terms.rdf#Wordlist) [dc:license](http://purl.org/dc/terms/license) | https://creativecommons.org/licenses/by/4.0/ [dcat:accessURL](http://www.w3.org/ns/dcat#accessURL) | https://github.com/numeralbank/barlowpacific -[prov:wasDerivedFrom](http://www.w3.org/ns/prov#wasDerivedFrom) |
  1. numeralbank/barlowpacific v1.3.1-11-g127ad28
  2. Glottolog v4.8
  3. Concepticon v3.1.0
  4. CLTS v2.2.0
+[prov:wasDerivedFrom](http://www.w3.org/ns/prov#wasDerivedFrom) |
  1. numeralbank/barlowpacific v1.3.1-12-g2f1cdda
  2. Glottolog v4.8
  3. Concepticon v3.1.0
  4. CLTS v2.2.0
[prov:wasGeneratedBy](http://www.w3.org/ns/prov#wasGeneratedBy) |
  1. lingpy-rcParams: lingpy-rcParams.json
  2. python: 3.11.4
  3. python-packages: requirements.txt
[rdf:ID](http://www.w3.org/1999/02/22-rdf-syntax-ns#ID) | barlowpacific [rdf:type](http://www.w3.org/1999/02/22-rdf-syntax-ns#type) | http://www.w3.org/ns/dcat#Distribution @@ -23,7 +23,7 @@ property | value property | value --- | --- [dc:conformsTo](http://purl.org/dc/terms/conformsTo) | [CLDF FormTable](http://cldf.clld.org/v1.0/terms.rdf#FormTable) -[dc:extent](http://purl.org/dc/terms/extent) | 33507 +[dc:extent](http://purl.org/dc/terms/extent) | 33517 ### Columns diff --git a/cldf/cldf-metadata.json b/cldf/cldf-metadata.json index c57dc6f..e87ea99 100644 --- a/cldf/cldf-metadata.json +++ b/cldf/cldf-metadata.json @@ -13,7 +13,7 @@ { "rdf:about": "https://github.com/numeralbank/barlowpacific", "rdf:type": "prov:Entity", - "dc:created": "v1.3.1-11-g127ad28", + "dc:created": "v1.3.1-12-g2f1cdda", "dc:title": "Repository" }, { @@ -54,7 +54,7 @@ "tables": [ { "dc:conformsTo": "http://cldf.clld.org/v1.0/terms.rdf#FormTable", - "dc:extent": 33507, + "dc:extent": 33517, "tableSchema": { "columns": [ { diff --git a/cldf/forms.csv b/cldf/forms.csv index 9f513e1..3be3b2f 100644 --- a/cldf/forms.csv +++ b/cldf/forms.csv @@ -590,6 +590,7 @@ akkk1240-1-5-1,,akkk1240-1,5,mɑ·ᵘkʷ,mɑ·ᵘkʷ,,,Laycock1970D09,,false,,,p akkk1240-1-6-1,,akkk1240-1,6,kənəri fu,kənəri fu,,,Laycock1970D09,,false,,,p.1232 akkk1240-1-7-1,,akkk1240-1,7,tә́rᵊwɑ,tә́rᵊwɑ,,"[tərɑf ~ tɛr- = arm, hand]",Laycock1970D09,,false,,,p.1232 akkk1240-1-8-1,,akkk1240-1,8,ɑləmp,ɑləmp,,,Laycock1970D09,,false,,,p.1232 +akkk1240-1-9-1,,akkk1240-1,9,?,?,,,Laycock1970D09,,false,,,p.1232 akkk1240-1-10-1,,akkk1240-1,10,tərə pis,tərə pis,,"[tərɑf ~ tɛr- = arm, hand]",Laycock1970D09,,false,,,p.1232 akkk1240-1-20-1,,akkk1240-1,20,tərə pis tuwɑ pis,tərə pis tuwɑ pis,,"[tərɑf ~ tɛr- = arm, hand]",Laycock1970D09,,false,,,p.1232 akol1237-1-1-1,,akol1237-1,1,ke,ke,,,Lean1985Vol04,,false,,,p.61 @@ -2371,6 +2372,7 @@ awun1245-1-11-1,,awun1245-1,11,ilʸi kəl mu witǽ,ilʸi kəl mu witǽ,,,Lay awun1245-1-12-1,,awun1245-1,12,ilʸi kəl mu kəlí,ilʸi kəl mu kəlí,,,Laycock1970D09,,false,,,p.1280 awun1245-1-13-1,,awun1245-1,13,ilʸi kəl mu kəlí tʸówɔ,ilʸi kəl mu kəlí tʸówɔ,,,Laycock1970D09,,false,,,p.1280 awun1245-1-14-1,,awun1245-1,14,ilʸi kəl mu ɑrəkownei,ilʸi kəl mu ɑrəkownei,,,Laycock1970D09,,false,,,p.1280 +awun1245-1-15-1,,awun1245-1,15,?,?,,,Laycock1970D09,,false,,,p.1280 awun1245-1-20-1,,awun1245-1,20,ilʸtikəl ɜ̀lwukí,ilʸtikəl ɜ̀lwukí,,Tested for classes,Laycock1970D09,,false,,,p.1280 awyi1241-1-1-1,,awyi1241-1,1,nanggare,nanggare,,,Stokhof1983Vol5.2,,false,,,p.146 awyi1241-1-2-1,,awyi1241-1,2,nangarre,nangarre,,,Stokhof1983Vol5.2,,false,,,p.146 @@ -8268,6 +8270,7 @@ erok1237-2-10-1,,erok1237-2,10,sambura,sambura,,,Grace1956pdf31,,false,,,p.2 [of ewag1241-1-1-1,,ewag1241-1,1,da,da,,,Lean1985Vol05,,false,,,p.32 ewag1241-1-2-1,,ewag1241-1,2,etoto,etoto,,,Lean1985Vol05,,false,,,p.32 ewag1241-1-3-1,,ewag1241-1,3,tamonde,tamonde,,,Lean1985Vol05,,false,,,p.32 +ewag1241-1-4-1,,ewag1241-1,4,?,?,,,Lean1985Vol05,,false,,,p.32 ewag1241-1-5-1,,ewag1241-1,5,ingo da,ingo da,,,Lean1985Vol05,,false,,,p.32 ewag1241-1-10-1,,ewag1241-1,10,ingo etoto,ingo etoto,,,Lean1985Vol05,,false,,,p.32 ewag1241-2-1-1,,ewag1241-2,1,da,da,,,Lean1985Vol05,,false,,,p.32 @@ -14817,6 +14820,7 @@ lang1328-2-21-1,,lang1328-2,21,ipï nanïnge kwandap mat pï,ipï nanïnge kwand lang1328-2-22-1,,lang1328-2,22,ipï nanïnge inin mat pï,ipï nanïnge inin mat pï,,,Barlow2020Pondi,,false,,,pp.103-105 lang1328-2-23-1,,lang1328-2,23,ipï nanïnge yawle mat pï,ipï nanïnge yawle mat pï,,,Barlow2020Pondi,,false,,,pp.103-105 lang1328-2-24-1,,lang1328-2,24,ipï nanïnge nanïnge mat pï,ipï nanïnge nanïnge mat pï,,,Barlow2020Pondi,,false,,,pp.103-105 +lang1328-2-25-1,,lang1328-2,25,?,?,,,Barlow2020Pondi,,false,,,pp.103-105 lang1328-2-30-1,,lang1328-2,30,yalïme yawle,yalïme yawle,,"Pondi speakers can refer to multiples of ten by using the form yalïme, which may derive from yalïm ‘ironwood tree’.",Barlow2020Pondi,,false,,,pp.103-105 lang1328-2-40-1,,lang1328-2,40,yalïme nanïnge,yalïme nanïnge,,,Barlow2020Pondi,,false,,,pp.103-105 lang1328-2-50-1,,lang1328-2,50,yalïme ipï kwandap,yalïme ipï kwandap,,,Barlow2020Pondi,,false,,,pp.103-105 @@ -19543,6 +19547,7 @@ morb1239-4-5-1,,morb1239-4,5,ˈtobena,ˈtobena,,,Grace1955pdf50,,false,,,p.55 [o morb1239-4-6-1,,morb1239-4,6,toˈbnadua,toˈbnadua,,,Grace1955pdf50,,false,,,p.55 [of pdf] morb1239-4-7-1,,morb1239-4,7,ˈtidagena,ˈtidagena,,,Grace1955pdf50,,false,,,p.55 [of pdf] morb1239-4-8-1,,morb1239-4,8,naudagena,naudagena,,,Grace1955pdf50,,false,,,p.55 [of pdf] +morb1239-4-9-1,,morb1239-4,9,_,_,,[arrow drawn from <ˈtidagena> to <9>],Grace1955pdf50,,false,,,p.55 [of pdf] morb1239-4-10-1,,morb1239-4,10,dˈnea,dˈnea,,,Grace1955pdf50,,false,,,p.55 [of pdf] more1258-1-1-1,,more1258-1,1,mindjak,mindjak,,,Capell1951,,false,,,p.144 more1258-1-2-1,,more1258-1,2,airai,airai,,,Capell1951,,false,,,p.144 @@ -21777,6 +21782,8 @@ nucl1594-3-4-1,,nucl1594-3,4,ai,ai,,,Grace1955pdf50,,false,,,p.55 [of pdf] nucl1594-3-5-1,,nucl1594-3,5,iˈvor,iˈvor,,,Grace1955pdf50,,false,,,p.55 [of pdf] nucl1594-3-6-1,,nucl1594-3,6,ˈiptan,ˈiptan,,,Grace1955pdf50,,false,,,p.55 [of pdf] nucl1594-3-7-1,,nucl1594-3,7,ivoro amoi,ivoro amoi,,,Grace1955pdf50,,false,,,p.55 [of pdf] +nucl1594-3-8-1,,nucl1594-3,8,_,_,,etc.,Grace1955pdf50,,false,,,p.55 [of pdf] +nucl1594-3-9-1,,nucl1594-3,9,_,_,,etc.,Grace1955pdf50,,false,,,p.55 [of pdf] nucl1594-3-10-1,,nucl1594-3,10,or pamo·i,or pamo·i,,,Grace1955pdf50,,false,,,p.55 [of pdf] nucl1595-1-1-1,,nucl1595-1,1,[ou'ri'as],[ou'ri'as],,the same as the names of the fingers,Stokhof1983Vol5.2,,false,,,p.82 nucl1595-1-2-1,,nucl1595-1,2,[nen'no'manna],[nen'no'manna],,the same as the names of the fingers,Stokhof1983Vol5.2,,false,,,p.82 @@ -23220,6 +23227,7 @@ pauw1243-3-8-1,,pauw1243-3,8,jimbā gāg̱u̱ssie,jimbā gāg̱u̱ssie,,,Stroeve pauw1243-3-9-1,,pauw1243-3,9,bĕsiera̱h,bĕsiera̱h,,,Stroeve1911,,false,,,p.28 pauw1243-3-10-1,,pauw1243-3,10,tā̱u̱nsie,tā̱u̱nsie,,,Stroeve1911,,false,,,p.28 pauw1243-3-20-1,,pauw1243-3,20,āsu̱mbjē,āsu̱mbjē,,,Stroeve1911,,false,,,p.28 +pauw1243-3-100-1,,pauw1243-3,100,?,?,,,Stroeve1911,,false,,,p.28 pauw1243-4-1-1,,pauw1243-4,1,oschénu,oschénu,,,Moszkowski1913,,false,,,p.258 pauw1243-4-2-1,,pauw1243-4,2,kaiámba,kaiámba,,,Moszkowski1913,,false,,,p.258 pauw1243-4-3-1,,pauw1243-4,3,biméssi,biméssi,,,Moszkowski1913,,false,,,p.258 @@ -32129,6 +32137,7 @@ west2594-4-10-1,,west2594-4,10,hanènggu,hanènggu,,,Galis1955,,false,,,p.165 west2594-5-1-1,,west2594-5,1,ambuét,ambuét,,,Galis1955,,false,,,p.165 west2594-5-2-1,,west2594-5,2,bere,bere,,,Galis1955,,false,,,p.165 west2594-5-3-1,,west2594-5,3,ambuét-bere,ambuét-bere,,,Galis1955,,false,,,p.165 +west2594-5-4-1,,west2594-5,4,?,?,,,Galis1955,,false,,,p.165 west2594-5-5-1,,west2594-5,5,anom,anom,,,Galis1955,,false,,,p.165 west2594-5-10-1,,west2594-5,10,linin ere,linin ere,,,Galis1955,,false,,,p.165 west2594-5-30-1,,west2594-5,30,ero-vid-ènom,ero-vid-ènom,,,Galis1955,,false,,,p.165 @@ -32525,6 +32534,7 @@ wutu1244-3-2-1,,wutu1244-3,2,nyumo,nyumo,,,Laycock1970D11,,false,,,p.1486 wutu1244-3-3-1,,wutu1244-3,3,hɛ·n,hɛ·n,,,Laycock1970D11,,false,,,p.1486 wutu1244-3-4-1,,wutu1244-3,4,nou,nou,,,Laycock1970D11,,false,,,p.1486 wutu1244-3-5-1,,wutu1244-3,5,wi,wi,,,Laycock1970D11,,false,,,p.1486 +wutu1244-3-6-1,,wutu1244-3,6,?,?,,,Laycock1970D11,,false,,,p.1486 wutu1244-3-7-1,,wutu1244-3,7,no· tʸinyu,no· tʸinyu,,"[nɔ ~ nõ = arm, hand]",Laycock1970D11,,false,,,p.1486 wutu1244-3-8-1,,wutu1244-3,8,no· tʸihɛwɔ,no· tʸihɛwɔ,,,Laycock1970D11,,false,,,p.1486 wutu1244-3-9-1,,wutu1244-3,9,no· tʸinou,no· tʸinou,,,Laycock1970D11,,false,,,p.1486 diff --git a/cldf/lingpy-rcParams.json b/cldf/lingpy-rcParams.json index 0a39318..a961473 100644 --- a/cldf/lingpy-rcParams.json +++ b/cldf/lingpy-rcParams.json @@ -64,7 +64,7 @@ 10, 10 ], - "filename": "lingpy-2023-09-27", + "filename": "lingpy-2023-09-28", "gap_symbol": "-", "gap_weight": 0.5, "gop": -2, @@ -123,7 +123,7 @@ "scorer": {}, "sonar": true, "stress": "\u02c8\u02cc'", - "timestamp": "2023-09-27 14:13", + "timestamp": "2023-09-28 08:58", "tones": "\u00b9\u00b2\u00b3\u2074\u2075\u2076\u2077\u2078\u2079\u2070\u2081\u2082\u2083\u2084\u2085\u2086\u2087\u2088\u2089\u20800123456789\u02e5\u02e6\u02e7\u02e8\u02e9\u02ea\u02eb-\ua708-\ua709-\ua70a-\ua70b-\ua70c-\ua70d-\ua70e-\ua70f-\ua710-\ua711-\ua712-\ua713-\ua714-\ua715-\ua716-\ua717-\ua718-\ua719-\ua71a-\ua700-\ua701-\ua702-\ua703-\ua704-\ua705-\ua706-\ua707", "tree_calc": "neighbor", "unique_sequences": true, diff --git a/lexibank_barlowpacific.py b/lexibank_barlowpacific.py index ce57b88..c6e69f3 100644 --- a/lexibank_barlowpacific.py +++ b/lexibank_barlowpacific.py @@ -17,6 +17,10 @@ class CustomLanguage(pylexibank.Language): class Dataset(pylexibank.Dataset): + form_spec = pylexibank.FormSpec( + missing_data=() + ) + dir = Path(__file__).parent id = "barlowpacific" lexeme_class = CustomLexeme