-
Notifications
You must be signed in to change notification settings - Fork 20
/
codemeta.json
156 lines (156 loc) · 7.57 KB
/
codemeta.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
{
"@context": [
"https://doi.org/10.5063/schema/codemeta-2.0",
"http://schema.org",
"https://w3id.org/software-types"
],
"@type": "SoftwareSourceCode",
"identifier": "colibricore",
"name": "Colibri Core",
"version": "2.5.9",
"description": "Colibri core is an NLP tool as well as a C++ and Python library for working with basic linguistic constructions such as n-grams and skipgrams (i.e patterns with one or more gaps, either of fixed or dynamic size) in a quick and memory-efficient way. ",
"license": "https://spdx.org/licenses/GPL-3.0-only",
"url": "https://proycon.github.io/colibri-core",
"author": [
{
"@id": "https://orcid.org/0000-0002-1046-0006",
"@type": "Person",
"givenName": "Maarten",
"familyName": "van Gompel",
"email": "[email protected]",
"affiliation": {
"@id": "https://www.ru.nl/cls",
"@type": "Organization",
"name": "Centre for Language Studies",
"url": "https://www.ru.nl/cls",
"parentOrganization": {
"@id": "https://www.ru.nl",
"name": "Radboud University",
"@type": "Organization",
"url": "https://www.ru.nl",
"location": {
"@type": "Place",
"name": "Nijmegen"
}
}
}
}
],
"programmingLanguage": [{
"@type": "ComputerLanguage",
"identifier": "c++",
"name": "C++"
},
{
"@type": "ComputerLanguage",
"identifier": "cython",
"name": "Cython"
}
],
"operatingSystem": [ "Linux", "BSD" ,"macOS" ],
"codeRepository": "https://github.com/proycon/colibri-core",
"softwareRequirements": [ ],
"softwareHelp": "https://proycon.github.io/colibri-core/doc/",
"readme": "https://github.com/proycon/colibri-core/blob/master/README.md",
"issueTracker": "https://github.com/proycon/colibri-core/issues",
"contIntegration": "https://travis-ci.org/proycon/colibri-core",
"releaseNotes": "https://github.com/proycon/colibri-core/releases",
"developmentStatus": "https://www.repostatus.org/#active",
"keywords": [ "nlp", "natural language processing", "ngrams", "skipgrams", "pattern recognition", "language modelling" ],
"referencePublication": [
{
"@id": "https://dx.doi.org/10.5334/jors.105",
"@type": "TechArticle",
"name": "Efficient n-gram, Skipgram and Flexgram Modelling with Colibri Core",
"author": [ "Maarten van Gompel", "Antal van den Bosch" ],
"isPartOf": {
"@type": "PublicationIssue",
"datePublised": "2016",
"name": "Journal of Open Research Software",
"issue": "4"
},
"url": "https://dx.doi.org/10.5334/jors.105"
}
],
"dateCreated": "2013-09-15",
"targetProduct": [
{
"@type": "CommandLineApplication",
"name": "colibri-classencode",
"executableName": "colibri-classencode",
"description": "Encodes a plain text corpus to a binary encoded corpus and a class file"
},
{
"@type": "CommandLineApplication",
"name": "colibri-classdecode",
"executableName": "colibri-classdecode",
"description": "Decodes a binary encoded corpus and a class file to a plain text corpus"
},
{
"@type": "CommandLineApplication",
"name": "colibri-patternmodeller",
"executableName": "colibri-patternmodeller",
"description": "Extract, model and compare recurring patterns (n-grams, skipgrams, flexgrams) and their frequencies in text corpus data. This is the main tool of Colibri Core."
},
{
"@type": "CommandLineApplication",
"name": "colibri-cooc",
"executableName": "colibri-cooc",
"description": "Computes co-occurrence statistics (absolute co-cooccurrence or pointwise mutual information) between patterns in a corpus"
},
{
"@type": "CommandLineApplication",
"name": "colibri-ngrams",
"executableName": "colibri-ngrams",
"description": "Extract n-grams of a particular size by moving a sliding window over the corpus. This is a high-level convenience script over underlying tools."
},
{
"@type": "CommandLineApplication",
"name": "colibri-freqlist",
"executableName": "colibri-freqlist",
"description": "Extract n-grams (and optionally skipgrams) with their counts from one or more plain-text corpus files. This is a high-level convenience script over underlying tools."
},
{
"@type": "CommandLineApplication",
"name": "colibri-coverage",
"executableName": "colibri-coverage",
"description": "Computes the coverage of training/background corpus on a particular test/foreground corpus, i.e how many of the patterns in the test corpus were found during training, how many tokens are covered, and how is this all distributed?. This is a high-level convenience script over underlying tools."
},
{
"@type": "CommandLineApplication",
"name": "colibri-findpatterns",
"executableName": "colibri-findpatterns",
"description": "Find patterns in corpus data based on a presupplied list of patterns (one per line). This is a high-level convenience script over underlying tools."
},
{
"@type": "CommandLineApplication",
"name": "colibri-histogram",
"executableName": "colibri-histogram",
"description": "Computes a histogram for ngram occurrences (and optionally skipgrams) in the corpus. This is a high-level convenience script over underlying tools."
},
{
"@type": "CommandLineApplication",
"name": "colibri-loglikelihood",
"executableName": "colibri-loglikelihood",
"description": "Compares the frequency of patterns between two or more corpus files (plain text) by computing log likelihood, following the methodology of Rayson and Garside (2000), Comparing corpora using frequency profiling. In proceedings of the workshop on Comparing Corpora, held in conjunction with the 38th annual meeting of the Association for Computational Linguistics (ACL 2000). 1-8 October 2000, Hong Kong, pp. 1 - 6: http://www.comp.lancs.ac.uk/~paul/publications/rg_acl2000.pdf. This is a high-level convenience script over underlying tools."
},
{
"@type": "CommandLineApplication",
"name": "colibri-ngramstats",
"executableName": "colibri-ngramstats",
"description": "Computes a summary report on the count of ngrams (and optionally skipgrams) in the corpus. This is a high-level convenience script over underlying tools."
},
{
"@type": "CommandLineApplication",
"name": "colibri-queryngrams",
"executableName": "colibri-queryngrams",
"description": "Interactive command line tool to n-grams with their counts from one or more plain-text corpus files. This is a high-level convenience script over underlying tools."
},
{
"@type": "CommandLineApplication",
"name": "colibri-reverseindex",
"executableName": "colibri-reverseindex",
"description": "Computes and prints reverse index of the corpus, for each token position in the corpus, all patterns that start at that position are shown. This is a high-level convenience script over underlying tools."
}
]
}