-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfieldedDataFile.icn
508 lines (480 loc) · 18.8 KB
/
fieldedDataFile.icn
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
$ifndef _fieldedDataFile_
$define _fieldedDataFile_
############################################################################
#
# File: fieldedDataFile.icn
#
# Subject: Procedures to produce logical lines or fields from formatted
# data files
#
# Author: Arthur C. Eschenlauer
#
# Date: January 20, 2023
#
############################################################################
#
# SPDX-License-Identifier: CC-PDDC
# https://spdx.org/licenses/CC-PDDC.html
#
# This file is in the public domain. Art Eschenlauer has waived all
# copyright and related or neighboring rights to:
# fieldedDataFile.icn - Procedures to produce logical lines or fields
# from formatted data files
# For details, see:
# https://creativecommons.org/publicdomain/zero/1.0/
#
# If you require a specific license and public domain status is not suffi-
# cient for your needs, please substitute the MIT license (see below), bearing
# in mind that the copyright "claim" is solely to meet your requirements
# and does not imply any restriction on use or copying by the author:
#
# Copyright (c) 2022, Arthur Eschenlauer
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject
# to the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
############################################################################
#
# record FieldedData(lines, fields)
# A record that holds two co-expression factories
# - lines === csvLines | tabularLines | iniLines
# - fields === csvFields | tabularFields | iniFields
#
# procedure FieldedDataFactory(format, filePath) : FieldedData
# Returns FieldedData for file at filePath according to format;
# formula == ( "csv" | "tabular" | "ini" | stop() )
#
# procedure csvLines(f) : C
# Produce co-expression that produces logical lines of a CSV file
# - f: file opened for reading
# - Actually, this is a synonym for tabularLines(f)
#
# procedure csvFields(line, sep) : C
# Produce co-expression that produces fields from a logical line of a
# CSV file
# - line: logical line, s
# - sep : (optional) separator, c; defaults to ','
#
# procedure getCSV(typeName, csvPath, colL, sep, dflt)
# Produce L of VNom from a CSV file
# - typeName: the string produced by vmsg(V, "type"), s
# - csvPath : the path to the comma-separated-values data file, s
# - colL : (optional) columns to select, L of i
# - sep : (optional) separators, c; defaults to ","
# - dflt : (optional) default value for VNom fields, x
#
# procedure tabularLines(f) : C
# Produce co-expression that produces logical lines of a tabular file
# - f: file opened for reading
#
# procedure tabularFields(line, sep) : C
# Produce co-expression that produces fields from a logical line of a
# tabular file
# - line: logical line, s
# - sep : (optional) separator, c; defaults to '\t'
#
# procedure getTabular(typeName, tsvPath, colL, sep, dflt)
# Produce L of VNom from a tabular file
# - typeName: the string produced by vmsg(V, "type"), s
# - tsvPath : the path to the tabular data file, s
# - colL : (optional) columns to select, L of i
# - sep : (optional) separators, c; defaults to '\t'
# - dflt : (optional) default value for VNom fields, x
#
# procedure iniLines(f) : C
# Produce co-expression that produces logical lines of an INI file
#
# procedure iniFields(line) : C
# Produce co-expression that produces fields from a logical line
# of an INI file
#
# procedure getIni(ini) : T (string -> T (string -> x))
# Produce a table of table from an INI file
#
############################################################################
# globals:
# g_Cerror - if defined, this C receives tables transmitted by _getTabular
# e.g., see procedure _flddDtFl_error(comment)
############################################################################
$ifndef VNOM
$include "vnom.icn"
$endif # VNOM
#$define DEBUG
# FieldedData organizes functions that create co-expressions producing
# logical lines and fields within a logical line from tabular data
record FieldedData(lines, fields) #: record holding two co-expression factories
global csvLines
# FieldedDataFactory produces FieldedData populated according to format;
# as of now, only "tabular" format is supported
procedure FieldedDataFactory(format, filePath) #: produce FieldedData for format
initial {
csvLines := tabularLines
}
close(open(\filePath, "r")) |
stop( "FieldedDataFactory: Cannot open file for reading: " ||
(\filePath | "&null") )
return case format of {
# eventually add support for CSV and who knows what else
"csv" : FieldedData(csvLines, csvFields)
"tabular" : FieldedData(tabularLines, tabularFields)
"ini" : FieldedData(iniLines, iniFields)
default : stop("FieldedDataFactory: Unknown format \"" || format || "\" for file " || filePath)
}
end
# create co-expression that, for each activation, transmits to &source a
# field from a logical line of a comma-separated-values file
procedure csvFields(line, sep) #: factory for C producing fields from a logical line of a CSV file
local field, source
/sep := ','
return create {
source := &source
every field := csvField(line, sep)
do {
field @source
}
# HACK transmit main to source to prevent resumption of csvField
# because Icon has no cofail(C) function
repeat {
main @source
}
}
end
# create co-expression that, for each activation, transmits to &source a
# logical line of a tab-separated-values file
procedure tabularLines(f) #: factory for C producing logical lines of a tabular file
local line, frag, in_dquote
frag := line := ""
return create
every frag := !f
do {
line ||:=
if line == ""
then frag
else "\n" || frag
in_dquote := &null
line ? every upto('"')
do in_dquote := if /in_dquote then 1 else &null
if /in_dquote
then {
line @ &source
line := ""
}
}
end
# create co-expression that, for each activation, transmits to &source a
# field from a logical line of a tab-separated-values file
procedure tabularFields(line, sep) #: factory for C producing fields from a logical line of a tabular file
local field, source
/sep := '\t'
return create {
source := &source
line ? while ( not pos(0)
, field := tab(upto(sep) | 0)
, field @ source
, move(1)
)
if line[-1:0] == sep
then "" @ source
&fail
}
end
procedure getCSV(typeName, csvPath, colL, sep, dflt) #: Produce L of VNom from a CSV file
# - typeName: the first result to be produced by vmsg(V, "type"), s
# - csvPath : the path to the comma-separated-values data file, s
# - colL : (optional) columns to use, L of i; default: use all columns
# - sep : (optional) separators, c; default: ','
# - dflt : (optional) default value for VNom fields, x
return _getSeparatedValues(typeName, csvPath, colL, sep, dflt, "csv")
end
procedure getTabular(typeName, tsvPath, colL, sep, dflt) #: Produce L of VNom from a tabular file
# - typeName: the first result to be produced by vmsg(V, "type"), s
# - tsvPath : the path to the tabular data file, s
# - colL : (optional) columns to use, L of i; default: use all columns
# - sep : (optional) separators, c; default: '\t'
# - dflt : (optional) default value for VNom fields, x
return _getSeparatedValues(typeName, tsvPath, colL, sep, dflt, "tabular")
end
procedure _flddDtFl_error(comment)
local result, Cerror, imager
&error := 0
comment := \comment || "; " | ""
Cerror := variable("g_Cerror") | fail
imager := variable("ximage") | image
result := table()
result["comment"] := comment
result["errornumber"] := &errornumber # i: run-time error number
result["errortext"] := &errortext # s: run-time error message text
result["errorvalue"] := &errorvalue # x: run-time error offending value
result["string"] :=
comment || "error " || &errornumber || " - \"" || &errortext ||
"\"; offending value `" || imager(&errorvalue) || "`"
errorclear()
result @Cerror
end
procedure _getSeparatedValues(typeName, inptPath, colL, sep, dflt, fmt) #: Helper to produce L of VNom from a CSV or tabular file
# - typeName: the first result to be produced by vmsg(V, "type"), s
# - inptPath : the path to the tabular data file, s
# - colL : (optional) columns to use, L of i; default: use all columns
# - sep : (optional) separators, c; default: '\t'
# - dflt : (optional) default value for VNom fields, x
# - fmt : (optional) "tabular" | "csv"; default: "tabular"
local fieldedData # a FieldedData record having two C members: lines, fields
local resultL # result, L of VNom
local lineC # C producing logical lines of the tabular file
local lineL # one result produced by lineC
local fieldC # C producing logial fields of lineL
local rsltV # a VNom
local f # input file specified by inptPath
local kL, kLtmp # lists of result fields
local i # scratch i for index
local comment
/sep := '\t'
$ifdef DEBUG
write(&errout, "getTabular parsing ", inptPath)
$endif # DEBUG
# produce record FieldedData(lines, fields)
# A record that holds two co-expression factories
# - lines === csvLines | tabularLines | iniLines
# - fields === csvFields | tabularFields | iniFields
fieldedData := FieldedDataFactory(fmt, inptPath)
# create lineC which produces lines from inptPath
lineC := fieldedData.lines(f := open(inptPath,"r"), sep) |
stop(&progname, ": failed to open tabular file ",inptPath)
# initialize list for results
resultL := []
# Using first line, construct a list of fields
# from which to construct VNom instances
# fetch first line
lineL := @lineC
# create fieldC that produces fields parsed from line
fieldC := fieldedData.fields(lineL)
# create kLtmp to receive the produced fields
kLtmp := []
# put the fields generated by fieldC onto kLtmp
while put(kLtmp, main ~=== \@fieldC)
# create kL to be length of colL if supplied; else, length of kLtmp
kL := list(*(\colL) | *kLtmp)
# if caller did not supply colL (the field selector), fetch all columns
if /colL
then {
colL := list()
every i := 1 to *kL
do put(colL, i)
}
# fetch the column names, or fail after transmitting error to g_Cerror
&error := 1
every i := 1 to *kL
do kL[i] := kLtmp[colL[i]] | {
_flddDtFl_error(
"_getSeparatedValues(" || image(typeName) ||
", " || inptPath ||
", ...)@fieldedDataFile.icn: colL invalid or out of range"
)
fail
}
&error := 0
# Using subsequent lines and colL, produce VNom instances
# while next line may be generated
while lineL := @lineC
do {
# create fieldC that produces fields parsed from line
fieldC := fieldedData.fields(lineL)
# create kLtmp to receive the produced fields
kLtmp := []
# put the fields generated by fieldC onto kLtmp
while put(kLtmp, main ~=== \@fieldC)
# generate the fields from the line, selected and ordered by colL
fieldC := create kLtmp[!colL]
# instantiate VNom to hold the selected field values
rsltV := vnew(table(dflt), typeName)
# assign each (selected and ordered) key with the corresponding value
every vmsg(rsltV, "put", !kL, main ~=== \@fieldC)
# add to the result list, except discard when length check fails
if *kL = vmsg(rsltV, "*")
then put(resultL, rsltV)
else {
comment :=
"_getSeparatedValues(" || image(typeName) ||
", " || inptPath ||
", ...)@fieldedDataFile.icn: structure\n " ||
image(lineL) ||
" (expected " ||
*kL ||
" terms but found " ||
vmsg(rsltV, "*") ||
")"
every comment ||:= "\n " || vmsg(rsltV, "strings")
_flddDtFl_error(comment)
}
$ifdef DEBUG
write(&errout, ximage(resultL[-1]))
write(&errout, "fields:")
every write(&errout, "# ",vmsg(resultL[-1], "key"))
write(&errout, "values:")
every write(&errout, "! ",vmsg(resultL[-1], "!"))
$endif # DEBUG
}
# close input file and produce list of results, if any
close(f)
if *resultL > 0
then return resultL
end
$ifdef DEBUG
link ximage
$endif # DEBUG
# create co-expression that, for each activation, transmits to &source a
# "logical line" of an INI file; note that this is in fact a list
# whose first member is the section title (if any) and the rest of the
# members are each key=value pair in the section.
# Note well:
# - Values must either be numeric or begin and end with a double quote.
# - There is no need to escape internal double quotes.
# - Because there are no escapes at all, values must be contained on a
# single line.
$ifndef ini_line_default
$define ini_line_default "default"
$endif # ini_line_default
procedure iniLines(f) #: factory for C producing logical lines of an INI file
local line, heading, logical
heading := ini_line_default
logical := [ini_line_default]
return create {
every line := !f
do {
# comments must have semicolon in first column
line ? match(";") & next
# produce sections (logical lines) having at least one key-value-pair
( line ? tab(match("[")) || (heading <- tab(upto(']'))) || move(1)
, if *logical > 1
then ( logical @ &source
, logical := [heading]
)
else logical := [heading]
) & next
# put key value pairs (unparsed) onto list, allowing empty values
line ? put( logical
, tab(upto('= \t')) || ("=" == move(1)) || (tab(0))
) & next
# non-blank lines cause abort
line ? upto(&cset -- ' \t') & stop("iniLines: bad INI line:\n" || line)
# blank lines should be ignored
}
if *logical > 1
then logical @ &source
&fail @ &source
}
end
# create co-expression that, for each activation, transmits to &source a
# key-value pair from a logical line of an INI file, producing the first
# field as a string and the rest as [string key, string value]
procedure iniFields(line) #: factory for C producing fields from a logical line of an INI file
local kvp_key, kvp_value, kvp, section
return create {
(section := get(line)) @ &source
while kvp := get(line) do kvp ? {
if (kvp_key := tab(upto('='))) || move(1) ||
( ="\"" || (kvp_value := tab(-1)) || "\"" | # match string
(kvp_value := numeric(tab(0))) # match numeric
)
then [kvp_key, kvp_value] @ &source
else stop("iniFields: bad key-value pair in section [" ||
section || "]: " || kvp)
}
}
end
$ifndef tbl_ini_default
$define tbl_ini_default "default"
$endif
procedure getIni(ini) #: parse an INI file into a table of table
# ini # the path to the INI named after the &progname
local f # the file returned by open(ini)
local l # a line from f
local tbl_ini # the table-of-tables holding the parsed INI, maping
# section names to tables of key-value pairs
local section_ce # co-expression producing logical lines from f
local line # logical line produced by section_ce
local kvp_ce # coexpression producing fields from line
local section # first member of logical line
local kvp # key-value pair from non-first members of logical line
local fieldedData # a FieldedData record having two members:
# lines - co-expression producing logical lines from source data file
# fields - co-expression producing fields from source data file lines
tbl_ini := table()
tbl_ini[tbl_ini_default] := table()
fieldedData := FieldedDataFactory("ini", ini)
if f := open(ini, "r") | stop("getIni: failed to open file " || ini)
then {
section_ce := fieldedData.lines(f)
while line := @section_ce do {
$ifdef DEBUG_getIni
write("*line = ", *line)
$endif
kvp_ce := fieldedData.fields(line)
tbl_ini[@kvp_ce] := section := table()
while kvp := @kvp_ce do section[kvp[1]] := kvp[2]
}
}
close(f)
$ifdef DEBUG_getIni
write("*tbl_ini = ", *tbl_ini)
every f := key(tbl_ini)
do write("*tbl_ini[", f, "] = ", *(tbl_ini[f]))
write(xencode(tbl_ini,StringFile(""),WriteString).s)
$endif
return tbl_ini
end
# procedure csvField is hacked out of Unicon procedure genCSV
# by Steve Wampler, Rob Parlett, and others
# http://unicon.org/api/uni-api/file_str_util.html#genCSV.html
procedure csvField(s, sep)
local WS, r, rslt
/sep := ","
WS := ' \t\n' -- sep # Can't include separators in whitespace
# (The '\n' is a trick so there's always
# something at the end of the last field.)
(s || WS) ? {
while not pos(0)
do {
tab(many(WS))
if ="\""
then { # Quoted string
r := ""
while r ||:= 1(tab(find("\"")), move(1))
do {
if not ="\"" then break # End of quoted string
r ||:= "\""
}
suspend 1(
rslt := (tab((upto(sep))|0)\1, r),
not (
pos(0),
move(-(1 + *WS)),
("\"" || WS) ~== move(1 + *WS),
rslt == ""
)
)
do move(1)
}
else
suspend trim(tab((upto(sep))|0)\1, WS)
do move(1)
}
}
end
$endif # _fieldedDataFile_