This repository has been archived by the owner on Oct 15, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 123
/
Copy pathkeynames.py
executable file
·490 lines (401 loc) · 16.4 KB
/
keynames.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
#!/usr/bin/env python3
"""
This file contains a reference implementation of Elektra's key name processing.
It contains commented functions for:
- Checking, whether a string is a valid Key Name and if so turning it into its canonical form.
- Splitting (canonical) Key Names into a Namespace and a list of (unescaped) key name parts.
In addition, we provide a command line interface to experiment with the implementation.
Execute the file with the argument '--help' to find out more about the CLI.
"""
import re
import sys
from itertools import takewhile
from typing import List, Tuple, Match, Optional, Iterator
from enum import Enum
import argparse
def check_array_part(part: str) -> Tuple[bool, Optional[str]]:
"""
Checks whether `part` is a valid array part.
If so, the tuple `(True, digits)` is returned where `digits` is a string
containing the digits part of the Array Part, i.e. the part after the `#` and any `_`.
Otherwise the tuple `(False, None)` is returned.
"""
underscores = sum(
1 for _ in takewhile(lambda x: x == "_", part[1:])
)
digits = part[underscores + 1:]
if any(not x.isdigit() for x in digits):
return False, None
if underscores > 0 and underscores != len(digits) - 1:
return False, None
if len(digits) > 19 or (len(digits) == 19 and digits > str(2**63-1)):
return False, None
if len(digits) > 1 and digits[0] == "0":
return False, None
return True, digits
class Namespace(Enum):
"""
Enum for Elektra's namespaces. Values are used as the first byte in unescaped names.
"""
CASCADING = 1
META = 2
SPEC = 3
PROC = 4
DIR = 5
USER = 6
SYSTEM = 7
DEFAULT = 8
# String names for the namespaces
NAMESPACES = {"meta", "spec", "proc", "dir", "user", "system", "default"}
# Map between string namespaces and enum values
NAMESPACES_MAP = {
"meta": Namespace.META,
"spec": Namespace.SPEC,
"proc": Namespace.PROC,
"dir": Namespace.DIR,
"user": Namespace.USER,
"system": Namespace.SYSTEM,
"default": Namespace.DEFAULT,
}
# Characters that can be escaped anywhere in a Key Name Part
ESCAPES = {"/", "\\"}
# Characters that can be escaped under special conditions
# The value in the dictionary is a function that will be called
# with the Key Name Part that contained the escape sequence.
# The function should return True, iff the escape sequence is valid.
ESCAPES_SPECIAL = {
".": lambda part: re.match(r"^\\\.{1,2}$", part),
"#": lambda part: (len(part) < 21 or part[2:] <= str(2**63-1)) and re.match(r"^\\#[1-9][0-9]{1,18}$", part),
"%": lambda part: re.match(r"^\\%$", part),
}
# This RegEx defines what a Key Name Part can be.
# A string must match this RegEx to be a Key Name Part, but not all matching strings necessarily are valid Key Name Parts.
#
# The RegEx looks for a leading slash `/`, optionally followed by more slashes `/`.
# All of these slashes will be treated as a single separator.
# After that comes the actual content of the Key Name Part, a series of
# - characters other than slash `/` and backslash `\`
# - OR backslash `\` followed by any other character (an escape sequence).
PART_REGEX = re.compile(r"/(?P<leadingSlashes>/*)(?P<content>(?:[^\\/]|\\.)*)")
# This RegEx is used to find escape sequences within a Key Name Part.
ESCAPE_REGEX = re.compile(r"\\(.)")
class KeyNameException(Exception):
pass
def canonicalize(name: str, prefix: str = "", verbose: bool = False) -> str:
"""
Returns the canonical version of the Key Name `name`.
If `name` is not a valid Key Name, a `KeyNameException` will be raised.
If `prefix` is not empty, `name` is canonicalized under the assumption that it is being appended to the existing Key Name `prefix`.
It is similar to (but not necessarily the same as) calling `canonicalize (name + "/" + prefix)`.
If `verbose` is set to `True`, additional information about the procedure is printed to `sys.stderr`.
"""
# First check for an empty Key Name
if len(name) == 0:
if len(prefix) == 0:
raise KeyNameException("Empty name is invalid")
return prefix
# Then find and validate the Namespace
namespace = None
namespace_offset = 0
# This will contain the Key Name without the Namespace (and Namespace separator `:`)
fullname = None
# Namespace is everything before the first colon `:`, or empty if there is no colon `:`
if len(prefix) == 0:
# No prefix, so we look in name
colon_index = name.find(":")
slash_index = name.find("/")
# There was a slash before the first colon, but not at the start of the string
# -> this is not a valid Key Name
if slash_index < colon_index and slash_index != 0:
raise KeyNameException(
"No namespace (and not cascading) or missing colon (:) after namespace"
)
if colon_index > 0:
namespace = name[:colon_index]
namespace_offset = len(namespace)
if namespace not in NAMESPACES:
raise KeyNameException(
f"Illegal namespace '{namespace}'. " +
f"Allowed namespaces: {NAMESPACES}"
)
fullname = name[colon_index + 1:]
else:
fullname = name
else:
# We have a prefix, so we extract the Namespace from there
colon_index = prefix.find(":")
slash_index = name.find("/")
# There was a slash before the first colon, but not at the start of the string
# -> this is not a valid Key Name
if slash_index < colon_index and slash_index != 0:
raise KeyNameException(
"No namespace (and not cascading) or missing colon (:) after namespace"
)
if colon_index > 0:
namespace = prefix[:colon_index]
namespace_offset = len(namespace)
if namespace not in NAMESPACES:
raise KeyNameException(
f"Illegal namespace '{namespace}'. " +
f"Allowed namespaces: {NAMESPACES}"
)
fullname = prefix[colon_index + 1:] + "/" + name
else:
fullname = prefix + "/" + name
# Check if Key Name starts correctly after Namespace
if len(fullname) == 0 or fullname[0] != "/":
raise KeyNameException(
f"Key must start with '<NAMESPACE>:/' or just '/'. " +
f"Allowed values for <NAMESPACE>: {NAMESPACES}"
)
# Check if Key Name starts correctly after Namespace
if fullname == "/%":
raise KeyNameException(
f"Key must NOT be '<NAMESPACE>:/%' or just '/%' (collides with root key). " +
f"Allowed values for <NAMESPACE>: {NAMESPACES}"
)
# Check for dangling escapes
if sum(1 for _ in takewhile(lambda x: x == "\\", reversed(fullname))) % 2 != 0:
raise KeyNameException(
f"The key must not end with an unescaped backslash '\\'."
)
# Split Key Name into Key Name Parts and process separately
parts = list(PART_REGEX.finditer(fullname))
part_count = len(parts)
# Will contain a Tuple for each Key Name Part in the canonical Key Name.
# The first element of the Tuple is the RegEx Mach object. This is used for processing `..` parts.
# The second element is either None, meaning this part will be skipped when creating the final Key Name or a str, i.e. a canonical Key Name Part.
key_parts: List[Tuple[Match[str], Optional[str]]] = []
for index, part in enumerate(parts):
full_part = part.group(0)
leading_slashes = len(part.group('leadingSlashes'))
actual_part = part.group('content')
part_start = part.start(0) + namespace_offset
part_end = part.end(0) + namespace_offset
# Report leading slashes in verbose mode.
if verbose and leading_slashes > 0:
print(
f"[DEBUG] removing {leading_slashes} leading slashes " +
f"from part '{full_part}' ({part_start}:{part_end})",
file=sys.stderr
)
# If this is the last part and we have no content, we are done. A trailing slash is explicitly allowed.
if index == part_count - 1:
if len(actual_part) == 0:
if verbose:
print(
f"[DEBUG] removing empty last part ({part_start}:{part_end})",
file=sys.stderr
)
break
# Validate escape sequences
for esc in ESCAPE_REGEX.finditer(actual_part):
escape = esc.group(1)
escape_valid = escape in ESCAPES or (
escape in ESCAPES_SPECIAL and
ESCAPES_SPECIAL[escape](actual_part)
)
if not escape_valid:
raise KeyNameException(
f"Illegal escape sequence '\\{escape}'. " +
f"Invalid part '{full_part}' ({part_start}:{part_end})."
)
if actual_part == ".":
# A `.` is skipped for the canonical Key Name.
if verbose:
print(
f"[DEBUG] skipping part '.' ({part_start}:{part_end})",
file=sys.stderr
)
elif actual_part == "..":
# A `..` removes a part from the canonical Key Name, so we need to check if there is a part left.
# If there is no part left, `..` behaves like `.`
if len(key_parts) == 0:
print(
f"[DEBUG] ignoring additional part '..' ({part_start}:{part_end})",
file=sys.stderr
)
continue
if verbose:
previous_part, _ = key_parts[-1]
full_previous_part = previous_part.group(0)
previous_start = previous_part.start(0) + namespace_offset
previous_end = previous_part.end(0) + namespace_offset
print(
f"[DEBUG] found part '..' ({part_start}:{part_end}) " +
f"removing previous part '{full_previous_part}' ({previous_start}:{previous_end})",
file=sys.stderr
)
key_parts.pop()
elif actual_part[0] == "#":
# This may be an Array part, so we need to check the structure.
valid, digits = check_array_part(actual_part)
if not valid:
key_parts.append((part, actual_part))
continue
# The canonical version of an Array Part always has the underscores.
canonical_part = f"#{'_' * (len(digits) - 1)}{digits}"
if verbose:
print(
f"[DEBUG] found array part '{full_part}'" +
f" canoncalized to '{canonical_part}' ({part_start}:{part_end})",
file=sys.stderr
)
key_parts.append((part, canonical_part))
elif actual_part[0] == "%":
# Parts starting with `%` are reserved, ...
if verbose:
if len(actual_part) == 1:
print(
f"[DEBUG] part '%' represents empty part ({part_start}:{part_end})",
file=sys.stderr
)
# ... but do not make Key Names invalid.
key_parts.append((part, actual_part))
else:
# Basic Key Name Part, just append to list
key_parts.append((part, actual_part))
# Create the Key Name without the namespace
key = "/".join(p for (_, p) in key_parts if p is not None)
# Add the Namespace, if this is not a cascading Key Name
if namespace is not None:
fullkey = f"{namespace}:/{key}"
else:
fullkey = f"/{key}"
return fullkey
def unescape(canonical: str, verbose: bool = False) -> Tuple[Namespace, Iterator[str]]:
"""
Returns the Namespace enum value and list of Key Name Parts for a given canonical Key Name.
`canonical` MUST be a canonical Key Name. If you are not sure about this, call `canonicalize` first.
"""
# Extract the Namespace string value and convert it into an enum value.
colon_index = canonical.find(":")
if colon_index > 0:
namespace = NAMESPACES_MAP[canonical[:colon_index]]
else:
namespace = Namespace.CASCADING
def unescape_part(part: Match[str]) -> str:
"""
Returns the unescaped version of a single Key Name Part.
`part` MUST NOT contain unescaped slashes `/` for this function to be correct.
"""
actual_part = part.group("content")
if actual_part == "%":
# Found an empty part, replace with empty string
if verbose:
part_start = part.start(0)
part_end = part.end(0)
print(
f"[DEBUG] '%' represents an empty part. ({part_start}:{part_end})",
file=sys.stderr
)
return ""
else:
# Simply remove the backslashes `\` from the escape sequences
return ESCAPE_REGEX.sub("\\1", actual_part)
# Use the RegEx to separate the parts and then unescape each one individually
unescaped = (
unescape_part(part) for part in PART_REGEX.finditer(canonical)
)
return (namespace, unescaped)
"""
START OF MAIN FUNCTION
"""
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Check and convert Elektra Key Names."
)
parser.add_argument(
"--verbose", "-v",
help="Show more info about what is happening.",
action="store_true"
)
subparsers = parser.add_subparsers(
title="commands",
dest="command"
)
p_canonicalize = subparsers.add_parser(
"canonicalize",
description="canonicalize Key Name and print result"
)
p_canonicalize.add_argument(
"--prefix", "-p",
nargs="?",
default="",
help="existing Key Name to which 'name' should be added"
)
p_canonicalize.add_argument(
"name",
help="the Key Name to canonicalize"
)
p_unescape = subparsers.add_parser(
"unescape",
description="unescape Key Name and print one part per line"
)
p_unescape.add_argument(
"--raw", "-r",
action="store_true",
help="print the raw unescaped Key Name instead of one part per line"
)
p_unescape.add_argument(
"--python-bytes", "-b",
action="store_true",
help="print the raw unescaped Key Name as a python bytes object"
)
p_unescape.add_argument(
"name",
help="the Key Name to unescape"
)
args = parser.parse_args()
if args.command == "canonicalize":
try:
if "prefix" in args:
key_canonical = canonicalize(
args.name,
args.prefix,
verbose=args.verbose
)
else:
key_canonical = canonicalize(
args.name,
verbose=args.verbose
)
print(key_canonical)
except KeyNameException as e:
print("ERROR:", e, file=sys.stderr)
exit(1)
elif args.command == "unescape":
try:
if "prefix" in args:
key_canonical = canonicalize(args.name, args.prefix)
else:
key_canonical = canonicalize(args.name)
if args.verbose:
print(f"[DEBUG] canonical version of name: {key_canonical}")
namespace, unescaped = unescape(
key_canonical,
verbose=args.verbose
)
if args.raw:
sys.stdout.buffer.write(
bytes([namespace.value]) +
b"\0" +
b"\0".join(bytes(p, encoding="utf-8") for p in unescaped) +
b"\0"
)
elif args.python_bytes:
print(
bytes([namespace.value]) +
b"\0" +
b"\0".join(bytes(p, encoding="utf-8") for p in unescaped) +
b"\0"
)
else:
print(str(namespace))
for part in unescaped:
print(part)
except KeyNameException as e:
print("ERROR:", e, file=sys.stderr)
exit(1)
else:
parser.print_usage()