-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathITextParsers.h
462 lines (430 loc) · 14 KB
/
ITextParsers.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
/**
* vim: set ts=4 :
* =============================================================================
* SourceMod
* Copyright (C) 2004-2008 AlliedModders LLC. All rights reserved.
* =============================================================================
*
* This program is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License, version 3.0, as published by the
* Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see <http://www.gnu.org/licenses/>.
*
* As a special exception, AlliedModders LLC gives you permission to link the
* code of this program (as well as its derivative works) to "Half-Life 2," the
* "Source Engine," the "SourcePawn JIT," and any Game MODs that run on software
* by the Valve Corporation. You must obey the GNU General Public License in
* all respects for all other code used. Additionally, AlliedModders LLC grants
* this exception to all derivative works. AlliedModders LLC defines further
* exceptions, found in LICENSE.txt (as of this writing, version JULY-31-2007),
* or <http://www.sourcemod.net/license.php>.
*
* Version: $Id$
*/
#ifndef _INCLUDE_SOURCEMOD_TEXTPARSERS_INTERFACE_H_
#define _INCLUDE_SOURCEMOD_TEXTPARSERS_INTERFACE_H_
#include <string.h> // size_t
/**
* @file ITextParsers.h
* @brief Defines various text/file parsing functions, as well as UTF-8 support code.
*/
//namespace SourceMod
//{
#define SMINTERFACE_TEXTPARSERS_NAME "ITextParsers"
#define SMINTERFACE_TEXTPARSERS_VERSION 4
/**
* The INI file format is defined as:
* WHITESPACE: 0x20, \n, \t, \r
* IDENTIFIER: A-Z a-z 0-9 _ - , + . $ ? /
* STRING: Any set of symbols
*
* Basic syntax is comprised of SECTIONs.
* A SECTION is defined as:
* [SECTIONNAME]
* OPTION
* OPTION
* OPTION...
*
* SECTIONNAME is an IDENTIFIER.
* OPTION can be repeated any number of times, once per line.
* OPTION is defined as one of:
* KEY = "VALUE"
* KEY = VALUE
* KEY
* Where KEY is an IDENTIFIER and VALUE is a STRING.
*
* WHITESPACE should always be omitted.
* COMMENTS should be stripped, and are defined as text occurring in:
* ;<TEXT>
*
* Example file below. Note that
* The second line is technically invalid. The event handler
* must decide whether this should be allowed.
* --FILE BELOW--
* [gaben]
* hi = clams
* bye = "NO CLAMS"
*
* [valve]
* cannot
* maintain
* products
*/
/**
* @brief Contains parse events for INI files.
*/
class ITextListener_INI
{
public:
/**
* @brief Returns version number.
*/
virtual unsigned int GetTextParserVersion1()
{
return SMINTERFACE_TEXTPARSERS_VERSION;
}
public:
/**
* @brief Called when starting parsing.
*/
virtual void ReadINI_ParseStart()
{
};
/**
* @brief Called when ending parsing.
*
* @param halted True if abnormally halted, false otherwise.
*/
virtual void ReadINI_ParseEnd(bool halted)
{
}
/**
* @brief Called when a new section is encountered in an INI file.
*
* @param section Name of section in between the [ and ] characters.
* @param invalid_tokens True if invalid tokens were detected in the name.
* @param close_bracket True if a closing bracket was detected, false otherwise.
* @param extra_tokens True if extra tokens were detected on the line.
* @param curtok Contains current token in the line where the section name starts.
* You can add to this offset when failing to point to a token.
* @return True to keep parsing, false otherwise.
*/
virtual bool ReadINI_NewSection(const char *section, bool invalid_tokens, bool close_bracket, bool extra_tokens, unsigned int *curtok)
{
return true;
}
/**
* @brief Called when encountering a key/value pair in an INI file.
*
* @param key Name of key.
* @param value String containing value (with quotes stripped, if any).
* @param invalid_tokens Whether or not the key contained invalid tokens.
* @param equal_token There was an '=' sign present (in case the value is missing).
* @param quotes Whether value was enclosed in quotes.
* @param curtok Contains the token index of the start of the value string.
* This can be changed when returning false.
* @return True to keep parsing, false otherwise.
*/
virtual bool ReadINI_KeyValue(const char *key, const char *value, bool invalid_tokens, bool equal_token, bool quotes, unsigned int *curtok)
{
return true;
}
/**
* @brief Called after a line has been preprocessed, if it has text.
*
* @param line Contents of line.
* @param curtok Pointer to optionally store failed position in string.
*
* @return True to keep parsing, false otherwise.
*/
virtual bool ReadINI_RawLine(const char *line, unsigned int *curtok)
{
return true;
}
};
/**
* :TODO: write this in CFG (context free grammar) format so it makes sense
*
* The SMC file format is defined as:
* WHITESPACE: 0x20, \n, \t, \r
* IDENTIFIER: Any ASCII character EXCLUDING ", {, }, ;, //, / *, or WHITESPACE.
* STRING: Any set of symbols enclosed in quotes.
* Note: if a STRING does not have quotes, it is parsed as an IDENTIFIER.
*
* Basic syntax is comprised of SECTIONBLOCKs.
* A SECTIONBLOCK defined as:
*
* SECTIONNAME
* {
* OPTION
* }
*
* OPTION can be repeated any number of times inside a SECTIONBLOCK.
* A new line will terminate an OPTION, but there can be more than one OPTION per line.
* OPTION is defined any of:
* "KEY" "VALUE"
* SECTIONBLOCK
*
* SECTIONNAME, KEY, VALUE, and SINGLEKEY are strings
* SECTIONNAME cannot have trailing characters if quoted, but the quotes can be optionally removed.
* If SECTIONNAME is not enclosed in quotes, the entire sectionname string is used (minus surrounding whitespace).
* If KEY is not enclosed in quotes, the key is terminated at first whitespace.
* If VALUE is not properly enclosed in quotes, the entire value string is used (minus surrounding whitespace).
* The VALUE may have inner quotes, but the key string may not.
*
* For an example, see configs/permissions.cfg
*
* WHITESPACE should be ignored.
* Comments are text occurring inside the following tokens, and should be stripped
* unless they are inside literal strings:
* ;<TEXT>
* //<TEXT>
* / *<TEXT> */
/**
* @brief Lists actions to take when an SMC parse hook is done.
*/
enum SMCResult
{
SMCResult_Continue, /**< Continue parsing */
SMCResult_Halt, /**< Stop parsing here */
SMCResult_HaltFail /**< Stop parsing and return SMCError_Custom */
};
/**
* @brief Lists error codes possible from parsing an SMC file.
*/
enum SMCError
{
SMCError_Okay = 0, /**< No error */
SMCError_StreamOpen, /**< Stream failed to open */
SMCError_StreamError, /**< The stream died... somehow */
SMCError_Custom, /**< A custom handler threw an error */
SMCError_InvalidSection1, /**< A section was declared without quotes, and had extra tokens */
SMCError_InvalidSection2, /**< A section was declared without any header */
SMCError_InvalidSection3, /**< A section ending was declared with too many unknown tokens */
SMCError_InvalidSection4, /**< A section ending has no matching beginning */
SMCError_InvalidSection5, /**< A section beginning has no matching ending */
SMCError_InvalidTokens, /**< There were too many unidentifiable strings on one line */
SMCError_TokenOverflow, /**< The token buffer overflowed */
SMCError_InvalidProperty1, /**< A property was declared outside of any section */
};
/**
* @brief States for line/column
*/
struct SMCStates
{
unsigned int line; /**< Current line */
unsigned int col; /**< Current col */
};
/**
* @brief Describes the events available for reading an SMC stream.
*/
class ITextListener_SMC
{
public:
/**
* @brief Returns version number.
*/
virtual unsigned int GetTextParserVersion2()
{
return SMINTERFACE_TEXTPARSERS_VERSION;
}
public:
/**
* @brief Called when starting parsing.
*/
virtual void ReadSMC_ParseStart()
{
};
/**
* @brief Called when ending parsing.
*
* @param halted True if abnormally halted, false otherwise.
* @param failed True if parsing failed, false otherwise.
*/
virtual void ReadSMC_ParseEnd(bool halted, bool failed)
{
}
/**
* @brief Called when entering a new section
*
* @param states Parsing states.
* @param name Name of section, with the colon omitted.
* @return SMCResult directive.
*/
virtual SMCResult ReadSMC_NewSection(const SMCStates *states, const char *name)
{
return SMCResult_Continue;
}
/**
* @brief Called when encountering a key/value pair in a section.
*
* @param states Parsing states.
* @param key Key string.
* @param value Value string. If no quotes were specified, this will be NULL,
* and key will contain the entire string.
* @return SMCResult directive.
*/
virtual SMCResult ReadSMC_KeyValue(const SMCStates *states, const char *key, const char *value)
{
return SMCResult_Continue;
}
/**
* @brief Called when leaving the current section.
*
* @param states Parsing states.
* @return SMCResult directive.
*/
virtual SMCResult ReadSMC_LeavingSection(const SMCStates *states)
{
return SMCResult_Continue;
}
/**
* @brief Called after an input line has been preprocessed.
*
* @param states Parsing states.
* @param line Contents of the line, null terminated at the position
* of the newline character (thus, no newline will exist).
* @return SMCResult directive.
*/
virtual SMCResult ReadSMC_RawLine(const SMCStates *states, const char *line)
{
return SMCResult_Continue;
}
};
/**
* @brief Contains various text stream parsing functions.
*/
class ITextParsers /*: public SMInterface*/
{
public:
virtual const char *GetInterfaceName()
{
return SMINTERFACE_TEXTPARSERS_NAME;
}
virtual unsigned int GetInterfaceVersion()
{
return SMINTERFACE_TEXTPARSERS_VERSION;
}
virtual bool IsVersionCompatible(unsigned int version)
{
if (version < 2)
{
return false;
}
return true;
/*return SMInterface::IsVersionCompatible(version);*/
}
public:
/**
* @brief Parses an INI-format file.
*
* @param file Path to file.
* @param ini_listener Event handler for reading file.
* @param line If non-NULL, will contain last line parsed (0 if file could not be opened).
* @param col If non-NULL, will contain last column parsed (undefined if file could not be opened).
* @param inline_comment Whether inline comment is allowed.
* @return True if parsing succeeded, false if file couldn't be opened or there was a syntax error.
*/
virtual bool ParseFile_INI(const char *file,
ITextListener_INI *ini_listener,
unsigned int *line,
unsigned int *col,
bool inline_comment = true) =0;
/**
* @brief Parses an SMC-format text file.
* Note that the parser makes every effort to obey broken syntax.
* For example, if an open brace is missing, but the section name has a colon,
* it will let you know. It is up to the event handlers to decide whether to be strict or not.
*
* @param file Path to file.
* @param smc_listener Event handler for reading file.
* @param states Optional pointer to store last known states.
* @return An SMCError result code.
*/
virtual SMCError ParseFile_SMC(const char *file,
ITextListener_SMC *smc_listener,
SMCStates *states) =0;
/**
* @brief Converts an SMCError to a string.
*
* @param err SMCError.
* @return String error message, or NULL if none.
*/
virtual const char *GetSMCErrorString(SMCError err) =0;
public:
/**
* @brief Returns the number of bytes that a multi-byte character contains in a UTF-8 stream.
* If the current character is not multi-byte, the function returns 1.
*
* @param stream Pointer to multi-byte ANSI character string.
* @return Number of bytes in current character.
*/
virtual unsigned int GetUTF8CharBytes(const char *stream) =0;
/**
* @brief Returns whether the first multi-byte character in the given stream
* is a whitespace character.
*
* @param stream Pointer to multi-byte character string.
* @return True if first character is whitespace, false otherwise.
*/
virtual bool IsWhitespace(const char *stream) =0;
/**
* @brief Same as ParseFile_SMC, but with an extended error buffer.
*
* @param file Path to file.
* @param smc_listener Event handler for reading file.
* @param states Optional pointer to store last known states.
* @param buffer Error message buffer.
* @param maxsize Maximum size of the error buffer.
* @return Error code.
*/
virtual SMCError ParseSMCFile(const char *file,
ITextListener_SMC *smc_listener,
SMCStates *states,
char *buffer,
size_t maxsize) =0;
/**
* @brief Parses a raw UTF8 stream as an SMC file.
*
* @param stream Memory containing data.
* @param length Number of bytes in the stream.
* @param smc_listener Event handler for reading file.
* @param states Optional pointer to store last known states.
* @param buffer Error message buffer.
* @param maxsize Maximum size of the error buffer.
* @return Error code.
*/
virtual SMCError ParseSMCStream(const char *stream,
size_t length,
ITextListener_SMC *smc_listener,
SMCStates *states,
char *buffer,
size_t maxsize) =0;
};
inline unsigned int _GetUTF8CharBytes(const char *stream)
{
unsigned char c = *(unsigned char *)stream;
if (c & (1<<7))
{
if (c & (1<<5))
{
if (c & (1<<4))
{
return 4;
}
return 3;
}
return 2;
}
return 1;
}
//}
extern ITextParsers *textparsers;
#endif //_INCLUDE_SOURCEMOD_TEXTPARSERS_INTERFACE_H_