-
-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathgrammar.js
116 lines (104 loc) · 2.34 KB
/
grammar.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
const END_CHARS = [
".",
",",
":",
";",
"!",
"?",
"\\",
"'",
'"',
"}",
"]",
")",
">",
];
const STOP_CHARS = [
"/",
"'",
'"',
"<",
"(",
"[",
"{",
".",
",",
":",
";",
"!",
"?",
"\\",
"}",
"]",
")",
">",
// This must be last, so that it isn't interpreted as a range.
"-",
];
module.exports = grammar({
name: "comment",
externals: ($) => [
$.name,
$.invalid_token
],
rules: {
source: ($) => repeat(
choice(
$.tag,
$._full_uri,
alias($._text, "text"),
),
),
tag: ($) => seq(
$.name,
optional($._user),
":",
),
_user: ($) => seq(
"(",
alias(/[^()]+/, $.user),
")",
),
// This token is split into two parts so the end character isn't included in the URI itself.
_full_uri: ($) => seq($.uri, choice(alias($._end_char, "text"), /\s/)),
// This token needs to be single regex, otherwise a partial match will result in an error.
uri: ($) => get_uri_regex(),
// Text tokens can be a single character, or a sequence of characters that aren't stop characters.
_text: ($) => choice($._stop_char, notmatching(STOP_CHARS)),
_stop_char: ($) => choice(...STOP_CHARS),
_end_char: ($) => choice(...END_CHARS),
},
});
/**
* Get a regex that matches a URI.
*
* A URI matches if:
*
* - It starts with http:// or https://
* - It contains at least one character that isn't whitespace or an end character.
* - If it contains an end character, it must be followed by a letter or number (.com).
* - It doesn't end with a whitespace or an end character (this marks the end of the URI).
*
* An end character is a character that marks the end of a sentence.
*/
function get_uri_regex() {
let end_chars = escapeRegExp(END_CHARS.join(""));
return new RegExp(
`https?://([^\\s${end_chars}]|[${end_chars}][a-zA-Z0-9])+`
);
}
/**
* Match any characters that aren't whitespace or that aren't in the given list.
*/
function notmatching(chars) {
chars = escapeRegExp(chars.join(""));
return new RegExp(`[^\\s${chars}]+`);
}
/**
* Escape a string for use in a regular expression.
*
* Taken from https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions#escaping.
*/
function escapeRegExp(string) {
return string.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}