-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlexer.js
119 lines (108 loc) · 3.02 KB
/
lexer.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
// lexer.js
// functions to tokenize a Scheme expression
// debugging
var print = console.log;
function token_identity(token) {
return token;
}
var match_table = [
{
regex_str: "([a-zA-Z_]+)", // symbol
handler_f: token_identity
},
{
regex_str:"(-?\\d+)", // number
handler_f: function(token) { return parseInt(token);}
},
{
regex_str: "(\\+)", // addition
handler_f: token_identity
},
{
regex_str: "(\\-)", // subtraction
handler_f: token_identity
},
{
regex_str: "(\\*)", // multiplication
handler_f: token_identity
},
{
regex_str: "(\\/)", // division
handler_f: token_identity
},
{
regex_str: "(\\>)", // greater than
handler_f: token_identity
},
{
regex_str: "(\\<)", // less than
handler_f: token_identity
},
{
regex_str: "(\\=)", // equal sign
handler_f: token_identity
},
{
regex_str: "(\\@)", // equal sign
handler_f: token_identity
},
{
regex_str: "(\\!)", // exclamation mark
handler_f: token_identity
},
{
regex_str: "(\\;)", // semi-colon
handler_f: token_identity
},
{
regex_str: "(\\|)", // pipe
handler_f: token_identity
},
{
regex_str: "(\\()", // left paren
handler_f: token_identity
},
{
regex_str: "(\\))", // right paren
handler_f: token_identity
},
{
regex_str: "(\\')", // single quote
handler_f: token_identity
},
{
regex_str: '(\\")', // double quote
handler_f: token_identity
},
{
regex_str: "(\\`)", // backquote
handler_f: token_identity
},
{
regex_str: "(\\,)", // comma
handler_f: token_identity
}
];
// TODO: better way to push tokens
//
var match_indexer = match_table.length; // first and last two elements are not needed
var match_index_regex = /\d+/;
var tokenize = exports.tokenize = function tokenize(expression) {
var tokens = [];
// map creates a new array, by calling the callback with each element in the map_table
// apply calls join on the array created by match_table.map; the arguments to join are in an array- in this case '|'
var re_str_array = match_table.map(function (elem) { return elem.regex_str;});
var regex_string = re_str_array.join('|');
// console.log(regex_string);
var regex = new RegExp(regex_string, 'g');
expression.replace(regex, function() {
var match = arguments[0];
delete arguments[0];
delete arguments[13];
delete arguments[12];
var index = parseInt(JSON.stringify(arguments).match( /\d+/));
tokens.push(match_table[index - 1].handler_f(match));
return '';
});
return tokens;
};