]> git.kianting.info Git - clo/blob - src/index.js
411e166d95894b49998c1f4b59f51f567f5a11d5
[clo] / src / index.js
1 "use strict";
2 Object.defineProperty(exports, "__esModule", { value: true });
3 exports.tokenize = exports.zeroOrOnceDo = exports.notDo = exports.zeroOrMoreDo = exports.orDo = exports.thenDo = exports.charToCodepoint = exports.matchRange = exports.matchAny = exports.match1Char = exports.TokenType = void 0;
4 var fs = require('fs');
5 /**
6 * wrap a x in a `Some(T)`
7 * @param x : variable to be wrapped.
8 * @returns wrapped `x`.
9 */
10 function toSome(x) {
11 return { _tag: "Some", value: x };
12 }
13 /**
14 * The types of Token
15 * NL, // newline
16 *
17 * SP, // half-width space and tab
18 *
19 * ID, // identifier
20 *
21 * STR, // string
22 *
23 * OP, // operator or something like it
24 *
25 * FLO, // float num
26 *
27 * INT, // integer
28 *
29 * I_* // integer manipulation
30 *
31 * F_* // float manipulation
32 *
33 * SEMI_C// semi-colon
34 */
35 var TokenType;
36 (function (TokenType) {
37 TokenType[TokenType["NL"] = 0] = "NL";
38 TokenType[TokenType["SP"] = 1] = "SP";
39 TokenType[TokenType["ID"] = 2] = "ID";
40 TokenType[TokenType["STR"] = 3] = "STR";
41 TokenType[TokenType["FLO"] = 4] = "FLO";
42 TokenType[TokenType["INT"] = 5] = "INT";
43 TokenType[TokenType["F_ADD"] = 6] = "F_ADD";
44 TokenType[TokenType["F_SUB"] = 7] = "F_SUB";
45 TokenType[TokenType["F_MUL"] = 8] = "F_MUL";
46 TokenType[TokenType["F_DIV"] = 9] = "F_DIV";
47 TokenType[TokenType["I_ADD"] = 10] = "I_ADD";
48 TokenType[TokenType["I_SUB"] = 11] = "I_SUB";
49 TokenType[TokenType["I_MUL"] = 12] = "I_MUL";
50 TokenType[TokenType["I_DIV"] = 13] = "I_DIV";
51 TokenType[TokenType["L_PAREN"] = 14] = "L_PAREN";
52 TokenType[TokenType["R_PAREN"] = 15] = "R_PAREN";
53 TokenType[TokenType["L_BRACK"] = 16] = "L_BRACK";
54 TokenType[TokenType["R_BRACK"] = 17] = "R_BRACK";
55 TokenType[TokenType["L_BRACE"] = 18] = "L_BRACE";
56 TokenType[TokenType["R_BRACE"] = 19] = "R_BRACE";
57 TokenType[TokenType["COMMA"] = 20] = "COMMA";
58 TokenType[TokenType["DOT"] = 21] = "DOT";
59 TokenType[TokenType["COLON"] = 22] = "COLON";
60 TokenType[TokenType["SEMI_C"] = 23] = "SEMI_C";
61 TokenType[TokenType["AT"] = 24] = "AT";
62 TokenType[TokenType["HASH"] = 25] = "HASH";
63 TokenType[TokenType["EQ"] = 26] = "EQ";
64 TokenType[TokenType["SET"] = 27] = "SET";
65 TokenType[TokenType["GT"] = 28] = "GT";
66 TokenType[TokenType["LT"] = 29] = "LT";
67 TokenType[TokenType["GE"] = 30] = "GE";
68 TokenType[TokenType["LE"] = 31] = "LE";
69 TokenType[TokenType["R_ARROW"] = 32] = "R_ARROW";
70 })(TokenType || (exports.TokenType = TokenType = {}));
71 /**
72 * @description
73 * it returns a function which test if the first char of the `remained` part of
74 * the argument of the function is `c`, if it's true, update the `MatchedPair` wrapped
75 * in `Some`. Otherwise, it returns `None`.
76 * * @param c : the char to be test.
77 * @returns the updated `MatchedPair` wrapped in `Some(x)` or `None`.
78 */
79 function match1Char(c) {
80 return (m) => {
81 if (m.remained.length == 0) {
82 return { _tag: "None" };
83 }
84 const charToBeMatched = m.remained[0];
85 if (charToBeMatched === c) {
86 return { _tag: "Some", value: {
87 matched: m.matched + charToBeMatched,
88 remained: m.remained.substring(1)
89 } };
90 }
91 else {
92 return { _tag: "None" };
93 }
94 };
95 }
96 exports.match1Char = match1Char;
97 ;
98 /**
99 *
100 * @param m : the `MatcheePair` to be consumed.
101 * @returns if the length of `m.remained` >= 1; consumes the matchee by 1 char and wraps it in `Some`,
102 * otherwise, returns `None`.
103 */
104 function matchAny(m) {
105 if (m.remained.length >= 1) {
106 return { _tag: "Some", value: {
107 matched: m.matched + m.remained[0],
108 remained: m.remained.substring(1)
109 } };
110 }
111 else {
112 return { _tag: "None" };
113 }
114 }
115 exports.matchAny = matchAny;
116 /**
117 * @description
118 * it returns a function which test if the first char of the `remained` part of
119 * the argument of the function is between `l` and `u`, if it's true, update the `MatchedPair` wrapped
120 * in `Some`. Otherwise, it returns `None`.
121 * * @param l : lower bound char, 1-char string
122 * * @param u : upper bound char, 1-char string
123 * @returns the updated `MatchedPair` wrapped in `Some(x)` or `None`.
124 */
125 function matchRange(l, u) {
126 let lCodepoint = charToCodepoint(l);
127 let uCodepoint = charToCodepoint(u);
128 if (l > u) {
129 throw new Error("Error: the codepoint of `" + l + "` is not smaller than `" + u + "`)");
130 }
131 return (m) => {
132 if (m.remained.length < 1) {
133 return { _tag: "None" };
134 }
135 const charToBeMatched = m.remained[0];
136 const codePointToBeMatched = charToCodepoint(charToBeMatched);
137 if (codePointToBeMatched >= lCodepoint && codePointToBeMatched <= uCodepoint) {
138 return { _tag: "Some", value: {
139 matched: m.matched + charToBeMatched,
140 remained: m.remained.substring(1)
141 } };
142 }
143 else {
144 return { _tag: "None" };
145 }
146 };
147 }
148 exports.matchRange = matchRange;
149 ;
150 /**
151 * convert the one-char string to codepoint.
152 * @param s : the string to code point.
153 * @returns if `s.length > 1` return error; otherwise, return the codepoint of `s`.
154 */
155 function charToCodepoint(s) {
156 if (s.length > 1) {
157 throw new Error("Error: the length of input string for " + s + "is " + s.length + `,
158 however, it should be 1.`);
159 }
160 else {
161 return s.charCodeAt(0);
162 }
163 }
164 exports.charToCodepoint = charToCodepoint;
165 /**
166 * @description thendo(input, f, ...) like
167 * a ==> f
168 * @param input: the wrapped input.
169 * @param f: the function to be applied.
170 *
171 * @returns:the applied wrapped result `MatcheePair`.
172 */
173 function thenDo(input, f) {
174 if (input._tag == "None") {
175 return input;
176 }
177 else {
178 let inner = input.value;
179 return f(inner);
180 }
181 }
182 exports.thenDo = thenDo;
183 /**
184 * @description "or", like the regex `( f1 | f2 )` .
185 * It returns a function `f` of which the argument is`x`.
186 * if `f1(x)` is None, then `f` returns `f2(x)`. Otherwise,
187 * `F` returns `f1(x)`.
188 * @param f1 : 1st function to be compared
189 * @param f2 : 2nd function to be compared
190 * @returns:the combined function
191 */
192 function orDo(f1, f2) {
193 return (x) => {
194 let f1x = (f1(x));
195 {
196 if (f1x._tag == "None") {
197 return f2(x);
198 }
199 else {
200 return f1x;
201 }
202 }
203 };
204 }
205 exports.orDo = orDo;
206 /**
207 * @description repeating matching function `f`
208 * zero or more times, like the asterisk `*` in regex `f*` .
209 * @param f : the function to be repeated 0+ times.
210 * @returns:the combined function
211 */
212 function zeroOrMoreDo(f) {
213 return (x) => {
214 var wrapped_old_x = { _tag: "Some", value: x };
215 var wrapped_new_x = wrapped_old_x;
216 while (wrapped_new_x._tag != "None") {
217 wrapped_old_x = wrapped_new_x;
218 wrapped_new_x = thenDo(wrapped_old_x, f);
219 }
220 ;
221 return wrapped_old_x;
222 };
223 }
224 exports.zeroOrMoreDo = zeroOrMoreDo;
225 /**
226 * @description Not. like the `^` inside regex of [^f].
227 * returns a function `F(x)` such that if `f(x)` is `None`,
228 * returns the x consuming a char; if `f(x)` is not None, F(x)
229 * returns `None`.
230 * @param f: the function forbidden to be matched.
231 * @returns: combined function `F`.
232 */
233 function notDo(f) {
234 return (x) => {
235 let wrapped_x = {
236 _tag: "Some",
237 value: x
238 };
239 let f_x = thenDo(wrapped_x, f);
240 if (f_x._tag != "None") {
241 return { _tag: "None" };
242 }
243 else {
244 return thenDo(wrapped_x, matchAny);
245 }
246 };
247 }
248 exports.notDo = notDo;
249 /**
250 * if `x` is matched by `f` once, returns `f(x)`. Otherwise,
251 * returns x
252 * similar to `?` in regex `f?`.
253 * @param f : the function to be matched
254 * @returns return wrapped f(x)
255 */
256 function zeroOrOnceDo(f) {
257 return (x) => {
258 var wrapped_old_x = { _tag: "Some", value: x };
259 var wrapped_new_x = thenDo(wrapped_old_x, f);
260 if (wrapped_new_x._tag != "None") {
261 return wrapped_new_x;
262 }
263 else {
264 return wrapped_old_x;
265 }
266 };
267 }
268 exports.zeroOrOnceDo = zeroOrOnceDo;
269 function tokenize(input) {
270 var input_matchee_pair = toSome({ matched: "",
271 remained: input });
272 /**
273 * generate a parser of a basic term (b_term)
274 * @param pattern : the pattern parser
275 * @param token_type : the returning token type
276 * @returns a wrapped parser.
277 */
278 function bTerm(pattern, token_type) {
279 return (x) => {
280 let wrapped_x = toSome(x);
281 let result = pattern(wrapped_x);
282 if (result._tag == "Some") {
283 result.value.matched_type = token_type;
284 }
285 return result;
286 };
287 }
288 let d = matchRange('0', '9'); // \d
289 // [+-]
290 let plusMinus = orDo(match1Char('+'), match1Char('-'));
291 let s_aux = orDo(match1Char(' '), match1Char('\t')); // (" " | "\t")
292 // integer = ([+]|[-])?\d\d*
293 let integer = bTerm((x) => thenDo(thenDo(thenDo(x, zeroOrOnceDo(plusMinus)), d), zeroOrMoreDo(d)), TokenType.INT);
294 // space = [ \t]+
295 let space = bTerm((x) => thenDo(thenDo(x, s_aux), zeroOrMoreDo(s_aux)), TokenType.INT);
296 // newline = \r?\n
297 let newline = bTerm((x) => thenDo(thenDo(x, zeroOrOnceDo(match1Char('\r'))), match1Char('\n')), TokenType.NL);
298 // [_A-Za-z]
299 let idHead = orDo(orDo(matchRange('a', 'z'), matchRange('A', 'Z')), match1Char('_'));
300 let idRemained = orDo(idHead, matchRange('0', '9')); // [_A-Za-z0-9]
301 // id = [_A-Za-z][_A-Za-z0-9]*
302 let id = bTerm((x) => thenDo(thenDo(x, idHead), zeroOrMoreDo(idRemained)), TokenType.ID);
303 let doublequote = match1Char("\"");
304 // [\\][\"]
305 let escapeReverseSlash = (x) => thenDo(thenDo(toSome(x), match1Char("\\")), doublequote);
306 // ([\\]["]|[^\"])*
307 let stringInnerPattern = zeroOrMoreDo(orDo(escapeReverseSlash, notDo(match1Char("\""))));
308 // str = ["]([\\]["]|[^"])*["]
309 let str = bTerm((x) => thenDo(thenDo(thenDo(x, doublequote), stringInnerPattern), doublequote), TokenType.STR);
310 // float = [+-]?\d+[.]\d+
311 function floatPattern(x) {
312 return thenDo(thenDo(thenDo(thenDo(thenDo(thenDo(x, zeroOrOnceDo(plusMinus)), d), zeroOrMoreDo(d)), match1Char(".")), d), zeroOrMoreDo(d));
313 }
314 ;
315 let float = bTerm(floatPattern, TokenType.FLO);
316 // operators
317 // +.
318 let floatAdd = bTerm((x) => thenDo(thenDo(x, match1Char("+")), match1Char(".")), TokenType.F_ADD);
319 // +.
320 let floatSub = bTerm((x) => thenDo(thenDo(x, match1Char("-")), match1Char(".")), TokenType.F_SUB);
321 // *.
322 let floatMul = bTerm((x) => thenDo(thenDo(x, match1Char("*")), match1Char(".")), TokenType.F_MUL);
323 // /.
324 let floatDiv = bTerm((x) => thenDo(thenDo(x, match1Char("/")), match1Char(".")), TokenType.F_DIV);
325 // ==
326 let eq = bTerm((x) => thenDo(thenDo(x, match1Char("=")), match1Char("=")), TokenType.EQ);
327 // >=
328 let ge = bTerm((x) => thenDo(thenDo(x, match1Char(">")), match1Char("=")), TokenType.GE);
329 // <=
330 let le = bTerm((x) => thenDo(thenDo(x, match1Char("<")), match1Char("=")), TokenType.LE);
331 // ->
332 let rightArrow = bTerm((x) => thenDo(thenDo(x, match1Char("-")), match1Char(">")), TokenType.R_ARROW);
333 /**
334 * unary operator : generating the pattern of basic unary operator
335 * @param char : uniry char for the operator
336 * @param token_type : the corresponding token_type
337 */
338 function unaryOp(char, token_type) {
339 return bTerm((x) => thenDo(x, match1Char(char)), token_type);
340 }
341 ;
342 let intAdd = unaryOp('+', TokenType.I_ADD);
343 let intSub = unaryOp('-', TokenType.I_SUB);
344 let intMul = unaryOp('*', TokenType.I_MUL);
345 let intDiv = unaryOp('/', TokenType.I_DIV);
346 let lParen = unaryOp('(', TokenType.L_PAREN);
347 let rParen = unaryOp(')', TokenType.R_PAREN);
348 let lBracket = unaryOp('[', TokenType.L_BRACK);
349 let rBracket = unaryOp(']', TokenType.R_BRACK);
350 let lBrace = unaryOp('{', TokenType.L_BRACE);
351 let rBrace = unaryOp('}', TokenType.R_BRACE);
352 let comma = unaryOp(',', TokenType.COMMA);
353 let dot = unaryOp('.', TokenType.DOT);
354 let colon = unaryOp(':', TokenType.COLON);
355 let semicolon = unaryOp(';', TokenType.SEMI_C);
356 let at = unaryOp('@', TokenType.AT);
357 let hash = unaryOp('#', TokenType.HASH);
358 let set = unaryOp('=', TokenType.SET);
359 let greaterthan = unaryOp('>', TokenType.GT);
360 let lessthan = unaryOp('<', TokenType.LE);
361 let term = (token_list, x) => {
362 var ln = 1;
363 var col = 0;
364 var old_x = x;
365 let term_list = [float, newline, space, integer, str, id,
366 floatAdd, floatSub, floatMul, floatDiv,
367 intAdd, intSub, intMul, intDiv,
368 eq, ge, le, rightArrow,
369 lParen, rParen, lBracket, rBracket, lBrace, rBrace,
370 comma, dot, colon, semicolon, at, hash,
371 set, greaterthan, lessthan];
372 let term_aux = term_list.reduce((x, y) => orDo(x, y));
373 var new_x = thenDo(old_x, term_aux);
374 while (new_x._tag != "None") {
375 if (new_x.value.matched_type != TokenType.NL) {
376 col += new_x.value.matched.length;
377 token_list.push({ text: new_x.value.matched,
378 type: new_x.value.matched_type,
379 ln: ln,
380 col: col });
381 }
382 else {
383 col = 0;
384 ln += 1;
385 token_list.push({ text: new_x.value.matched,
386 type: new_x.value.matched_type,
387 ln: ln,
388 col: col });
389 }
390 old_x = toSome({ matched: "",
391 remained: new_x.value.remained });
392 new_x = thenDo(old_x, term_aux);
393 }
394 if (old_x.value.remained.length) {
395 console.log(token_list);
396 throw new Error("the code can't be tokenized is near Ln. " + ln + ", Col." + col
397 + ", starting with " + old_x.value.remained.substring(0, 10));
398 }
399 return token_list;
400 };
401 console.log(term([], input_matchee_pair));
402 // TODO: id, string, space, basic operator, 3 marks: @, {, }.
403 }
404 exports.tokenize = tokenize;