]>
git.kianting.info Git - uann/blob - index.js
411e166d95894b49998c1f4b59f51f567f5a11d5
2 Object
.defineProperty(exports
, "__esModule", { value
: true });
3 exports
.tokenize
= exports
.zeroOrOnceDo
= exports
.notDo
= exports
.zeroOrMoreDo
= exports
.orDo
= exports
.thenDo
= exports
.charToCodepoint
= exports
.matchRange
= exports
.matchAny
= exports
.match1Char
= exports
.TokenType
= void 0;
4 var fs
= require('fs');
6 * wrap a x in a `Some(T)`
7 * @param x : variable to be wrapped.
8 * @returns wrapped `x`.
11 return { _tag
: "Some", value
: x
};
17 * SP, // half-width space and tab
23 * OP, // operator or something like it
29 * I_* // integer manipulation
31 * F_* // float manipulation
36 (function (TokenType
) {
37 TokenType
[TokenType
["NL"] = 0] = "NL";
38 TokenType
[TokenType
["SP"] = 1] = "SP";
39 TokenType
[TokenType
["ID"] = 2] = "ID";
40 TokenType
[TokenType
["STR"] = 3] = "STR";
41 TokenType
[TokenType
["FLO"] = 4] = "FLO";
42 TokenType
[TokenType
["INT"] = 5] = "INT";
43 TokenType
[TokenType
["F_ADD"] = 6] = "F_ADD";
44 TokenType
[TokenType
["F_SUB"] = 7] = "F_SUB";
45 TokenType
[TokenType
["F_MUL"] = 8] = "F_MUL";
46 TokenType
[TokenType
["F_DIV"] = 9] = "F_DIV";
47 TokenType
[TokenType
["I_ADD"] = 10] = "I_ADD";
48 TokenType
[TokenType
["I_SUB"] = 11] = "I_SUB";
49 TokenType
[TokenType
["I_MUL"] = 12] = "I_MUL";
50 TokenType
[TokenType
["I_DIV"] = 13] = "I_DIV";
51 TokenType
[TokenType
["L_PAREN"] = 14] = "L_PAREN";
52 TokenType
[TokenType
["R_PAREN"] = 15] = "R_PAREN";
53 TokenType
[TokenType
["L_BRACK"] = 16] = "L_BRACK";
54 TokenType
[TokenType
["R_BRACK"] = 17] = "R_BRACK";
55 TokenType
[TokenType
["L_BRACE"] = 18] = "L_BRACE";
56 TokenType
[TokenType
["R_BRACE"] = 19] = "R_BRACE";
57 TokenType
[TokenType
["COMMA"] = 20] = "COMMA";
58 TokenType
[TokenType
["DOT"] = 21] = "DOT";
59 TokenType
[TokenType
["COLON"] = 22] = "COLON";
60 TokenType
[TokenType
["SEMI_C"] = 23] = "SEMI_C";
61 TokenType
[TokenType
["AT"] = 24] = "AT";
62 TokenType
[TokenType
["HASH"] = 25] = "HASH";
63 TokenType
[TokenType
["EQ"] = 26] = "EQ";
64 TokenType
[TokenType
["SET"] = 27] = "SET";
65 TokenType
[TokenType
["GT"] = 28] = "GT";
66 TokenType
[TokenType
["LT"] = 29] = "LT";
67 TokenType
[TokenType
["GE"] = 30] = "GE";
68 TokenType
[TokenType
["LE"] = 31] = "LE";
69 TokenType
[TokenType
["R_ARROW"] = 32] = "R_ARROW";
70 })(TokenType
|| (exports
.TokenType
= TokenType
= {}));
73 * it returns a function which test if the first char of the `remained` part of
74 * the argument of the function is `c`, if it's true, update the `MatchedPair` wrapped
75 * in `Some`. Otherwise, it returns `None`.
76 * * @param c : the char to be test.
77 * @returns the updated `MatchedPair` wrapped in `Some(x)` or `None`.
79 function match1Char(c
) {
81 if (m
.remained
.length
== 0) {
82 return { _tag
: "None" };
84 const charToBeMatched
= m
.remained
[0];
85 if (charToBeMatched
=== c
) {
86 return { _tag
: "Some", value
: {
87 matched
: m
.matched
+ charToBeMatched
,
88 remained
: m
.remained
.substring(1)
92 return { _tag
: "None" };
96 exports
.match1Char
= match1Char
;
100 * @param m : the `MatcheePair` to be consumed.
101 * @returns if the length of `m.remained` >= 1; consumes the matchee by 1 char and wraps it in `Some`,
102 * otherwise, returns `None`.
104 function matchAny(m
) {
105 if (m
.remained
.length
>= 1) {
106 return { _tag
: "Some", value
: {
107 matched
: m
.matched
+ m
.remained
[0],
108 remained
: m
.remained
.substring(1)
112 return { _tag
: "None" };
115 exports
.matchAny
= matchAny
;
118 * it returns a function which test if the first char of the `remained` part of
119 * the argument of the function is between `l` and `u`, if it's true, update the `MatchedPair` wrapped
120 * in `Some`. Otherwise, it returns `None`.
121 * * @param l : lower bound char, 1-char string
122 * * @param u : upper bound char, 1-char string
123 * @returns the updated `MatchedPair` wrapped in `Some(x)` or `None`.
125 function matchRange(l
, u
) {
126 let lCodepoint
= charToCodepoint(l
);
127 let uCodepoint
= charToCodepoint(u
);
129 throw new Error("Error: the codepoint of `" + l
+ "` is not smaller than `" + u
+ "`)");
132 if (m
.remained
.length
< 1) {
133 return { _tag
: "None" };
135 const charToBeMatched
= m
.remained
[0];
136 const codePointToBeMatched
= charToCodepoint(charToBeMatched
);
137 if (codePointToBeMatched
>= lCodepoint
&& codePointToBeMatched
<= uCodepoint
) {
138 return { _tag
: "Some", value
: {
139 matched
: m
.matched
+ charToBeMatched
,
140 remained
: m
.remained
.substring(1)
144 return { _tag
: "None" };
148 exports
.matchRange
= matchRange
;
151 * convert the one-char string to codepoint.
152 * @param s : the string to code point.
153 * @returns if `s.length > 1` return error; otherwise, return the codepoint of `s`.
155 function charToCodepoint(s
) {
157 throw new Error("Error: the length of input string for " + s
+ "is " + s
.length
+ `,
158 however, it should be 1.`);
161 return s
.charCodeAt(0);
164 exports
.charToCodepoint
= charToCodepoint
;
166 * @description thendo(input, f, ...) like
168 * @param input: the wrapped input.
169 * @param f: the function to be applied.
171 * @returns:the applied wrapped result `MatcheePair`.
173 function thenDo(input
, f
) {
174 if (input
._tag
== "None") {
178 let inner
= input
.value
;
182 exports
.thenDo
= thenDo
;
184 * @description "or", like the regex `( f1 | f2 )` .
185 * It returns a function `f` of which the argument is`x`.
186 * if `f1(x)` is None, then `f` returns `f2(x)`. Otherwise,
187 * `F` returns `f1(x)`.
188 * @param f1 : 1st function to be compared
189 * @param f2 : 2nd function to be compared
190 * @returns:the combined function
192 function orDo(f1
, f2
) {
196 if (f1x
._tag
== "None") {
207 * @description repeating matching function `f`
208 * zero or more times, like the asterisk `*` in regex `f*` .
209 * @param f : the function to be repeated 0+ times.
210 * @returns:the combined function
212 function zeroOrMoreDo(f
) {
214 var wrapped_old_x
= { _tag
: "Some", value
: x
};
215 var wrapped_new_x
= wrapped_old_x
;
216 while (wrapped_new_x
._tag
!= "None") {
217 wrapped_old_x
= wrapped_new_x
;
218 wrapped_new_x
= thenDo(wrapped_old_x
, f
);
221 return wrapped_old_x
;
224 exports
.zeroOrMoreDo
= zeroOrMoreDo
;
226 * @description Not. like the `^` inside regex of [^f].
227 * returns a function `F(x)` such that if `f(x)` is `None`,
228 * returns the x consuming a char; if `f(x)` is not None, F(x)
230 * @param f: the function forbidden to be matched.
231 * @returns: combined function `F`.
239 let f_x
= thenDo(wrapped_x
, f
);
240 if (f_x
._tag
!= "None") {
241 return { _tag
: "None" };
244 return thenDo(wrapped_x
, matchAny
);
248 exports
.notDo
= notDo
;
250 * if `x` is matched by `f` once, returns `f(x)`. Otherwise,
252 * similar to `?` in regex `f?`.
253 * @param f : the function to be matched
254 * @returns return wrapped f(x)
256 function zeroOrOnceDo(f
) {
258 var wrapped_old_x
= { _tag
: "Some", value
: x
};
259 var wrapped_new_x
= thenDo(wrapped_old_x
, f
);
260 if (wrapped_new_x
._tag
!= "None") {
261 return wrapped_new_x
;
264 return wrapped_old_x
;
268 exports
.zeroOrOnceDo
= zeroOrOnceDo
;
269 function tokenize(input
) {
270 var input_matchee_pair
= toSome({ matched
: "",
273 * generate a parser of a basic term (b_term)
274 * @param pattern : the pattern parser
275 * @param token_type : the returning token type
276 * @returns a wrapped parser.
278 function bTerm(pattern
, token_type
) {
280 let wrapped_x
= toSome(x
);
281 let result
= pattern(wrapped_x
);
282 if (result
._tag
== "Some") {
283 result
.value
.matched_type
= token_type
;
288 let d
= matchRange('0', '9'); // \d
290 let plusMinus
= orDo(match1Char('+'), match1Char('-'));
291 let s_aux
= orDo(match1Char(' '), match1Char('\t')); // (" " | "\t")
292 // integer = ([+]|[-])?\d\d*
293 let integer
= bTerm((x
) => thenDo(thenDo(thenDo(x
, zeroOrOnceDo(plusMinus
)), d
), zeroOrMoreDo(d
)), TokenType
.INT
);
295 let space
= bTerm((x
) => thenDo(thenDo(x
, s_aux
), zeroOrMoreDo(s_aux
)), TokenType
.INT
);
297 let newline
= bTerm((x
) => thenDo(thenDo(x
, zeroOrOnceDo(match1Char('\r'))), match1Char('\n')), TokenType
.NL
);
299 let idHead
= orDo(orDo(matchRange('a', 'z'), matchRange('A', 'Z')), match1Char('_'));
300 let idRemained
= orDo(idHead
, matchRange('0', '9')); // [_A-Za-z0-9]
301 // id = [_A-Za-z][_A-Za-z0-9]*
302 let id
= bTerm((x
) => thenDo(thenDo(x
, idHead
), zeroOrMoreDo(idRemained
)), TokenType
.ID
);
303 let doublequote
= match1Char("\"");
305 let escapeReverseSlash
= (x
) => thenDo(thenDo(toSome(x
), match1Char("\\")), doublequote
);
307 let stringInnerPattern
= zeroOrMoreDo(orDo(escapeReverseSlash
, notDo(match1Char("\""))));
308 // str = ["]([\\]["]|[^"])*["]
309 let str
= bTerm((x
) => thenDo(thenDo(thenDo(x
, doublequote
), stringInnerPattern
), doublequote
), TokenType
.STR
);
310 // float = [+-]?\d+[.]\d+
311 function floatPattern(x
) {
312 return thenDo(thenDo(thenDo(thenDo(thenDo(thenDo(x
, zeroOrOnceDo(plusMinus
)), d
), zeroOrMoreDo(d
)), match1Char(".")), d
), zeroOrMoreDo(d
));
315 let float = bTerm(floatPattern
, TokenType
.FLO
);
318 let floatAdd
= bTerm((x
) => thenDo(thenDo(x
, match1Char("+")), match1Char(".")), TokenType
.F_ADD
);
320 let floatSub
= bTerm((x
) => thenDo(thenDo(x
, match1Char("-")), match1Char(".")), TokenType
.F_SUB
);
322 let floatMul
= bTerm((x
) => thenDo(thenDo(x
, match1Char("*")), match1Char(".")), TokenType
.F_MUL
);
324 let floatDiv
= bTerm((x
) => thenDo(thenDo(x
, match1Char("/")), match1Char(".")), TokenType
.F_DIV
);
326 let eq
= bTerm((x
) => thenDo(thenDo(x
, match1Char("=")), match1Char("=")), TokenType
.EQ
);
328 let ge
= bTerm((x
) => thenDo(thenDo(x
, match1Char(">")), match1Char("=")), TokenType
.GE
);
330 let le
= bTerm((x
) => thenDo(thenDo(x
, match1Char("<")), match1Char("=")), TokenType
.LE
);
332 let rightArrow
= bTerm((x
) => thenDo(thenDo(x
, match1Char("-")), match1Char(">")), TokenType
.R_ARROW
);
334 * unary operator : generating the pattern of basic unary operator
335 * @param char : uniry char for the operator
336 * @param token_type : the corresponding token_type
338 function unaryOp(char, token_type
) {
339 return bTerm((x
) => thenDo(x
, match1Char(char)), token_type
);
342 let intAdd
= unaryOp('+', TokenType
.I_ADD
);
343 let intSub
= unaryOp('-', TokenType
.I_SUB
);
344 let intMul
= unaryOp('*', TokenType
.I_MUL
);
345 let intDiv
= unaryOp('/', TokenType
.I_DIV
);
346 let lParen
= unaryOp('(', TokenType
.L_PAREN
);
347 let rParen
= unaryOp(')', TokenType
.R_PAREN
);
348 let lBracket
= unaryOp('[', TokenType
.L_BRACK
);
349 let rBracket
= unaryOp(']', TokenType
.R_BRACK
);
350 let lBrace
= unaryOp('{', TokenType
.L_BRACE
);
351 let rBrace
= unaryOp('}', TokenType
.R_BRACE
);
352 let comma
= unaryOp(',', TokenType
.COMMA
);
353 let dot
= unaryOp('.', TokenType
.DOT
);
354 let colon
= unaryOp(':', TokenType
.COLON
);
355 let semicolon
= unaryOp(';', TokenType
.SEMI_C
);
356 let at
= unaryOp('@', TokenType
.AT
);
357 let hash
= unaryOp('#', TokenType
.HASH
);
358 let set = unaryOp('=', TokenType
.SET
);
359 let greaterthan
= unaryOp('>', TokenType
.GT
);
360 let lessthan
= unaryOp('<', TokenType
.LE
);
361 let term
= (token_list
, x
) => {
365 let term_list
= [float, newline
, space
, integer
, str
, id
,
366 floatAdd
, floatSub
, floatMul
, floatDiv
,
367 intAdd
, intSub
, intMul
, intDiv
,
368 eq
, ge
, le
, rightArrow
,
369 lParen
, rParen
, lBracket
, rBracket
, lBrace
, rBrace
,
370 comma
, dot
, colon
, semicolon
, at
, hash
,
371 set, greaterthan
, lessthan
];
372 let term_aux
= term_list
.reduce((x
, y
) => orDo(x
, y
));
373 var new_x
= thenDo(old_x
, term_aux
);
374 while (new_x
._tag
!= "None") {
375 if (new_x
.value
.matched_type
!= TokenType
.NL
) {
376 col
+= new_x
.value
.matched
.length
;
377 token_list
.push({ text
: new_x
.value
.matched
,
378 type
: new_x
.value
.matched_type
,
385 token_list
.push({ text
: new_x
.value
.matched
,
386 type
: new_x
.value
.matched_type
,
390 old_x
= toSome({ matched
: "",
391 remained
: new_x
.value
.remained
});
392 new_x
= thenDo(old_x
, term_aux
);
394 if (old_x
.value
.remained
.length
) {
395 console
.log(token_list
);
396 throw new Error("the code can't be tokenized is near Ln. " + ln
+ ", Col." + col
397 + ", starting with " + old_x
.value
.remained
.substring(0, 10));
401 console
.log(term([], input_matchee_pair
));
402 // TODO: id, string, space, basic operator, 3 marks: @, {, }.
404 exports
.tokenize
= tokenize
;