]>
git.kianting.info Git - uann/blob - src/index.ts
5ca3a12b008cf290bfdc3a21675da93324132912
1 import { match
} from
"assert";
3 var fs
= require('fs');
5 export type Some
<T
> = { _tag
: "Some"; value
: T
};
6 export type None
= {_tag
: "None"};
10 * wrap a x in a `Some(T)`
11 * @param x : variable to be wrapped.
12 * @returns wrapped `x`.
14 function toSome
<T
>(x
: T
): Some
<T
>{
15 return { _tag
: "Some", value
: x
};
18 * @description Like the `Some(a)` and `None` in Rust.
22 * let exam1 : Maybe<Number> = { _tag: "Some", value: 12 };
23 * let exam2 : Maybe<Number> = None;
26 export type Maybe
<T
> = Some
<T
> | None
;
31 * the pair of the string to be matched later and the string that have been matched
32 * @var matched : have been matched
33 * @var remained : will be tested whether it'll be matched.
34 * @var matched_type (optional): the type of the matched string
36 export interface MatcheePair
{
39 matched_type
?: TokenType
46 * SP, // half-width space and tab
52 * OP, // operator or something like it
58 * I_* // integer manipulation
60 * F_* // float manipulation
64 export enum TokenType
{
66 SP
, // half-width space and tab
103 * @var text : the content text
104 * @var type (optional): the type of the token
105 * @var col : the column number
106 * @var ln : the line number
108 export interface Token
{
117 * it returns a function which test if the first char of the `remained` part of
118 * the argument of the function is `c`, if it's true, update the `MatchedPair` wrapped
119 * in `Some`. Otherwise, it returns `None`.
120 * * @param c : the char to be test.
121 * @returns the updated `MatchedPair` wrapped in `Some(x)` or `None`.
123 export function match1Char(c
: string) : (m
: MatcheePair
) => Maybe
<MatcheePair
> {
124 return (m
: MatcheePair
)=>{
125 if (m
.remained
.length
== 0){
126 return { _tag
: "None" };
128 const charToBeMatched
= m
.remained
[0];
129 if (charToBeMatched
=== c
){
130 return {_tag
: "Some", value
:{
131 matched
: m
.matched
+ charToBeMatched
,
132 remained
: m
.remained
.substring(1)}};
135 return {_tag
: "None"};
142 * @param m : the `MatcheePair` to be consumed.
143 * @returns if the length of `m.remained` >= 1; consumes the matchee by 1 char and wraps it in `Some`,
144 * otherwise, returns `None`.
146 export function matchAny(m
: MatcheePair
) : Maybe
<MatcheePair
>{
147 if (m
.remained
.length
>= 1){
148 return {_tag
: "Some", value
:{
149 matched
: m
.matched
+ m
.remained
[0],
150 remained
: m
.remained
.substring(1)}};
152 return {_tag
: "None"};
158 * it returns a function which test if the first char of the `remained` part of
159 * the argument of the function is between `l` and `u`, if it's true, update the `MatchedPair` wrapped
160 * in `Some`. Otherwise, it returns `None`.
161 * * @param l : lower bound char, 1-char string
162 * * @param u : upper bound char, 1-char string
163 * @returns the updated `MatchedPair` wrapped in `Some(x)` or `None`.
165 export function matchRange(l
: string, u
: string) : (m
: MatcheePair
) => Maybe
<MatcheePair
> {
166 let lCodepoint
= charToCodepoint(l
);
167 let uCodepoint
= charToCodepoint(u
);
169 throw new Error("Error: the codepoint of `"+l
+"` is not smaller than `"+u
+"`)");
171 return (m
: MatcheePair
)=>{
172 if (m
.remained
.length
< 1){
173 return {_tag
: "None"};
175 const charToBeMatched
= m
.remained
[0];
176 const codePointToBeMatched
= charToCodepoint(charToBeMatched
);
177 if (codePointToBeMatched
>= lCodepoint
&& codePointToBeMatched
<= uCodepoint
){
178 return {_tag
: "Some", value
:{
179 matched
: m
.matched
+ charToBeMatched
,
180 remained
: m
.remained
.substring(1)}};
183 return {_tag
: "None"};
189 * convert the one-char string to codepoint.
190 * @param s : the string to code point.
191 * @returns if `s.length > 1` return error; otherwise, return the codepoint of `s`.
193 export function charToCodepoint(s
: string): number{
195 throw new Error("Error: the length of input string for "+s
+ "is "+s
.length
+`,
196 however, it should be 1.`);
198 return s
.charCodeAt(0);
203 * @description thendo(input, f, ...) like
205 * @param input: the wrapped input.
206 * @param f: the function to be applied.
208 * @returns:the applied wrapped result `MatcheePair`.
210 export function thenDo
<T
>(input
: Maybe
<T
>, f
: Function) : Maybe
<T
>{
211 if (input
._tag
== "None"){
215 let inner
= input
.value
;
221 * @description "or", like the regex `( f1 | f2 )` .
222 * It returns a function `f` of which the argument is`x`.
223 * if `f1(x)` is None, then `f` returns `f2(x)`. Otherwise,
224 * `F` returns `f1(x)`.
225 * @param f1 : 1st function to be compared
226 * @param f2 : 2nd function to be compared
227 * @returns:the combined function
229 export function orDo
<T
>(f1
: Function, f2
: Function) : (x
: T
) => Maybe
<T
>{
231 let f1x
: Maybe
<T
> = (f1(x
));
233 if (f1x
._tag
== "None"){
245 * @description repeating matching function `f`
246 * zero or more times, like the asterisk `*` in regex `f*` .
247 * @param f : the function to be repeated 0+ times.
248 * @returns:the combined function
250 export function zeroOrMoreDo
<T
>(f
: Function): (x
: T
) => Maybe
<T
>{
252 var wrapped_old_x
: Maybe
<T
> = {_tag
: "Some", value
: x
};
253 var wrapped_new_x
: Maybe
<T
> = wrapped_old_x
;
255 while (wrapped_new_x
._tag
!= "None"){
256 wrapped_old_x
= wrapped_new_x
;
257 wrapped_new_x
= thenDo(wrapped_old_x
, f
);
260 return wrapped_old_x
;
265 * @description Not. like the `^` inside regex of [^f].
266 * returns a function `F(x)` such that if `f(x)` is `None`,
267 * returns the x consuming a char; if `f(x)` is not None, F(x)
269 * @param f: the function forbidden to be matched.
270 * @returns: combined function `F`.
272 export function notDo
<T
>(f
: Function): (x
: T
) => Maybe
<T
>{
274 let wrapped_x
: Maybe
<T
> = {
278 let f_x
= thenDo(wrapped_x
, f
);
280 if (f_x
._tag
!= "None"){
281 return {_tag
:"None"};
283 return thenDo(wrapped_x
, matchAny
);
289 * if `x` is matched by `f` once, returns `f(x)`. Otherwise,
291 * similar to `?` in regex `f?`.
292 * @param f : the function to be matched
293 * @returns return wrapped f(x)
295 export function zeroOrOnceDo
<T
>(f
: Function): (x
: T
) => Maybe
<T
>{
297 var wrapped_old_x
: Maybe
<T
> = {_tag
: "Some", value
: x
};
298 var wrapped_new_x
= thenDo(wrapped_old_x
, f
);
300 if (wrapped_new_x
._tag
!= "None"){
301 return wrapped_new_x
;
303 return wrapped_old_x
;
309 export function tokenize(input
: string){
310 var input_matchee_pair
: Maybe
<MatcheePair
> = toSome(
315 * generate a parser of a basic term (b_term)
316 * @param pattern : the pattern parser
317 * @param token_type : the returning token type
318 * @returns a wrapped parser.
320 function bTerm(pattern
: Function, token_type
: TokenType
){
321 return (x
: MatcheePair
) =>{
322 let wrapped_x
= toSome(x
);
323 let result
= pattern(wrapped_x
);
324 if (result
._tag
== "Some") {
325 result
.value
.matched_type
= token_type
;
331 let d
= matchRange('0','9'); // \d
333 let plusMinus
= orDo(match1Char('+'), match1Char('-'));
334 let s_aux
= orDo(match1Char(' '), match1Char('\t')); // (" " | "\t")
336 // integer = ([+]|[-])?\d\d*
337 let integer
= bTerm((x
: Maybe
<MatcheePair
>)=>
338 thenDo(thenDo(thenDo(x
,
339 zeroOrOnceDo(plusMinus
)),d
),
343 let space
= bTerm((x
: Maybe
<MatcheePair
>)=>
344 thenDo(thenDo(x
, s_aux
), zeroOrMoreDo(s_aux
)),
348 let newline
= bTerm((x
: Maybe
<MatcheePair
>)=>
350 zeroOrOnceDo(match1Char('\r'))),
355 let idHead
= orDo(orDo(matchRange('a','z'),matchRange('A','Z')), match1Char('_'));
356 let idRemained
= orDo(idHead
, matchRange('0','9')); // [_A-Za-z0-9]
358 // id = [_A-Za-z][_A-Za-z0-9]*
359 let id
= bTerm((x
: Maybe
<MatcheePair
>)=>
362 zeroOrMoreDo(idRemained
)),
364 let doublequote
= match1Char("\"");
366 let escapeReverseSlash
= (x
:MatcheePair
)=>
367 thenDo(thenDo(toSome(x
), match1Char("\\")), doublequote
);
369 let stringInnerPattern
= zeroOrMoreDo(
370 orDo(escapeReverseSlash
, notDo(match1Char("\""))));
372 // str = ["]([\\]["]|[^"])*["]
373 let str
= bTerm((x
: Maybe
<MatcheePair
>)=>
374 thenDo(thenDo(thenDo(x
,doublequote
),
375 stringInnerPattern
),doublequote
),
378 // float = [+-]?\d+[.]\d+
379 function floatPattern(x
: Maybe
<MatcheePair
>){
380 return thenDo(thenDo(thenDo(thenDo(thenDo(thenDo(x
,
381 zeroOrOnceDo(plusMinus
)),d
),
385 let float = bTerm(floatPattern
, TokenType
.FLO
);
389 let floatAdd
= bTerm((x
: Maybe
<MatcheePair
>)=>
390 thenDo(thenDo(x
, match1Char("+")),match1Char(".")),
393 let floatSub
= bTerm((x
: Maybe
<MatcheePair
>)=>
394 thenDo(thenDo(x
, match1Char("-")),match1Char(".")),
398 let floatMul
= bTerm((x
: Maybe
<MatcheePair
>)=>
399 thenDo(thenDo(x
, match1Char("*")),match1Char(".")),
403 let floatDiv
= bTerm((x
: Maybe
<MatcheePair
>)=>
404 thenDo(thenDo(x
, match1Char("/")),match1Char(".")),
408 let eq
= bTerm((x
: Maybe
<MatcheePair
>)=>
409 thenDo(thenDo(x
, match1Char("=")),match1Char("=")),
413 let ge
= bTerm((x
: Maybe
<MatcheePair
>)=>
414 thenDo(thenDo(x
, match1Char(">")),match1Char("=")),
418 let le
= bTerm((x
: Maybe
<MatcheePair
>)=>
419 thenDo(thenDo(x
, match1Char("<")),match1Char("=")),
423 let rightArrow
= bTerm((x
: Maybe
<MatcheePair
>)=>
424 thenDo(thenDo(x
, match1Char("-")),match1Char(">")),
428 * unary operator : generating the pattern of basic unary operator
429 * @param char : uniry char for the operator
430 * @param token_type : the corresponding token_type
432 function unaryOp(char: string, token_type
: TokenType
) {
433 return bTerm((x
: Maybe
<MatcheePair
>)=>thenDo(x
, match1Char(char)),
436 let intAdd
= unaryOp('+', TokenType
.I_ADD
);
437 let intSub
= unaryOp('-', TokenType
.I_SUB
);
438 let intMul
= unaryOp('*', TokenType
.I_MUL
);
439 let intDiv
= unaryOp('/', TokenType
.I_DIV
);
440 let lParen
= unaryOp('(', TokenType
.L_PAREN
);
441 let rParen
= unaryOp(')', TokenType
.R_PAREN
);
442 let lBracket
= unaryOp('[', TokenType
.L_BRACK
);
443 let rBracket
= unaryOp(']', TokenType
.R_BRACK
);
444 let lBrace
= unaryOp('{', TokenType
.L_BRACE
);
445 let rBrace
= unaryOp('}', TokenType
.R_BRACE
);
446 let comma
= unaryOp(',', TokenType
.COMMA
);
447 let dot
= unaryOp('.', TokenType
.DOT
);
448 let colon
= unaryOp(':', TokenType
.COLON
);
449 let semicolon
= unaryOp(';', TokenType
.SEMI_C
);
450 let at
= unaryOp('@', TokenType
.AT
);
451 let hash
= unaryOp('#', TokenType
.HASH
);
452 let set
= unaryOp('=', TokenType
.SET
);
453 let greaterthan
= unaryOp('>', TokenType
.GT
);
454 let lessthan
= unaryOp('<', TokenType
.LE
);
457 let term
= (token_list
: Array<Token
>, x
: Some
<MatcheePair
>)=>{
461 let term_list
= [float, newline
, space
, integer
,str
, id
,
462 floatAdd
, floatSub
, floatMul
, floatDiv
,
463 intAdd
, intSub
, intMul
, intDiv
,
464 eq
, ge
, le
, rightArrow
,
465 lParen
, rParen
, lBracket
, rBracket
, lBrace
, rBrace
,
466 comma
, dot
, colon
, semicolon
, at
, hash
,
467 set
,greaterthan
, lessthan
];
468 let term_aux
= term_list
.reduce((x
,y
)=> orDo(x
,y
));
470 var new_x
: Maybe
<MatcheePair
> = thenDo(old_x
, term_aux
);
471 while (new_x
._tag
!= "None"){
472 if (new_x
.value
.matched_type
!= TokenType
.NL
){
473 col
+= new_x
.value
.matched
.length
;
474 token_list
.push({text
: new_x
.value
.matched
,
475 type: new_x
.value
.matched_type
,
484 token_list
.push({text
: new_x
.value
.matched
,
485 type: new_x
.value
.matched_type
,
492 old_x
= toSome({matched
: "",
493 remained
: new_x
.value
.remained
});
494 new_x
= thenDo(old_x
, term_aux
);
497 if (old_x
.value
.remained
.length
){
498 console
.log(token_list
);
499 throw new Error("the code can't be tokenized is near Ln. "+ln
+", Col."+col
500 +", starting with "+ old_x
.value
.remained
.substring(0,10));
506 console
.log(term([], input_matchee_pair
));
508 // TODO: id, string, space, basic operator, 3 marks: @, {, }.