X-Git-Url: https://git.kianting.info/?a=blobdiff_plain;f=src%2Findex.ts;h=883d94b742145129ff948e0325d94457b274b8f1;hb=5fbb467bdf501dee03c78817242581d962547c4f;hp=e22fdbbdd6550fe81c5779546472dd388927d95a;hpb=c236ca23a513d0a456add8c98401af10cfbdb295;p=clo diff --git a/src/index.ts b/src/index.ts index e22fdbb..883d94b 100644 --- a/src/index.ts +++ b/src/index.ts @@ -27,10 +27,55 @@ export type Maybe = Some | None; /** * @description * the pair of the string to be matched later and the string that have been matched - * @param matched : string have been matched - * @param remained : string will be tested whether it'll be matched. + * @var matched : have been matched + * @var remained : will be tested whether it'll be matched. + * @var matched_type (optional): the type of the matched string +*/ +export interface MatcheePair { + matched : string + remained : string + matched_type?: TokenType +} + +/** + * The types of Token + * NL, // newline + * + * SP, // half-width space and tab + * + * ID, // identifier + * + * STR, // string + * + * OP, // operator or something like it + * + * FLO, // float num + * + * INT, // Integer */ -export type MatcheePair = {matched : string; remained : string}; +export enum TokenType{ + NL, // newlinw + SP, // half-width space and tab + ID, // identifier + STR, // string + OP, // operator + FLO, // float num + INT, // integer +} + +/** + * tokenized token. + * @var text : the content text + * @var type (optional): the type of the token + * @var col : the column number + * @var ln : the line number + */ +export interface Token{ + text: string, + type?: TokenType, + col: number, + ln: number, +} /** * @description @@ -231,23 +276,87 @@ export function tokenize(input : string){ {matched:"", remained: input}); - // integer = ([+]|[-])\d\d? + // integer = ([+]|[-])?\d\d* let integer = (x : MatcheePair) => { let wrapped_x = toSome(x); let plusMinus = orDo(match1Char('+'), match1Char('-')); // ([+]|[-]) let d = matchRange('0','9'); // \d - return thenDo(thenDo(thenDo(wrapped_x, + var result = thenDo(thenDo(thenDo(wrapped_x, zeroOrOnceDo(plusMinus)),d), zeroOrMoreDo(d)); + + if (result._tag == "Some"){ + result.value.matched_type = TokenType.INT; + } + return result; + } + let space = (x : MatcheePair) =>{ + let wrapped_x = toSome(x); + let s_aux = orDo(match1Char(' '), match1Char('\t')); // (" " | "\t") + var result = thenDo(thenDo(wrapped_x, s_aux), zeroOrMoreDo(s_aux)); + if (result._tag == "Some"){ + result.value.matched_type = TokenType.SP; + } + return result; + } + let newline = (x : MatcheePair) =>{ + let wrapped_x = toSome(x); + // nl = \r?\n + let result = thenDo(thenDo(wrapped_x, + zeroOrOnceDo(match1Char('\r'))), match1Char('\n')); + if (result._tag == "Some"){ + result.value.matched_type = TokenType.NL; + } + return result; + } + + let term = (token_list : Array, x : Some)=>{ + var ln = 1; + var col = 0; + var old_x = x; + let term_list = [newline, space, integer]; + let term_aux = term_list.reduce((x,y)=> orDo(x,y)); + + var new_x : Maybe = thenDo(old_x, term_aux); + while (new_x._tag != "None"){ + if (new_x.value.matched_type != TokenType.NL){ + col += new_x.value.matched.length; + token_list.push({text : new_x.value.matched, + type: new_x.value.matched_type, + ln : ln, + col : col}); + + } + else{ + col = 0; + ln += 1; + + token_list.push({text : new_x.value.matched, + type: new_x.value.matched_type, + ln : ln, + col : col}); + + } + + + old_x = toSome({matched : "", + remained : new_x.value.remained}); + new_x = thenDo(old_x, term_aux); + } + + if (old_x.value.remained.length){ + console.log(token_list); + throw new Error("the code can't be tokenized is near Ln. "+ln+", Col."+col + +", starting with "+ old_x.value.remained.substring(0,10)); + } + + return token_list; } - console.log(input+", result: "); - console.log(thenDo(input_matchee_pair, integer)); + + console.log(term([], input_matchee_pair)); + // TODO: id, string, space, basic operator, 3 marks: @, {, }. } -tokenize("+123"); -tokenize("123"); -tokenize("-123"); -tokenize(" 123"); -tokenize("c123"); \ No newline at end of file +