X-Git-Url: https://git.kianting.info/?a=blobdiff_plain;f=src%2Findex.ts;h=26a690eb3b71659a119668c7e4c5f202faf04a39;hb=d4dfd2e99f564ca880d40172687986ea3ce757f0;hp=8cbd145671c4508ca55759035ba2da57c1edcccb;hpb=e2668789e238707fa38ce3e724e3b64fba5d53b3;p=clo diff --git a/src/index.ts b/src/index.ts index 8cbd145..26a690e 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,256 +1,265 @@ var fs = require('fs'); - -export type Some = { _tag: "Some"; value: T }; -export type None = {_tag: "None"}; - - +import jsTokens from "js-tokens"; +import * as util from 'util'; +import * as p from 'typescript-parsec'; +import { Token } from 'typescript-parsec'; +import { TokenType } from "./tokenize"; /** - * wrap a x in a `Some(T)` - * @param x : variable to be wrapped. - * @returns wrapped `x`. + * + * # REPRESENTATION */ -function toSome(x: T): Some{ - return { _tag: "Some", value: x}; -} /** - * @description Like the `Some(a)` and `None` in Rust. - * - * @example - * ```ts - * let exam1 : Maybe = { _tag: "Some", value: 12 }; - * let exam2 : Maybe = None; - * ``` + * convert a `tkTree` AST to S-expr string + * @param t the `tkTree` + * @returns S-expr String */ -export type Maybe = Some | None; +export function tkTreeToSExp(t: tkTree): string{ + var str = ""; + + if (Array.isArray(t)){ + let strArray = t.map((x)=>tkTreeToSExp(x)); + str = "(" + strArray.join(" ") + ")"; + }else{ + if (t=== undefined){ + str = "%undefined" + }else{ + str = t; + } + } + return str; +} +/**inspect the inner of the representation. */ +let repr = (x : any)=>{return util.inspect(x, {depth: null})}; /** - * @description - * the pair of the string to be matched later and the string that have been matched -*/ -export interface MatcheePair { - /** have been matched */ - matched : string - /** will be tested whether it'll be matched. */ - remained : string + * + * # TYPES + */ + + +type tkTree = string | tkTree[]; + +enum TokenKind { + Seperator, // --- + Semicolon, // ; + Number, + Op, + ExprMark, // @ + ExcapeAt, // \@ + Paren, + SpaceNL, // \s\t\n\r + Id, + Str, + Comment, // /* ooo */ } /** - * @description - * it returns a function which test if the first char of the `remained` part of - * the argument of the function is `c`, if it's true, update the `MatchedPair` wrapped - * in `Some`. Otherwise, it returns `None`. - * * @param c : the char to be test. - * @returns the updated `MatchedPair` wrapped in `Some(x)` or `None`. + * Parsing */ -export function match1Char(c : string) : (m: MatcheePair) => Maybe { - return (m : MatcheePair)=>{ - if (m.remained.length == 0){ - return { _tag: "None" }; - } - const charToBeMatched = m.remained[0]; - if (charToBeMatched === c){ - return {_tag: "Some", value :{ - matched : m.matched + charToBeMatched, - remained : m.remained.substring(1)}}; - } - else{ - return {_tag: "None"}; - } - } -}; +const lexer = p.buildLexer([ + [true, /^\d+(\.\d+)?/g, TokenKind.Number], + [true, /^\\\@/g, TokenKind.ExcapeAt], + [true, /^\/\*([^/]|\/[^*])*\*\//g, TokenKind.Comment], + [true, /^\;/g, TokenKind.Semicolon], + [true, /^[-][-][-]/g, TokenKind.Seperator], + [true, /^[\+\-\*\/\&\|\!\^\<\>\~\=\?]+/g, TokenKind.Op], + [true, /^\@/g, TokenKind.ExprMark], + [true, /^[()\[\]{}]/g, TokenKind.Paren], + [true, /^["]([\"]|[\\].)*["]/g, TokenKind.Str], + [true, /^[']([\']|[\\].)*[']/g, TokenKind.Str], + [true, /^[()\[\]{}]/g, TokenKind.Paren], + [true, /^[^\/\\\@\s\n\t\r;]+/g, TokenKind.Id], + [true, /^(\s|\n|\r|\t)+/g, TokenKind.SpaceNL], + +]); /** * - * @param m : the `MatcheePair` to be consumed. - * @returns if the length of `m.remained` >= 1; consumes the matchee by 1 char and wraps it in `Some`, - * otherwise, returns `None`. + * # TEST */ -export function matchAny(m : MatcheePair) : Maybe{ - if (m.remained.length >= 1){ - return {_tag: "Some", value :{ - matched : m.matched + m.remained[0], - remained : m.remained.substring(1)}}; - }else{ - return {_tag: "None"}; - } +const inputTxt= +`import a as b; /*bacourt*/ +/* ba choir +ipsum lorem*/ + +import you as john; +--- + +臺中市\\\@ + +公園 +@1+2==3; + +console.log("122");@ + +山頂 +`; + + +const PROG = p.rule(); +const SEGMENT = p.rule(); +const IMPORT = p.rule(); +const IMPORTS = p.rule(); +const SEMICOLON = p.rule(); +const EXCAPE_AT = p.rule(); +const NOT_AT_TEXT = p.rule(); +const CONTENT = p.rule(); + +let doubleMinus = { type: 'Punctuator', value: '--' }; +let doubleMinus2 = p.str('--'); +const TERM = p.rule(); + +function applySegment(input: [Token, Token[], + Token]): tkTree[]{ + let unpackedInnerExprs = input[1].map((x)=>{return x.text}); + return ["%exprs", unpackedInnerExprs]; } -/** - * @description - * it returns a function which test if the first char of the `remained` part of - * the argument of the function is between `l` and `u`, if it's true, update the `MatchedPair` wrapped - * in `Some`. Otherwise, it returns `None`. - * * @param l : lower bound char, 1-char string - * * @param u : upper bound char, 1-char string - * @returns the updated `MatchedPair` wrapped in `Some(x)` or `None`. - */ -export function matchRange(l : string, u : string) : (m: MatcheePair) => Maybe { - let lCodepoint = charToCodepoint(l); - let uCodepoint = charToCodepoint(u); - if (l > u){ - throw new Error("Error: the codepoint of `"+l+"` is not smaller than `"+u+"`)"); - } - return (m : MatcheePair)=>{ - if (m.remained.length < 1){ - return {_tag : "None"}; - } - const charToBeMatched = m.remained[0]; - const codePointToBeMatched = charToCodepoint(charToBeMatched); - if (codePointToBeMatched >= lCodepoint && codePointToBeMatched <= uCodepoint){ - return {_tag: "Some", value :{ - matched : m.matched + charToBeMatched, - remained : m.remained.substring(1)}}; - } - else{ - return {_tag: "None"}; - } +function applySemiColon(value: Token): tkTree{ + return value.text; +} + +function applyParts(first: tkTree, + second: [Token, tkTree]):tkTree { + return ["%clo", first , second[1]]; +} + + +function applyComment(value: Token): tkTree[]{ + return [value.text]; +} + + +function applyImport(input: [Token,Token[], tkTree]) : tkTree{ + let importTail = input[1].map(x=>x.text); + return ["import"].concat(importTail); +}; + + +/* +function applyImportComment(input: [Token,Token[], + tkTree, Token]) : tkTree{ + let importTail = input[1].map(x=>x.text); + let comment = [input[3].text]; + return ["import"].concat(importTail).concat(comment); +};*/ + +function applyImports(input : [tkTree, tkTree[]]): tkTree{ + let resultBody = [input[0]].concat(input[1]); + let resultWrapper = ["%import", resultBody]; + return resultWrapper; +}; + + + + +function applyNotAtText(value : Token): tkTree{ + if (value.text == "\\\@"){ + return '@'; } + else{return value.text;} }; +function applyText (input : tkTree): tkTree[]{ + return ["%text", input]; +}; + +function applyContent(input : tkTree[]): tkTree[]{ + return ["%content", input]; +}; + +function applySpaceNL(value : Token): tkTree{ + return value.text; +} + /** - * convert the one-char string to codepoint. - * @param s : the string to code point. - * @returns if `s.length > 1` return error; otherwise, return the codepoint of `s`. + * IMPORTEE: Number, Op, Paren, Id, Str, Comment, */ -export function charToCodepoint(s : string): number{ - if (s.length > 1){ - throw new Error("Error: the length of input string for "+s+ "is "+s.length+`, - however, it should be 1.`); - }else{ - return s.charCodeAt(0); - } -} +let IMPORTEE = p.alt(p.tok(TokenKind.Number), + p.tok(TokenKind.Op), + p.tok(TokenKind.Paren), + p.tok(TokenKind.Id), + p.tok(TokenKind.Str), + p.tok(TokenKind.SpaceNL), + p.tok(TokenKind.Comment)); + +let NOT_AT = p.alt(p.tok(TokenKind.Seperator), + p.tok(TokenKind.Semicolon), + p.tok(TokenKind.Number), + p.tok(TokenKind.ExcapeAt), + p.tok(TokenKind.Op), + p.tok(TokenKind.Paren), + p.tok(TokenKind.SpaceNL), + p.tok(TokenKind.Id), + p.tok(TokenKind.Str), + p.tok(TokenKind.Comment), + ); /** - * @description thendo(input, f, ...) like - * a ==> f - * @param input: the wrapped input. - * @param f: the function to be applied. - * - * @returns:the applied wrapped result `MatcheePair`. + * PROG : IMPORTS '---' CONTENT; */ -export function thenDo(input : Maybe, f : Function) : Maybe{ - if (input._tag == "None"){ - return input; - } - else{ - let inner = input.value; - return f(inner); - } -} +PROG.setPattern( + p.lrec_sc(IMPORTS, p.seq(p.str('---'), CONTENT), applyParts) + +) /** - * @description "or", like the regex `( f1 | f2 )` . - * It returns a function `f` of which the argument is`x`. - * if `f1(x)` is None, then `f` returns `f2(x)`. Otherwise, - * `F` returns `f1(x)`. - * @param f1 : 1st function to be compared - * @param f2 : 2nd function to be compared - * @returns:the combined function + * NOT_AT_TEXT : NOT_AT */ -export function orDo(f1 : Function, f2: Function) : (x : T ) => Maybe{ - return (x) => { - let f1x : Maybe = (f1(x)); - { - if (f1x._tag == "None"){ - return f2(x); - } - else{ - return f1x; - } - } - }; -} +NOT_AT_TEXT.setPattern( + p.apply(NOT_AT, applyNotAtText) +); +IMPORTS.setPattern( + p.apply( p.seq(IMPORT, p.rep(IMPORT)), applyImports) +); /** -* @description repeating matching function `f` -* zero or more times, like the asterisk `*` in regex `f*` . -* @param f : the function to be repeated 0+ times. -* @returns:the combined function -*/ -export function zeroOrMoreDo(f : Function): (x : T) => Maybe{ - return (x)=>{ - var wrapped_old_x : Maybe = {_tag: "Some", value : x}; - var wrapped_new_x : Maybe = wrapped_old_x; - - while (wrapped_new_x._tag != "None"){ - wrapped_old_x = wrapped_new_x; - wrapped_new_x = thenDo(wrapped_old_x, f); - }; - - return wrapped_old_x; - }; -} + * IMPORT : + * 'import' IMPORTEE* SEMICOLON | + * COMMENT | + */ +IMPORT.setPattern( + p.alt( + p.apply(p.seq(p.str('import'), p.rep_sc(IMPORTEE), SEMICOLON), + applyImport), + p.apply(p.tok(TokenKind.Comment), applyComment), + p.apply(p.tok(TokenKind.SpaceNL), applySpaceNL) + + ) +); /** -* @description Not. like the `^` inside regex of [^f]. -* returns a function `F(x)` such that if `f(x)` is `None`, -* returns the x consuming a char; if `f(x)` is not None, F(x) -* returns `None`. -* @param f: the function forbidden to be matched. -* @returns: combined function `F`. -*/ -export function notDo(f : Function): (x : T) => Maybe{ - return (x)=>{ - let wrapped_x : Maybe = { - _tag : "Some", - value : x - }; - let f_x = thenDo(wrapped_x, f); - - if (f_x._tag != "None"){ - return {_tag:"None"}; - }else{ - return thenDo(wrapped_x, matchAny); - } - }; -} + * SEMICOLON : ';'; + */ +SEMICOLON.setPattern( + p.apply(p.tok(TokenKind.Semicolon), applySemiColon) +); + + /** - * if `x` is matched by `f` once, returns `f(x)`. Otherwise, - * returns x - * similar to `?` in regex `f?`. - * @param f : the function to be matched - * @returns return wrapped f(x) + * SEGMENT : '@' NOT_AT* '@' | + * (NOT_AT_TEXT | EXCAPE_AT)* */ -export function zeroOrOnceDo(f : Function): (x : T) => Maybe{ - return (x)=>{ - var wrapped_old_x : Maybe = {_tag: "Some", value : x}; - var wrapped_new_x = thenDo(wrapped_old_x, f); +SEGMENT.setPattern( + p.alt( + p.apply(p.rep_sc(NOT_AT_TEXT), applyText), + p.apply(p.seq(p.str('@'), p.rep(NOT_AT), p.str('@')), applySegment), + ) +); - if (wrapped_new_x._tag != "None"){ - return wrapped_new_x; - }else{ - return wrapped_old_x; - } - }; -} +/** + * CONTENT : SEGMENT* + */ +CONTENT.setPattern( + p.apply(p.rep(SEGMENT), applyContent) +); -export function tokenize(input : string){ - var input_matchee_pair : Maybe = toSome( - {matched:"", - remained: input}); +let tree = p.expectSingleResult(p.expectEOF(PROG.parse(lexer.parse(inputTxt)))); - // integer = ([+]|[-])?\d\d* - let integer = (x : MatcheePair) => - { let wrapped_x = toSome(x); - let plusMinus = orDo(match1Char('+'), match1Char('-')); // ([+]|[-]) - let d = matchRange('0','9'); // \d - return thenDo(thenDo(thenDo(wrapped_x, - zeroOrOnceDo(plusMinus)),d), - zeroOrMoreDo(d)); - } - console.log(input+", result: "); - console.log(thenDo(input_matchee_pair, integer)); - // TODO: id, string, space, basic operator, 3 marks: @, {, }. -} -tokenize("+123"); -tokenize("123"); -tokenize("-123"); -tokenize(" 123"); -tokenize("c123"); +console.log("RESULT="+tkTreeToSExp(tree));