X-Git-Url: https://git.kianting.info/?a=blobdiff_plain;f=src%2Findex.ts;h=26a690eb3b71659a119668c7e4c5f202faf04a39;hb=d4dfd2e99f564ca880d40172687986ea3ce757f0;hp=3bf281fddc0d462beb6ab89500def42091181a13;hpb=d3447bfe8439ab0ecf7cc6cb51ddb065abeca47d;p=clo diff --git a/src/index.ts b/src/index.ts index 3bf281f..26a690e 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,61 +1,13 @@ var fs = require('fs'); -import { argv, resourceUsage } from 'node:process'; -import * as tk from './tokenize.js'; +import jsTokens from "js-tokens"; import * as util from 'util'; -import { drawEllipsePath, reduceRotation } from 'pdf-lib'; -import { isAnyArrayBuffer, isTypedArray } from 'node:util/types'; -import { error } from 'node:console'; -import { isUndefined } from 'node:util'; - +import * as p from 'typescript-parsec'; +import { Token } from 'typescript-parsec'; +import { TokenType } from "./tokenize"; /** - * debug reprensenting - */ -let repr = (x : any)=>{return util.inspect(x, {depth: null})}; - -/** - * token tree type. - */ -type tkTree = tkTree[] | tk.Token - -/** - * concated 2 `tkTree`s - * @param x the array to be concated - * @param y the item or array to ve concated - * @returns concated tkTree array, or thrown error if can't be concated. - */ -function concat(x: tkTree, y:tkTree): tkTree[] { - if (Array.isArray(x)){ - return x.concat(y); - }else{ - throw new Error("the tkTree can't be concated, because it's not an array."); - - } -} - -function slice(x: tkTree, index?:number, end?:number): tkTree[] { - if (Array.isArray(x)){ - return x.slice(index,end); - }else{ - throw new Error("the tkTree can't be concated, because it's not an array."); - - } -} - -/** - * TokenMatcheePair for tokens' parser combinator - * - * matched: the matched (now and before) tokens - * - * remained: tokens to be matched * - * ast: abstract syntax tree + * # REPRESENTATION */ -export interface TokenMatcheePair { - matched: tk.Token[] - remained: tk.Token[] - ast : tkTree[] -} - /** * convert a `tkTree` AST to S-expr string * @param t the `tkTree` @@ -71,400 +23,243 @@ export function tkTreeToSExp(t: tkTree): string{ if (t=== undefined){ str = "%undefined" }else{ - str = t.text; + str = t; } } return str; } +/**inspect the inner of the representation. */ +let repr = (x : any)=>{return util.inspect(x, {depth: null})}; /** - * @description - * match one token type. * - * it returns a function which test if the type of first token of the `remained` part of - * the argument of the function is `typ` , if it's true, update the `TokenMatcheePair` wrapped - * in `Some`. Otherwise, it returns `None`. - * * @param typ : the type to be test. - * @returns the updated `TokenMatcheePair` wrapped in `Some(x)` or `None`. + * # TYPES */ -export function m1TType(typ: tk.TokenType): - (m: TokenMatcheePair) => tk.Maybe { - return (m: TokenMatcheePair) => { - if (m.remained.length == 0) { - return { _tag: "None" }; - } - /** - * token to be matched - * */ - const ttbm = m.remained[0]; - - if (ttbm.type == typ) { - let new_matched = m.matched.concat(ttbm); - let result : tk.Some = { - _tag: "Some", value: { - matched: new_matched, - remained: m.remained.slice(1), - ast: ([ttbm]), - } - }; - return result; - } - else { - return { _tag: "None" }; - } - } -}; - -/** - * type int - */ -let tInt = m1TType(tk.TokenType.INT); -let tId = m1TType(tk.TokenType.ID); - -let tAdd = m1TType(tk.TokenType.I_ADD); -let tSub = m1TType(tk.TokenType.I_SUB); -let tMul = m1TType(tk.TokenType.I_MUL); -let tDiv = m1TType(tk.TokenType.I_DIV); -let tLParen = m1TType(tk.TokenType.L_PAREN); -let tRParen = m1TType(tk.TokenType.R_PAREN); -let tComma = m1TType(tk.TokenType.COMMA); -let toSome = tk.toSome; - - -argv.forEach((val, index) => { - console.log(`${index}=${val}`); -}); - - -/** - * like `m ==> f` in ocaml - * @param m matchee wrapped - * @param f matching function - * @returns wrapped result - */ -function thenDo(m : tk.Maybe, f : Function){ - if (m._tag == "None"){ - return m; - }else{ - var a : tk.Maybe = f(m.value); - if (a._tag == "Some"){ - a.value.ast = concat(m.value.ast, a.value.ast); - } - return a; - } +type tkTree = string | tkTree[]; + +enum TokenKind { + Seperator, // --- + Semicolon, // ; + Number, + Op, + ExprMark, // @ + ExcapeAt, // \@ + Paren, + SpaceNL, // \s\t\n\r + Id, + Str, + Comment, // /* ooo */ } /** - * like `f1 | f2` in regex - * @param f1 the first tried function - * @param f2 the second tried function - * @returns wrapped result + * Parsing */ -function orDo(f1 : Function, f2 : Function){ - return (x : TokenMatcheePair) =>{ - let res1 : tk.Maybe = f1(x); - if (res1._tag == "Some"){ - return res1; - }else{ - let res2 : tk.Maybe = f2(x); - return res2; - } - } -} - +const lexer = p.buildLexer([ + [true, /^\d+(\.\d+)?/g, TokenKind.Number], + [true, /^\\\@/g, TokenKind.ExcapeAt], + [true, /^\/\*([^/]|\/[^*])*\*\//g, TokenKind.Comment], + [true, /^\;/g, TokenKind.Semicolon], + [true, /^[-][-][-]/g, TokenKind.Seperator], + [true, /^[\+\-\*\/\&\|\!\^\<\>\~\=\?]+/g, TokenKind.Op], + [true, /^\@/g, TokenKind.ExprMark], + [true, /^[()\[\]{}]/g, TokenKind.Paren], + [true, /^["]([\"]|[\\].)*["]/g, TokenKind.Str], + [true, /^[']([\']|[\\].)*[']/g, TokenKind.Str], + [true, /^[()\[\]{}]/g, TokenKind.Paren], + [true, /^[^\/\\\@\s\n\t\r;]+/g, TokenKind.Id], + [true, /^(\s|\n|\r|\t)+/g, TokenKind.SpaceNL], + +]); /** * - * @param m : the `MatcheePair` to be consumed. - * @returns if the length of `m.remained` >= 1; consumes the matchee by 1 token - * and wraps it in `Some`, - * otherwise, returns `None`. - */ -export function matchAny(m: TokenMatcheePair): tk.Maybe { - if (m.remained.length >= 1) { - return { - _tag: "Some", value: { - matched: m.matched.concat(m.remained[0]), - remained: m.remained.slice(1), - ast : [m.remained[0]], - } - }; - } else { - return { _tag: "None" }; - } -} - -/** - * Danger : Maybe it's not enough to work. -* @description repeating matching function `f` -* zero or more times, like the asterisk `*` in regex `f*` . -* @param f : the function to be repeated 0+ times. -* @returns:the combined function -*/ -export function OnceOrMoreDo(f: Function): (x: TokenMatcheePair) => - tk.Maybe { - return (x) => { - var wrappedOldX: tk.Maybe = { _tag: "Some", value: x }; - var wrappedNewX: tk.Maybe = wrappedOldX; - - var counter = -1; - - while (wrappedNewX._tag != "None") { - wrappedOldX = wrappedNewX; - wrappedNewX = thenDo(wrappedOldX, f); - counter += 1; - - }; - - if (counter <= 0){ - return { _tag: "None"}; - } - let ast = wrappedOldX.value.ast ; - wrappedOldX.value.ast =ast.slice(ast.length-counter); - console.log(repr(wrappedOldX.value.ast)); - - return wrappedOldX; }; -} - -/** - * aux function for midfix operator - * @param f function - * @param signal the rule name - * @returns + * # TEST */ -let midfix = (f : Function, signal? : string) => (x : TokenMatcheePair)=>{ - var a = f(x); - if (a._tag == "Some"){ - let ast_tail : tkTree[] = slice(a.value.ast,a.value.ast.length-3); - let new_ast = [ast_tail]; - a.value.ast = new_ast; +const inputTxt= +`import a as b; /*bacourt*/ +/* ba choir +ipsum lorem*/ - // console.log("+"+signal+"+"+repr(a)); +import you as john; +--- - - } - return a; -} +臺中市\\\@ -let circumfix = (f : Function, signal? : string) => (x : TokenMatcheePair)=>{ - var a = f(x); - if (a._tag == "Some"){ - console.log("$$$"+repr(a.value.ast)); - let inner = a.value.ast[a.value.ast.length-2]; - var ast_middle : tkTree[]; - if (Array.isArray(inner)){ - ast_middle = inner; - } - else{ - ast_middle = [inner]; - } - let new_ast = [ast_middle]; - a.value.ast = new_ast; - } - return a; -} +公園 +@1+2==3; -/** single1 = tInt | "(" expr ")"*/ -let single1 = circumfix((x : TokenMatcheePair) => - thenDo(thenDo(thenDo(toSome(x), tLParen), expr), tRParen), "fac1"); -let single2= tInt; -let single = orDo(single1, single2); - -/** args = single "," args | single */ -let args1 = (x: TokenMatcheePair)=>{ - var ret = thenDo(thenDo(thenDo(toSome(x), single), tComma), args); - if (ret._tag == "Some"){ - let retLength = ret.value.ast.length; - ret.value.ast = [[ret.value.ast[retLength-3]].concat(ret.value.ast[retLength-1])]; - console.log("$$"+repr(ret.value.ast)); - } - return ret; -}; +console.log("122");@ -let args2 = single; +山頂 +`; -let args = orDo(args1, args2); +const PROG = p.rule(); +const SEGMENT = p.rule(); +const IMPORT = p.rule(); +const IMPORTS = p.rule(); +const SEMICOLON = p.rule(); +const EXCAPE_AT = p.rule(); +const NOT_AT_TEXT = p.rule(); +const CONTENT = p.rule(); -/** callees = "(" args ")" | "(" ")" */ +let doubleMinus = { type: 'Punctuator', value: '--' }; +let doubleMinus2 = p.str('--'); +const TERM = p.rule(); +function applySegment(input: [Token, Token[], + Token]): tkTree[]{ + let unpackedInnerExprs = input[1].map((x)=>{return x.text}); + return ["%exprs", unpackedInnerExprs]; +} -let callees1 = circumfix((x : TokenMatcheePair) => - thenDo(thenDo(thenDo(toSome(x), tLParen), args), tRParen), "callees1"); -let callees2 = (x: TokenMatcheePair)=>{ - let ret = thenDo(thenDo(toSome(x), tLParen), tRParen); - if (ret._tag == "Some"){ - let new_ast : tkTree[] = [[]]; - ret.value.ast = new_ast; - } - - return ret}; - -let callees = orDo(callees1, callees2); +function applySemiColon(value: Token): tkTree{ + return value.text; +} +function applyParts(first: tkTree, + second: [Token, tkTree]):tkTree { + return ["%clo", first , second[1]]; +} -/** %apply R combinating token */ -let applyToken = { - text: "%apply", - type: tk.TokenType.ID, - col: 0, - ln: 0, +function applyComment(value: Token): tkTree[]{ + return [value.text]; } -/** facAux = callees facAux | callees */ -let facAux1 = (x: TokenMatcheePair)=>{ - var ret = thenDo(thenDo(toSome(x), callees), facAux); - if (ret._tag == "Some"){ - console.log("1232345"+repr(tkTreeToSExp(ret.value.ast[ret.value.ast.length-1]))); - let last1 = ret.value.ast[ret.value.ast.length-1]; - let last2 = ret.value.ast[ret.value.ast.length-2]; - - let b : tkTree[] = [applyToken]; - ret.value.ast = [b.concat([last2, last1])]; - console.log("11111"+repr(tkTreeToSExp(ret.value.ast))); +function applyImport(input: [Token,Token[], tkTree]) : tkTree{ + let importTail = input[1].map(x=>x.text); + return ["import"].concat(importTail); +}; + - }; +/* +function applyImportComment(input: [Token,Token[], + tkTree, Token]) : tkTree{ + let importTail = input[1].map(x=>x.text); + let comment = [input[3].text]; + return ["import"].concat(importTail).concat(comment); +};*/ -return ret;} -let facAux2 = callees; -let facAux = orDo(facAux1, facAux2); +function applyImports(input : [tkTree, tkTree[]]): tkTree{ + let resultBody = [input[0]].concat(input[1]); + let resultWrapper = ["%import", resultBody]; + return resultWrapper; +}; -/** fac = single facAux | single - * Issue1 to be fixed. - */ -let fac1 = (x: TokenMatcheePair)=>{ - var ret = thenDo(thenDo(toSome(x), single),facAux); - if(ret._tag == "Some"){ - console.log("777"+repr(tkTreeToSExp(ret.value.ast))); - ret.value.ast = [applyToken, ret.value.ast[ret.value.ast.length-2], - ret.value.ast[ret.value.ast.length-1]]; - ret.value.ast; - rearrangeTree(ret.value.ast); - console.log("888"+repr(tkTreeToSExp(ret.value.ast))); +function applyNotAtText(value : Token): tkTree{ + if (value.text == "\\\@"){ + return '@'; } + else{return value.text;} +}; - return ret;}; -let fac2 = single; -let fac = orDo(fac1, fac2); +function applyText (input : tkTree): tkTree[]{ + return ["%text", input]; +}; +function applyContent(input : tkTree[]): tkTree[]{ + return ["%content", input]; +}; + +function applySpaceNL(value : Token): tkTree{ + return value.text; +} /** - * rearrangeTree : for applyToken subtree from right-combination to - * left-combination - * @input x a ast - * @return another ast + * IMPORTEE: Number, Op, Paren, Id, Str, Comment, */ -function rearrangeTree(x: any) : any { - - if (x !== undefined){ - for (var i=1;i - thenDo(thenDo(thenDo(toSome(x), fac), orDo(tMul,tDiv)), fac), "term1"); +) - /** - * - * term2 = int MUL int + * NOT_AT_TEXT : NOT_AT */ -let term2 = fac; +NOT_AT_TEXT.setPattern( + p.apply(NOT_AT, applyNotAtText) +); -/** - * term = term1 | term2 - */ -let term = orDo(term1, term2); - +IMPORTS.setPattern( + p.apply( p.seq(IMPORT, p.rep(IMPORT)), applyImports) +); /** - * - * expr1 = term ADD term - */ -let expr1 = midfix((x : TokenMatcheePair)=> - thenDo(thenDo(thenDo(toSome(x), term), orDo(tAdd,tSub)), term), "expr1"); -/** - * expr2 = term + * IMPORT : + * 'import' IMPORTEE* SEMICOLON | + * COMMENT | */ -let expr2 = term; +IMPORT.setPattern( + p.alt( + p.apply(p.seq(p.str('import'), p.rep_sc(IMPORTEE), SEMICOLON), + applyImport), + p.apply(p.tok(TokenKind.Comment), applyComment), + p.apply(p.tok(TokenKind.SpaceNL), applySpaceNL) + + ) +); /** - * expr = expr1 | expr2 + * SEMICOLON : ';'; */ -let expr = orDo(expr1, expr2); - - - -let tokens = tk.tokenize("1"); -let tokens2 = tk.tokenize("1(2)"); -let tokens3 = tk.tokenize("1(2)(3)"); -let tokens4 = tk.tokenize("2()(4)"); - -//let tokens = tk.tokenize("(4-(3/4))"); -//tk.tokenize(argv[2]); +SEMICOLON.setPattern( + p.apply(p.tok(TokenKind.Semicolon), applySemiColon) +); -let tokensFiltered = tokens4.filter( - (x)=>{return (x.type != tk.TokenType.NL - && x.type != tk.TokenType.SP)}); +/** + * SEGMENT : '@' NOT_AT* '@' | + * (NOT_AT_TEXT | EXCAPE_AT)* + */ +SEGMENT.setPattern( + p.alt( + p.apply(p.rep_sc(NOT_AT_TEXT), applyText), + p.apply(p.seq(p.str('@'), p.rep(NOT_AT), p.str('@')), applySegment), + ) +); -let beta = expr({ - matched : [] , - remained : tokensFiltered, - ast : []}); - +/** + * CONTENT : SEGMENT* + */ +CONTENT.setPattern( + p.apply(p.rep(SEGMENT), applyContent) +); -if (beta._tag == "Some"){ - beta.value.ast = rearrangeTree(beta.value.ast); - console.log(tkTreeToSExp(beta.value.ast)); +let tree = p.expectSingleResult(p.expectEOF(PROG.parse(lexer.parse(inputTxt)))); -} -console.log("RESULT="+repr(beta)); +console.log("RESULT="+tkTreeToSExp(tree));