From: Tan Kian-ting Date: Thu, 5 Oct 2023 17:09:28 +0000 (+0800) Subject: rewrite parser X-Git-Url: https://git.kianting.info/?a=commitdiff_plain;h=8d03cc503c747bb974c75d39f8b9c0678a9cc91f;p=clo rewrite parser --- diff --git a/README.md b/README.md index 9372723..69d3c81 100644 --- a/README.md +++ b/README.md @@ -23,4 +23,5 @@ License: MIT ``` - 20230928:basically fix `issue1`。其他ê物件猶著做。 - 20230929:add multi args parsing for `callee`. - - 20230930:tîng-khí parser, using `js-token`. \ No newline at end of file + - 20230930:tîng khí parser, using `js-token`. + - 20231016: tîng siá parser, using `ts-parsec`. \ No newline at end of file diff --git a/package-lock.json b/package-lock.json index 410edf1..f1c2f4a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -13,7 +13,8 @@ "harfbuzzjs": "^0.3.3", "js-tokens": "^8.0.2", "npx": "^3.0.0", - "pdf-lib": "^1.17.1" + "pdf-lib": "^1.17.1", + "typescript-parsec": "^0.3.4" }, "devDependencies": { "@types/chai": "^4.3.5", @@ -6411,6 +6412,11 @@ "node": ">=14.17" } }, + "node_modules/typescript-parsec": { + "version": "0.3.4", + "resolved": "https://registry.npmjs.org/typescript-parsec/-/typescript-parsec-0.3.4.tgz", + "integrity": "sha512-6RD4xOxp26BTZLopNbqT2iErqNhQZZWb5m5F07/UwGhldGvOAKOl41pZ3fxsFp04bNL+PbgMjNfb6IvJAC/uYQ==" + }, "node_modules/unbox-primitive": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/unbox-primitive/-/unbox-primitive-1.0.2.tgz", @@ -11275,6 +11281,11 @@ "integrity": "sha512-mI4WrpHsbCIcwT9cF4FZvr80QUeKvsUsUvKDoR+X/7XHQH98xYD8YHZg7ANtz2GtZt/CBq2QJ0thkGJMHfqc1w==", "dev": true }, + "typescript-parsec": { + "version": "0.3.4", + "resolved": "https://registry.npmjs.org/typescript-parsec/-/typescript-parsec-0.3.4.tgz", + "integrity": "sha512-6RD4xOxp26BTZLopNbqT2iErqNhQZZWb5m5F07/UwGhldGvOAKOl41pZ3fxsFp04bNL+PbgMjNfb6IvJAC/uYQ==" + }, "unbox-primitive": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/unbox-primitive/-/unbox-primitive-1.0.2.tgz", diff --git a/package.json b/package.json index c1c8661..fbb9cfc 100644 --- a/package.json +++ b/package.json @@ -41,6 +41,7 @@ "harfbuzzjs": "^0.3.3", "js-tokens": "^8.0.2", "npx": "^3.0.0", - "pdf-lib": "^1.17.1" + "pdf-lib": "^1.17.1", + "typescript-parsec": "^0.3.4" } } diff --git a/src/index.js b/src/index.js index 3816130..8fc8c39 100644 --- a/src/index.js +++ b/src/index.js @@ -22,14 +22,11 @@ var __importStar = (this && this.__importStar) || function (mod) { __setModuleDefault(result, mod); return result; }; -var __importDefault = (this && this.__importDefault) || function (mod) { - return (mod && mod.__esModule) ? mod : { "default": mod }; -}; Object.defineProperty(exports, "__esModule", { value: true }); -exports.matchAny = exports.tkTreeToSExp = void 0; +exports.tkTreeToSExp = void 0; var fs = require('fs'); -const js_tokens_1 = __importDefault(require("js-tokens")); const util = __importStar(require("util")); +const p = __importStar(require("typescript-parsec")); /** * * # REPRESENTATION @@ -50,7 +47,7 @@ function tkTreeToSExp(t) { str = "%undefined"; } else { - str = t.value; + str = t; } } return str; @@ -58,129 +55,65 @@ function tkTreeToSExp(t) { exports.tkTreeToSExp = tkTreeToSExp; /**inspect the inner of the representation. */ let repr = (x) => { return util.inspect(x, { depth: null }); }; +var TokenKind; +(function (TokenKind) { + TokenKind[TokenKind["Seperator"] = 0] = "Seperator"; + TokenKind[TokenKind["Semicolon"] = 1] = "Semicolon"; + TokenKind[TokenKind["Number"] = 2] = "Number"; + TokenKind[TokenKind["Op"] = 3] = "Op"; + TokenKind[TokenKind["ExprMark"] = 4] = "ExprMark"; + TokenKind[TokenKind["Paren"] = 5] = "Paren"; + TokenKind[TokenKind["SpaceNL"] = 6] = "SpaceNL"; + TokenKind[TokenKind["Id"] = 7] = "Id"; + TokenKind[TokenKind["Str"] = 8] = "Str"; +})(TokenKind || (TokenKind = {})); /** - * - * # PARSER UNITS + * Parsing */ -function toSome(x) { - return { _tag: "Some", value: x }; -} -/** - * like `m ==> f` in ocaml - * @param m matchee wrapped - * @param f matching function - * @returns wrapped result - */ -function thenDo(m, f) { - if (m._tag == "None") { - return m; - } - else { - var a = f(m.value); - if (a._tag == "Some") { - a.value.ast = m.value.ast.concat(a.value.ast); - } - return a; - } -} +const lexer = p.buildLexer([ + [true, /^\d+(\.\d+)?/g, TokenKind.Number], + [true, /^\;/g, TokenKind.Semicolon], + [true, /^[-][-][-]/g, TokenKind.Seperator], + [true, /^[\+\-\*\/\&\|\!\^\<\>\~\=\?]+/g, TokenKind.Op], + [true, /^\@+/g, TokenKind.ExprMark], + [true, /^[()\[\]{}]/g, TokenKind.Paren], + [true, /^["]([\"]|[\\].)*["]/g, TokenKind.Str], + [true, /^[']([\']|[\\].)*[']/g, TokenKind.Str], + [true, /^[()\[\]{}]/g, TokenKind.Paren], + [true, /^[^\s\n\t\r;]+/g, TokenKind.Id], + [false, /^(\s|\n|\r|\t)+/g, TokenKind.SpaceNL] +]); /** * - * @param m : the `TokenPair` to be consumed. - * @returns if the length of `m.remained` >= 1; consumes the matchee by 1 token - * and wraps it in `Some`, - * otherwise, returns `None`. + * # TEST */ -function matchAny(m) { - if (m.remained.length >= 1) { - return { - _tag: "Some", value: { - matched: m.matched.concat(m.remained[0]), - remained: m.remained.slice(1), - ast: [m.remained[0]], - } - }; - } - else { - return { _tag: "None" }; - } +const inputTxt = `import ast; +--- +122`; +const PROG = p.rule(); +const UNIT = p.rule(); +const IMPORTS = p.rule(); +const SEMICOLON = p.rule(); +let doubleMinus = { type: 'Punctuator', value: '--' }; +let doubleMinus2 = p.str('--'); +const TERM = p.rule(); +function applyUnit(value) { + return value.text; } -exports.matchAny = matchAny; -/** - * like `f1 | f2` in regex - * @param f1 the first tried function - * @param f2 the second tried function - * @returns wrapped result - */ -function orDo(f1, f2) { - return (x) => { - let res1 = f1(x); - if (res1._tag == "Some") { - return res1; - } - else { - let res2 = f2(x); - return res2; - } - }; +function applySemiColon(value) { + return value.text; } -/** - * like regex [^c] - * @param f input token function. one token only. - * @returns combined finction - */ -function notDo(f) { - return (x) => { - let res1 = f(x); - if (res1._tag == "Some") { - return { _tag: "None" }; - } - else { - let res2 = matchAny(x); - return res2; - } - }; +function applyParts(first, second) { + return ["%clo", first, second[1]]; } -function matchToken(typeName, value) { - return (t) => { - let headToken = t.remained[0]; - if (headToken.type != typeName) { - return { _tag: "None" }; - } - else { - if (value === undefined || value == headToken.value) { - let newTokenPair = { - matched: t.matched.concat(headToken), - remained: t.remained.slice(1), - ast: [headToken] - }; - return { _tag: "Some", value: newTokenPair }; - } - else { - return { _tag: "None" }; - } - } - ; - }; +PROG.setPattern(p.lrec_sc(IMPORTS, p.seq(p.str('---'), UNIT), applyParts)); +function applyImports(input) { + let importTail = input[1].map(x => x.text); + return ["import"].concat(importTail); } ; -/** - * - * # TEST - */ -const tokens = Array.from((0, js_tokens_1.default)(`import; foo from\t 'bar'; -import * as util from 'util'; - - -花非花,霧\\{非霧 。{{foo();}}下 -一句`)); -console.log("RESULT=" + repr(tokens)); -var mainTokenPair = { - matched: [], - remained: tokens, - ast: [] -}; -let a = thenDo(thenDo(toSome(mainTokenPair), matchToken('IdentifierName')), notDo(matchToken('Punctuator', ';'))); -console.log("RESULT=" + repr(a)); -if (a._tag == "Some") { - console.log("SEXP=" + tkTreeToSExp(a.value.ast)); -} +IMPORTS.setPattern(p.apply(p.seq(p.str('import'), p.rep_sc(p.tok(TokenKind.Id)), SEMICOLON), applyImports)); +SEMICOLON.setPattern(p.apply(p.tok(TokenKind.Semicolon), applySemiColon)); +UNIT.setPattern(p.apply(p.tok(TokenKind.Number), applyUnit)); +let tree = p.expectSingleResult(p.expectEOF(PROG.parse(lexer.parse(inputTxt)))); +console.log("RESULT=" + tkTreeToSExp(tree)); diff --git a/src/index.ts b/src/index.ts index 60e7487..157b160 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,7 +1,8 @@ var fs = require('fs'); import jsTokens from "js-tokens"; import * as util from 'util'; - +import * as p from 'typescript-parsec'; +import { Token } from 'typescript-parsec'; /** * * # REPRESENTATION @@ -21,7 +22,7 @@ export function tkTreeToSExp(t: tkTree): string{ if (t=== undefined){ str = "%undefined" }else{ - str = t.value; + str = t; } } @@ -35,156 +36,110 @@ let repr = (x : any)=>{return util.inspect(x, {depth: null})}; * # TYPES */ -/** - * TokenPair for tokens' parser combinator - * - * matched: the matched (now and before) tokens - * - * remained: tokens to be matched - * - * ast: abstract syntax tree - */ -export interface TokenPair { - matched: jsTokens.Token[] - remained: jsTokens.Token[] - ast : tkTree[] -} -export type Some = { _tag: "Some"; value: T }; -export type None = { _tag: "None" }; -export type Maybe = Some | None; -type Token = jsTokens.Token; -type tkTree = Token | tkTree[]; +type tkTree = string | tkTree[]; -/** - * - * # PARSER UNITS - */ -function toSome(x:T): Maybe{ - return {_tag: "Some", value: x}; +enum TokenKind { + Seperator, + Semicolon, + Number, + Op, + ExprMark, + Paren, + SpaceNL, + Id, + Str, } /** - * like `m ==> f` in ocaml - * @param m matchee wrapped - * @param f matching function - * @returns wrapped result + * Parsing */ -function thenDo(m : Maybe, f : Function){ - if (m._tag == "None"){ - return m; - }else{ - var a : Maybe = f(m.value); - if (a._tag == "Some"){ - a.value.ast = m.value.ast.concat(a.value.ast); - } +const lexer = p.buildLexer([ + [true, /^\d+(\.\d+)?/g, TokenKind.Number], + [true, /^\;/g, TokenKind.Semicolon], + [true, /^[-][-][-]/g, TokenKind.Seperator], + [true, /^[\+\-\*\/\&\|\!\^\<\>\~\=\?]+/g, TokenKind.Op], + [true, /^\@+/g, TokenKind.ExprMark], + [true, /^[()\[\]{}]/g, TokenKind.Paren], + [true, /^["]([\"]|[\\].)*["]/g, TokenKind.Str], + [true, /^[']([\']|[\\].)*[']/g, TokenKind.Str], + [true, /^[()\[\]{}]/g, TokenKind.Paren], + [true, /^[^\s\n\t\r;]+/g, TokenKind.Id], + [false, /^(\s|\n|\r|\t)+/g, TokenKind.SpaceNL] +]); - return a; - } -} /** * - * @param m : the `TokenPair` to be consumed. - * @returns if the length of `m.remained` >= 1; consumes the matchee by 1 token - * and wraps it in `Some`, - * otherwise, returns `None`. + * # TEST */ -export function matchAny(m: TokenPair): Maybe { - if (m.remained.length >= 1) { - return { - _tag: "Some", value: { - matched: m.matched.concat(m.remained[0]), - remained: m.remained.slice(1), - ast : [m.remained[0]], - } - }; - } else { - return { _tag: "None" }; - } +const inputTxt= +`import ast; +--- +122`; + + +const PROG = p.rule(); +const UNIT = p.rule(); +const IMPORTS = p.rule(); +const SEMICOLON = p.rule(); + + +let doubleMinus = { type: 'Punctuator', value: '--' }; +let doubleMinus2 = p.str('--'); +const TERM = p.rule(); + +function applyUnit(value: Token): tkTree{ + return value.text; } -/** - * like `f1 | f2` in regex - * @param f1 the first tried function - * @param f2 the second tried function - * @returns wrapped result - */ -function orDo(f1 : Function, f2 : Function){ - return (x : TokenPair) =>{ - let res1 : Maybe = f1(x); - if (res1._tag == "Some"){ - return res1; - }else{ - let res2 : Maybe = f2(x); - return res2; - } - } +function applySemiColon(value: Token): tkTree{ + return value.text; } -/** - * like regex [^c] - * @param f input token function. one token only. - * @returns combined finction - */ -function notDo(f : Function){ - return (x : TokenPair) =>{ - let res1 : Maybe = f(x); - if (res1._tag == "Some"){ - return {_tag:"None"}; - }else{ - let res2 = matchAny(x); - return res2; - } - } +function applyParts(first: tkTree, + second: [Token, tkTree]):tkTree { + return ["%clo", first , second[1]]; } -function matchToken(typeName : string, value? : string): - (t : TokenPair) => Maybe{ - return (t)=>{ - let headToken = t.remained[0]; - if (headToken.type != typeName){ - return {_tag:"None"}; - }else{ - if (value === undefined || value == headToken.value){ - let newTokenPair = { - matched: t.matched.concat(headToken), - remained: t.remained.slice(1), - ast : [headToken] - }; - return {_tag : "Some", value : newTokenPair}; - }else{ - return {_tag:"None"}; - } - }; - } -}; + +function applyImports(input: [Token,Token[], tkTree]):tkTree{ + let importTail = input[1].map(x=>x.text); + return ["import"].concat(importTail); +}; + /** - * - * # TEST + * PROG : IMPORTS '---' UNIT; */ -const tokens = Array.from(jsTokens( -`import foo from\t 'bar'; -import * as util from 'util'; +PROG.setPattern( + p.lrec_sc(IMPORTS, p.seq(p.str('---'), UNIT), applyParts) +) -花非花,霧\\{非霧 。{{foo();}}下 -一句`)); +/** + * PROG : 'import' Id* SEMICOLON; + */ +IMPORTS.setPattern( + p.apply(p.seq(p.str('import'), p.rep_sc(p.tok(TokenKind.Id)), SEMICOLON) , applyImports) +); -console.log("RESULT="+repr(tokens)); +/** + * SEMICOLON : ';'; + */ +SEMICOLON.setPattern( + p.apply(p.tok(TokenKind.Semicolon), applySemiColon) +); +/** + * UNIT : Number; + */ +UNIT.setPattern( + p.apply(p.tok(TokenKind.Number), applyUnit) +); -var mainTokenPair : TokenPair = { - matched : [] , - remained : tokens, - ast : []}; +let tree = p.expectSingleResult(p.expectEOF(PROG.parse(lexer.parse(inputTxt)))); -let a = thenDo(thenDo(toSome(mainTokenPair), matchToken('IdentifierName')), - notDo(matchToken('Punctuator', ';'))); -console.log("RESULT="+repr(a)); -if (a._tag == "Some"){ - console.log("SEXP="+tkTreeToSExp(a.value.ast)); -} +console.log("RESULT="+tkTreeToSExp(tree));