From 68a21c773db9a912ec9a5717fba1787135ab9dda Mon Sep 17 00:00:00 2001 From: Tan Kian-ting Date: Sat, 30 Mar 2024 17:34:51 +0800 Subject: [PATCH] modify the language --- src/index.js | 106 ++++++++++++++++++++------- src/index.ts | 200 +++++++++++++++++++++++++++++++-------------------- 2 files changed, 203 insertions(+), 103 deletions(-) diff --git a/src/index.js b/src/index.js index b771819..3c0e3a1 100644 --- a/src/index.js +++ b/src/index.js @@ -10,10 +10,12 @@ var TokenKind; TokenKind[TokenKind["Str"] = 3] = "Str"; TokenKind[TokenKind["LParen"] = 4] = "LParen"; TokenKind[TokenKind["RParen"] = 5] = "RParen"; - TokenKind[TokenKind["SpaceNL"] = 6] = "SpaceNL"; - TokenKind[TokenKind["At"] = 7] = "At"; - TokenKind[TokenKind["BSlash"] = 8] = "BSlash"; - TokenKind[TokenKind["Other"] = 9] = "Other"; + TokenKind[TokenKind["LBrack"] = 6] = "LBrack"; + TokenKind[TokenKind["RBrack"] = 7] = "RBrack"; + TokenKind[TokenKind["SpaceNL"] = 8] = "SpaceNL"; + TokenKind[TokenKind["BSlash"] = 9] = "BSlash"; + TokenKind[TokenKind["Apos"] = 10] = "Apos"; + TokenKind[TokenKind["Other"] = 11] = "Other"; })(TokenKind || (TokenKind = {})); var ItemType; (function (ItemType) { @@ -25,18 +27,43 @@ var ItemType; const tokenizer = (0, typescript_parsec_1.buildLexer)([ [true, /^\d+/g, TokenKind.Int], [true, /^\d+\.\d+/g, TokenKind.Flo], - [true, /^[\+\-\*\\\w_][0-9\+\-\*\\\w]*/g, TokenKind.Id], + [true, /^[+\-*/a-zA-Z_][0-9+\-*/a-zA-Z_]*/g, TokenKind.Id], [true, /^\"([^\"]|\\\")+\"/g, TokenKind.Str], - [true, /^\(/g, TokenKind.LParen], - [true, /^\)/g, TokenKind.RParen], + [true, /^[(]/g, TokenKind.LParen], + [true, /^[)]/g, TokenKind.RParen], + [true, /^\[/g, TokenKind.LBrack], + [true, /^\]/g, TokenKind.RBrack], + [true, /^'/g, TokenKind.Apos], [true, /^(\s|\t|\r?\n)+/g, TokenKind.SpaceNL], - [true, /^\@/g, TokenKind.At], [true, /^\\/g, TokenKind.BSlash], - [true, /^[.]+/g, TokenKind.Other], + [true, /^([^+\-*/a-zA-Z_0-9\[\]()'\s\t\r\n\\]+)/g, TokenKind.Other], ]); +/** + * ## BNF +LISP = UNIT | LISPS | CON_STR +LISPS = "(" LISP ")" | "'" "(" LISP ")" +SINGLE = "{" CONSTR_INNR | LISP "}" +UNIT = INT | NUMBER | STRING | ID +CONSTR = "[" (CONSTR_INNER "]" +CONSTR_INNER = ([^\\\[\][]] | [\\][{}\[\]]) | LISPS)* + */ const SINGLE = (0, typescript_parsec_1.rule)(); -const SINGLES = (0, typescript_parsec_1.rule)(); -const PROG_INNER = (0, typescript_parsec_1.rule)(); +const LISPS = (0, typescript_parsec_1.rule)(); +const LISP = (0, typescript_parsec_1.rule)(); +const CON_STR = (0, typescript_parsec_1.rule)(); +const CON_STR_INNER = (0, typescript_parsec_1.rule)(); +function tokenToStr(value) { + return { + type: ItemType.Str, + str: value.text + }; +} +function bSlashTokenToStr(value) { + return { + type: ItemType.Str, + str: value.text + }; +} function applyId(value) { return { type: ItemType.Id, @@ -58,29 +85,56 @@ function applyFlo(value) { function applyStr(value) { return { type: ItemType.Str, - str: value.text + str: value.text.slice(1, value.text.length - 1) }; } function applyList(value) { return value; } +function applyQuoted(value) { + let head = { type: ItemType.Id, + id: "quote" }; + let merged = [head, value]; + return merged; +} +function applyStrings(value) { + let head = [{ type: ItemType.Id, + id: "%concat" }]; + let merged = head.concat(value); + return merged; +} /** for convinence to omit the spaces and newlines */ let __ = (0, typescript_parsec_2.opt)((0, typescript_parsec_2.tok)(TokenKind.SpaceNL)); -function getInsideParathesis(value) { - return value[2]; -} -function giveAST(value) { - return value; +LISP.setPattern((0, typescript_parsec_2.alt)((0, typescript_parsec_2.kleft)(SINGLE, __), (0, typescript_parsec_2.kleft)(LISPS, __), (0, typescript_parsec_2.kleft)(CON_STR, __))); +SINGLE.setPattern((0, typescript_parsec_2.alt)((0, typescript_parsec_2.apply)((0, typescript_parsec_2.tok)(TokenKind.Id), applyId), (0, typescript_parsec_2.apply)((0, typescript_parsec_2.tok)(TokenKind.Int), applyInt), (0, typescript_parsec_2.apply)((0, typescript_parsec_2.tok)(TokenKind.Flo), applyFlo), (0, typescript_parsec_2.apply)((0, typescript_parsec_2.tok)(TokenKind.Str), applyStr))); +LISPS.setPattern((0, typescript_parsec_2.alt)((0, typescript_parsec_2.apply)((0, typescript_parsec_2.kmid)((0, typescript_parsec_2.seq)((0, typescript_parsec_2.str)("("), __), (0, typescript_parsec_2.rep_sc)(LISP), (0, typescript_parsec_2.str)(")")), applyList), (0, typescript_parsec_2.apply)((0, typescript_parsec_2.kright)((0, typescript_parsec_2.str)("'"), (0, typescript_parsec_2.kmid)((0, typescript_parsec_2.seq)((0, typescript_parsec_2.str)("("), __), (0, typescript_parsec_2.rep_sc)(LISP), (0, typescript_parsec_2.str)(")"))), applyQuoted))); +CON_STR_INNER.setPattern((0, typescript_parsec_2.alt)((0, typescript_parsec_2.apply)((0, typescript_parsec_2.tok)(TokenKind.Id), tokenToStr), (0, typescript_parsec_2.apply)((0, typescript_parsec_2.tok)(TokenKind.Int), tokenToStr), (0, typescript_parsec_2.apply)((0, typescript_parsec_2.tok)(TokenKind.Flo), tokenToStr), (0, typescript_parsec_2.apply)((0, typescript_parsec_2.tok)(TokenKind.Str), tokenToStr), (0, typescript_parsec_2.apply)((0, typescript_parsec_2.tok)(TokenKind.Other), tokenToStr), (0, typescript_parsec_2.apply)((0, typescript_parsec_2.tok)(TokenKind.SpaceNL), tokenToStr), (0, typescript_parsec_2.apply)((0, typescript_parsec_2.kright)((0, typescript_parsec_2.tok)(TokenKind.BSlash), (0, typescript_parsec_2.tok)(TokenKind.LParen)), tokenToStr), (0, typescript_parsec_2.apply)((0, typescript_parsec_2.kright)((0, typescript_parsec_2.tok)(TokenKind.BSlash), (0, typescript_parsec_2.tok)(TokenKind.RParen)), tokenToStr), (0, typescript_parsec_2.apply)((0, typescript_parsec_2.kright)((0, typescript_parsec_2.tok)(TokenKind.BSlash), (0, typescript_parsec_2.tok)(TokenKind.LBrack)), tokenToStr), (0, typescript_parsec_2.apply)((0, typescript_parsec_2.kright)((0, typescript_parsec_2.tok)(TokenKind.BSlash), (0, typescript_parsec_2.tok)(TokenKind.RBrack)), tokenToStr), (0, typescript_parsec_2.apply)((0, typescript_parsec_2.kright)((0, typescript_parsec_2.tok)(TokenKind.BSlash), (0, typescript_parsec_2.tok)(TokenKind.Apos)), tokenToStr), (0, typescript_parsec_2.apply)((0, typescript_parsec_2.kright)((0, typescript_parsec_2.tok)(TokenKind.BSlash), (0, typescript_parsec_2.tok)(TokenKind.BSlash)), bSlashTokenToStr), LISPS)); +CON_STR.setPattern((0, typescript_parsec_2.apply)((0, typescript_parsec_2.kmid)((0, typescript_parsec_2.str)("["), (0, typescript_parsec_2.rep_sc)(CON_STR_INNER), (0, typescript_parsec_2.str)("]")), applyStrings)); +function printAST(ast) { + if (Array.isArray(ast)) { + let ast2 = ast.map(printAST); + return "(" + ast2.join(" ") + ")"; + } + else { + if (ast.type == ItemType.Str) { + return "`" + ast.str + "`"; + } + else if (ast.type == ItemType.Id) { + return ast.id; + } + else if (ast.type == ItemType.Flo) { + return ast.flo.toString(); + } + else { + return ast.int.toString(); + } + } } -/** SINGLE ::= Int| Flo | Str | Id */ -SINGLE.setPattern((0, typescript_parsec_2.alt)((0, typescript_parsec_2.apply)((0, typescript_parsec_2.tok)(TokenKind.Id), applyId), (0, typescript_parsec_2.apply)((0, typescript_parsec_2.tok)(TokenKind.Int), applyInt), (0, typescript_parsec_2.apply)((0, typescript_parsec_2.tok)(TokenKind.Flo), applyFlo), (0, typescript_parsec_2.apply)((0, typescript_parsec_2.tok)(TokenKind.Str), applyStr), (0, typescript_parsec_2.apply)((0, typescript_parsec_2.seq)((0, typescript_parsec_2.tok)(TokenKind.LParen), __, SINGLES, (0, typescript_parsec_2.tok)(TokenKind.RParen)), getInsideParathesis))); -/** SINGLES ::= SINGLE SP_NL? */ -SINGLES.setPattern((0, typescript_parsec_2.apply)((0, typescript_parsec_2.rep_sc)((0, typescript_parsec_2.kleft)(SINGLE, __)), applyList)); -/** PROG_INNER ::= "(" SP_NL? SINGLES ")" */ -PROG_INNER.setPattern((0, typescript_parsec_2.apply)((0, typescript_parsec_2.kmid)((0, typescript_parsec_2.str)('@'), SINGLE, (0, typescript_parsec_2.str)('@')), giveAST)); function evaluate(expr) { - let a = (0, typescript_parsec_1.expectSingleResult)((0, typescript_parsec_1.expectEOF)(PROG_INNER.parse(tokenizer.parse(expr)))); - console.log(a); + let a = (0, typescript_parsec_1.expectSingleResult)((0, typescript_parsec_1.expectEOF)(LISP.parse(tokenizer.parse(expr)))); + const util = require('util'); + console.log(printAST(a)); return a; } -evaluate("@(let (a 17) (+ a 10))@"); +evaluate(`(main '((text 12)) [ 快狐跳懶狗\\\\\\\[\\\]\\\(\\\)(italic "fox and dog") (bold [OK])])`); +//evaluate("@(let (a 17) (+ a 10))@") diff --git a/src/index.ts b/src/index.ts index db125c4..46a318a 100644 --- a/src/index.ts +++ b/src/index.ts @@ -9,9 +9,11 @@ enum TokenKind{ Str, LParen, RParen, + LBrack, + RBrack, SpaceNL, - At, BSlash, + Apos, Other } @@ -23,13 +25,29 @@ enum ItemType{ Str, } -interface Item{ - type : ItemType, - int? : BigInt, - flo? : number, - str? : string, - id? : string, +type Item = ItemStr | ItemInt | ItemId | ItemFlo; + + +interface ItemStr{ + type : ItemType.Str, + str : string, +} + +interface ItemInt{ + type : ItemType.Int, + int : BigInt, +} + +interface ItemId{ + type : ItemType.Id, + id : string, +} + + +interface ItemFlo{ + type : ItemType.Flo, + flo : number, } type AST = Item | AST[]; @@ -37,48 +55,56 @@ type AST = Item | AST[]; const tokenizer = buildLexer([ [true, /^\d+/g, TokenKind.Int], [true, /^\d+\.\d+/g, TokenKind.Flo], - [true, /^[\+\-\*\\\w_][0-9\+\-\*\\\w]*/g, TokenKind.Id], + [true, /^[+\-*/a-zA-Z_][0-9+\-*/a-zA-Z_]*/g, TokenKind.Id], [true, /^\"([^\"]|\\\")+\"/g, TokenKind.Str], - [true, /^\(/g, TokenKind.LParen], - [true, /^\)/g, TokenKind.RParen], + [true, /^[(]/g, TokenKind.LParen], + [true, /^[)]/g, TokenKind.RParen], + [true, /^\[/g, TokenKind.LBrack], + [true, /^\]/g, TokenKind.RBrack], + [true, /^'/g, TokenKind.Apos], [true, /^(\s|\t|\r?\n)+/g, TokenKind.SpaceNL], - [true, /^\@/g, TokenKind.At], [true, /^\\/g, TokenKind.BSlash], - [true, /^[.]+/g, TokenKind.Other], + [true, /^([^+\-*/a-zA-Z_0-9\[\]()'\s\t\r\n\\]+)/g, TokenKind.Other], ]); +/** + * ## BNF +LISP = UNIT | LISPS | CON_STR +LISPS = "(" LISP ")" | "'" "(" LISP ")" +SINGLE = "{" CONSTR_INNR | LISP "}" +UNIT = INT | NUMBER | STRING | ID +CONSTR = "[" (CONSTR_INNER "]" +CONSTR_INNER = ([^\\\[\][]] | [\\][{}\[\]]) | LISPS)* + */ + const SINGLE = rule(); -const SINGLES = rule(); -const PROG_INNER = rule(); -const STRINGS = rule(); -const STRING = rule(); +const LISPS = rule(); +const LISP = rule(); +const CON_STR = rule(); +const CON_STR_INNER = rule(); -function applyId(value: Token): Item { - return { - type : ItemType.Id, - id : value.text}; -} -function applyInt(value: Token): Item { +function tokenToStr(value: Token): Item { return { - type : ItemType.Int, - int : BigInt(value.text)}; + type : ItemType.Str, + str : value.text}; } -function applyFlo(value: Token): Item { - return { - type : ItemType.Flo, - flo : +value.text}; -} -function applyStr(value: Token): Item { +function bSlashTokenToStr(value: Token): Item { return { type : ItemType.Str, str : value.text}; } +function applyId(value: Token): Item { + return { + type :ItemType.Id, + id : value.text}; +} + function applyInt(value: Token): Item { return { type : ItemType.Int, @@ -94,86 +120,106 @@ function applyFlo(value: Token): Item { function applyStr(value: Token): Item { return { type : ItemType.Str, - str : value.text}; + str : value.text.slice(1,value.text.length-1)}; } function applyList(value: AST[]):AST{ return value; } -/** for convinence to omit the spaces and newlines */ -let __ = opt(tok(TokenKind.SpaceNL)) - -function getInsideParathesis (value: [Token, Token|undefined, AST, Token]){ - return value[2]; +function applyQuoted(value: AST[]):AST{ + let head : Item = {type : ItemType.Id, + id:"quote"} + let merged = [head, value]; + return merged; } -function giveAST (value: AST){ - return value; +function applyStrings(value: AST[]):AST{ + let head : AST[] = [{type : ItemType.Id, + id:"%concat"}] + let merged = head.concat(value); + return merged; } -/** SINGLE ::= Int| Flo | Str | Id */ +/** for convinence to omit the spaces and newlines */ +let __ = opt(tok(TokenKind.SpaceNL)) + +LISP.setPattern( + alt( + kleft(SINGLE, __), + kleft(LISPS, __), + kleft(CON_STR, __) + )) + SINGLE.setPattern( alt( apply(tok(TokenKind.Id), applyId), apply(tok(TokenKind.Int), applyInt), apply(tok(TokenKind.Flo), applyFlo), apply(tok(TokenKind.Str), applyStr), - apply(seq(tok(TokenKind.LParen),__, SINGLES,tok(TokenKind.RParen)),getInsideParathesis), )) -/** SINGLES ::= SINGLE SP_NL? */ -SINGLES.setPattern( - apply(rep_sc(kleft(SINGLE, __)), applyList)) +LISPS.setPattern( +alt( + apply(kmid(seq(str("("), __),rep_sc(LISP),str(")")), applyList), + apply(kright(str("'"), + kmid(seq(str("("), __),rep_sc(LISP),str(")"))), applyQuoted), +)) - -/** PROG_INNER ::= "(" SP_NL? SINGLES ")" */ -PROG_INNER.setPattern( - apply( - kmid(str('@'), SINGLE, str('@')), - giveAST - ) -) - -/** PROG_INNER ::= "(" SP_NL? SINGLES ")" */ -STRING.setPattern( +CON_STR_INNER.setPattern( alt( - apply(tok(TokenKind.Id),idToStr), - apply(tok(TokenKind.Float),fLoatToStr), + apply(tok(TokenKind.Id),tokenToStr), + apply(tok(TokenKind.Int),tokenToStr), + apply(tok(TokenKind.Flo),tokenToStr), + apply(tok(TokenKind.Str),tokenToStr), + apply(tok(TokenKind.Other),tokenToStr), + apply(tok(TokenKind.SpaceNL), tokenToStr), + apply(kright(tok(TokenKind.BSlash),tok(TokenKind.LParen)), tokenToStr), + apply(kright(tok(TokenKind.BSlash),tok(TokenKind.RParen)), tokenToStr), + apply(kright(tok(TokenKind.BSlash),tok(TokenKind.LBrack)), tokenToStr), + apply(kright(tok(TokenKind.BSlash),tok(TokenKind.RBrack)), tokenToStr), + apply(kright(tok(TokenKind.BSlash),tok(TokenKind.Apos)), tokenToStr), + apply(kright(tok(TokenKind.BSlash),tok(TokenKind.BSlash)), bSlashTokenToStr), + LISPS )) + +CON_STR.setPattern( + apply(kmid(str("["), + rep_sc(CON_STR_INNER), + str("]")), applyStrings) ) +function printAST(ast : AST): string{ + if (Array.isArray(ast)){ + let ast2 = ast.map(printAST); + return "(" + ast2.join(" ") + ")"; + }else{ + if (ast.type==ItemType.Str){ + return "`" + ast.str + "`"; + }else if (ast.type==ItemType.Id){ + return ast.id; + }else if (ast.type== ItemType.Flo){ + return ast.flo.toString(); + }else{ + return ast.int.toString(); + } + } +} function evaluate(expr: string): AST { - let a = expectSingleResult(expectEOF(PROG_INNER.parse(tokenizer.parse(expr)))); - console.log(a); + let a = expectSingleResult(expectEOF(LISP.parse(tokenizer.parse(expr)))); + const util = require('util') + console.log(printAST(a)) return a; } - -[true, /^\d+/g, TokenKind.Int], -[true, /^\d+\.\d+/g, TokenKind.Flo], -[true, /^[\+\-\*\\\w_][0-9\+\-\*\\\w]*/g, TokenKind.Id], -[true, /^\"([^\"]|\\\")+\"/g, TokenKind.Str], -[true, /^\(/g, TokenKind.LParen], -[true, /^\)/g, TokenKind.RParen], -[true, /^(\s|\t|\r?\n)+/g, TokenKind.SpaceNL], -[true, /^\@/g, TokenKind.At], -[true, /^\\/g, TokenKind.BSlash], -[true, /^[.]+/g, TokenKind.Other], - - - - - - - -evaluate("@(let (a 17) (+ a 10))@") \ No newline at end of file +evaluate(`(main '((text 12)) [ 快狐跳懶狗\\\\\\\[\\\]\\\(\\\)(italic "fox and dog") (bold [OK])])`) +//evaluate("@(let (a 17) (+ a 10))@") \ No newline at end of file -- 2.39.2