From 6f2e788329da7702ea96dc28ae04499917ec8152 Mon Sep 17 00:00:00 2001 From: Tan Kian-ting Date: Sun, 10 Sep 2023 23:45:01 +0800 Subject: [PATCH 1/1] 20230910 : add basic parser `CONST` rule, and add the grammar rule. --- README.md | 1 + package.json | 2 +- parser_rule.txt | 66 ++++++++++++++++++++++++++++++ src/index.js | 84 ++++++++++++++++++++++++++++++++------ src/index.ts | 104 +++++++++++++++++++++++++++++++++++++++++------- src/tokenize.ts | 26 +++++++++++- 6 files changed, 252 insertions(+), 31 deletions(-) create mode 100644 parser_rule.txt diff --git a/README.md b/README.md index 067b36a..4d8beeb 100644 --- a/README.md +++ b/README.md @@ -7,3 +7,4 @@ another personal draught of a typesetting language and engine. `matchAny`, `notDo`, `orDo`, `zeroOrMoreDo`, `zeroOrOnceDo` - 20230905-07:強化`tokenize`, 加強功能,加`Token`界面。 - 20230907-08:強化`tokenize`。 + - 20230910 : add basic parser `CONST` rule, and add the grammar rule. diff --git a/package.json b/package.json index 58a28b7..0f7c93e 100644 --- a/package.json +++ b/package.json @@ -2,7 +2,7 @@ "name": "clo", "version": "0.0.1", "description": "a little typesetting engine in TypeScript", - "main": "index.js", + "main": "src/index.js", "scripts": { "test": "echo \"Error: no test specified\" && exit 1" }, diff --git a/parser_rule.txt b/parser_rule.txt new file mode 100644 index 0000000..65ad348 --- /dev/null +++ b/parser_rule.txt @@ -0,0 +1,66 @@ +let sqrtSum = (int x, int y) -> int { +let x2 = x * x; +let y2 = y * y; +return x2+y2; +} + +let pi = 3.14159; +let _2p = (intToFLo 2) *. pi; + +let c = if (2 == 2) then 2 else 3; + +let aStr = "hello"; + +let rec fac = (int n)-> int { + if n == 0 then 1 else (fac (n - 1));}; + + +type student = Student {int id, string name}; + +let alice = Student {id=20, name="Alice"}; + +alice.name = "Siobhan"; + +let customAnd = (@ 'a has age) ('a x, 'a y) => {'a > 'b}; + +type list 'a = (Cons 'a (List 'a)) | Nil; + +import("alifbata.clo"); # 匯入檔案 alifbata # + +t of import :== string -> Option string string +Error("string") | Ok("import aaa") +# 型別構造子統一大寫,型別小寫 # + + + +PROG ::= (STMT | COMMENT | STMT_COMMENT)* +COMMENT ::= # COMMENT_INNER # +COMMENT_INNER ::= [^#]+ +STMT ::= (TYPE_DEF | VAR_DEF | SET | EXPR ) ";" +TYPE_DEF ::= type ID "=" UNION + | type ID TYPE_VARS+ "=" UNIOM +TYPE_VARS = ' ID +UNION ::= (REC "|" UNION) | REC +REC ::= ID ( TYPES ) +TYPES ::= TYPE+ +TYPE ::= ID + +EXPR ::= if SUB_EXPR then IF_BRANCH else IF_BRANCH | SUB_EXPR +IF_BRANCH ::= EXPR | { BLOCK } +SUB_EXPR ::= COMPAREE| COMPAREE (LE|GE|LT|GT|EQ|NE) EXPR +COMPAREE ::= FAC| (FAC(ADD | SUB) FAC) +FAC ::= APPLY | (APPLIER (MUL | DIV) APPLY) +APPLY ::= "(" ID APPLYEE* ")" | APPLYEE +APPLYEE ::= REF | CONST | EXPR | FUNC +CONST ::= INT | STR | FLO | BOOL +BOOL ::= "true" | "false" +FUNC ::= FUNC_OPTION? ( ARGS? ) -> TYPE {BLOCK} +BLOCK ::= PROG (return ID |noReturn) ; +ARGS ::= TYPE (TYPE_VARS | ID) + | TYPE (TYPE_VARS | TYPE_VARS) , ARGS +REF ::= VAR "." ID | VAR +VAR ::= ID +VAR_DEF ::= "let" VAR "=" EXPR +SET ::= VAR "=" EXPR +FUNC_OPTION ::= ( @ TYPE_HAS (, TYPE_HAS)* ) +TYPE_HAS ::= TYPE_VAR "has" ID \ No newline at end of file diff --git a/src/index.js b/src/index.js index fb88770..e18ebd5 100644 --- a/src/index.js +++ b/src/index.js @@ -23,26 +23,32 @@ var __importStar = (this && this.__importStar) || function (mod) { return result; }; Object.defineProperty(exports, "__esModule", { value: true }); -exports.match1token = void 0; +exports.m1TType = void 0; var fs = require('fs'); +const node_process_1 = require("node:process"); const tk = __importStar(require("./tokenize.js")); -let b = tk.tokenize("2+2"); +const util = __importStar(require("util")); /** * @description - * it returns a function which test if the first char of the `remained` part of - * the argument of the function is `c`, if it's true, update the `MatchedPair` wrapped + * match one token type. + * + * it returns a function which test if the type of first token of the `remained` part of + * the argument of the function is `typ` , if it's true, update the `TokenMatcheePair` wrapped * in `Some`. Otherwise, it returns `None`. - * * @param t : the char to be test. - * @returns the updated `MatchedPair` wrapped in `Some(x)` or `None`. + * * @param typ : the type to be test. + * @returns the updated `TokenMatcheePair` wrapped in `Some(x)` or `None`. */ -function match1token(t) { +function m1TType(typ) { return (m) => { if (m.remained.length == 0) { return { _tag: "None" }; } - const tokenToBeMatched = m.remained[0]; - if (tokenToBeMatched === t) { - m.matched.push(tokenToBeMatched); + /** + * token to be matched + * */ + const ttbm = m.remained[0]; + if (ttbm.type == typ) { + m.matched.push(ttbm); return { _tag: "Some", value: { matched: m.matched, @@ -55,7 +61,59 @@ function match1token(t) { } }; } -exports.match1token = match1token; +exports.m1TType = m1TType; ; -let c = tk.toSome(b); -console.log(thenDo(c, match1token(tk.tokenize("+")[0]))); +let toSome = tk.toSome; +let thenDo = tk.thenDo; +let orDo = tk.orDo; +node_process_1.argv.forEach((val, index) => { + console.log(`${index}=${val}`); +}); +let commandInput = node_process_1.argv[2]; +let commandInputTokenized = tk.tokenize(commandInput); +console.log(commandInputTokenized); +/** + * matchee pair of commandInputTokenized + */ +let commandTPair = { matched: [], + remained: commandInputTokenized }; +let tInt = m1TType(tk.TokenType.INT); +let tFlo = m1TType(tk.TokenType.FLO); +let tStr = m1TType(tk.TokenType.STR); +function tBool(x) { + let text = x.remained[0].text; + if (text == "true" || text == "false") { + return thenDo(toSome(x), m1TType(tk.TokenType.ID)); + } + else { + return { _tag: "None" }; + } +} +/** + * define the right hand side of a grammar + * eg. `LHS ::= a + b` + * @param process the right hand side processing : eg. `a + b` in `LHS` + * @param arrange define the order (0 starting) of the elements of the result. + * ast. : eg. `a + c` is `1 0 2` `(+ a c)` + * @returns the processed ast. + */ +function gramRHS(process, arrange) { + return (m) => { + let result = process(m); + console.log(`result ${result}`); + if (result._tag == "None") { + return result; + } + else { + let matched = result.value.matched; + let return_array = Array(arrange.length); + arrange.forEach((val, index) => { + return_array[arrange[index]] = matched[index]; + }); + return return_array; + } + }; +} +var constParser = gramRHS((x) => { return thenDo(toSome(x), orDo(orDo(orDo(tInt, tFlo), tStr), tBool)); }, [0]); +let tree = constParser(commandTPair); +console.log(util.inspect(tree, { showHidden: true, depth: null })); diff --git a/src/index.ts b/src/index.ts index 55e3d65..a8d103b 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,32 +1,41 @@ var fs = require('fs'); - +import { argv } from 'node:process'; import * as tk from './tokenize.js'; +import * as util from 'util'; - - -let b : Array = tk.tokenize("2+2"); +/** + * token tree type. + */ +type tkTree = tk.Token[] | tk.Token export interface TokenMatcheePair { - matched: tk.Token[] + matched: tkTree[] remained: tk.Token[] } /** * @description - * it returns a function which test if the first char of the `remained` part of - * the argument of the function is `c`, if it's true, update the `MatchedPair` wrapped + * match one token type. + * + * it returns a function which test if the type of first token of the `remained` part of + * the argument of the function is `typ` , if it's true, update the `TokenMatcheePair` wrapped * in `Some`. Otherwise, it returns `None`. - * * @param t : the char to be test. - * @returns the updated `MatchedPair` wrapped in `Some(x)` or `None`. + * * @param typ : the type to be test. + * @returns the updated `TokenMatcheePair` wrapped in `Some(x)` or `None`. */ -export function match1token(t: tk.Token): (m: TokenMatcheePair) => tk.Maybe { +export function m1TType(typ: tk.TokenType): + (m: TokenMatcheePair) => tk.Maybe { return (m: TokenMatcheePair) => { if (m.remained.length == 0) { return { _tag: "None" }; } - const tokenToBeMatched = m.remained[0]; - if (tokenToBeMatched === t) { - m.matched.push(tokenToBeMatched); + /** + * token to be matched + * */ + const ttbm = m.remained[0]; + + if (ttbm.type == typ) { + m.matched.push(ttbm); return { _tag: "Some", value: { matched: m.matched, @@ -40,7 +49,72 @@ export function match1token(t: tk.Token): (m: TokenMatcheePair) => tk.Maybe { + console.log(`${index}=${val}`); +}); +let commandInput = argv[2]; +let commandInputTokenized = tk.tokenize(commandInput); +console.log(commandInputTokenized); + +/** + * matchee pair of commandInputTokenized + */ +let commandTPair : TokenMatcheePair = {matched:[], + remained: commandInputTokenized}; + + +let tInt = m1TType(tk.TokenType.INT); +let tFlo = m1TType(tk.TokenType.FLO); +let tStr = m1TType(tk.TokenType.STR); +function tBool (x : TokenMatcheePair) :tk.Maybe { + let text = x.remained[0].text + if (text == "true" || text == "false"){ + return thenDo(toSome(x), m1TType(tk.TokenType.ID)); + }else{ + return {_tag : "None"}; + } +} + +/** + * define the right hand side of a grammar + * eg. `LHS ::= a + b` + * @param process the right hand side processing : eg. `a + b` in `LHS` + * @param arrange define the order (0 starting) of the elements of the result. + * ast. : eg. `a + c` is `1 0 2` `(+ a c)` + * @returns the processed ast. + */ +function gramRHS (process: Function, arrange : number[]){ + return (m : TokenMatcheePair)=>{ + + let result : tk.Maybe = process(m); + console.log(`result ${result}`) + if (result._tag == "None"){ + return result; + } + else{ + let matched = result.value.matched; + let return_array : tkTree[] = Array(arrange.length); + + arrange.forEach((val, index) => { + return_array[arrange[index]] = matched[index]; + }); + + return return_array; + } + } +} + +/** + * CONST ::= INT | STR | FLO | BOOL + */ +var constParser = gramRHS((x : TokenMatcheePair)=> + {return thenDo(toSome(x),orDo(orDo(orDo(tInt,tFlo),tStr),tBool))}, [0]); -let c = tk.toSome(b); -console.log(thenDo(c,match1token(tk.tokenize("+")[0]))); \ No newline at end of file +let tree = constParser(commandTPair); +console.log(util.inspect(tree, { showHidden: true, depth: null })); diff --git a/src/tokenize.ts b/src/tokenize.ts index 861b638..e597a9e 100644 --- a/src/tokenize.ts +++ b/src/tokenize.ts @@ -98,7 +98,9 @@ export enum TokenType { NE, // <> APOS, // ' R_ARROW, // -> - + TRUE, // true + FALSE, // false + IF, // if } /** @@ -197,6 +199,25 @@ export function matchRange(l: string, u: string): (m: MatcheePair) => Maybe Maybe { + return (m)=>{ + if (s.length==0){ + return { _tag: "None" }; + } + var someM : Maybe = toSome(m); + for (var idx : number=0; idx { thenDo(thenDo(x, match1Char("-")), match1Char(">")), TokenType.R_ARROW); + /** * unary operator : generating the pattern of basic unary operator * @param char : uniry char for the operator @@ -488,7 +510,7 @@ export function tokenize(input: string): Array { lParen, rParen, lBracket, rBracket, lBrace, rBrace, comma, dot, colon, semicolon, at, hash, set, greaterthan, lessthan, apos, - float, newline, space, integer, str, id]; + float, newline, space, id, integer, str]; let term_aux = term_list.reduce((x, y) => orDo(x, y)); var new_x: Maybe = thenDo(old_x, term_aux); -- 2.39.2