From 4064e597019df41eabbe7bf662a8f2ba16bda44a Mon Sep 17 00:00:00 2001 From: Tan Kian-ting Date: Thu, 12 Oct 2023 00:39:32 +0800 Subject: [PATCH] 20231012: clo->js converter successfully (maybe.) --- README.md | 3 +- a.clo | 13 + a.js | 27 ++ package-lock.json | 41 ++- package.json | 3 +- parserRule.txt | 33 --- parserRuleRefOnly.txt | 65 ----- src/index.js | 208 +++------------- src/index.ts | 273 +++----------------- src/parser.ts | 343 +++++++++++++++++++++++++ src/tokenize.ts | 565 ------------------------------------------ 11 files changed, 487 insertions(+), 1087 deletions(-) create mode 100644 a.clo create mode 100644 a.js delete mode 100644 parserRule.txt delete mode 100644 parserRuleRefOnly.txt create mode 100644 src/parser.ts delete mode 100644 src/tokenize.ts diff --git a/README.md b/README.md index a11749a..8c72072 100644 --- a/README.md +++ b/README.md @@ -25,4 +25,5 @@ License: MIT - 20230929:add multi args parsing for `callee`. - 20230930:tîng khí parser, using `js-token`. - 20231006: tîng siá parser, using `ts-parsec`. - - 20231010: 初步完成tsit ê階段ê Parser`。 \ No newline at end of file + - 20231010: 初步完成tsit ê階段ê Parser`。 + - 20231012: clo->js converter successfully (maybe.) \ No newline at end of file diff --git a/a.clo b/a.clo new file mode 100644 index 0000000..f98a5c3 --- /dev/null +++ b/a.clo @@ -0,0 +1,13 @@ +/* ba choir +ipsum lorem*/ + +--- +我是一隻古怪的虎斑貓。擅長喵喵叫。 +the quick brown fox jumps over the lazy dog. +臺中市\\\@ + +兩個反斜線打出\\;斜線加小老鼠打出\@。 +"公\\\\園" +@repr()@ + +山頂 diff --git a/a.js b/a.js new file mode 100644 index 0000000..317dd04 --- /dev/null +++ b/a.js @@ -0,0 +1,27 @@ + +/* clo, a typesetting engine, generated JS file*/ +/* CLO: beginning of head*/ +import * as clo from "clo"; + +cl = clo.initClo(); +/* CLO: end of head*/ +/* ba choir +ipsum lorem*/ + + +/* CLO: beginning of middle part*/ +cl.mainText = /* CLO: end of middle part*/ +([` +`, `我是一隻古怪的虎斑貓。擅長喵喵叫。`, ` +`, `the`, ` `, `quick`, ` `, `brown`, ` `, `fox`, ` `, `jumps`, ` `, `over`, ` `, `the`, ` `, `lazy`, ` `, `dog.`, ` +`, `臺中市`, `\\`, `@`, ` + +`, `兩個反斜線打出`, `\\`, `;斜線加小老鼠打出`, `@`, `。`, ` +`, `"公\\\\園"`, ` +`]).concat(repr()).concat([` + +`, `山頂`, ` +`]); +/* CLO: beginning of end part*/ +cl.generatePdf(); +/*CLO : end of end part*/ diff --git a/package-lock.json b/package-lock.json index f1c2f4a..4c1706f 100644 --- a/package-lock.json +++ b/package-lock.json @@ -12,6 +12,7 @@ "@pdf-lib/fontkit": "^1.1.1", "harfbuzzjs": "^0.3.3", "js-tokens": "^8.0.2", + "minimist": "^1.2.8", "npx": "^3.0.0", "pdf-lib": "^1.17.1", "typescript-parsec": "^0.3.4" @@ -19,7 +20,7 @@ "devDependencies": { "@types/chai": "^4.3.5", "@types/mocha": "^10.0.1", - "@types/node": "^20.5.7", + "@types/node": "^20.8.4", "@typescript-eslint/eslint-plugin": "^6.5.0", "chai": "^4.3.8", "eslint": "^8.48.0", @@ -775,10 +776,13 @@ "dev": true }, "node_modules/@types/node": { - "version": "20.5.7", - "resolved": "https://registry.npmjs.org/@types/node/-/node-20.5.7.tgz", - "integrity": "sha512-dP7f3LdZIysZnmvP3ANJYTSwg+wLLl8p7RqniVlV7j+oXSXAbt9h0WIBFmJy5inWZoX9wZN6eXx+YXd9Rh3RBA==", - "dev": true + "version": "20.8.4", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.8.4.tgz", + "integrity": "sha512-ZVPnqU58giiCjSxjVUESDtdPk4QR5WQhhINbc9UBrKLU68MX5BF6kbQzTrkwbolyr0X8ChBpXfavr5mZFKZQ5A==", + "dev": true, + "dependencies": { + "undici-types": "~5.25.1" + } }, "node_modules/@types/semver": { "version": "7.5.1", @@ -3701,7 +3705,6 @@ "version": "1.2.8", "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz", "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==", - "dev": true, "funding": { "url": "https://github.com/sponsors/ljharb" } @@ -6432,6 +6435,12 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/undici-types": { + "version": "5.25.3", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.25.3.tgz", + "integrity": "sha512-Ga1jfYwRn7+cP9v8auvEXN1rX3sWqlayd4HP7OKk4mZWylEmu3KzXDUGrQUN6Ol7qo1gPvB2e5gX6udnyEPgdA==", + "dev": true + }, "node_modules/update-browserslist-db": { "version": "1.0.11", "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.0.11.tgz", @@ -7304,10 +7313,13 @@ "dev": true }, "@types/node": { - "version": "20.5.7", - "resolved": "https://registry.npmjs.org/@types/node/-/node-20.5.7.tgz", - "integrity": "sha512-dP7f3LdZIysZnmvP3ANJYTSwg+wLLl8p7RqniVlV7j+oXSXAbt9h0WIBFmJy5inWZoX9wZN6eXx+YXd9Rh3RBA==", - "dev": true + "version": "20.8.4", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.8.4.tgz", + "integrity": "sha512-ZVPnqU58giiCjSxjVUESDtdPk4QR5WQhhINbc9UBrKLU68MX5BF6kbQzTrkwbolyr0X8ChBpXfavr5mZFKZQ5A==", + "dev": true, + "requires": { + "undici-types": "~5.25.1" + } }, "@types/semver": { "version": "7.5.1", @@ -9384,8 +9396,7 @@ "minimist": { "version": "1.2.8", "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz", - "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==", - "dev": true + "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==" }, "mkdirp": { "version": "0.5.6", @@ -11298,6 +11309,12 @@ "which-boxed-primitive": "^1.0.2" } }, + "undici-types": { + "version": "5.25.3", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.25.3.tgz", + "integrity": "sha512-Ga1jfYwRn7+cP9v8auvEXN1rX3sWqlayd4HP7OKk4mZWylEmu3KzXDUGrQUN6Ol7qo1gPvB2e5gX6udnyEPgdA==", + "dev": true + }, "update-browserslist-db": { "version": "1.0.11", "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.0.11.tgz", diff --git a/package.json b/package.json index fbb9cfc..90a18bc 100644 --- a/package.json +++ b/package.json @@ -22,7 +22,7 @@ "devDependencies": { "@types/chai": "^4.3.5", "@types/mocha": "^10.0.1", - "@types/node": "^20.5.7", + "@types/node": "^20.8.4", "@typescript-eslint/eslint-plugin": "^6.5.0", "chai": "^4.3.8", "eslint": "^8.48.0", @@ -40,6 +40,7 @@ "@pdf-lib/fontkit": "^1.1.1", "harfbuzzjs": "^0.3.3", "js-tokens": "^8.0.2", + "minimist": "^1.2.8", "npx": "^3.0.0", "pdf-lib": "^1.17.1", "typescript-parsec": "^0.3.4" diff --git a/parserRule.txt b/parserRule.txt deleted file mode 100644 index 8fceff9..0000000 --- a/parserRule.txt +++ /dev/null @@ -1,33 +0,0 @@ -import foo from 'bar'; - -行到水窮處坐看雲起時@blah()@下一句是什麼? - -我沒背唐詩。 - -#h1() -@h1()@ ------- -要變成 -import 'clo'; -import foo from 'bar'; - -clo = clo(); - -clo.mainText = `行到水窮處坐看雲起時`+blah()+`下一句是什麼? - -我沒背唐詩。` - -clo.genPdf(path); ----- - -BODY = IMPORTS SENTENCES | SENTENCES -IMPORTS = import SP IMPORTINNER ; NL IMPORTS | import IMPORTINNER ; -IMPORTINNER = NOT[;] IMPORTINNER | NOT[;] - -SENTENCES = SENTENCE SENTENCES -SENTENCE = TEXT | STMT | None -TEXT = TEXT_SEG TEXT | TEXT_SEG -TEXT_SEG = ([^@\] | \ @ | \ \ ) - -STMT = @ STMTINNER @ -STMTINNER = [^@] STMTINNER | [^@] \ No newline at end of file diff --git a/parserRuleRefOnly.txt b/parserRuleRefOnly.txt deleted file mode 100644 index efff906..0000000 --- a/parserRuleRefOnly.txt +++ /dev/null @@ -1,65 +0,0 @@ -let sqrtSum = (int x, int y) -> int { -let x2 = x * x; -let y2 = y * y; -return x2+y2; -} - -let pi = 3.14159; -let _2p = (intToFLo 2) *. pi; - -let c = if (2 == 2) then 2 else 3; - -let aStr = "hello"; - -let rec fac = (int n)-> int { - if n == 0 then 1 else (fac (n - 1));}; - - -type student = Student {int id, string name}; - -let alice = Student {id=20, name="Alice"}; - -alice.name = "Siobhan"; - -let customAnd = (@ 'a has age) ('a x, 'a y) => {'a > 'b}; - -type list 'a = (Cons 'a (List 'a)) | Nil; - -import("alifbata.clo"); # 匯入檔案 alifbata # - -t of import :== string -> Option string string -Error("string") | Ok("import aaa") -# 型別構造子統一大寫,型別小寫 # - - - -PROG ::= (STMT | COMMENT | STMT_COMMENT)* -COMMENT ::= # COMMENT_INNER # -COMMENT_INNER ::= [^#]+ -STMT ::= (TYPE_DEF | VAR_DEF | SET | EXPR ) ";" -TYPE_DEF ::= type ID "=" UNION - | type ID TYPE_VARS+ "=" UNIOM -TYPE_VARS = ' ID -UNION ::= (REC "|" UNION) | REC -REC ::= ID ( TYPES ) -TYPES ::= TYPE+ -TYPE ::= ID - -EXPR ::= if SUB_EXPR then IF_BRANCH else IF_BRANCH | SUB_EXPR -IF_BRANCH ::= EXPR | { BLOCK } -SUB_EXPR ::= COMPAREE| COMPAREE (LE|GE|LT|GT|EQ|NE) EXPR -COMPAREE ::= FAC| (FAC(ADD | SUB) FAC) -FAC ::= APPLY | (APPLIER (MUL | DIV) APPLY) -APPLY ::= "(" ID APPLYEE* ")" | APPLYEE -APPLYEE ::= REF | CONST | EXPR | FUNC -* CONST ::= INT | STR | FLO | BOOL -BOOL ::= "true" | "false" -FUNC ::= FUNC_OPTION? ( ARGS? ) -> TYPE {BLOCK} -BLOCK ::= PROG (return ID |noReturn) ; - -REF ::= VAR "." ID | VAR -VAR ::= ID -VAR_DEF ::= "let" VAR "=" EXPR -SET ::= VAR "=" EXPR -FUNC_OPTION ::= ( @ TYPE_HAS (, TYPE_HAS)* ) -TYPE_HAS ::= TYPE_VAR "has" ID \ No newline at end of file diff --git a/src/index.js b/src/index.js index 15e1862..53df24c 100644 --- a/src/index.js +++ b/src/index.js @@ -23,184 +23,48 @@ var __importStar = (this && this.__importStar) || function (mod) { return result; }; Object.defineProperty(exports, "__esModule", { value: true }); -exports.tkTreeToSExp = void 0; var fs = require('fs'); -const util = __importStar(require("util")); -const p = __importStar(require("typescript-parsec")); -/** - * - * # REPRESENTATION - */ -/** - * convert a `tkTree` AST to S-expr string - * @param t the `tkTree` - * @returns S-expr String - */ -function tkTreeToSExp(t) { - var str = ""; - if (Array.isArray(t)) { - let strArray = t.map((x) => tkTreeToSExp(x)); - str = "(" + strArray.join(" ") + ")"; - } - else { - if (t === undefined) { - str = "%undefined"; - } - else { - str = t; - } - } - return str; -} -exports.tkTreeToSExp = tkTreeToSExp; -/**inspect the inner of the representation. */ -let repr = (x) => { return util.inspect(x, { depth: null }); }; -var TokenKind; -(function (TokenKind) { - TokenKind[TokenKind["Seperator"] = 0] = "Seperator"; - TokenKind[TokenKind["Semicolon"] = 1] = "Semicolon"; - TokenKind[TokenKind["Number"] = 2] = "Number"; - TokenKind[TokenKind["Op"] = 3] = "Op"; - TokenKind[TokenKind["ExprMark"] = 4] = "ExprMark"; - TokenKind[TokenKind["ExcapeAt"] = 5] = "ExcapeAt"; - TokenKind[TokenKind["Paren"] = 6] = "Paren"; - TokenKind[TokenKind["SpaceNL"] = 7] = "SpaceNL"; - TokenKind[TokenKind["Id"] = 8] = "Id"; - TokenKind[TokenKind["Str"] = 9] = "Str"; - TokenKind[TokenKind["Comment"] = 10] = "Comment"; -})(TokenKind || (TokenKind = {})); -/** - * Parsing - */ -const lexer = p.buildLexer([ - [true, /^\d+(\.\d+)?/g, TokenKind.Number], - [true, /^\\\@/g, TokenKind.ExcapeAt], - [true, /^\/\*([^/]|\/[^*])*\*\//g, TokenKind.Comment], - [true, /^\;/g, TokenKind.Semicolon], - [true, /^[-][-][-]/g, TokenKind.Seperator], - [true, /^[\+\-\*\/\&\|\!\^\<\>\~\=\?]+/g, TokenKind.Op], - [true, /^\@/g, TokenKind.ExprMark], - [true, /^[()\[\]{}]/g, TokenKind.Paren], - [true, /^["]([\"]|[\\].)*["]/g, TokenKind.Str], - [true, /^[']([\']|[\\].)*[']/g, TokenKind.Str], - [true, /^[()\[\]{}]/g, TokenKind.Paren], - [true, /^[^\/\\\@\s\n\t\r;]+/g, TokenKind.Id], - [true, /^(\s|\n|\r|\t)+/g, TokenKind.SpaceNL], -]); -/** - * - * # TEST - */ -const inputTxt = `import a as b; /*bacourt*/ -/* ba choir -ipsum lorem*/ - -import you as john; ---- +var argv = require('minimist')(process.argv.slice(2)); +const parser = __importStar(require("./parser.js")); +let helpDesc = ` +clo: clo INPUT_FILE --output-js OUTPUT_JS_FILE -臺中市\\\@ +\ta little typesetter powered by TypeScript/Javascript. -公園 -@1+2==3; +## Arguments +INPUT_FILE\tan input .clo file -console.log("122");@ - -山頂 +## Parameters +--- +--output-js\tgenerated the output middle JS file `; -const PROG = p.rule(); -const SEGMENT = p.rule(); -const IMPORT = p.rule(); -const IMPORTS = p.rule(); -const SEMICOLON = p.rule(); -const EXCAPE_AT = p.rule(); -const NOT_AT_TEXT = p.rule(); -const CONTENT = p.rule(); -let doubleMinus = { type: 'Punctuator', value: '--' }; -let doubleMinus2 = p.str('--'); -const TERM = p.rule(); -function applySegment(input) { - let unpackedInnerExprs = input[1].map((x) => { return x.text; }); - return ["%exprs", unpackedInnerExprs]; -} -function applySemiColon(value) { - return value.text; -} -function applyParts(first, second) { - return ["%clo", first, second[1]]; -} -function applyComment(value) { - return [value.text]; -} -function applyImport(input) { - let importTail = input[1].map(x => x.text); - return ["import"].concat(importTail); -} -; -/* -function applyImportComment(input: [Token,Token[], - tkTree, Token]) : tkTree{ - let importTail = input[1].map(x=>x.text); - let comment = [input[3].text]; - return ["import"].concat(importTail).concat(comment); -};*/ -function applyImports(input) { - let resultBody = [input[0]].concat(input[1]); - let resultWrapper = ["%import", resultBody]; - return resultWrapper; -} -; -function applyNotAtText(value) { - if (value.text == "\\\@") { - return '@'; +processArgv(argv, helpDesc); +/** + * processing the passed `argv` (arguments) + */ +function processArgv(argv, helpDesc) { + let inputFile = argv['_']; + let outputJSFile = argv['output-js']; + let NoInputFile = (inputFile.length == 0); + let NoOutputJSFile = (outputJSFile === undefined || outputJSFile == true); + let helpTriggered = argv['help']; + if (inputFile.length > 1) { + console.log("Sorry, the input file should be only one."); + } + /** output --help */ + if (helpTriggered || NoInputFile || NoOutputJSFile) { + console.log(helpDesc); } else { - return value.text; + fs.readFile(inputFile[0], 'utf8', (err, inputText) => { + if (err) + throw err; + let tree = parser.inputTextToTree(inputText); + let output = parser.treeToJS(tree); + fs.writeFile(outputJSFile, output, (err) => { + if (err) + throw err; + }); + }); } } -; -function applyText(input) { - return ["%text", input]; -} -; -function applyContent(input) { - return ["%content", input]; -} -; -function applySpaceNL(value) { - return value.text; -} -/** - * IMPORTEE: Number, Op, Paren, Id, Str, Comment, - */ -let IMPORTEE = p.alt(p.tok(TokenKind.Number), p.tok(TokenKind.Op), p.tok(TokenKind.Paren), p.tok(TokenKind.Id), p.tok(TokenKind.Str), p.tok(TokenKind.SpaceNL), p.tok(TokenKind.Comment)); -let NOT_AT = p.alt(p.tok(TokenKind.Seperator), p.tok(TokenKind.Semicolon), p.tok(TokenKind.Number), p.tok(TokenKind.ExcapeAt), p.tok(TokenKind.Op), p.tok(TokenKind.Paren), p.tok(TokenKind.SpaceNL), p.tok(TokenKind.Id), p.tok(TokenKind.Str), p.tok(TokenKind.Comment)); -/** - * PROG : IMPORTS '---' CONTENT; - */ -PROG.setPattern(p.lrec_sc(IMPORTS, p.seq(p.str('---'), CONTENT), applyParts)); -/** - * NOT_AT_TEXT : NOT_AT - */ -NOT_AT_TEXT.setPattern(p.apply(NOT_AT, applyNotAtText)); -IMPORTS.setPattern(p.apply(p.seq(IMPORT, p.rep(IMPORT)), applyImports)); -/** - * IMPORT : - * 'import' IMPORTEE* SEMICOLON | - * COMMENT | - */ -IMPORT.setPattern(p.alt(p.apply(p.seq(p.str('import'), p.rep_sc(IMPORTEE), SEMICOLON), applyImport), p.apply(p.tok(TokenKind.Comment), applyComment), p.apply(p.tok(TokenKind.SpaceNL), applySpaceNL))); -/** - * SEMICOLON : ';'; - */ -SEMICOLON.setPattern(p.apply(p.tok(TokenKind.Semicolon), applySemiColon)); -/** - * SEGMENT : '@' NOT_AT* '@' | - * (NOT_AT_TEXT | EXCAPE_AT)* - */ -SEGMENT.setPattern(p.alt(p.apply(p.rep_sc(NOT_AT_TEXT), applyText), p.apply(p.seq(p.str('@'), p.rep(NOT_AT), p.str('@')), applySegment))); -/** - * CONTENT : SEGMENT* - */ -CONTENT.setPattern(p.apply(p.rep(SEGMENT), applyContent)); -let tree = p.expectSingleResult(p.expectEOF(PROG.parse(lexer.parse(inputTxt)))); -console.log("RESULT=" + tkTreeToSExp(tree)); diff --git a/src/index.ts b/src/index.ts index 26a690e..d5be674 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,265 +1,62 @@ var fs = require('fs'); -import jsTokens from "js-tokens"; -import * as util from 'util'; -import * as p from 'typescript-parsec'; -import { Token } from 'typescript-parsec'; -import { TokenType } from "./tokenize"; -/** - * - * # REPRESENTATION - */ -/** - * convert a `tkTree` AST to S-expr string - * @param t the `tkTree` - * @returns S-expr String - */ -export function tkTreeToSExp(t: tkTree): string{ - var str = ""; +var argv : any = require('minimist')(process.argv.slice(2)); - if (Array.isArray(t)){ - let strArray = t.map((x)=>tkTreeToSExp(x)); - str = "(" + strArray.join(" ") + ")"; - }else{ - if (t=== undefined){ - str = "%undefined" - }else{ - str = t; - } - } - - return str; -} +import * as parser from "./parser.js"; -/**inspect the inner of the representation. */ -let repr = (x : any)=>{return util.inspect(x, {depth: null})}; -/** - * - * # TYPES - */ +let helpDesc = +` +clo: clo INPUT_FILE --output-js OUTPUT_JS_FILE -type tkTree = string | tkTree[]; +\ta little typesetter powered by TypeScript/Javascript. -enum TokenKind { - Seperator, // --- - Semicolon, // ; - Number, - Op, - ExprMark, // @ - ExcapeAt, // \@ - Paren, - SpaceNL, // \s\t\n\r - Id, - Str, - Comment, // /* ooo */ -} +## Arguments +INPUT_FILE\tan input .clo file -/** - * Parsing - */ -const lexer = p.buildLexer([ - [true, /^\d+(\.\d+)?/g, TokenKind.Number], - [true, /^\\\@/g, TokenKind.ExcapeAt], - [true, /^\/\*([^/]|\/[^*])*\*\//g, TokenKind.Comment], - [true, /^\;/g, TokenKind.Semicolon], - [true, /^[-][-][-]/g, TokenKind.Seperator], - [true, /^[\+\-\*\/\&\|\!\^\<\>\~\=\?]+/g, TokenKind.Op], - [true, /^\@/g, TokenKind.ExprMark], - [true, /^[()\[\]{}]/g, TokenKind.Paren], - [true, /^["]([\"]|[\\].)*["]/g, TokenKind.Str], - [true, /^[']([\']|[\\].)*[']/g, TokenKind.Str], - [true, /^[()\[\]{}]/g, TokenKind.Paren], - [true, /^[^\/\\\@\s\n\t\r;]+/g, TokenKind.Id], - [true, /^(\s|\n|\r|\t)+/g, TokenKind.SpaceNL], +## Parameters +--- +--output-js\tgenerated the output middle JS file +` -]); +processArgv(argv, helpDesc); /** - * - * # TEST + * processing the passed `argv` (arguments) */ -const inputTxt= -`import a as b; /*bacourt*/ -/* ba choir -ipsum lorem*/ - -import you as john; ---- - -臺中市\\\@ - -公園 -@1+2==3; - -console.log("122");@ - -山頂 -`; - - -const PROG = p.rule(); -const SEGMENT = p.rule(); -const IMPORT = p.rule(); -const IMPORTS = p.rule(); -const SEMICOLON = p.rule(); -const EXCAPE_AT = p.rule(); -const NOT_AT_TEXT = p.rule(); -const CONTENT = p.rule(); - -let doubleMinus = { type: 'Punctuator', value: '--' }; -let doubleMinus2 = p.str('--'); -const TERM = p.rule(); - -function applySegment(input: [Token, Token[], - Token]): tkTree[]{ - let unpackedInnerExprs = input[1].map((x)=>{return x.text}); - return ["%exprs", unpackedInnerExprs]; -} - -function applySemiColon(value: Token): tkTree{ - return value.text; -} - -function applyParts(first: tkTree, - second: [Token, tkTree]):tkTree { - return ["%clo", first , second[1]]; -} - - -function applyComment(value: Token): tkTree[]{ - return [value.text]; -} +function processArgv(argv : any, helpDesc : string){ + let inputFile : string[] = argv['_']; + let outputJSFile : string | true = argv['output-js']; -function applyImport(input: [Token,Token[], tkTree]) : tkTree{ - let importTail = input[1].map(x=>x.text); - return ["import"].concat(importTail); -}; + let NoInputFile : boolean = (inputFile.length == 0); + let NoOutputJSFile : boolean = (outputJSFile === undefined || outputJSFile == true); + let helpTriggered : boolean = argv['help']; + if (inputFile.length > 1){ + console.log("Sorry, the input file should be only one."); + } -/* -function applyImportComment(input: [Token,Token[], - tkTree, Token]) : tkTree{ - let importTail = input[1].map(x=>x.text); - let comment = [input[3].text]; - return ["import"].concat(importTail).concat(comment); -};*/ - -function applyImports(input : [tkTree, tkTree[]]): tkTree{ - let resultBody = [input[0]].concat(input[1]); - let resultWrapper = ["%import", resultBody]; - return resultWrapper; -}; + /** output --help */ + if (helpTriggered || NoInputFile || NoOutputJSFile){ + console.log(helpDesc); + }else{ + fs.readFile(inputFile[0], 'utf8', (err : Error, inputText : string) => { + if (err) throw err; + let tree = parser.inputTextToTree(inputText); + let output = parser.treeToJS(tree); + fs.writeFile(outputJSFile, output , (err : Error) => { + if (err) throw err; + }); -function applyNotAtText(value : Token): tkTree{ - if (value.text == "\\\@"){ - return '@'; + }); } - else{return value.text;} -}; - -function applyText (input : tkTree): tkTree[]{ - return ["%text", input]; -}; -function applyContent(input : tkTree[]): tkTree[]{ - return ["%content", input]; -}; - -function applySpaceNL(value : Token): tkTree{ - return value.text; } -/** - * IMPORTEE: Number, Op, Paren, Id, Str, Comment, - */ -let IMPORTEE = p.alt(p.tok(TokenKind.Number), - p.tok(TokenKind.Op), - p.tok(TokenKind.Paren), - p.tok(TokenKind.Id), - p.tok(TokenKind.Str), - p.tok(TokenKind.SpaceNL), - p.tok(TokenKind.Comment)); - -let NOT_AT = p.alt(p.tok(TokenKind.Seperator), - p.tok(TokenKind.Semicolon), - p.tok(TokenKind.Number), - p.tok(TokenKind.ExcapeAt), - p.tok(TokenKind.Op), - p.tok(TokenKind.Paren), - p.tok(TokenKind.SpaceNL), - p.tok(TokenKind.Id), - p.tok(TokenKind.Str), - p.tok(TokenKind.Comment), - ); - -/** - * PROG : IMPORTS '---' CONTENT; - */ -PROG.setPattern( - p.lrec_sc(IMPORTS, p.seq(p.str('---'), CONTENT), applyParts) - -) - -/** - * NOT_AT_TEXT : NOT_AT - */ -NOT_AT_TEXT.setPattern( - p.apply(NOT_AT, applyNotAtText) -); - -IMPORTS.setPattern( - p.apply( p.seq(IMPORT, p.rep(IMPORT)), applyImports) -); - -/** - * IMPORT : - * 'import' IMPORTEE* SEMICOLON | - * COMMENT | - */ -IMPORT.setPattern( - p.alt( - p.apply(p.seq(p.str('import'), p.rep_sc(IMPORTEE), SEMICOLON), - applyImport), - p.apply(p.tok(TokenKind.Comment), applyComment), - p.apply(p.tok(TokenKind.SpaceNL), applySpaceNL) - - ) -); - -/** - * SEMICOLON : ';'; - */ -SEMICOLON.setPattern( - p.apply(p.tok(TokenKind.Semicolon), applySemiColon) -); - - - -/** - * SEGMENT : '@' NOT_AT* '@' | - * (NOT_AT_TEXT | EXCAPE_AT)* - */ -SEGMENT.setPattern( - p.alt( - p.apply(p.rep_sc(NOT_AT_TEXT), applyText), - p.apply(p.seq(p.str('@'), p.rep(NOT_AT), p.str('@')), applySegment), - ) -); - -/** - * CONTENT : SEGMENT* - */ -CONTENT.setPattern( - p.apply(p.rep(SEGMENT), applyContent) -); - -let tree = p.expectSingleResult(p.expectEOF(PROG.parse(lexer.parse(inputTxt)))); -console.log("RESULT="+tkTreeToSExp(tree)); diff --git a/src/parser.ts b/src/parser.ts new file mode 100644 index 0000000..269b698 --- /dev/null +++ b/src/parser.ts @@ -0,0 +1,343 @@ +/** + * parser.ts - parser and js generator of clo. + */ +import * as p from 'typescript-parsec'; +import { Token } from 'typescript-parsec'; + +/** + * + * # REPRESENTATION + */ + +/** + * convert a `tkTree` AST to S-expr string + * @param t the `tkTree` + * @returns S-expr String + * +export function tkTreeToSExp(t: tkTree): string{ + var str = ""; + + if (Array.isArray(t)){ + let strArray = t.map((x)=>tkTreeToSExp(x)); + str = "(" + strArray.join("◎") + ")"; + }else{ + if (t=== undefined){ + str = "%undefined" + }else{ + str = t; + } + } + + return str; +}*/ + +type tkTree = string | tkTree[]; + +enum TokenKind { + Seperator, // --- + Semicolon, // ; + Number, + Op, + ExprMark, // @ + ExcapeAt, // \@ + Paren, + SpaceNL, // \s\t\n\r + Id, + Str, + Comment, // /* ooo */ +} + +/** + * Parsing + */ +const lexer = p.buildLexer([ + [true, /^\d+(\.\d+)?/g, TokenKind.Number], + [true, /^[\\][\\]/g, TokenKind.Op], + [true, /^\\\@/g, TokenKind.ExcapeAt], + [true, /^\/\*([^/]|\/[^*])*\*\//g, TokenKind.Comment], + [true, /^\;/g, TokenKind.Semicolon], + [true, /^[-][-][-]/g, TokenKind.Seperator], + [true, /^[\+\-\*\/\&\|\!\^\<\>\~\=\?]+/g, TokenKind.Op], + [true, /^\@/g, TokenKind.ExprMark], + [true, /^[()\[\]{}]/g, TokenKind.Paren], + [true, /^[\"]([^\"]|[\\].)*[\"]/g, TokenKind.Str], + [true, /^[\']([^\']|[\\].)*[\']/g, TokenKind.Str], + [true, /^[()\[\]{}]/g, TokenKind.Paren], + [true, /^[^\/\\\@\s\n\t\r;]+/g, TokenKind.Id], + [true, /^(\s|\n|\r|\t)+/g, TokenKind.SpaceNL], + +]); + +/** + * + * # TEST + */ + + + +const PROG = p.rule(); +const SEGMENT = p.rule(); +const IMPORT = p.rule(); +const IMPORTS = p.rule(); +const SEMICOLON = p.rule(); +const NOT_AT_TEXT = p.rule(); +const CONTENT = p.rule(); + + +function applySegment(input: [Token, Token[], + Token]): tkTree[]{ + let unpackedInnerExprs = input[1].map((x)=>{return x.text}); + return ["%exprs", unpackedInnerExprs]; +} + +function applySemiColon(value: Token): tkTree{ + return value.text; +} + +function applyParts(first: tkTree, + second: [Token, tkTree]):tkTree { + return ["%clo", first , second[1]]; +} + + +function applyComment(value: Token): tkTree[]{ + return [value.text]; +} + + +function applyImport(input: [Token,Token[], tkTree]) : tkTree{ + let importTail = input[1].map(x=>x.text); + return ["import"].concat(importTail); +}; + + +/* +function applyImportComment(input: [Token,Token[], + tkTree, Token]) : tkTree{ + let importTail = input[1].map(x=>x.text); + let comment = [input[3].text]; + return ["import"].concat(importTail).concat(comment); +};*/ + +function applyImports(input : [tkTree, tkTree[]]): tkTree{ + let resultBody = [input[0]].concat(input[1]); + let resultWrapper = ["%import", resultBody]; + return resultWrapper; +}; + + + + +function applyNotAtText(value : Token): tkTree{ + if (value.text == "\\\@"){ + return '@'; + } + else{return value.text;} +}; + +function applyText (input : tkTree): tkTree[]{ + return ["%text", input]; +}; + +function applyContent(input : tkTree[]): tkTree[]{ + return ["%content", input]; +}; + +function applySpaceNL(value : Token): tkTree{ + return value.text; +} + +/** + * IMPORTEE: Number, Op, Paren, Id, Str, Comment, + */ +let IMPORTEE = p.alt(p.tok(TokenKind.Number), + p.tok(TokenKind.Op), + p.tok(TokenKind.Paren), + p.tok(TokenKind.Id), + p.tok(TokenKind.Str), + p.tok(TokenKind.SpaceNL), + p.tok(TokenKind.Comment)); + +let NOT_AT = p.alt(p.tok(TokenKind.Seperator), + p.tok(TokenKind.Semicolon), + p.tok(TokenKind.Number), + p.tok(TokenKind.ExcapeAt), + p.tok(TokenKind.Op), + p.tok(TokenKind.Paren), + p.tok(TokenKind.SpaceNL), + p.tok(TokenKind.Id), + p.tok(TokenKind.Str), + p.tok(TokenKind.Comment), + ); + +/** + * PROG : IMPORTS '---' CONTENT; + */ +PROG.setPattern( + p.lrec_sc(IMPORTS, p.seq(p.str('---'), CONTENT), applyParts) + +) + +/** + * NOT_AT_TEXT : NOT_AT + */ +NOT_AT_TEXT.setPattern( + p.apply(NOT_AT, applyNotAtText) +); + +IMPORTS.setPattern( + p.apply( p.seq(IMPORT, p.rep(IMPORT)), applyImports) +); + +/** + * IMPORT : + * 'import' IMPORTEE* SEMICOLON | + * COMMENT | + */ +IMPORT.setPattern( + p.alt( + p.apply(p.seq(p.str('import'), p.rep_sc(IMPORTEE), SEMICOLON), + applyImport), + p.apply(p.tok(TokenKind.Comment), applyComment), + p.apply(p.tok(TokenKind.SpaceNL), applySpaceNL) + + ) +); + +/** + * SEMICOLON : ';'; + */ +SEMICOLON.setPattern( + p.apply(p.tok(TokenKind.Semicolon), applySemiColon) +); + + + +/** + * SEGMENT : '@' NOT_AT* '@' | + * (NOT_AT_TEXT | EXCAPE_AT)* + */ +SEGMENT.setPattern( + p.alt( + p.apply(p.rep_sc(NOT_AT_TEXT), applyText), + p.apply(p.seq(p.str('@'), p.rep(NOT_AT), p.str('@')), applySegment), + ) +); + +/** + * CONTENT : SEGMENT* + */ +CONTENT.setPattern( + p.apply(p.rep(SEGMENT), applyContent) +); + + + +/** + * the head part of the output JS code : before import + */ +let outputHead = ` +/* clo, a typesetting engine, generated JS file*/ +/* CLO: beginning of head*/ +import * as clo from "clo"; + +cl = clo.initClo(); +/* CLO: end of head*/\n` + +/** + * the middle part of the output JS code : between import part and content part + */ +let outputMiddle =` +/* CLO: beginning of middle part*/ +cl.mainText = /* CLO: end of middle part*/ +` +let outputEnd =` +/* CLO: beginning of end part*/ +cl.generatePdf(); +/*CLO : end of end part*/ +` + +/** + * Convert `tree` (ASTTree; `tkTree`) to JS Code. + */ +export function treeToJS(tree : tkTree): string{ + let head = tree[0]; + if (head == "%clo"){ + let totalResult = outputHead + treeToJS(tree[1]) + + outputMiddle + treeToJS(tree[2]) + outputEnd; + return totalResult; + } + if (head == "%import"){ + let imports = tree[1]; + if (Array.isArray(imports)){ + let importsText = imports.map( + (x)=>{ + if (Array.isArray(x)){ + return x.join(''); + } + else{ + return x; + } + }); + let importTextCombined = importsText.join(''); + return importTextCombined; + } + else{ + return imports; + } + } + if (head == "%content"){ + let tail = tree[1]; + if (Array.isArray(tail)){ + if (tail.length == 1){ + return treeToJS(tail); + } + let tailStrings = tail.map((x)=>treeToJS(x)); + return "(" + tailStrings.join(').concat(') + ");"; + }else{ + return tail; + } + } + if (head == "%text"){ + let textContents = tree[1]; + if (Array.isArray(textContents)){ + let decoratedArray = textContents + .flatMap(x=>String(x)) + .map(x=>x.replace("\`","\\\`")); + + return "[`" + decoratedArray.join("\`, \`") + "`]"; + }else{ + let decorated = textContents.replace("\`","\\\`"); + + return "[`" + decorated + "`]"; + } + } + + if (head == "%exprs"){ + let content = tree[1]; + if (Array.isArray(content)){ + let flattenContent = content.flat(); + return flattenContent.join(''); + } + else{ + return content; + } + + } + else{ + if (Array.isArray(tree)){ + return tree.join(''); + }else{ + return tree; + } + } +} + + +/** + * `inputText` to `tkTree` (ASTTree) + */ +export function inputTextToTree(inputText : string){ +return p.expectSingleResult( + p.expectEOF(PROG.parse(lexer.parse(inputText)))); +} \ No newline at end of file diff --git a/src/tokenize.ts b/src/tokenize.ts deleted file mode 100644 index 144dbed..0000000 --- a/src/tokenize.ts +++ /dev/null @@ -1,565 +0,0 @@ -import * as util from 'util'; - -var fs = require('fs'); - -export type Some = { _tag: "Some"; value: T }; -export type None = { _tag: "None" }; -/** - * part for tokenize the input string - */ - -/** - * wrap a x in a `Some(T)` - * @param x : variable to be wrapped. - * @returns wrapped `x`. - */ -export function toSome(x: T): Some { - return { _tag: "Some", value: x }; -} -/** - * @description Like the `Some(a)` and `None` in Rust. - * - * @example - * ```ts - * let exam1 : Maybe = { _tag: "Some", value: 12 }; - * let exam2 : Maybe = None; - * ``` - */ -export type Maybe = Some | None; - - -/** - * @description - * the pair of the string to be matched later and the string that have been matched - * @var matched : have been matched - * @var remained : will be tested whether it'll be matched. - * @var matched_type (optional): the type of the matched string -*/ -export interface MatcheePair { - matched: string - remained: string - matched_type?: TokenType -} - -/** - * The types of Token - * NL, // newline - * - * SP, // half-width space and tab - * - * ID, // identifier - * - * STR, // string - * - * OP, // operator or something like it - * - * FLO, // float num - * - * INT, // integer - * - * I_* // integer manipulation - * - * F_* // float manipulation - * - * SEMI_C// semi-colon - */ -export enum TokenType { - NL, // newline - SP, // half-width space and tab - ID, // identifier - STR, // string - FLO, // float num - INT, // integer - F_ADD, - F_SUB, - F_MUL, - F_DIV, - I_ADD, - I_SUB, - I_MUL, - I_DIV, - L_PAREN, // ( - R_PAREN, // ) - L_BRACK, // [ - R_BRACK, // ] - L_BRACE, // { - R_BRACE, // } - COMMA, // , - DOT, // . - COLON, // : - SEMI_C, // ; - AT, // @ - HASH, // # - EQ, // == - SET, // = - GT, // > greater than - LT, // = - LE, // <= - NE, // <> - APOS, // ' - R_ARROW, // -> - TRUE, // true - FALSE, // false - IF, // if -} - -/** - * tokenized token. - * @var text : the content text - * @var type (optional): the type of the token - * @var col : the column number - * @var ln : the line number - */ -export interface Token { - text: string, - type?: TokenType, - col: number, - ln: number, -} - -/** - * @description - * it returns a function which test if the first char of the `remained` part of - * the argument of the function is `c`, if it's true, update the `MatchedPair` wrapped - * in `Some`. Otherwise, it returns `None`. - * * @param c : the char to be test. - * @returns the updated `MatchedPair` wrapped in `Some(x)` or `None`. - */ -export function match1Char(c: string): (m: MatcheePair) => Maybe { - return (m: MatcheePair) => { - if (m.remained.length == 0) { - return { _tag: "None" }; - } - const charToBeMatched = m.remained[0]; - if (charToBeMatched === c) { - return { - _tag: "Some", value: { - matched: m.matched + charToBeMatched, - remained: m.remained.substring(1) - } - }; - } - else { - return { _tag: "None" }; - } - } -}; - -/** - * - * @param m : the `MatcheePair` to be consumed. - * @returns if the length of `m.remained` >= 1; consumes the matchee by 1 char and wraps it in `Some`, - * otherwise, returns `None`. - */ -export function matchAny(m: MatcheePair): Maybe { - if (m.remained.length >= 1) { - return { - _tag: "Some", value: { - matched: m.matched + m.remained[0], - remained: m.remained.substring(1) - } - }; - } else { - return { _tag: "None" }; - } -} - -/** - * @description - * it returns a function which test if the first char of the `remained` part of - * the argument of the function is between `l` and `u`, if it's true, update the `MatchedPair` wrapped - * in `Some`. Otherwise, it returns `None`. - * * @param l : lower bound char, 1-char string - * * @param u : upper bound char, 1-char string - * @returns the updated `MatchedPair` wrapped in `Some(x)` or `None`. - */ -export function matchRange(l: string, u: string): (m: MatcheePair) => Maybe { - let lCodepoint = charToCodepoint(l); - let uCodepoint = charToCodepoint(u); - if (l > u) { - throw new Error("Error: the codepoint of `" + l + "` is not smaller than `" + u + "`)"); - } - return (m: MatcheePair) => { - if (m.remained.length < 1) { - return { _tag: "None" }; - } - const charToBeMatched = m.remained[0]; - const codePointToBeMatched = charToCodepoint(charToBeMatched); - if (codePointToBeMatched >= lCodepoint && codePointToBeMatched <= uCodepoint) { - return { - _tag: "Some", value: { - matched: m.matched + charToBeMatched, - remained: m.remained.substring(1) - } - }; - } - else { - return { _tag: "None" }; - } - } -}; - - -/** - * check if a matcheePair `m` matches a stringv `s`. - * @param s the checker string. - * @returns `None` or matched pair wrapped in `Some` - */ -export function matchWord(s: string, ): (m: MatcheePair) => Maybe { - return (m)=>{ - if (s.length==0){ - return { _tag: "None" }; - } - var someM : Maybe = toSome(m); - for (var idx : number=0; idx 1` return error; otherwise, return the codepoint of `s`. - */ -export function charToCodepoint(s: string): number { - if (s.length > 1) { - throw new Error("Error: the length of input string for " + s + "is " + s.length + `, - however, it should be 1.`); - } else { - return s.charCodeAt(0); - } -} - -/** - * @description thendo(input, f, ...) like - * a ==> f - * @param input: the wrapped input. - * @param f: the function to be applied. - * - * @returns:the applied wrapped result `MatcheePair`. - */ -export function thenDo(input: Maybe, f: Function): Maybe { - if (input._tag == "None") { - return input; - } - else { - let inner = input.value; - return f(inner); - } -} - -/** - * @description "or", like the regex `( f1 | f2 )` . - * It returns a function `f` of which the argument is`x`. - * if `f1(x)` is None, then `f` returns `f2(x)`. Otherwise, - * `F` returns `f1(x)`. - * @param f1 : 1st function to be compared - * @param f2 : 2nd function to be compared - * @returns:the combined function - */ -export function orDo(f1: Function, f2: Function): (x: T) => Maybe { - return (x) => { - let f1x: Maybe = (f1(x)); - { - if (f1x._tag == "None") { - return f2(x); - } - else { - return f1x; - } - } - }; -} - - -/** -* @description repeating matching function `f` -* zero or more times, like the asterisk `*` in regex `f*` . -* @param f : the function to be repeated 0+ times. -* @returns:the combined function -*/ -export function zeroOrMoreDo(f: Function): (x: T) => Maybe { - return (x) => { - var wrapped_old_x: Maybe = { _tag: "Some", value: x }; - var wrapped_new_x: Maybe = wrapped_old_x; - - while (wrapped_new_x._tag != "None") { - wrapped_old_x = wrapped_new_x; - wrapped_new_x = thenDo(wrapped_old_x, f); - }; - - return wrapped_old_x; - }; -} - -/** -* @description Not. like the `^` inside regex of [^f]. -* returns a function `F(x)` such that if `f(x)` is `None`, -* returns the x consuming a char; if `f(x)` is not None, F(x) -* returns `None`. -* @param f: the function forbidden to be matched. -* @returns: combined function `F`. -*/ -export function notDo(f: Function): (x: T) => Maybe { - return (x) => { - let wrapped_x: Maybe = { - _tag: "Some", - value: x - }; - let f_x = thenDo(wrapped_x, f); - - if (f_x._tag != "None") { - return { _tag: "None" }; - } else { - return thenDo(wrapped_x, matchAny); - } - }; -} - -/** - * if `x` is matched by `f` once, returns `f(x)`. Otherwise, - * returns x - * similar to `?` in regex `f?`. - * @param f : the function to be matched - * @returns return wrapped f(x) - */ -export function zeroOrOnceDo(f: Function): (x: T) => Maybe { - return (x) => { - var wrapped_old_x: Maybe = { _tag: "Some", value: x }; - var wrapped_new_x = thenDo(wrapped_old_x, f); - - if (wrapped_new_x._tag != "None") { - return wrapped_new_x; - } else { - return wrapped_old_x; - } - }; -} - - -export function tokenize(input: string): Array { - var input_matchee_pair: Maybe = toSome( - { - matched: "", - remained: input - }); - - /** - * generate a parser of a basic term (b_term) - * @param pattern : the pattern parser - * @param token_type : the returning token type - * @returns a wrapped parser. - */ - function bTerm(pattern: Function, token_type: TokenType) { - return (x: MatcheePair) => { - let wrapped_x = toSome(x); - let result = pattern(wrapped_x); - if (result._tag == "Some") { - result.value.matched_type = token_type; - } - return result; - } - } - - let d = matchRange('0', '9'); // \d - // [+-] - let plusMinus = orDo(match1Char('+'), match1Char('-')); - let s_aux = orDo(match1Char(' '), match1Char('\t')); // (" " | "\t") - - // integer = ([+]|[-])?\d\d* - let integer = bTerm((x: Maybe) => - thenDo(thenDo(thenDo(x, - zeroOrOnceDo(plusMinus)), d), - zeroOrMoreDo(d)), - TokenType.INT); - // space = [ \t]+ - let space = bTerm((x: Maybe) => - thenDo(thenDo(x, s_aux), zeroOrMoreDo(s_aux)), - TokenType.SP); - - // newline = \r?\n - let newline = bTerm((x: Maybe) => - thenDo(thenDo(x, - zeroOrOnceDo(match1Char('\r'))), - match1Char('\n')), - TokenType.NL); - - // [_A-Za-z] - let idHead = orDo(orDo(matchRange('a', 'z'), matchRange('A', 'Z')), match1Char('_')); - let idRemained = orDo(idHead, matchRange('0', '9')); // [_A-Za-z0-9] - - // id = [_A-Za-z][_A-Za-z0-9]* - let id = bTerm((x: Maybe) => - thenDo(thenDo(x, - idHead), - zeroOrMoreDo(idRemained)), - TokenType.ID); - let doublequote = match1Char("\""); - // [\\][\"] - let escapeReverseSlash = (x: MatcheePair) => - thenDo(thenDo(toSome(x), match1Char("\\")), doublequote); - // ([\\]["]|[^\"])* - let stringInnerPattern = zeroOrMoreDo( - orDo(escapeReverseSlash, notDo(match1Char("\"")))); - - // str = ["]([\\]["]|[^"])*["] - let str = bTerm((x: Maybe) => - thenDo(thenDo(thenDo(x, doublequote), - stringInnerPattern), doublequote), - TokenType.STR); - - // float = [+-]?\d+[.]\d+ - function floatPattern(x: Maybe) { - return thenDo(thenDo(thenDo(thenDo(thenDo(thenDo(x, - zeroOrOnceDo(plusMinus)), d), - zeroOrMoreDo(d)), - match1Char(".")), d), - zeroOrMoreDo(d)) - }; - let float = bTerm(floatPattern, TokenType.FLO); - - // operators - // +. - let floatAdd = bTerm((x: Maybe) => - thenDo(thenDo(x, match1Char("+")), match1Char(".")), - TokenType.F_ADD); - // +. - let floatSub = bTerm((x: Maybe) => - thenDo(thenDo(x, match1Char("-")), match1Char(".")), - TokenType.F_SUB); - - // *. - let floatMul = bTerm((x: Maybe) => - thenDo(thenDo(x, match1Char("*")), match1Char(".")), - TokenType.F_MUL); - - // /. - let floatDiv = bTerm((x: Maybe) => - thenDo(thenDo(x, match1Char("/")), match1Char(".")), - TokenType.F_DIV); - - // == - let eq = bTerm((x: Maybe) => - thenDo(thenDo(x, match1Char("=")), match1Char("=")), - TokenType.EQ); - - // >= - let ge = bTerm((x: Maybe) => - thenDo(thenDo(x, match1Char(">")), match1Char("=")), - TokenType.GE); - - // <= - let le = bTerm((x: Maybe) => - thenDo(thenDo(x, match1Char("<")), match1Char("=")), - TokenType.LE); - - // != - let ne = bTerm((x: Maybe) => - thenDo(thenDo(x, match1Char("!")), match1Char("=")), - TokenType.NE); - - // -> - let rightArrow = bTerm((x: Maybe) => - thenDo(thenDo(x, match1Char("-")), match1Char(">")), - TokenType.R_ARROW); - - - /** - * unary operator : generating the pattern of basic unary operator - * @param char : uniry char for the operator - * @param token_type : the corresponding token_type - */ - function unaryOp(char: string, token_type: TokenType) { - return bTerm((x: Maybe) => thenDo(x, match1Char(char)), - token_type); - }; - - let intAdd = unaryOp('+', TokenType.I_ADD); - let intSub = unaryOp('-', TokenType.I_SUB); - let intMul = unaryOp('*', TokenType.I_MUL); - let intDiv = unaryOp('/', TokenType.I_DIV); - let lParen = unaryOp('(', TokenType.L_PAREN); - let rParen = unaryOp(')', TokenType.R_PAREN); - let lBracket = unaryOp('[', TokenType.L_BRACK); - let rBracket = unaryOp(']', TokenType.R_BRACK); - let lBrace = unaryOp('{', TokenType.L_BRACE); - let rBrace = unaryOp('}', TokenType.R_BRACE); - let comma = unaryOp(',', TokenType.COMMA); - let dot = unaryOp('.', TokenType.DOT); - let colon = unaryOp(':', TokenType.COLON); - let semicolon = unaryOp(';', TokenType.SEMI_C); - let at = unaryOp('@', TokenType.AT); - let hash = unaryOp('#', TokenType.HASH); - let set = unaryOp('=', TokenType.SET); - let greaterthan = unaryOp('>', TokenType.GT); - let lessthan = unaryOp('<', TokenType.LE); - let apos = unaryOp('\'', TokenType.APOS); - - - - let term = (token_list: Array, x: Some) => { - var ln = 1; - var col = 0; - var old_x = x; - let term_list = [ - floatAdd, floatSub, floatMul, floatDiv, - intAdd, intSub, intMul, intDiv, - eq, ge, le, ne, rightArrow, - lParen, rParen, lBracket, rBracket, lBrace, rBrace, - comma, dot, colon, semicolon, at, hash, - set, greaterthan, lessthan, apos, - float, newline, space, id, integer, str]; - let term_aux = term_list.reduce((x, y) => orDo(x, y)); - - var new_x: Maybe = thenDo(old_x, term_aux); - while (new_x._tag != "None") { - if (new_x.value.matched_type != TokenType.NL) { - col += new_x.value.matched.length; - token_list.push({ - text: new_x.value.matched, - type: new_x.value.matched_type, - ln: ln, - col: col - }); - - } - else { - col = 0; - ln += 1; - - token_list.push({ - text: new_x.value.matched, - type: new_x.value.matched_type, - ln: ln, - col: col - }); - - } - - - old_x = toSome({ - matched: "", - remained: new_x.value.remained - }); - new_x = thenDo(old_x, term_aux); - } - - if (old_x.value.remained.length) { - console.log(token_list); - throw new Error("the code can't be tokenized is near Ln. " + ln + ", Col." + col - + ", starting with " + old_x.value.remained.substring(0, 10)); - } - - return token_list; - } - - return term([], input_matchee_pair); - - // TODO: id, string, space, basic operator, 3 marks: @, {, }. - -} - - -- 2.39.2