From: Tan Kian-ting Date: Thu, 14 Sep 2023 16:34:20 +0000 (+0800) Subject: add some tokenizer function X-Git-Url: https://git.kianting.info/?a=commitdiff_plain;h=ec563ca30f683cf8dcfd825c697913128f2a5445;p=uann add some tokenizer function --- diff --git a/src/index.js b/src/index.js index e18ebd5..b73463e 100644 --- a/src/index.js +++ b/src/index.js @@ -48,13 +48,14 @@ function m1TType(typ) { * */ const ttbm = m.remained[0]; if (ttbm.type == typ) { - m.matched.push(ttbm); - return { + let new_matched = m.matched.concat(ttbm); + let result = { _tag: "Some", value: { - matched: m.matched, + matched: new_matched, remained: m.remained.slice(1) } }; + return result; } else { return { _tag: "None" }; @@ -65,21 +66,29 @@ exports.m1TType = m1TType; ; let toSome = tk.toSome; let thenDo = tk.thenDo; +let zeroOrOnceDo = tk.zeroOrOnceDo; let orDo = tk.orDo; +let zeroOrMoreDo = tk.zeroOrMoreDo; node_process_1.argv.forEach((val, index) => { console.log(`${index}=${val}`); }); -let commandInput = node_process_1.argv[2]; +let commandInput = "int a str b"; //argv[2]; let commandInputTokenized = tk.tokenize(commandInput); -console.log(commandInputTokenized); +let commandInputTokenizedFiltered = commandInputTokenized.filter((x) => { + return x.type != tk.TokenType.SP && + x.type != tk.TokenType.NL; +}); +console.log("aaa: " + util.inspect(commandInputTokenizedFiltered, { showHidden: true, depth: null })); /** * matchee pair of commandInputTokenized */ let commandTPair = { matched: [], - remained: commandInputTokenized }; + remained: commandInputTokenizedFiltered }; let tInt = m1TType(tk.TokenType.INT); let tFlo = m1TType(tk.TokenType.FLO); let tStr = m1TType(tk.TokenType.STR); +let tId = m1TType(tk.TokenType.ID); +let tApos = m1TType(tk.TokenType.APOS); function tBool(x) { let text = x.remained[0].text; if (text == "true" || text == "false") { @@ -99,21 +108,89 @@ function tBool(x) { */ function gramRHS(process, arrange) { return (m) => { - let result = process(m); - console.log(`result ${result}`); - if (result._tag == "None") { - return result; + let middle = process(m); + console.log("Middle" + util.inspect(middle, { showHidden: true, depth: null })); + if (middle._tag == "None") { + return middle; } else { - let matched = result.value.matched; - let return_array = Array(arrange.length); + let matched = middle.value.matched; + let arrLength = arrange.length; + let returnRrray = Array(arrange.length); arrange.forEach((val, index) => { - return_array[arrange[index]] = matched[index]; + returnRrray[arrange[index]] = matched[index]; }); - return return_array; + let matchedTmp1Length = matched.length - arrLength; + console.log(matchedTmp1Length); + var matchedTmp1 = matched + .slice(0, matchedTmp1Length); + console.log("matchedTmp1" + util.inspect(matchedTmp1, { showHidden: true, depth: null })); + console.log("returnRrray" + util.inspect(returnRrray, { showHidden: true, depth: null })); + matchedTmp1.push(returnRrray); + let result = { _tag: "Some", + value: { matched: matchedTmp1, + remained: middle.value.remained } }; + return result; } }; } +/** + * typeABS ::= "'" ID + */ +var typeABS = (x) => { + var result = thenDo(thenDo(toSome(x), tApos), tId); + if (result._tag == "Some" && "text" in result.value.matched[1]) { + var realToken = result.value.matched[1]; + realToken.text = "'" + realToken.text; + result.value.matched = [realToken]; + } + return result; +}; +/** + * TypeId ::= typeABS | ID + */ +var typeName = (x) => { + return thenDo(toSome(x), orDo(typeABS, tId)); +}; +/** + * CONST ::= INT | STR | FLO | BOOL + */ +/** + * TODO: 要用 debugger 檢查分析問題 + */ var constParser = gramRHS((x) => { return thenDo(toSome(x), orDo(orDo(orDo(tInt, tFlo), tStr), tBool)); }, [0]); -let tree = constParser(commandTPair); -console.log(util.inspect(tree, { showHidden: true, depth: null })); +/** + * storing the tree + */ +var astTree = []; +/** + * TYPE_PAIR ::= TYP_ID ID + */ +var typePair = (x) => { + let a = thenDo(thenDo(x.maybeTokens, typeName), tId); + if (a._tag == "Some") { + let matched = a.value.matched; + let slice = matched.slice(matched.length - 2); + console.log("slice" + slice); + let b = { maybeTokens: a, ast: slice }; + return b; + } + else { + let b = { maybeTokens: a, ast: [] }; + return b; + } +}; +/** + * function's arguments + * FN_ARGS = TYPE_PAIR ("," TYPE_PAIR)+ + */ +var fnArgs = (x) => { + let wrapper = { maybeTokens: toSome(x), ast: [] }; + let a = typePair(wrapper); + console.log("AAAAA" + util.inspect(a, { showHidden: true, depth: null })); + let abanibi = typePair(a); + console.log("ABNB" + util.inspect(abanibi, { showHidden: true, depth: null })); + return { maybeTokens: abanibi.maybeTokens, ast: [a.ast, abanibi.ast] }; +}; +let tree = fnArgs(commandTPair); +console.log("CHRANN" + util.inspect(tree, { showHidden: true, depth: null })); diff --git a/src/index.ts b/src/index.ts index a8d103b..48d2c86 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,18 +1,24 @@ var fs = require('fs'); -import { argv } from 'node:process'; +import { argv, resourceUsage } from 'node:process'; import * as tk from './tokenize.js'; import * as util from 'util'; +import { reduceRotation } from 'pdf-lib'; /** * token tree type. */ -type tkTree = tk.Token[] | tk.Token +type tkTree = tkTree[] | tk.Token export interface TokenMatcheePair { matched: tkTree[] remained: tk.Token[] } +export interface MaybeTokensAST{ + maybeTokens: tk.Maybe; + ast: tkTree; +} + /** * @description * match one token type. @@ -35,13 +41,14 @@ export function m1TType(typ: tk.TokenType): const ttbm = m.remained[0]; if (ttbm.type == typ) { - m.matched.push(ttbm); - return { + let new_matched = m.matched.concat(ttbm); + let result : tk.Some = { _tag: "Some", value: { - matched: m.matched, + matched: new_matched, remained: m.remained.slice(1) } }; + return result; } else { return { _tag: "None" }; @@ -51,27 +58,36 @@ export function m1TType(typ: tk.TokenType): let toSome = tk.toSome; let thenDo = tk.thenDo; +let zeroOrOnceDo = tk.zeroOrOnceDo; let orDo = tk.orDo; +let zeroOrMoreDo = tk.zeroOrMoreDo; argv.forEach((val, index) => { console.log(`${index}=${val}`); }); -let commandInput = argv[2]; +let commandInput = "int a str b"//argv[2]; let commandInputTokenized = tk.tokenize(commandInput); -console.log(commandInputTokenized); +let commandInputTokenizedFiltered = commandInputTokenized.filter( + (x : tk.Token)=>{return x.type != tk.TokenType.SP && + x.type != tk.TokenType.NL}); +console.log("aaa: "+util.inspect(commandInputTokenizedFiltered, { showHidden: true, depth: null })); /** * matchee pair of commandInputTokenized */ let commandTPair : TokenMatcheePair = {matched:[], - remained: commandInputTokenized}; + remained: commandInputTokenizedFiltered}; let tInt = m1TType(tk.TokenType.INT); let tFlo = m1TType(tk.TokenType.FLO); let tStr = m1TType(tk.TokenType.STR); +let tId = m1TType(tk.TokenType.ID); +let tApos = m1TType(tk.TokenType.APOS); + + function tBool (x : TokenMatcheePair) :tk.Maybe { let text = x.remained[0].text if (text == "true" || text == "false"){ @@ -92,29 +108,116 @@ function tBool (x : TokenMatcheePair) :tk.Maybe { function gramRHS (process: Function, arrange : number[]){ return (m : TokenMatcheePair)=>{ - let result : tk.Maybe = process(m); - console.log(`result ${result}`) - if (result._tag == "None"){ - return result; + let middle : tk.Maybe = process(m); + + console.log("Middle"+util.inspect(middle, { showHidden: true, depth: null })); + + if (middle._tag == "None"){ + return middle; } else{ - let matched = result.value.matched; - let return_array : tkTree[] = Array(arrange.length); + let matched = middle.value.matched; + let arrLength = arrange.length; + let returnRrray : tkTree[] = Array(arrange.length); arrange.forEach((val, index) => { - return_array[arrange[index]] = matched[index]; + returnRrray[arrange[index]] = matched[index]; }); - return return_array; + let matchedTmp1Length = matched.length-arrLength; + console.log(matchedTmp1Length); + var matchedTmp1 : tkTree[] = matched + .slice(0,matchedTmp1Length); + + console.log("matchedTmp1"+util.inspect(matchedTmp1, { showHidden: true, depth: null })); + console.log("returnRrray"+util.inspect(returnRrray, { showHidden: true, depth: null })); + matchedTmp1.push(returnRrray); + + + let result : tk.Some = {_tag:"Some", + value : {matched : matchedTmp1, + remained : middle.value.remained}}; + return result; } } } +/** + * typeABS ::= "'" ID + */ +var typeABS = (x : TokenMatcheePair)=> +{ + var result = thenDo(thenDo(toSome(x),tApos),tId); + if (result._tag == "Some" && "text" in result.value.matched[1]){ + var realToken : tk.Token = result.value.matched[1]; + realToken.text = "'"+realToken.text; + result.value.matched = [realToken]; + } + return result; +} + +/** + * TypeId ::= typeABS | ID + */ +var typeName = (x : TokenMatcheePair)=> +{ + return thenDo(toSome(x), orDo(typeABS, tId)); +} + /** * CONST ::= INT | STR | FLO | BOOL */ + +/** + * TODO: 要用 debugger 檢查分析問題 + */ var constParser = gramRHS((x : TokenMatcheePair)=> {return thenDo(toSome(x),orDo(orDo(orDo(tInt,tFlo),tStr),tBool))}, [0]); -let tree = constParser(commandTPair); -console.log(util.inspect(tree, { showHidden: true, depth: null })); +/** + * storing the tree + */ +var astTree : tkTree = []; + +/** + * TYPE_PAIR ::= TYP_ID ID + */ +var typePair = (x : MaybeTokensAST)=> +{ + + + let a = thenDo(thenDo(x.maybeTokens, typeName), tId); + if (a._tag == "Some"){ + let matched = a.value.matched; + let slice = matched.slice(matched.length-2); + console.log("slice"+slice); + + let b : MaybeTokensAST = {maybeTokens : a, ast : slice}; + return b; + } + else{ + let b : MaybeTokensAST= {maybeTokens : a, ast : []}; + return b; + } +} + +/** + * function's arguments + * FN_ARGS = TYPE_PAIR ("," TYPE_PAIR)+ + */ + +var fnArgs = (x : TokenMatcheePair)=> + { + let wrapper : MaybeTokensAST = {maybeTokens : toSome(x), ast : []}; + let a = typePair(wrapper); + console.log("AAAAA"+util.inspect(a, { showHidden: true, depth: null })); + let abanibi = typePair(a); + console.log("ABNB"+util.inspect(abanibi, { showHidden: true, depth: null })); + + + return {maybeTokens : abanibi.maybeTokens, ast : [a.ast, abanibi.ast]}; + + }; + +let tree = fnArgs(commandTPair); +console.log("CHRANN"+util.inspect(tree, { showHidden: true, depth: null })); diff --git a/src/tokenize.ts b/src/tokenize.ts index e597a9e..6fa22f4 100644 --- a/src/tokenize.ts +++ b/src/tokenize.ts @@ -1,3 +1,4 @@ +import * as util from 'util'; var fs = require('fs'); @@ -205,7 +206,7 @@ export function matchRange(l: string, u: string): (m: MatcheePair) => Maybe Maybe { +export function matchWord(s: string, ): (m: MatcheePair) => Maybe { return (m)=>{ if (s.length==0){ return { _tag: "None" }; @@ -377,7 +378,7 @@ export function tokenize(input: string): Array { // space = [ \t]+ let space = bTerm((x: Maybe) => thenDo(thenDo(x, s_aux), zeroOrMoreDo(s_aux)), - TokenType.INT); + TokenType.SP); // newline = \r?\n let newline = bTerm((x: Maybe) =>