]> git.kianting.info Git - clo/blob - src/index.ts
add tokenize's function, add interface `Token`
[clo] / src / index.ts
1 var fs = require('fs');
2
3 export type Some<T> = { _tag: "Some"; value: T };
4 export type None = {_tag: "None"};
5
6
7 /**
8 * wrap a x in a `Some(T)`
9 * @param x : variable to be wrapped.
10 * @returns wrapped `x`.
11 */
12 function toSome<T>(x: T): Some<T>{
13 return { _tag: "Some", value: x};
14 }
15 /**
16 * @description Like the `Some(a)` and `None` in Rust.
17 *
18 * @example
19 * ```ts
20 * let exam1 : Maybe<Number> = { _tag: "Some", value: 12 };
21 * let exam2 : Maybe<Number> = None;
22 * ```
23 */
24 export type Maybe<T> = Some<T> | None;
25
26
27 /**
28 * @description
29 * the pair of the string to be matched later and the string that have been matched
30 * @var matched : have been matched
31 * @var remained : will be tested whether it'll be matched.
32 * @var matched_type (optional): the type of the matched string
33 */
34 export interface MatcheePair {
35 matched : string
36 remained : string
37 matched_type?: TokenType
38 }
39
40 /**
41 * The types of Token
42 * NL, // newline
43 *
44 * SP, // half-width space and tab
45 *
46 * ID, // identifier
47 *
48 * STR, // string
49 *
50 * OP, // operator or something like it
51 *
52 * FLO, // float num
53 *
54 * INT, // Integer
55 */
56 export enum TokenType{
57 NL, // newlinw
58 SP, // half-width space and tab
59 ID, // identifier
60 STR, // string
61 OP, // operator
62 FLO, // float num
63 INT, // integer
64 }
65
66 /**
67 * tokenized token.
68 * @var text : the content text
69 * @var type (optional): the type of the token
70 * @var col : the column number
71 * @var ln : the line number
72 */
73 export interface Token{
74 text: string,
75 type?: TokenType,
76 col: number,
77 ln: number,
78 }
79
80 /**
81 * @description
82 * it returns a function which test if the first char of the `remained` part of
83 * the argument of the function is `c`, if it's true, update the `MatchedPair` wrapped
84 * in `Some`. Otherwise, it returns `None`.
85 * * @param c : the char to be test.
86 * @returns the updated `MatchedPair` wrapped in `Some(x)` or `None`.
87 */
88 export function match1Char(c : string) : (m: MatcheePair) => Maybe<MatcheePair> {
89 return (m : MatcheePair)=>{
90 if (m.remained.length == 0){
91 return { _tag: "None" };
92 }
93 const charToBeMatched = m.remained[0];
94 if (charToBeMatched === c){
95 return {_tag: "Some", value :{
96 matched : m.matched + charToBeMatched,
97 remained : m.remained.substring(1)}};
98 }
99 else{
100 return {_tag: "None"};
101 }
102 }
103 };
104
105 /**
106 *
107 * @param m : the `MatcheePair` to be consumed.
108 * @returns if the length of `m.remained` >= 1; consumes the matchee by 1 char and wraps it in `Some`,
109 * otherwise, returns `None`.
110 */
111 export function matchAny(m : MatcheePair) : Maybe<MatcheePair>{
112 if (m.remained.length >= 1){
113 return {_tag: "Some", value :{
114 matched : m.matched + m.remained[0],
115 remained : m.remained.substring(1)}};
116 }else{
117 return {_tag: "None"};
118 }
119 }
120
121 /**
122 * @description
123 * it returns a function which test if the first char of the `remained` part of
124 * the argument of the function is between `l` and `u`, if it's true, update the `MatchedPair` wrapped
125 * in `Some`. Otherwise, it returns `None`.
126 * * @param l : lower bound char, 1-char string
127 * * @param u : upper bound char, 1-char string
128 * @returns the updated `MatchedPair` wrapped in `Some(x)` or `None`.
129 */
130 export function matchRange(l : string, u : string) : (m: MatcheePair) => Maybe<MatcheePair> {
131 let lCodepoint = charToCodepoint(l);
132 let uCodepoint = charToCodepoint(u);
133 if (l > u){
134 throw new Error("Error: the codepoint of `"+l+"` is not smaller than `"+u+"`)");
135 }
136 return (m : MatcheePair)=>{
137 if (m.remained.length < 1){
138 return {_tag : "None"};
139 }
140 const charToBeMatched = m.remained[0];
141 const codePointToBeMatched = charToCodepoint(charToBeMatched);
142 if (codePointToBeMatched >= lCodepoint && codePointToBeMatched <= uCodepoint){
143 return {_tag: "Some", value :{
144 matched : m.matched + charToBeMatched,
145 remained : m.remained.substring(1)}};
146 }
147 else{
148 return {_tag: "None"};
149 }
150 }
151 };
152
153 /**
154 * convert the one-char string to codepoint.
155 * @param s : the string to code point.
156 * @returns if `s.length > 1` return error; otherwise, return the codepoint of `s`.
157 */
158 export function charToCodepoint(s : string): number{
159 if (s.length > 1){
160 throw new Error("Error: the length of input string for "+s+ "is "+s.length+`,
161 however, it should be 1.`);
162 }else{
163 return s.charCodeAt(0);
164 }
165 }
166
167 /**
168 * @description thendo(input, f, ...) like
169 * a ==> f
170 * @param input: the wrapped input.
171 * @param f: the function to be applied.
172 *
173 * @returns:the applied wrapped result `MatcheePair`.
174 */
175 export function thenDo<T>(input : Maybe<T>, f : Function) : Maybe<T>{
176 if (input._tag == "None"){
177 return input;
178 }
179 else{
180 let inner = input.value;
181 return f(inner);
182 }
183 }
184
185 /**
186 * @description "or", like the regex `( f1 | f2 )` .
187 * It returns a function `f` of which the argument is`x`.
188 * if `f1(x)` is None, then `f` returns `f2(x)`. Otherwise,
189 * `F` returns `f1(x)`.
190 * @param f1 : 1st function to be compared
191 * @param f2 : 2nd function to be compared
192 * @returns:the combined function
193 */
194 export function orDo<T>(f1 : Function, f2: Function) : (x : T ) => Maybe<T>{
195 return (x) => {
196 let f1x : Maybe<T> = (f1(x));
197 {
198 if (f1x._tag == "None"){
199 return f2(x);
200 }
201 else{
202 return f1x;
203 }
204 }
205 };
206 }
207
208
209 /**
210 * @description repeating matching function `f`
211 * zero or more times, like the asterisk `*` in regex `f*` .
212 * @param f : the function to be repeated 0+ times.
213 * @returns:the combined function
214 */
215 export function zeroOrMoreDo<T>(f : Function): (x : T) => Maybe<T>{
216 return (x)=>{
217 var wrapped_old_x : Maybe<T> = {_tag: "Some", value : x};
218 var wrapped_new_x : Maybe<T> = wrapped_old_x;
219
220 while (wrapped_new_x._tag != "None"){
221 wrapped_old_x = wrapped_new_x;
222 wrapped_new_x = thenDo(wrapped_old_x, f);
223 };
224
225 return wrapped_old_x;
226 };
227 }
228
229 /**
230 * @description Not. like the `^` inside regex of [^f].
231 * returns a function `F(x)` such that if `f(x)` is `None`,
232 * returns the x consuming a char; if `f(x)` is not None, F(x)
233 * returns `None`.
234 * @param f: the function forbidden to be matched.
235 * @returns: combined function `F`.
236 */
237 export function notDo<T>(f : Function): (x : T) => Maybe<T>{
238 return (x)=>{
239 let wrapped_x : Maybe<T> = {
240 _tag : "Some",
241 value : x
242 };
243 let f_x = thenDo(wrapped_x, f);
244
245 if (f_x._tag != "None"){
246 return {_tag:"None"};
247 }else{
248 return thenDo(wrapped_x, matchAny);
249 }
250 };
251 }
252
253 /**
254 * if `x` is matched by `f` once, returns `f(x)`. Otherwise,
255 * returns x
256 * similar to `?` in regex `f?`.
257 * @param f : the function to be matched
258 * @returns return wrapped f(x)
259 */
260 export function zeroOrOnceDo<T>(f : Function): (x : T) => Maybe<T>{
261 return (x)=>{
262 var wrapped_old_x : Maybe<T> = {_tag: "Some", value : x};
263 var wrapped_new_x = thenDo(wrapped_old_x, f);
264
265 if (wrapped_new_x._tag != "None"){
266 return wrapped_new_x;
267 }else{
268 return wrapped_old_x;
269 }
270 };
271 }
272
273
274 export function tokenize(input : string){
275 var input_matchee_pair : Maybe<MatcheePair> = toSome(
276 {matched:"",
277 remained: input});
278
279 // integer = ([+]|[-])?\d\d*
280 let integer = (x : MatcheePair) =>
281 { let wrapped_x = toSome(x);
282 let plusMinus = orDo(match1Char('+'), match1Char('-')); // ([+]|[-])
283 let d = matchRange('0','9'); // \d
284 var result = thenDo(thenDo(thenDo(wrapped_x,
285 zeroOrOnceDo(plusMinus)),d),
286 zeroOrMoreDo(d));
287
288 if (result._tag == "Some"){
289 result.value.matched_type = TokenType.INT;
290 }
291 return result;
292 }
293 let space = (x : MatcheePair) =>{
294 let wrapped_x = toSome(x);
295 let s_aux = orDo(match1Char(' '), match1Char('\t')); // (" " | "\t")
296 var result = thenDo(thenDo(wrapped_x, s_aux), zeroOrMoreDo(s_aux));
297 if (result._tag == "Some"){
298 result.value.matched_type = TokenType.SP;
299 }
300 return result;
301 }
302 let newline = (x : MatcheePair) =>{
303 let wrapped_x = toSome(x);
304 // nl = \r?\n
305 let result = thenDo(thenDo(wrapped_x,
306 zeroOrOnceDo(match1Char('\r'))), match1Char('\n'));
307 if (result._tag == "Some"){
308 result.value.matched_type = TokenType.NL;
309 }
310 return result;
311 }
312
313 let term = (token_list : Array<Token>, x : Some<MatcheePair>)=>{
314 var ln = 1;
315 var col = 0;
316 var old_x = x;
317 let term_list = [newline, space, integer];
318 let term_aux = term_list.reduce((x,y)=> orDo(x,y));
319
320 var new_x : Maybe<MatcheePair> = thenDo(old_x, term_aux);
321 while (new_x._tag != "None"){
322 if (new_x.value.matched_type != TokenType.NL){
323 col += new_x.value.matched.length;
324 token_list.push({text : new_x.value.matched,
325 type: new_x.value.matched_type,
326 ln : ln,
327 col : col});
328
329 }
330 else{
331 col = 0;
332 ln += 1;
333
334 token_list.push({text : new_x.value.matched,
335 type: new_x.value.matched_type,
336 ln : ln,
337 col : col});
338
339 }
340
341
342 old_x = toSome({matched : "",
343 remained : new_x.value.remained});
344 new_x = thenDo(old_x, term_aux);
345 }
346
347 if (old_x.value.remained.length){
348 console.log(token_list);
349 throw new Error("the code can't be tokenized is near Ln. "+ln+", Col."+col
350 +", starting with "+ old_x.value.remained.substring(0,10));
351 }
352
353 return token_list;
354 }
355
356 console.log(term([], input_matchee_pair));
357
358 // TODO: id, string, space, basic operator, 3 marks: @, {, }.
359
360 }
361
362