From a4f79a3761539f45ac7d86fe919bdc32cf290db0 Mon Sep 17 00:00:00 2001 From: Tan Kian-ting Date: Fri, 10 Nov 2023 01:09:44 +0800 Subject: [PATCH] add line-breaking algorithm initially --- README.md | 2 +- b.clo | 12 +---- src/libclo/breakLines.js | 95 ++++++++++++++++++++++++++++++++++ src/libclo/breakLines.ts | 109 +++++++++++++++++++++++++++++++++++++++ src/libclo/index.js | 50 ++++++++++++------ src/libclo/index.ts | 60 ++++++++++++++++----- 6 files changed, 289 insertions(+), 39 deletions(-) create mode 100644 src/libclo/breakLines.js create mode 100644 src/libclo/breakLines.ts diff --git a/README.md b/README.md index c87f530..5f70399 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ add cjk-english splitter, etc. - 20231029-30: hyphenating for english. - 20231105-06: 利用fontkit算文字的對應px寬度 initially pushed. - + - 20231109-10: Keng-kái oa̍h-chōa ián-sǹg-hoat. ## 之後的做法 - 先做一個前處理註冊器,註冊下列的前處理 diff --git a/b.clo b/b.clo index dc50abc..41a9b70 100644 --- a/b.clo +++ b/b.clo @@ -1,12 +1,2 @@ --- -The book of the generation of Jesus Christ, the son of David, the son of Abraham. - -Abraham begat Isaac; and Isaac begat Jacob; and Jacob begat Judas and his brethren; And Judas begat Phares and Zara of Thamar; and Phares begat Esrom; and Esrom begat Aram; And Aram begat Aminadab; and Aminadab begat Naasson; and Naasson begat Salmon; And Salmon begat Booz of Rachab; and Booz begat Obed of Ruth; and Obed begat Jesse; And Jesse begat David the king; - -and David the king begat Solomon of her that had been the wife of Urias; And Solomon begat Roboam; and Roboam begat Abia; and Abia begat Asa; And Asa begat Josaphat; and Josaphat begat Joram; and Joram begat Ozias; And Ozias begat Joatham; and Joatham begat Achaz; and Achaz begat Ezekias; And Ezekias begat Manasses; and Manasses begat Amon; and Amon begat Josias; And Josias begat Jechonias and his brethren, about the time they were carried away to Babylon: And after they were brought to Babylon, Jechonias begat Salathiel; and Salathiel begat Zorobabel; And Zorobabel begat Abiud; and Abiud begat Eliakim; and Eliakim begat Azor; And Azor begat Sadoc; and Sadoc begat Achim; and Achim begat Eliud; And Eliud begat Eleazar; and Eleazar begat Matthan; and Matthan begat Jacob; And Jacob begat Joseph the husband of Mary, of whom was born Jesus, who is called Christ. - -So all the generations from Abraham to David are fourteen generations; and from David until the carrying away into Babylon are fourteen generations; and from the carrying away into Babylon unto Christ are fourteen generations. - -Now the birth of Jesus Christ was on this wise: When as his mother Mary was espoused to Joseph, before they came together, she was found with child of the Holy Ghost. Then Joseph her husband, being a just man, and not willing to make her a publick example, was minded to put her away privily. But while he thought on these things, behold, the angel of the Lord appeared unto him in a dream, saying, Joseph, thou son of David, fear not to take unto thee Mary thy wife: for that which is conceived in her is of the Holy Ghost. And she shall bring forth a son, and thou shalt call his name JESUS: for he shall save his people from their sins. Now all this was done, that it might be fulfilled which was spoken of the Lord by the prophet, saying, Behold, a virgin shall be with child, and shall bring forth a son, and they shall call his name Emmanuel, which being interpreted is, God with us. - -Then Joseph being raised from sleep did as the angel of the Lord had bidden him, and took unto him his wife: And knew her not till she had brought forth her firstborn son: and he called his name JESUS. Now when Jesus was born in Bethlehem of Judaea in the days of Herod the king, behold, there came wise men from the east to Jerusalem, Saying, Where is he that is born King of the Jews? \ No newline at end of file +The book of the generation of Jesus Christ, the son of David, \ No newline at end of file diff --git a/src/libclo/breakLines.js b/src/libclo/breakLines.js new file mode 100644 index 0000000..27513f3 --- /dev/null +++ b/src/libclo/breakLines.js @@ -0,0 +1,95 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.totalCost = void 0; +/** + * Algorithms in LATEX language +TotalCost(i) = min_{j}~TotalCost(j) + LineCost(j, i)~~~~j=0, 1, ..., i-1 + +LineCost(j, i)= \begin{cases} +\infty ~~~ if~~LineWidth - \sum_{k=j+1}^{i-1} OrigWidth(item[k]) - newLineWidth(item[i]) < 0 \\ +\infty~~if~~NOT~~breakable(item[i]) \\ +(LineWidth - \sum_{k=j+1}^{i-1} OrigWidth(item[k]) - newLineWidth(item[i]))^3 ~~elsewhere +\end{cases} */ +/**check if a boeitem is BreakPoint Type */ +function isBreakPoint(item) { + return item.newLined !== undefined; +} +/**check if a boeitem is BreakPoint Type */ +function isHGlue(item) { + return item.stretchFactor !== undefined; +} +/** measuring original advance width */ +function origWidth(item) { + if (isBreakPoint(item)) { + console.log(item); + return origWidth(item.original); + } + else if (Array.isArray(item)) { + return item.map((x) => origWidth(x)) + .reduce((acc, current) => acc + current, 0.0); + } + else if (isHGlue(item)) { + return 0.0; + } + else { + return item.width; + } +} +/** measuring new-line triggered advance width */ +function newLineWidth(item) { + if (isBreakPoint(item)) { + return origWidth(item.newLined); + } + else { + // impossible to make a new line + return Infinity; + } +} +let lineCostStorage = new Object(); +/** + * check the total cost item[0..j]. + * @param items + * @param i + * @param lineWidth + */ +function totalCost(items, j, lineWidth) { + if (j in lineCostStorage) { + return lineCostStorage[j]; + } + var returnCost = Infinity; + for (var i = -1; i <= j; i++) { + // lineCost + let lCost = lineCost(items, i, j, lineWidth); + if (returnCost > lCost) { + returnCost = lCost; + } + } + lineCostStorage[j] = returnCost; + return returnCost; +} +exports.totalCost = totalCost; +/** + * check the line cost of a line containing items[i+1..j] + * @param items items of box + * @param i beginning (excluded) + * @param j end of the line + * @param lineWidth line width + */ +function lineCost(items, i, j, lineWidth) { + if (!isBreakPoint(items[j])) { + return Infinity; + } + else { + var tmpItemWidth = 0; + for (var k = i + 1; k < j; k++) { + tmpItemWidth += origWidth(items[k]); + } + tmpItemWidth += newLineWidth(items[j]); + if (tmpItemWidth > lineWidth) { + return Infinity; + } + else { + return (lineWidth - tmpItemWidth) ** 3.0; + } + } +} diff --git a/src/libclo/breakLines.ts b/src/libclo/breakLines.ts new file mode 100644 index 0000000..989ba93 --- /dev/null +++ b/src/libclo/breakLines.ts @@ -0,0 +1,109 @@ +/** + * Algorithms and functions for LineBreaking + */ +import { join } from "path"; +import {BreakPoint, BoxesItem, HGlue} from "./index.js"; +/** + * Algorithms in LATEX language +TotalCost(i) = min_{j}~TotalCost(j) + LineCost(j, i)~~~~j=0, 1, ..., i-1 + +LineCost(j, i)= \begin{cases} +\infty ~~~ if~~LineWidth - \sum_{k=j+1}^{i-1} OrigWidth(item[k]) - newLineWidth(item[i]) < 0 \\ +\infty~~if~~NOT~~breakable(item[i]) \\ +(LineWidth - \sum_{k=j+1}^{i-1} OrigWidth(item[k]) - newLineWidth(item[i]))^3 ~~elsewhere +\end{cases} */ + +/**check if a boeitem is BreakPoint Type */ +function isBreakPoint (item : any) : item is BreakPoint{ + return (item as BreakPoint).newLined !== undefined; +} + +/**check if a boeitem is BreakPoint Type */ +function isHGlue (item : any) : item is HGlue{ + return (item as HGlue).stretchFactor !== undefined; +} + + +/** measuring original advance width */ +function origWidth(item : BoxesItem) : number{ + if (isBreakPoint(item)){ + console.log(item); + return origWidth(item.original); + }else if(Array.isArray(item)){ + return item.map((x)=>origWidth(x)) + .reduce((acc, current) => acc + current, + 0.0,) + }else if(isHGlue(item)){ + return 0.0; + } + else{ + return item.width; + } +} + +/** measuring new-line triggered advance width */ +function newLineWidth(item : BoxesItem) : number{ + if (isBreakPoint(item)){ + return origWidth(item.newLined); + }else{ + // impossible to make a new line + return Infinity; + } +} + +let lineCostStorage : any = new Object(); + +/** + * check the total cost item[0..j]. + * @param items + * @param i + * @param lineWidth + */ +export function totalCost(items : BoxesItem[], j : number, lineWidth: number){ + if (j in lineCostStorage){ + return lineCostStorage[j]; + } + var returnCost = Infinity; + + for(var i=-1; i<=j; i++){ + // lineCost + let lCost = lineCost(items, i, j, lineWidth); + + if (returnCost > lCost){ + returnCost = lCost; + } + } + + lineCostStorage[j] = returnCost; + return returnCost; + +} + + + +/** + * check the line cost of a line containing items[i+1..j] + * @param items items of box + * @param i beginning (excluded) + * @param j end of the line + * @param lineWidth line width + */ +function lineCost(items : BoxesItem[], i : number, j : number, lineWidth: number){ + if (!isBreakPoint(items[j])){ + return Infinity; + }else{ + var tmpItemWidth = 0; + for (var k = i+1; k lineWidth){ + return Infinity; + }else{ + return (lineWidth - tmpItemWidth)**3.0; + } + } + +} \ No newline at end of file diff --git a/src/libclo/index.js b/src/libclo/index.js index 5cfaabe..927c0af 100644 --- a/src/libclo/index.js +++ b/src/libclo/index.js @@ -35,6 +35,8 @@ Object.defineProperty(exports, "__esModule", { value: true }); exports.Clo = exports.calculateTextWidthHeightAux = exports.calculateTextWidthHeight = exports.hyphenTkTree = exports.filterEmptyString = exports.spacesToBreakpoint = exports.hyphenForClo = exports.splitCJKV = exports.twoReturnsToNewline = exports.ptToPx = exports.cjkvRegexPattern = exports.cjkvBlocksInRegex = exports.defaultFrameStyle = exports.defaultTextStyle = exports.A4_IN_PX = exports.Direction = void 0; const canva_1 = require("../canva"); const fontkit = __importStar(require("fontkit")); +const util = __importStar(require("node:util")); +const breakLines = __importStar(require("./breakLines")); /** * TYPES */ @@ -177,7 +179,8 @@ function spacesToBreakpoint(arr, clo) { for (let i = 0; i < arr.length; i++) { var item = arr[i]; if (!Array.isArray(item) && item.match(spacePattern)) { - result.push(['bp', item, ""]); // push a newline command to the result `tkTree` + // push a breakpoint command to the result `tkTree` + result.push(['bp', [["hglue", "0.1"], item], ""]); } else { result.push(item); @@ -244,7 +247,7 @@ function calculateTextWidthHeight(element, style) { for (var i = 0; i < element.length; i++) { res.push(yield calculateTextWidthHeightAux(element[i], style)); } - console.log(res); + res = res.flat(); return res; }); } @@ -286,12 +289,26 @@ function calculateTextWidthHeightAux(element, style) { return result; } else if (element[0] == "bp") { - let beforeNewLine = yield calculateTextWidthHeightAux(element[1], style); + var beforeNewLine = yield calculateTextWidthHeightAux(element[1], style); + if (Array.isArray(beforeNewLine)) { + beforeNewLine = beforeNewLine.flat(); + } let afterNewLine = yield calculateTextWidthHeightAux(element[2], style); - return ["bp", beforeNewLine, afterNewLine]; + if (Array.isArray(afterNewLine)) { + afterNewLine = afterNewLine.flat(); + } + let breakPointNode = { + original: beforeNewLine, + newLined: afterNewLine, + }; + return breakPointNode; + } + else if (element[0] == "hglue" && !Array.isArray(element[1])) { + let hGlue = { stretchFactor: parseFloat(element[1]) }; + return hGlue; } else { - return calculateTextWidthHeight(element[1], style); + return calculateTextWidthHeight(element, style); } }); } @@ -334,16 +351,19 @@ class Clo { this.preprocessors.push(f); } generatePdf() { - // preprocessed - var preprocessed = this.mainStream; - for (var i = 0; i < this.preprocessors.length; i++) { - preprocessed = this.preprocessors[i](preprocessed, this); - } - // generate the width and height of the stream - let defaultFontStyle = this.attrs["defaultFrameStyle"].textStyle; - calculateTextWidthHeight(preprocessed, defaultFontStyle); - // TODO - console.log(preprocessed); + return __awaiter(this, void 0, void 0, function* () { + // preprocessed + var preprocessed = this.mainStream; + for (var i = 0; i < this.preprocessors.length; i++) { + preprocessed = this.preprocessors[i](preprocessed, this); + } + // generate the width and height of the stream + let defaultFontStyle = this.attrs["defaultFrameStyle"].textStyle; + let a = yield calculateTextWidthHeight(preprocessed, defaultFontStyle); + // TODO + console.log(util.inspect(a, true, 100)); + console.log(breakLines.totalCost(a, 3, 100)); + }); } } exports.Clo = Clo; diff --git a/src/libclo/index.ts b/src/libclo/index.ts index bb884fa..53b837c 100644 --- a/src/libclo/index.ts +++ b/src/libclo/index.ts @@ -3,6 +3,8 @@ import {tkTree} from "../parser"; import {FontStyle, TextStyle, TextWeight, fontStyleTofont} from "../canva"; import { JSDOM } from "jsdom"; import * as fontkit from "fontkit"; +import * as util from "node:util"; +import * as breakLines from "./breakLines"; /** * TYPES @@ -21,6 +23,21 @@ export enum Direction{ BTT, } +/** + * Horizonal glue. + * - stretchFactor : the stretch factor in float + */ +export interface HGlue{ + stretchFactor: number +} + +export interface BreakPoint{ + original : BoxesItem, + newLined : BoxesItem +} + +export type BoxesItem = HGlue | Box | BreakPoint | BoxesItem[] ; + /** * frame box is a subclass of box * - directionInsideLine : text direction inside a line @@ -197,7 +214,8 @@ export function spacesToBreakpoint(arr : tkTree, clo : Clo) : tkTree{ for (let i = 0; i < arr.length; i++){ var item = arr[i]; if (!Array.isArray(item) && item.match(spacePattern)){ - result.push([ 'bp', item, "" ]); // push a newline command to the result `tkTree` + // push a breakpoint command to the result `tkTree` + result.push([ 'bp', [["hglue", "0.1"], item] , "" ]); } else{ result.push(item); @@ -266,14 +284,14 @@ export function hyphenTkTree(arr : tkTree, lang: string) : tkTree{ * @param preprocessed * @param defaultFontStyle */ -export async function calculateTextWidthHeight(element : tkTree, style : TextStyle): Promise { +export async function calculateTextWidthHeight(element : tkTree, style : TextStyle): Promise { var res = []; for (var i=0; i { - var result : any = []; +export async function calculateTextWidthHeightAux(element : tkTree, style : TextStyle): Promise { + var result : BoxesItem = []; @@ -328,12 +346,29 @@ export async function calculateTextWidthHeightAux(element : tkTree, style : Text }else if(element[0] == "bp"){ - let beforeNewLine = await calculateTextWidthHeightAux(element[1], style); + + var beforeNewLine = await calculateTextWidthHeightAux(element[1], style); + if (Array.isArray(beforeNewLine)){ + beforeNewLine = beforeNewLine.flat(); + } + let afterNewLine = await calculateTextWidthHeightAux(element[2], style); + if (Array.isArray(afterNewLine)){ + afterNewLine = afterNewLine.flat(); + } - return ["bp", beforeNewLine, afterNewLine]; - }else{ - return calculateTextWidthHeight(element[1], style); + let breakPointNode : BreakPoint = { + original : beforeNewLine, + newLined : afterNewLine, + } + + return breakPointNode; + }else if(element[0] == "hglue" && !Array.isArray(element[1])){ + let hGlue : HGlue = {stretchFactor : parseFloat(element[1])} + return hGlue; + } + else{ + return calculateTextWidthHeight(element, style); } } @@ -392,7 +427,7 @@ export class Clo{ this.preprocessors.push(f); } - public generatePdf(){ + public async generatePdf(){ // preprocessed var preprocessed = this.mainStream; for (var i = 0; i