diff --git a/README.md b/README.md index 0208a8a..a623494 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,12 @@ Read, manipulate and write spreadsheet data and styles to XLSX and JSON. Reverse engineered from Excel spreadsheet files as a project. +# Special thanks + +The project code is forked from exceljs, based on the last commit on 2023-5-5, [commitid](https://github.com/exceljs/exceljs/commit/ ec92cb3b898bdf7f806ff9d7b8370c955ee8ba20), Since the latest version of exceljs is v4.3.0, sincere thanks to all the developers of the exceljs project. + +@zurmokeeper/exceljs is compatible with exceljs V4.3.0 and previous versions, so feel free to switch to it. + # Translations * [中文文档](README_zh.md) @@ -2254,6 +2260,16 @@ faster or more resilient. #### Reading XLSX[⬆](#contents) +Options supported when reading xlsx files. + +| Field | Required | Type |Description | +| ---------------- | ----------- | ----------- | ----------- | +| ignoreNodes | N | array | A list of node names to ignore while loading the XLSX document. Improves performance in some situations.
Available: `sheetPr`, `dimension`, `sheetViews `, `sheetFormatPr`, `cols `, `sheetData`, `autoFilter `, `mergeCells `, `rowBreaks`, `hyperlinks `, `pageMargins`, `dataValidations`, `pageSetup`, `headerFooter `, `printOptions `, `picture`, `drawing`, `sheetProtection`, `tableParts `, `conditionalFormatting`, `extLst`,| +| password | N | string | Decrypted passwords, maximum length is 255. | +| base64 | N | boolean | This parameter indicates that the input is a base64-encoded buffer. Only valid for the load method. | +| maxRows | N | number | TODO:. | +| maxCols | N | number | TODO:. | + ```javascript // read from a file const workbook = new Excel.Workbook(); @@ -2262,6 +2278,9 @@ await workbook.xlsx.readFile(filename); // read from a file, decrypt excel files encrypted with password const workbook = new Excel.Workbook(); await workbook.xlsx.readFile(filename, {password:'123456'}); +await workbook.xlsx.readFile(filename, { + ignoreNodes:['dataValidations'] // ignores the workbook's Data Validations +}); // ... use workbook @@ -2272,6 +2291,9 @@ await workbook.xlsx.read(stream); // read from a stream, decrypt excel files encrypted with password const workbook = new Excel.Workbook(); await workbook.xlsx.read(stream, {password:'123456'}); +await workbook.xlsx.read(stream, { + ignoreNodes:['dataValidations'] // ignores the workbook's Data Validations +}); // ... use workbook @@ -2282,6 +2304,9 @@ await workbook.xlsx.load(data); // load from buffer, decrypt excel files encrypted with password const workbook = new Excel.Workbook(); await workbook.xlsx.load(data, {password:'123456'}); +await workbook.xlsx.load(data, { + ignoreNodes:['dataValidations'] // ignores the workbook's Data Validations +}); // ... use workbook ``` diff --git a/README_zh.md b/README_zh.md index 27517de..689ccc8 100644 --- a/README_zh.md +++ b/README_zh.md @@ -8,6 +8,13 @@ 一个 Excel 电子表格文件逆向工程项目。 +# 特别鸣谢 + +此项目代码基于 exceljs 2023-5-5 的最后一次提交, +[commitid](https://github.com/exceljs/exceljs/commit/ ec92cb3b898bdf7f806ff9d7b8370c955ee8ba20), exceljs的最新版本是V4.3.0,感谢exceljs项目所有的开发者 + +@zurmokeeper/exceljs 兼容 exceljs V4.3.0和之前的版本,可以放心切换使用。 + # 安装 ```shell @@ -2144,6 +2151,16 @@ worksheet.unprotect(); #### 读 XLSX[⬆](#目录) +读取xlsx文件的可选参数Option如下 + +| Field | Required | Type |Description | +| ---------------- | ----------- | ----------- | ----------- | +| ignoreNodes | N | array | 读取xlsx文件时要忽略的xml节点数组,用于提高性能.
节点名有这些: `sheetPr`, `dimension`, `sheetViews `, `sheetFormatPr`, `cols `, `sheetData`, `autoFilter `, `mergeCells `, `rowBreaks`, `hyperlinks `, `pageMargins`, `dataValidations`, `pageSetup`, `headerFooter `, `printOptions `, `picture`, `drawing`, `sheetProtection`, `tableParts `, `conditionalFormatting`, `extLst`,| +| password | N | string | 解密用的密码,最大长度是255个字符. | +| base64 | N | boolean | 传入的是否是base64编码的内容,只在load方法时有这个参数. | +| maxRows | N | number | TODO:. | +| maxCols | N | number | TODO:. | + ```javascript // 从文件读取 const workbook = new Excel.Workbook(); @@ -2152,6 +2169,9 @@ await workbook.xlsx.readFile(filename); // 从文件读取, 解密使用密码加密的excel文件 const workbook = new Excel.Workbook(); await workbook.xlsx.readFile(filename, {password:'123456'}); +await workbook.xlsx.readFile(filename, { + ignoreNodes:['dataValidations'] // 忽略工作簿的数据有限性验证节点 +}); // ... 使用 workbook @@ -2162,6 +2182,9 @@ await workbook.xlsx.read(stream); // 从流读取, 解密使用密码加密的excel文件 const workbook = new Excel.Workbook(); await workbook.xlsx.read(stream, {password:'123456'}); +await workbook.xlsx.read(stream, { + ignoreNodes:['dataValidations'] // 忽略工作簿的数据有限性验证节点 +}); // ... 使用 workbook @@ -2172,6 +2195,9 @@ await workbook.xlsx.load(data); // 从 buffer 加载, 解密使用密码加密的excel文件 const workbook = new Excel.Workbook(); await workbook.xlsx.load(data, {password:'123456'}); +await workbook.xlsx.load(data, { + ignoreNodes:['dataValidations'] // 忽略工作簿的数据有限性验证节点 +}); // ... 使用 workbook ``` diff --git a/index.d.ts b/index.d.ts index 19ed627..04200ca 100644 --- a/index.d.ts +++ b/index.d.ts @@ -1614,6 +1614,23 @@ export interface XlsxReadOptions { * optional */ maxCols: number; + + /** + * @desc The list of XML node names to ignore while parsing an XLSX file + * optional + * + * Example: + * + * ignoreNodes: [ + * 'dataValidations' // ignores the workbook's Data Validations + * ], + * + * Available: `sheetPr`, `dimension`, `sheetViews `, `sheetFormatPr`, `cols `, + * `sheetData`, `autoFilter `, `mergeCells `, `rowBreaks`, `hyperlinks `, `pageMargins`, + * `dataValidations`, `pageSetup`, `headerFooter `, `printOptions `, `picture`, + * `drawing`, `sheetProtection`, `tableParts `, `conditionalFormatting`, `extLst` + */ + ignoreNodes: string[]; } export interface Xlsx { diff --git a/lib/xlsx/xform/sheet/worksheet-xform.js b/lib/xlsx/xform/sheet/worksheet-xform.js index 4195f6d..490f384 100644 --- a/lib/xlsx/xform/sheet/worksheet-xform.js +++ b/lib/xlsx/xform/sheet/worksheet-xform.js @@ -93,7 +93,8 @@ class WorkSheetXform extends BaseXform { constructor(options) { super(); - const {maxRows, maxCols} = options || {}; + const {maxRows, maxCols, ignoreNodes} = options || {}; + this.ignoreNodes = ignoreNodes || []; this.map = { sheetPr: new SheetPropertiesXform(), dimension: new DimensionXform(), @@ -377,8 +378,8 @@ class WorkSheetXform extends BaseXform { return true; } - this.parser = this.map[node.name]; - if (this.parser) { + if (this.map[node.name] && !this.ignoreNodes.includes(node.name)) { + this.parser = this.map[node.name]; this.parser.parseOpen(node); } return true; diff --git a/package.json b/package.json index e688837..86ebe1b 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@zurmokeeper/exceljs", - "version": "4.4.5", + "version": "4.4.6", "description": "Excel Workbook Manager - Read and Write xlsx and csv Files.", "private": false, "license": "MIT", diff --git a/spec/integration/data/test-new-issue-22.xlsx b/spec/integration/data/test-new-issue-22.xlsx new file mode 100644 index 0000000..0136397 Binary files /dev/null and b/spec/integration/data/test-new-issue-22.xlsx differ diff --git a/spec/integration/issues/new/new-issue-22-add-ignore-nodes-func.spec.js b/spec/integration/issues/new/new-issue-22-add-ignore-nodes-func.spec.js new file mode 100644 index 0000000..fb6739c --- /dev/null +++ b/spec/integration/issues/new/new-issue-22-add-ignore-nodes-func.spec.js @@ -0,0 +1,39 @@ +const path = require('path'); +const fs = require('fs'); + +const ExcelJS = verquire('exceljs'); +const fileName = './spec/integration/data/test-new-issue-22.xlsx'; + +describe('github issues', () => { + describe('new issue 22 - Memory overload when unnecessary dataValidations apply', () => { + it('when using readFile', async () => { + const wb = new ExcelJS.Workbook(); + await wb.xlsx.readFile(fileName, { + ignoreNodes: ['dataValidations'], + }); + + expect(true).to.equal(true); + }); + + it('when loading an in memory buffer', async () => { + const filePath = path.join(process.cwd(), fileName); + const buffer = fs.readFileSync(filePath); + const wb = new ExcelJS.Workbook(); + await wb.xlsx.load(buffer, { + ignoreNodes: ['dataValidations'], + }); + + expect(true).to.equal(true); + }); + + it('when using read', async () => { + const wb = new ExcelJS.Workbook(); + const input = fs.createReadStream(fileName); + await wb.xlsx.read(input, { + ignoreNodes: ['dataValidations'], + }); + + expect(true).to.equal(true); + }); + }); +});