Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- const { createReadStream } = require("fs");
- const { Parser } = require("htmlparser2");
- const { pipeline, Transform } = require("stream");
- const { isArray, last } = require("lodash");
- let i = 0;
- class XML extends Transform {
- /**
- * @constructor
- * @param {string} tagName Target tag name
- * @param {number} tagDepth Target tag depath
- * @param {object} options All transform stream options
- */
- constructor(tagName, tagDepth, options = {}) {
- super({
- ...options,
- objectMode: true
- });
- let depth = 0;
- let tree;
- this._parser = new Parser(
- {
- onopentag: (name, attrs) => {
- if (!tree && name === tagName && depth === tagDepth) {
- tree = [];
- }
- if (tree) {
- const parent = last(tree);
- const child = { _attributes: attrs };
- if (parent) {
- if (!parent[name]) {
- parent[name] = child;
- } else if (isArray(parent[name])) {
- parent[name].push(child);
- } else {
- parent[name] = [parent[name], child];
- }
- }
- tree.push(child);
- }
- depth++;
- },
- ontext: text => {
- if (tree) {
- last(tree)._text = text;
- }
- },
- onclosetag: () => {
- if (tree) {
- const node = tree.pop();
- if (tree.length <= 0) {
- tree = undefined;
- this.push(node);
- }
- }
- depth--;
- },
- onend: () => {
- this._close();
- },
- onerror: err => {
- this.emit("error", err);
- }
- },
- { decodeEntities: true }
- );
- }
- _transform(chunk, encoding, callback) {
- this._parser.write(chunk.toString("utf8"));
- callback();
- }
- _final(callback) {
- this._close = callback;
- this._parser.end();
- }
- }
- const file = "big.xml";
- const encoding = "utf8";
- async function foo() {
- await new Promise((resolve, reject) => {
- pipeline(
- //
- createReadStream(file, { encoding }),
- //
- new XML("page", 1),
- //
- err => (err ? reject(err) : resolve())
- )
- // .on("data", data => console.log(data))
- .on("data", () => i++);
- });
- }
- console.log(new Date());
- foo()
- .catch(err => console.error(err))
- .then(() => console.log(i))
- .then(() => console.log(new Date()));
Add Comment
Please, Sign In to add comment