Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- const { workerData, parentPort } = require('worker_threads');
- const fs = require('fs');
- const iconv = require('iconv-lite');
- const util = require('util');
- const md5 = require('md5');
- const async = require('async');
- const parser = require('xml2js').parseString;
- const knex = require('knex')({
- client: 'pg',
- connection: {
- host : '127.0.0.1',
- user : 'homestead',
- password : 'secret',
- database : 'egr'
- }
- });
- const MAPPING = require('../../assets/mapping/egrip_4.04');
- const MAX_THREADS = 200;
- const DUPE_FIELDS = [
- 'ogrnip',
- 'ogrnip_data',
- 'egrnip',
- 'egrnip_data',
- 'egrnip_correct',
- 'egrnip_data_correct'
- ];
- const PERSON_OBJECTS = [
- 'card',
- 'registration',
- 'location',
- 'email'
- ];
- const ORGANIZATION_OBJECTS = [
- 'organization_register',
- 'info_farm_status',
- 'info_termination_activities',
- 'info_tax_organ',
- 'info_pension_fund',
- 'social_security_fund',
- 'info_economic_activity'
- ];
- asyncParseFiles(workerData, (file) => {
- });
- function asyncParseFiles(filesList, cb)
- {
- let workers = 0;
- let filePosition = -1;
- for (let i = 0; i < filesList.length; i++) {
- setTimeout(fetchNextFile, 0);
- }
- function fetchNextFile()
- {
- if (filePosition > filesList.length - 2) return;
- if (workers > MAX_THREADS) return;
- filePosition += 1;
- workers += 1;
- handleFile(filesList[filePosition], (obj) => {
- cb(filesList[filePosition]);
- workers -= 1;
- setTimeout(fetchNextFile, 0);
- });
- }
- function handleFile(fileName, readcb)
- {
- fs.readFile(fileName, null, (err, content) => {
- parser(
- iconv.decode(content, 'cp1251').toString(), (err, object) => {
- let objects = getObjects(object);
- importObjects(objects);
- parentPort.postMessage(fileName);
- readcb();
- // console.log(util.inspect(objects, false, null, true));
- }
- );
- });
- }
- }
- async function importObjects(objects)
- {
- for (let [objectKey, object] of Object.entries(objects)) {
- try {
- let fileID = await importObject(object['data'], objectKey, MAPPING[objectKey]['tableName']);
- for (let documentIndex in object['child']['documents']) {
- let document = object['child']['documents'][documentIndex];
- if (document.hasOwnProperty('person')) {
- let personID = await importObject(document['person']['data'], 'person', MAPPING['person']['tableName'], 'file_id', fileID);
- for (let [pOKey, personObject] of PERSON_OBJECTS.entries()) {
- await importObject(document['person']['child'][personObject], personObject, MAPPING[personObject]['tableName'], 'person_id', personID);
- }
- let organizationID = await importObject(document['organization']['data'], 'organization', MAPPING['organization']['tableName'], 'person_id', personID);
- for (let [oKey, organizationObject] of ORGANIZATION_OBJECTS.entries()) {
- await importObject(document['organization']['child'][organizationObject], organizationObject, MAPPING[organizationObject]['tableName'], 'organization_id', organizationID);
- }
- if (document['organization']['child']['licenses'].length > 0) {
- let lisencesObjects = document['organization']['child']['licenses'];
- for (let [licenseIndex, lisenceObject] of Object.entries(lisencesObjects)) {
- await importObject(lisenceObject, 'licenses', MAPPING['licenses']['tableName'], 'organization_id', organizationID);
- }
- }
- if (document['organization']['child']['egrip_notes'].length > 0) {
- let noteObjects = document['organization']['child']['egrip_notes'];
- for (let [noteIndex, noteObject] of Object.entries(noteObjects)) {
- let noteID = await importObject(noteObject['data'], 'egrip_notes', MAPPING['egrip_notes']['tableName'], 'organization_id', organizationID);
- if (noteObject['child'].length > 0) {
- let noteDocumentObjects = noteObject['child'];
- for (let [noteDocumentIndex, noteDocumentObject] of Object.entries(noteDocumentObjects)) {
- await importObject(noteDocumentObject, 'egrip_notes_documents', MAPPING['egrip_notes_documents']['tableName'], 'organization_note', noteID);
- }
- }
- }
- }
- }
- }
- } catch (e) {
- }
- }
- // for (let [objectKey, object] of Object.entries(objects)) {
- // console.log('Обработка 1');
- // try {
- // importObject(object['data'], objectKey, MAPPING[objectKey]['tableName'], (fileID) => {
- // for (let documentIndex in object['child']['documents']) {
- // setTimeout(() => {
- // let document = object['child']['documents'][documentIndex];
- //
- // if (document.hasOwnProperty('person')) {
- // importObject(document['person']['data'], 'person', MAPPING['person']['tableName'], (personID) => {
- // for (let [pOKey, personObject] of PERSON_OBJECTS.entries()) {
- // setTimeout(() => {
- // console.log('Обработка 2');
- // importObject(document['person']['child'][personObject], personObject, MAPPING[personObject]['tableName'], () => {
- // }, 'person_id', personID);
- // }, 5000);
- // }
- //
- // // importObject(document['organization']['data'], 'organization', MAPPING['organization']['tableName'], (organizationID) => {
- // // for (let [oKey, organizationObject] of ORGANIZATION_OBJECTS.entries()) {
- // // importObject(document['organization']['child'][organizationObject], organizationObject, MAPPING[organizationObject]['tableName'], () => {
- // // }, 'organization_id', organizationID);
- // // }
- // //
- // // if (document['organization']['child']['licenses'].length > 0) {
- // // let lisencesObjects = document['organization']['child']['licenses'];
- // //
- // // for (let [licenseIndex, lisenceObject] of Object.entries(lisencesObjects)) {
- // // importObject(lisenceObject, 'licenses', MAPPING['licenses']['tableName'], () => {
- // // }, 'organization_id', organizationID);
- // // }
- // // }
- // //
- // // if (document['organization']['child']['egrip_notes'].length > 0) {
- // // let noteObjects = document['organization']['child']['egrip_notes'];
- // //
- // // for (let [noteIndex, noteObject] of Object.entries(noteObjects)) {
- // // importObject(noteObject['data'], 'egrip_notes', MAPPING['egrip_notes']['tableName'], (noteID) => {
- // // if (noteObject['child'].length > 0) {
- // // let noteDocumentObjects = noteObject['child'];
- // //
- // // for (let [noteDocumentIndex, noteDocumentObject] of Object.entries(noteDocumentObjects)) {
- // // importObject(noteDocumentObject, 'egrip_notes_documents', MAPPING['egrip_notes_documents']['tableName'], () => {
- // // }, 'organization_note', noteID);
- // // }
- // // }
- // // }, 'organization_id', organizationID);
- // // }
- // // }
- // // }, 'person_id', personID);
- // }, 'file_id', fileID);
- // }
- // }, 2000);
- // }
- // });
- // } catch (e) {
- // ;
- // }
- // }
- }
- async function importObject(object, objectKey, table, foreignColumn = null, foreignID = null)
- {
- let check = object;
- for (let dupeField in DUPE_FIELDS) {
- if (check.hasOwnProperty(dupeField)) {
- delete check[dupeField];
- }
- }
- if (MAPPING[objectKey].hasOwnProperty('foreign')) {
- let foreignColumn = MAPPING[objectKey]['foreign']['column'];
- if (check.hasOwnProperty(foreignColumn)) {
- delete check[foreignColumn];
- }
- }
- let hash = md5(
- JSON.stringify(
- Object.assign({}, check)
- ).toString()
- );
- let checkData = await knex(table).where({hash: hash}).select('id');
- if (checkData.length === 0) {
- if (foreignColumn != null && foreignID != null) {
- object[foreignColumn] = foreignID;
- }
- object['hash'] = hash;
- let row = await knex(table).insert(Object.assign({}, object), 'id');
- return row;
- } else {
- return checkData[0].id;
- }
- }
- function getObjects(object)
- {
- let objects = [];
- objects['file'] = [];
- objects['file']['child'] = [];
- objects['file']['child']['documents'] = [];
- objects['file']['data'] = getObject(object, MAPPING['file']);
- let documentObjects = new Map(Object.entries(object['Файл']['Документ']));
- for (const [dKey, _documentObject] of documentObjects.entries()) {
- let documentObject = [];
- documentObject['person'] = [];
- documentObject['person']['child'] = [];
- documentObject['person']['data'] = getObject(_documentObject, MAPPING['person']);
- for (let [pKey, personObject] of PERSON_OBJECTS.entries()) {
- documentObject['person']['child'][personObject] = getObject(_documentObject, MAPPING[personObject]);
- }
- documentObject['organization'] = [];
- documentObject['organization']['child'] = [];
- documentObject['organization']['child']['licenses'] = [];
- documentObject['organization']['child']['egrip_notes'] = [];
- documentObject['organization']['data'] = getObject(_documentObject, MAPPING['organization']);
- for (let [oKey, organizationObject] of ORGANIZATION_OBJECTS.entries()) {
- documentObject['organization']['child'][organizationObject] = getObject(_documentObject, MAPPING[organizationObject]);
- }
- if (_documentObject['СвИП'][0].hasOwnProperty('СвЛицензия')) {
- let licenseObjects = new Map(Object.entries(_documentObject['СвИП'][0]['СвЛицензия']));
- for (const [lKey, _licenseObject] of licenseObjects.entries()) {
- documentObject['organization']['child']['licenses'].push(
- getObject(_licenseObject, MAPPING['licenses'])
- );
- }
- }
- if (_documentObject['СвИП'][0].hasOwnProperty('СвЗапЕГРИП')) {
- let noteObjects = new Map(Object.entries(_documentObject['СвИП'][0]['СвЗапЕГРИП']));
- for (const [nKey, _noteObject] of noteObjects.entries()) {
- let noteObject = [];
- noteObject['child'] = [];
- noteObject['data'] = getObject(_noteObject, MAPPING['egrip_notes']);
- if (_noteObject.hasOwnProperty('СведПредДок')) {
- let noteDocumentObjects = new Map(Object.entries(_noteObject['СведПредДок']));
- for (const [nDKey, _noteDocumentObject] of noteDocumentObjects.entries()) {
- noteObject['child'].push(
- getObject(_noteDocumentObject, MAPPING['egrip_notes_documents'])
- );
- }
- }
- documentObject['organization']['child']['egrip_notes'].push(noteObject);
- }
- }
- objects['file']['child']['documents'].push(documentObject);
- }
- return objects;
- }
- function getObject(object, mapping)
- {
- let list = [];
- for (let field in mapping['fields']) {
- let path = getPath(mapping['fields'][field]['xpath'].split('/'));
- try {
- let value = eval('object' + path);
- if (value !== undefined) {
- list[field] = value;
- }
- } catch (e) {
- continue;
- }
- }
- return list;
- }
- function getPath(list)
- {
- let result = '';
- for (let item in list) {
- let key = list[item];
- if (key.charAt(0) === '@') {
- result += `['$']['${key.replace('@', '')}']`;
- } else {
- result += key === '0' ? '[0]' : `['${key}']`;
- }
- }
- return result;
- }
- function getTime(timeStart = null)
- {
- let time = Math.floor(new Date().getTime() / 1000.0);
- return timeStart === null
- ? time
- : time - timeStart;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement