Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- function LoadDocument(url, OnLoaded)
- {
- jsdom.env(
- {
- url: url,
- src: [jQSrc],
- done: (err, window) =>
- {
- if(err)
- {
- ws.error("Got error in LoadDocument('%s'): %s, retrying...".Error, url, err.message);
- LoadDocument(url, OnLoaded);
- }
- else
- {
- OnLoaded(window.$, window);
- }
- }
- });
- }
- function DownloadImage(url, localPath)
- {
- if(fetchImages)
- {
- fs.writeFileSync(localPath, ""); // resetting the file to 0 bytes
- var remStream = request(url);
- remStream.on("error", (err) =>
- {
- ws.error("Got error while downloading image: %s, retrying...".Error, err.message);
- DownloadImage(url, localPath);
- })
- var locStream = fs.createWriteStream(localPath);
- remStream.pipe(locStream);
- }
- }
- function GetProductImages($)
- {
- var images = [];
- ws.info("Fetching product images".Info);
- $(".gallery-body > ul > li").each((index, elem) =>
- {
- if(imgUrl = $(elem).attr("data-orig-url"))
- {
- var ocImgPath = ocPathPrefix + rs.generate() + ".jpg";
- var finPath = ocInstallDir + "image/" + ocImgPath;
- ws.info("Saving %s into %s".Info, imgUrl, finPath);
- DownloadImage(imgUrl, finPath);
- images.push(ocImgPath);
- }
- });
- return images;
- }
- function GetProductParams($)
- {
- var params =
- {
- "Бренд" : "",
- "Артикул" : ""
- };
- $(".character-block > ul > li").each((index, elem) =>
- {
- var kvPair = $(elem).text().split(": ");
- var key = kvPair[0];
- var val = kvPair[1];
- params[key] = val ? val.Ruwr() : "";
- });
- return params;
- }
- function ParseProduct(pageUrl, assignedCategoryId)
- {
- ws.info("Parsing product of category #%d at %s".Info, assignedCategoryId, pageUrl);
- LoadDocument(pageUrl, ($) =>
- {
- var params = GetProductParams($);
- if(!filterToFid[params["Материал"]]) ws.error("Warning: couldn't get MaterialFID of %s, '%s'", pageUrl, params["Материал"]);
- if(!$(".character-block").html()) ws.error("Warning: looks like there is no .character-block in %s", pageUrl);
- var product =
- {
- CategoryId: assignedCategoryId,
- Model: params["Артикул"],
- VendorId: vendorToVid[params["Бренд"]],
- FilterId: filterToFid[params["Материал"]] || 1,
- Title: $("#item-card > div.colon-3-double-thin > div.pull-left.left30.product-box-js > div > h1").text().Ruwr(),
- CurrentPrice: $(".product-price-js").text().Ruwr() || 0,
- OldPrice: $("span.linethrough.color999.size24.left20").text().Ruwr() || 0,
- Description: $(".description").text().Ruwr().replace("Описание :", "").replace("Описание:", "") + $(".character-block").html() ? $(".character-block").html().Ruwr() : "",
- Meta: GetPageMetadata($),
- Images: GetProductImages($)
- };
- PushProduct(product);
- });
- }
- function PushProduct(product)
- {
- var localPid = currPid;
- currPid ++;
- ws.info("Pushing product %s (CategoryID#%d):".Info, product.Title, product.CategoryId);
- var query = sql.build(
- {
- type: "insert",
- table: "oc_product",
- values:
- {
- product_id: localPid,
- model: product.Model,
- sku: "",
- upc: "",
- ean: "",
- jan: "",
- isbn: "",
- mpn: "",
- location: "",
- quantity: 1,
- stock_status_id: 6,
- image: product.Images[0],
- manufacturer_id: product.VendorId ? product.VendorId : 0,
- shipping: 1,
- price: product.CurrentPrice,
- tax_class_id: 9,
- status: 1,
- date_available: dfDate,
- date_added: dfDate,
- date_modified: dfDate
- }
- }).query;
- ws.info("Running a query: ".Info + "\n%s".Query, query);
- db.query(query, (err) =>
- {
- ws.info("Total products %s".Info, totalProducts);
- if(err)
- {
- ws.error("Got MySQL Error in PushProduct() (querying `oc_product`): %s, query was: %s".Error, err, query);
- }
- else
- {
- query = sql.build(
- {
- type: "insert",
- table: "oc_product_to_category",
- values:
- {
- product_id: localPid,
- category_id: product.CategoryId
- }
- }).query;
- ws.info("Running a query: ".Info + "\n%s".Query, query);
- db.query(query)
- query = sql.build(
- {
- type: "insert",
- table: "oc_product_to_store",
- values:
- {
- product_id: localPid,
- store_id: 0
- }
- }).query;
- ws.info("Running a query: ".Info + "\n%s".Query, query);
- db.query(query);
- query = sql.build(
- {
- type: "insert",
- table: "oc_product_description",
- values:
- {
- product_id: localPid,
- language_id: 1,
- name: product.Title,
- description: product.Description,
- tag: "",
- meta_title: product.Meta.Title,
- meta_description: product.Meta.Description,
- meta_keyword: product.Meta.Keywords
- }
- }).query;
- ws.info("Running a query: ".Info + "\n%s".Query, query);
- db.query(query);
- query = sql.build(
- {
- type: "insert",
- table: "oc_product_filter",
- values:
- {
- product_id: localPid,
- filter_id: product.FilterId
- }
- }).query;
- ws.info("Running a query: ".Info + "\n%s".Query, query);
- db.query(query);
- product.Images.forEach((val) =>
- {
- query = sql.build(
- {
- type: "insert",
- table: "oc_product_image",
- values:
- {
- product_id: localPid,
- image: val,
- sort_order: 0
- }
- }).query;
- ws.info("Running a query: ".Info + "\n%s".Query, query);
- db.query(query);
- });
- }
- return;
- });
- }
- function PushCategory(cat)
- {
- var localCid = currCid;
- ws.info("Pushing category %s (ID#%d):".Info, cat.Name, cat.CategoryId);
- var query = sql.build(
- {
- type: "insert",
- table: "oc_category",
- values:
- {
- category_id: cat.CategoryId,
- image: "",
- parent_id: cat.ParentId,
- column: "1",
- top: cat.ParentId == 0 ? "1" : "0",
- sort_order: "0",
- status: "1",
- date_added: dfDate,
- date_modified: dfDate
- }
- }).query;
- ws.info("Running a query: ".Info + "\n%s".Query, query);
- db.query(query);
- query = sql.build(
- {
- type: "insert",
- table: "oc_category_description",
- values:
- {
- category_id: cat.CategoryId,
- language_id: 1,
- name: cat.Name,
- description: cat.Description,
- meta_title: cat.Meta.Title,
- meta_description: cat.Meta.Description,
- meta_keyword: cat.Meta.Keywords
- }
- }).query;
- ws.info("Running a query: ".Info + "\n%s".Query, query);
- db.query(query);
- query = sql.build(
- {
- type: "insert",
- table: "oc_category_path",
- values:
- {
- category_id: cat.CategoryId,
- path_id: cat.CategoryId,
- level: 0
- }
- }).query;
- ws.info("Running a query: ".Info + "\n%s".Query, query);
- db.query(query);
- query = sql.build(
- {
- type: "insert",
- table: "oc_category_to_store",
- values:
- {
- category_id: cat.CategoryId,
- store_id: 0
- }
- }).query;
- ws.info("Running a query: ".Info + "\n%s".Query, query);
- db.query(query);
- query = sql.build(
- {
- type: "insert",
- table: "oc_category_to_layout",
- values:
- {
- category_id: cat.CategoryId,
- store_id: 0,
- layout_id: 0
- }
- }).query;
- ws.info("Running a query: ".Info + "\n%s".Query, query);
- db.query(query);
- }
- function InitDBConnection()
- {
- var mysql = require("mysql");
- var db = mysql.createConnection(
- {
- host : 'localhost',
- user : 'root',
- password : '1',
- database : 'oc_Sumkini'
- });
- db.connect((err) =>
- {
- if(err) throw err;
- });
- db.on("error", (err) => ws.error("Got MySQL error: %s".Error, err.message));
- return db;
- }
- function PushVendor(name)
- {
- ws.info("Pushing vendor %s", name);
- var locVid = currVid;
- currVid ++;
- vendorToVid[name] = locVid;
- var query = sql.build(
- {
- type: "insert",
- table: "oc_manufacturer",
- values:
- {
- manufacturer_id: locVid,
- name: name,
- sort_order: locVid
- }
- }).query;
- ws.info("Running a query: \n%s".Info, query);
- db.query(query);
- var query = sql.build(
- {
- type: "insert",
- table: "oc_manufacturer_to_store",
- values:
- {
- manufacturer_id: locVid,
- store_id: 0
- }
- }).query;
- ws.info("Running a query: \n%s".Info, query);
- db.query(query);
- }
- function GetPageMetadata($)
- {
- var kw = $('meta[name="keywords"]')[0];
- var ds = $('meta[name="description"]')[0];
- var meta =
- {
- Title: $('title')[0].innerHTML.Ruwr(),
- Keywords: kw ? kw.content.Ruwr() : "",
- Description: ds ? ds.content.Ruwr() : ""
- };
- return meta;
- }
- function PushMaterial(name, fid)
- {
- filterToFid[name] = fid;
- ws.info("Pushing material %s", name);
- var query = sql.build(
- {
- type: "insert",
- table: "oc_filter",
- values:
- {
- filter_id: fid,
- filter_group_id: 1,
- sort_order: fid
- }
- }).query;
- db.query(query);
- query = sql.build(
- {
- type: "insert",
- table: "oc_filter_description",
- values:
- {
- filter_id: fid,
- filter_group_id: 1,
- name: name,
- language_id: 1
- }
- }).query;
- db.query(query);
- }
- function ParseVendors($)
- {
- $(".left-menu > form > li:has(.brands) > ol > .item").each((index, elem) =>
- {
- PushVendor($(elem).text().Ruwr());
- });
- }
- function ParseMaterials($)
- {
- $(".compositions > li > label > span").each((index, elem) =>
- {
- PushMaterial(elem.innerHTML.Ruwr().trim(), index + 3);
- });
- }
- function TruncateTables(list)
- {
- list.forEach((val) =>
- {
- var query = "TRUNCATE TABLE `" + val + "`;";
- ws.info("Truncating table %s, running query %s".Info, val, query);
- db.query(query);
- });
- }
- String.prototype.Ruwr = function()
- {
- return this.replace(/\`|\'|\"/g, "-").trim();
- }
- async function Initialize()
- {
- global.ws = require("winston");
- global.rs = require("randomstring");
- global.jsdom = require("jsdom");
- global.request = require("request");
- global.fs = require("fs");
- global.cp = require("child_process");
- global.sql = require("json-sql")();
- global.colors = require("colors");
- global.jQSrc = fs.readFileSync("./JQuery.js", "utf-8");
- global.db = await InitDBConnection();
- global.dfDate = "2009-01-31 01:04:25";
- global.ocPathPrefix = "catalog/demo/product/";
- global.ocInstallDir = "/var/www/wc/";
- global.fetchImages = true;
- global.vendorToVid = [];
- global.filterToFid = {};
- global.currPid = 1;
- global.currCid = 1;
- global.currVid = 1;
- global.totalProducts = 0;
- process.on("uncaughtException", (err) => ws.error("Got uncaught exception: %s".Error, err.message));
- sql.configure({separatedValues: false});
- sql.setDialect("mysql");
- colors.setTheme(
- {
- Error: "red",
- Info: "cyan",
- Query: "white"
- });
- var usedTables =
- [
- 'oc_category',
- 'oc_category_description',
- 'oc_category_path',
- 'oc_category_to_store',
- 'oc_category_to_layout',
- 'oc_filter',
- 'oc_filter_description',
- 'oc_product',
- 'oc_product_description',
- 'oc_product_image',
- 'oc_product_to_category',
- 'oc_product_to_store',
- 'oc_product_filter',
- 'oc_manufacturer',
- 'oc_manufacturer_to_store'
- ];
- await TruncateTables(usedTables);
- }
- function GetCategoryPageCount($)
- {
- var paginationBtns = $("div.pagination > div > a");
- if(paginationBtns.length != 0)
- {
- var pageCount = Number(paginationBtns[paginationBtns.length - 2].innerHTML);
- }
- else
- {
- pageCount = 1;
- }
- return pageCount;
- }
- function ParseCategoryPage(categoryUrl, pageIndex, assignedCid)
- {
- LoadDocument(categoryUrl + "/?page=" + pageIndex, ($) =>
- {
- var items = $(".item-box > a");
- ws.info("Found %d products in category#".Info, items.length, assignedCid);
- totalProducts += items.length;
- items.each((index, elem) =>
- {
- ParseProduct(elem.href, assignedCid);
- });
- });
- }
- function FindSubcategories($, parentCid)
- {
- var subcats = [];
- $(".left-menu > form > li > a").each((index, elem) =>
- {
- var tVal = $(elem).text();
- if(tVal.startsWith("> "))
- {
- subcats.push(
- {
- HrefElemEmu:
- {
- href: elem.href,
- innerHTML: tVal.replace("> ", "")
- },
- AssignedCid: ++ currCid
- });
- }
- });
- return subcats;
- }
- function ParseCategory(hrefElem, assignedCid, parentId = 0)
- {
- var catUrl = hrefElem.href;
- var catName = hrefElem.innerHTML;
- ws.info('Parsing category "%s" (ID#%s) (%s)'.Info, catName, assignedCid, catUrl);
- var category =
- {
- ParentId: parentId,
- CategoryId: assignedCid,
- Name: catName
- };
- LoadDocument(catUrl, ($) =>
- {
- var subcats = FindSubcategories($, assignedCid);
- if(subcats.length != 0 && parentId == 0)
- {
- subcats.forEach((val) =>
- {
- ParseCategory(val.HrefElemEmu, val.AssignedCid, assignedCid);
- });
- }
- var desc = $(".seo-text-block").html();
- var pageCount = GetCategoryPageCount($);
- ws.info("Got number of pages in category#%s: %s".Info, assignedCid, pageCount);
- category.Meta = GetPageMetadata($);
- category.Description = desc ? desc.Ruwr() : "";
- PushCategory(category);
- for(var pageIndex = 0; pageIndex != pageCount + 1; pageIndex ++)
- {
- ParseCategoryPage(catUrl, pageIndex, assignedCid);
- }
- });
- }
- function ParseCategoryList($)
- {
- $(".left-menu > form > li > a").each((index, clHRefElem) =>
- {
- currCid ++;
- ParseCategory(clHRefElem, currCid);
- });
- }
- function Main()
- {
- Initialize();
- LoadDocument("http://sumkini.ru/catalogue", ($) =>
- {
- ws.info("Index document has been loaded".Info);
- ParseVendors($);
- ParseMaterials($);
- ParseCategoryList($);
- });
- }
- Main();
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement