Advertisement
Guest User

Untitled

a guest
Apr 10th, 2020
219
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. const fetch = require('node-fetch');
  2. const cheerio = require('cheerio');
  3. const fs = require('fs');
  4. const url = require('url');
  5.  
  6. const FILENAME = 'items.json';
  7. const SITE_URL = 'https://www.dns-shop.ru';
  8. const PARSE_URL = 'https://www.dns-shop.ru/catalog/17a8a0f116404e77/elektronnye-knigi/';
  9. const STEP_SIZE = 3;
  10.  
  11. const Selectors = {
  12.     product: '.n-catalog-product',
  13.     productName: '.price-item-title',
  14.     price: '.current-price-value',
  15.     description: '.price-item-description > p',
  16.     link: '.product-info__title-link > a',
  17.     productCode: '.price-item-code > span',
  18.     paramsRows: '.table-params tbody tr',
  19.     tablePart: '.table-part',
  20.     paramName: '.dots > span',
  21.     paramValue: '> td:last-child'
  22. };
  23.  
  24. async function parseItem(pathname) {
  25.     try {
  26.         const response = await fetch(url.resolve(SITE_URL, pathname));
  27.         const body = await response.text();
  28.         const $ = cheerio.load(body);
  29.         const price = parseFloat($(Selectors.price).eq(0).text().replace(/\s/g, ''));
  30.         const code = parseInt($(Selectors.productCode).text());
  31.         const name = $(Selectors.productName).text();
  32.         const description = $(Selectors.description).text();
  33.         const rows = $(Selectors.paramsRows);
  34.  
  35.         const properties = {};
  36.         let currentPart = 'none';
  37.         rows.each((i, row) => {
  38.             const part = $(row).find(Selectors.tablePart).eq(0);
  39.  
  40.             if (part.length > 0) {
  41.                 currentPart = part.text();
  42.                 properties[currentPart] = {};
  43.             } else {
  44.                 const property = $(row).find(Selectors.paramName).text().trim();
  45.                 const value = $(row).find(Selectors.paramValue).text().trim();
  46.                 properties[currentPart][property] = value;
  47.             }
  48.         });
  49.  
  50.         return { name, price, code, description, properties };
  51.     } catch (error) {
  52.         console.error(error);
  53.         throw error;
  54.     }
  55. }
  56.  
  57. async function parsePage(page = 1, items = []) {
  58.     try {
  59.         const response = await fetch(`${PARSE_URL}?p=${page}`);
  60.         const body = await response.text();
  61.         const $ = cheerio.load(body);
  62.         const products = $(Selectors.product).toArray();
  63.  
  64.         if (!products.length) {
  65.             return items;
  66.         }
  67.  
  68.         console.log('Обрабатываю страницу:', page);
  69.  
  70.         const parseItemQueries = products.map(item => {
  71.             const link = $(item).find(Selectors.link).attr('href');
  72.             return parseItem(link);
  73.         });
  74.  
  75.         const parsedItems = await Promise.all(parseItemQueries);
  76.         const nextItems = items.concat(parsedItems);
  77.  
  78.         return parsePage(page + STEP_SIZE, nextItems);
  79.     } catch (error) {
  80.         console.error('Парсинг не удался.', error);
  81.     }
  82. }
  83.  
  84. async function main() {
  85.     try {
  86.         const parsers = new Array(STEP_SIZE).fill(null).map((_, i) => parsePage(i + 1));
  87.         const results = await Promise.all(parsers);
  88.         const items = results.flat();
  89.         console.log('Кол-во собранных товаров:', items.length);
  90.         fs.writeFileSync(FILENAME, JSON.stringify(items));
  91.         console.log(`Успешно записали результат в файл ${FILENAME}!`);
  92.     } catch (error) {
  93.         console.error(error);
  94.         process.exit(1);
  95.     }
  96. }
  97.  
  98. main();
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement