Namokonov

parserWB2

Apr 14th, 2021 (edited)
625
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. /*
  2. Наш канал про Таблицы и скрипты в Telegram: https://t.me/google_sheets
  3. Оглавление канала (конечно, в Таблице): https://docs.google.com/spreadsheets/d/1u0aRlwb0rQHbjE9avbgCqIJSQ3EvPfTxT-h54jTQh8I/edit#gid=0&range=A1:B1
  4. Чат: https://t.me/google_spreadsheets_chat
  5. Донаты: https://t.me/google_spreadsheets_chat/160663
  6. Написать: https://t.me/namokonov
  7. */
  8.  
  9. const regExps = {
  10.   name: /<meta itemprop="name" content="(.+?)">/,
  11.   price: /<meta itemprop="price" content="(.+?)">/,
  12.   curr: /<meta itemprop="priceCurrency" content="(.+?)">/,
  13.   orders: /ordersCount":(\d+)/,
  14.   q: /quantity":(\d+)/,
  15.   brand: /<meta itemprop="brand" content="(.+?)">/,
  16.   rating: /<meta itemprop="ratingValue" content="(.+?)">/,
  17.   reviewCount: /<meta itemprop="reviewCount" content="(.+?)">/,
  18.   image: /<meta itemprop="image" content="(.+?)"/
  19. }
  20.  
  21. const sheetNames = {
  22.   shMain: 'URLS',
  23.   shPaste: 'RESULT',
  24.   shLog: 'LOG'
  25. }
  26.  
  27. const step = 1000;
  28.  
  29. function onOpen(e) {
  30.   SpreadsheetApp.getUi()
  31.     .createMenu(">–]> В ЭТОМ МЕНЮ СКРИПТЫ")
  32.     .addItem("Парсим!", "parserWb2")
  33.     .addToUi();
  34. }
  35.  
  36. function parserWb2() {
  37.   const ss = SpreadsheetApp.getActive();
  38.   const sh = ss.getSheetByName(sheetNames.shMain);
  39.   const shPaste = ss.getSheetByName(sheetNames.shPaste);
  40.   const shLog = ss.getSheetByName(sheetNames.shLog);
  41.   const d = sh.getDataRange().getValues();
  42.   const urls = d.slice(1).filter(h => h[0]).map(h => h[0]);
  43.   let requests = requestPrepare(urls);
  44.  
  45.   shPaste.getRange("A2:L").clearContent();
  46.  
  47.   while (requests.length) {
  48.     const startTime = new Date();
  49.     const partRequests = requests.splice(0, step);
  50.     const r = UrlFetchApp.fetchAll(partRequests);
  51.     let arr = [];
  52.  
  53.     for (n in r) {
  54.       const response = r[n].getContentText();
  55.       const row = responseParse(response);
  56.       const url = urls[n];
  57.       const date = new Date();
  58.       row.unshift(date, url);
  59.       arr = arr.concat([row]);
  60.     }
  61.  
  62.     shLog.appendRow([new Date(), partRequests.length, arr.filter(r => r[2]).length, secondsBetweenDates(startTime)]);
  63.     shPaste.getRange(shPaste.getLastRow() + 1, 1, arr.length, arr[0].length).setValues(arr);
  64.     SpreadsheetApp.flush();
  65.   }
  66. }
  67.  
  68. function responseParse(r) {
  69.   let row = [];
  70.   for (y in regExps) {
  71.     const regExp = regExps[y];
  72.     let result = m(r, regExp);
  73.     result = y == 'price' ? result * 1 : result;
  74.     row.push(result);
  75.   }
  76.   return row;
  77. }
  78.  
  79.  
  80. function requestPrepare(urls) {
  81.   return urls.map(url => ({ 'url': url, 'muteHttpExceptions': true }));
  82. }
  83.  
  84. function createUrlImage(y) {
  85.   return y ? `https://img1.wbstatic.net/big/new/${Math.floor(y / 10000) * 10000}/${y}-1.jpg` : 0;
  86. }
  87.  
  88. function m(response, regExp) {
  89.   regExp = new RegExp(regExp);
  90.   return response.match(regExp) ? response.match(regExp)[1] : 0;
  91. }
  92.  
  93. function secondsBetweenDates(st) {
  94.   const et = new Date();
  95.   var dif = st.getTime() - et.getTime();
  96.   var Seconds_from_T1_to_T2 = dif / 1000;
  97.   return Math.abs(Seconds_from_T1_to_T2);
  98. }
  99.  
  100.  
  101.  
  102.  
  103.  
  104.  
  105.  
  106.  
  107.  
  108.  
  109.  
  110.  
Add Comment
Please, Sign In to add comment