Advertisement
Namokonov

Untitled

Apr 12th, 2021
257
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. /*
  2. Наш канал про Таблицы и скрипты в Telegram: https://t.me/google_sheets
  3. Оглавление канала (конечно, в Таблице): https://docs.google.com/spreadsheets/d/1u0aRlwb0rQHbjE9avbgCqIJSQ3EvPfTxT-h54jTQh8I/edit#gid=0&range=A1:B1
  4. Чат: https://t.me/google_spreadsheets_chat
  5. Донаты: https://t.me/google_spreadsheets_chat/160663
  6. Написать: https://t.me/namokonov
  7. */
  8.  
  9. function onOpen(e) {
  10.   SpreadsheetApp.getUi()
  11.     .createMenu(">–]> В ЭТОМ МЕНЮ СКРИПТЫ")
  12.     .addItem("Парсим!", "parserWb2")
  13.     .addToUi();
  14. }
  15.  
  16. function parserWb2() {
  17.   const ss = SpreadsheetApp.getActive();
  18.   const sh = ss.getSheetByName(sheetNames.shMain);
  19.   const shPaste = ss.getSheetByName(sheetNames.shPaste);
  20.   const shLog = ss.getSheetByName(sheetNames.shLog);
  21.   const d = sh.getDataRange().getValues();
  22.   const urls = d.slice(1).filter(h => h[0]).map(h => h[0]);
  23.   let requests = requestPrepare(urls);
  24.  
  25.   shPaste.getRange("A2:L").clearContent();
  26.  
  27.   while (requests.length) {
  28.     const startTime = new Date();
  29.     const partRequests = requests.splice(0, step);
  30.     const r = UrlFetchApp.fetchAll(partRequests);
  31.     let arr = [];
  32.  
  33.     for (n in r) {
  34.       const response = r[n].getContentText();
  35.       const row = responseParse(response);
  36.       const url = urls[n];
  37.       const date = new Date();
  38.       row.unshift(date, url);
  39.       arr = arr.concat([row]);
  40.     }
  41.  
  42.     shLog.appendRow([new Date(), partRequests.length, arr.filter(r => r[2]).length, secondsBetweenDates(startTime)]);
  43.     shPaste.getRange(2, 1, arr.length, arr[0].length).setValues(arr);
  44.   }
  45. }
  46.  
  47. function responseParse(r) {
  48.   let row = [];
  49.   for (y in regExps) {
  50.     const regExp = regExps[y];
  51.     let result = m(r, regExp);
  52.     result = y == 'price' ? result * 1 : result;
  53.     row.push(result);
  54.   }
  55.   return row;
  56. }
  57.  
  58.  
  59. function requestPrepare(urls) {
  60.   return urls.map(url => ({ 'url': url, 'muteHttpExceptions': true }));
  61. }
  62.  
  63. function createUrlImage(y) {
  64.   return y ? `https://img1.wbstatic.net/big/new/${Math.floor(y / 10000) * 10000}/${y}-1.jpg` : 0;
  65. }
  66.  
  67. function m(response, regExp) {
  68.   regExp = new RegExp(regExp);
  69.   return response.match(regExp) ? response.match(regExp)[1] : 0;
  70. }
  71.  
  72. function secondsBetweenDates(st) {
  73.   const et = new Date();
  74.   var dif = st.getTime() - et.getTime();
  75.   var Seconds_from_T1_to_T2 = dif / 1000;
  76.   return Math.abs(Seconds_from_T1_to_T2);
  77. }
  78.  
  79. const regExps = {
  80.   name: /<meta itemprop="name" content="(.+)">/,
  81.   price: /<meta itemprop="price" content="(.+)">/,
  82.   curr: /<meta itemprop="priceCurrency" content="(.+)">/,
  83.   orders: /ordersCount":(\d+)/,
  84.   q: /quantity":(\d+)/,
  85.   brand: /<meta itemprop="brand" content="(.+)">/,
  86.   rating: /<meta itemprop="ratingValue" content="(.+)">/,
  87.   reviewCount: /<meta itemprop="reviewCount" content="(.+)">/,
  88.   image: /<meta itemprop="image" content="(.+)"/
  89. }
  90.  
  91. const sheetNames = {
  92.   shMain: 'URLS',
  93.   shPaste: 'RESULT',
  94.   shLog: 'LOG'
  95. }
  96.  
  97. const step = 1000;
  98.  
  99.  
  100.  
  101.  
  102.  
  103.  
  104.  
  105.  
  106.  
  107.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement