Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*
- Наш канал про Таблицы и скрипты в Telegram: https://t.me/google_sheets
- Оглавление канала (конечно, в Таблице): https://docs.google.com/spreadsheets/d/1u0aRlwb0rQHbjE9avbgCqIJSQ3EvPfTxT-h54jTQh8I/edit#gid=0&range=A1:B1
- Чат: https://t.me/google_spreadsheets_chat
- Донаты: https://t.me/google_spreadsheets_chat/160663
- Написать: https://t.me/namokonov
- */
- const regExps = {
- name: /<meta itemprop="name" content="(.+?)">/,
- price: /<meta itemprop="price" content="(.+?)">/,
- curr: /<meta itemprop="priceCurrency" content="(.+?)">/,
- orders: /ordersCount":(\d+)/,
- q: /quantity":(\d+)/,
- brand: /<meta itemprop="brand" content="(.+?)">/,
- rating: /<meta itemprop="ratingValue" content="(.+?)">/,
- reviewCount: /<meta itemprop="reviewCount" content="(.+?)">/,
- image: /<meta itemprop="image" content="(.+?)"/
- }
- const sheetNames = {
- shMain: 'URLS',
- shPaste: 'RESULT',
- shLog: 'LOG'
- }
- const step = 1000;
- function onOpen(e) {
- SpreadsheetApp.getUi()
- .createMenu(">–]> В ЭТОМ МЕНЮ СКРИПТЫ")
- .addItem("Парсим!", "parserWb2")
- .addToUi();
- }
- function parserWb2() {
- const ss = SpreadsheetApp.getActive();
- const sh = ss.getSheetByName(sheetNames.shMain);
- const shPaste = ss.getSheetByName(sheetNames.shPaste);
- const shLog = ss.getSheetByName(sheetNames.shLog);
- const d = sh.getDataRange().getValues();
- const urls = d.slice(1).filter(h => h[0]).map(h => h[0]);
- let requests = requestPrepare(urls);
- shPaste.getRange("A2:L").clearContent();
- while (requests.length) {
- const startTime = new Date();
- const partRequests = requests.splice(0, step);
- const r = UrlFetchApp.fetchAll(partRequests);
- let arr = [];
- for (n in r) {
- const response = r[n].getContentText();
- const row = responseParse(response);
- const url = urls[n];
- const date = new Date();
- row.unshift(date, url);
- arr = arr.concat([row]);
- }
- shLog.appendRow([new Date(), partRequests.length, arr.filter(r => r[2]).length, secondsBetweenDates(startTime)]);
- shPaste.getRange(shPaste.getLastRow() + 1, 1, arr.length, arr[0].length).setValues(arr);
- SpreadsheetApp.flush();
- }
- }
- function responseParse(r) {
- let row = [];
- for (y in regExps) {
- const regExp = regExps[y];
- let result = m(r, regExp);
- result = y == 'price' ? result * 1 : result;
- row.push(result);
- }
- return row;
- }
- function requestPrepare(urls) {
- return urls.map(url => ({ 'url': url, 'muteHttpExceptions': true }));
- }
- function createUrlImage(y) {
- return y ? `https://img1.wbstatic.net/big/new/${Math.floor(y / 10000) * 10000}/${y}-1.jpg` : 0;
- }
- function m(response, regExp) {
- regExp = new RegExp(regExp);
- return response.match(regExp) ? response.match(regExp)[1] : 0;
- }
- function secondsBetweenDates(st) {
- const et = new Date();
- var dif = st.getTime() - et.getTime();
- var Seconds_from_T1_to_T2 = dif / 1000;
- return Math.abs(Seconds_from_T1_to_T2);
- }
Add Comment
Please, Sign In to add comment