ZinsBR

facebook crawler

Jul 8th, 2018
1,037
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. const puppeterr = require('puppeteer');
  2. const fs = require('fs');
  3. const path = require('path');
  4.  
  5. (async () => {
  6.     // Set up brower and page
  7.     const browser = await puppeterr.launch({
  8.         headless: false
  9.     })
  10.  
  11.     const page = await browser.newPage();
  12.  
  13.     await page.goto(
  14.         'https://www.facebook.com/parkshoppingcg.oficial/reviews/',
  15.         {waitUntil: 'domcontentloaded'},
  16.         {waitUntil: 'load'}
  17.     )
  18.  
  19.     const bodyHandle = await page.$('body')
  20.     let { height } = await bodyHandle.boundingBox();
  21.     await bodyHandle.dispose();
  22.  
  23.     const wrapper = '.userContentWrapper';
  24.     const viewportHeight = 800;
  25.     let viewportIncr = 0;
  26.  
  27.     while (viewportIncr + viewportHeight < height) {
  28.  
  29.         await page.waitFor(1200)
  30.         await page.evaluate(() =>  window.scrollBy(0, document.body.scrollHeight))
  31.         viewportIncr = viewportIncr + viewportHeight;
  32.         height = await page.evaluate(() => document.body.scrollHeight)
  33.         await page.waitFor(2000)
  34.  
  35.     }
  36.  
  37.     const reviews = await page.evaluate(wrapper => {
  38.         let rr = Array.from(document.querySelectorAll(wrapper))
  39.         return rr.map(r => r.innerHTML)
  40.     }, wrapper)
  41.  
  42.     const replaced = reviews.map(rr => rr.replace(/<[^>]*>/gim, ''))
  43.  
  44.     const stream = fs.createWriteStream(path.join(__dirname, 'myFile2.txt'));
  45.     replaced.forEach(r => stream.write(r + '\n'))
  46.     stream.end()
  47.  
  48.     await browser.close();
  49.  
  50.     // const replaced = reviews.map(rr => rr.replace(/<[^>]*>/gim, ''))
  51.  
  52.     // console.log(replaced)
  53.  
  54.     // await browser.close();
  55. })();
Advertisement
Add Comment
Please, Sign In to add comment