Advertisement
dereksir

Untitled

Mar 7th, 2024
111
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. // import the required libraries
  2. const puppeteer = require('puppeteer');
  3. const cheerio = require('cheerio');
  4.  
  5. (async () => {
  6.     // launch Puppeteer instance and open new page
  7.     const browser = await puppeteer.launch();
  8.     const page = await browser.newPage();
  9.    
  10.     try {
  11.         // navigate to target web page
  12.         await page.goto('https://scrapingclub.com/exercise/list_infinite_scroll/');
  13.  
  14.         // scroll to the bottom of the page
  15.         await autoScroll(page);
  16.  
  17.         // retrieve HTML content
  18.         const htmlContent = await page.content();
  19.         // parse HTML using Cheerio
  20.         const $ = cheerio.load(htmlContent);
  21.  
  22.         // extract desired data using Cheerio selectors
  23.         $('.post').each((index, element) => {
  24.             const title = $(element).find('h4').text().trim();
  25.             const price = $(element).find('h5').text().trim();
  26.             console.log(`item ${index + 1}: ${title}, ${price}`);
  27.         });
  28.  
  29.     } catch (error) {
  30.         console.error('Error:', error);
  31.     } finally {
  32.         await browser.close();
  33.   }
  34. })();
  35.  
  36.  
  37. // function to automatically scroll to the bottom of the page
  38. async function autoScroll(page) {
  39.   await page.evaluate(async () => {
  40.     // use Promise to handle asynchronous scrolling
  41.     await new Promise((resolve, reject) => {
  42.       let totalHeight = 0; // initialize total height scrolled
  43.       const distance = 100; // set distance to scroll each time
  44.       // set interval to continuously scroll the page
  45.       const scrollInterval = setInterval(() => {
  46.         // calculate the total scroll height of the page
  47.         const scrollHeight = document.body.scrollHeight;
  48.         // scroll the page by the specified distance
  49.         window.scrollBy(0, distance);
  50.         // update the total height scrolled
  51.         totalHeight += distance;
  52.         // check if the total height scrolled is equal to or exceeds the scroll height of the page
  53.         if (totalHeight >= scrollHeight) {
  54.           // if so, clear the interval to stop scrolling
  55.           clearInterval(scrollInterval);
  56.           // resolve the Promise to indicate scrolling is complete
  57.           resolve();
  58.         }
  59.       }, 100); // set interval duration
  60.     });
  61.   });
  62. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement