Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // import the required libraries
- const puppeteer = require('puppeteer');
- const cheerio = require('cheerio');
- (async () => {
- // launch Puppeteer instance and open new page
- const browser = await puppeteer.launch();
- const page = await browser.newPage();
- try {
- // navigate to target web page
- await page.goto('https://scrapingclub.com/exercise/list_infinite_scroll/');
- // scroll to the bottom of the page
- await autoScroll(page);
- // retrieve HTML content
- const htmlContent = await page.content();
- // parse HTML using Cheerio
- const $ = cheerio.load(htmlContent);
- // extract desired data using Cheerio selectors
- $('.post').each((index, element) => {
- const title = $(element).find('h4').text().trim();
- const price = $(element).find('h5').text().trim();
- console.log(`item ${index + 1}: ${title}, ${price}`);
- });
- } catch (error) {
- console.error('Error:', error);
- } finally {
- await browser.close();
- }
- })();
- // function to automatically scroll to the bottom of the page
- async function autoScroll(page) {
- await page.evaluate(async () => {
- // use Promise to handle asynchronous scrolling
- await new Promise((resolve, reject) => {
- let totalHeight = 0; // initialize total height scrolled
- const distance = 100; // set distance to scroll each time
- // set interval to continuously scroll the page
- const scrollInterval = setInterval(() => {
- // calculate the total scroll height of the page
- const scrollHeight = document.body.scrollHeight;
- // scroll the page by the specified distance
- window.scrollBy(0, distance);
- // update the total height scrolled
- totalHeight += distance;
- // check if the total height scrolled is equal to or exceeds the scroll height of the page
- if (totalHeight >= scrollHeight) {
- // if so, clear the interval to stop scrolling
- clearInterval(scrollInterval);
- // resolve the Promise to indicate scrolling is complete
- resolve();
- }
- }, 100); // set interval duration
- });
- });
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement