SHARE
TWEET

Untitled

a guest Aug 17th, 2019 61 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. const cheerio = require('cheerio')
  2. const cleaner = require('clean-html')
  3. const axios = require('axios')
  4.  
  5. module.exports = async function getCleanDOM (url) {  
  6.   // Wrap in a recusive retry func
  7.   async function retry() {
  8.     try {
  9.       // AJAX to get HTML
  10.       this.siteRequest = await axios.get(url)
  11.       this.siteURLHTML = this.siteRequest.data
  12.  
  13.       // Clean it for sanity
  14.       cleaner.clean(this.siteURLHTML, html => {
  15.         this.siteURLHTMLClean = html
  16.       })
  17.  
  18.       // Send back page DOM
  19.       this.siteURLDOM = cheerio.load(this.siteURLHTMLClean)
  20.       return this.siteURLDOM
  21.     } catch (err) {
  22.       console.log(`ERROR - TRYING ${url} AGAIN`)
  23.       if(!err.response.status === 404) {
  24.         retry()
  25.       } else {
  26.         return '404'
  27.       }
  28.     }
  29.   }  
  30.   return retry()
  31. }
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top