Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- const puppeteer = require('puppeteer')
- const express = require('express')
- const bodyParser = require('body-parser')
- const crypto = require('crypto')
- const fs = require('fs')
- const port = 3000
- var app = express()
- app.use(bodyParser.json())
- const runJob = async (url) => {
- var browser
- var title = ''
- var filename = ''
- var html = ''
- try {
- browser = await puppeteer.launch({headless: true})
- const page = await browser.newPage()
- page.setViewport({ width: 1280, height: 1024 })
- // consider navigation to be finished when there are no more than 2 network connections for at least 500 ms
- await page.goto(url, {"waitUntil" : "networkidle2"})
- title = await page.title()
- filename = crypto.createHash('md5').update(url).digest('hex')
- await page.screenshot({path: 'pages/' + filename + '.png'})
- html = await page.content();
- // save file synchronously
- fs.writeFileSync('pages/' + filename + '.html', html, function(err) {
- if (err) {
- return console.log('Error writing html: ' + err)
- }
- });
- } catch (err) {
- console.log('There was an error: ' + err.message)
- } finally {
- if (browser) {
- await browser.close()
- }
- }
- return [filename, html]
- }
- app.get('/get_page', async function(req, res) {
- var url = req.body.url || req.query.url
- console.log('Got URL: ' + url)
- var [id, body] = await runJob(url)
- console.log('Processed ID: ' + id + ' for URL: ' + url)
- await res.send(body)
- });
- // handle errors
- app.use((err, req, res, next) => {
- if (! err) return next()
- res.status(500)
- res.send('500: Internal server error')
- })
- process.setMaxListeners(0);
- process.on('uncaughtException', function(error) {
- console.log('uncaughtException Error: ' + error)
- })
- process.on('unhandledRejection', function(error) {
- console.log('unhandledRejection Error: ' + error)
- })
- app.listen(port)
- console.log('Server is listening on port ' + port)
Add Comment
Please, Sign In to add comment