Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // Variable set to 1 for testing
- var testVar = 0;
- if (testVar == 1) {
- console.log('Test mode: not writing to DB, not uploading to static website host')
- console.log('__dirname: ', __dirname)
- }
- // Require Modules
- const { MongoClient, ObjectID } = require('mongodb');
- const Xray = require('x-ray');
- const puppeteer = require('puppeteer');
- const _ = require('lodash');
- const nodemailer = require('nodemailer');
- const fs = require('fs')
- const express = require('express')
- const hbs = require('hbs')
- const app = express()
- const js2xmlparser = require('js2xmlparser')
- const scp = require('scp');
- const moment = require('moment')
- var interest = {}
- // Ignore invalid SSH certificates on websites scraped
- process.env['NODE_TLS_REJECT_UNAUTHORIZED'] = '0';
- // Create date string
- moment.locale('en')
- var dateVar = moment().format('LL').toString()
- // Email Configuration
- let smtpConfig = {
- host: '', // Redacted
- port: 465,
- secure: true,
- auth: {
- user: '', // Redacted
- pass: '' // Redacted
- },
- tls: {
- rejectUnauthorized: false
- }
- };
- let transporter = nodemailer.createTransport(smtpConfig);
- // Non-JS Scraper Configuration
- var x = Xray({
- filters: {
- trim: function(value) {
- return typeof value === 'string' ? value.trim() : value;
- },
- slice: function(value) {
- return value.slice(-5);
- },
- match: function(value) {
- arr = value.match(/.,..%/g);
- return arr[0];
- },
- matchNoPerc: function(value) {
- arr = value.match(/.,../g);
- return arr[0] + '%';
- },
- sliceBeg: function(value) {
- return value.slice(0, 5).trim();
- }
- }
- });
- // Today and Yesterday strings
- var d = new Date();
- var datestring = ("0" + d.getDate()).slice(-2) + "-" + ("0" + (d.getMonth() + 1)).slice(-2) + "-" +
- d.getFullYear();
- d.setDate(d.getDate() - 1);
- var yDatestring = ("0" + d.getDate()).slice(-2) + "-" + ("0" + (d.getMonth() + 1)).slice(-2) + "-" +
- d.getFullYear();
- // DB lookup by date
- var date = new Date(Date.now())
- var options = { year: 'numeric', month: 'long', day: 'numeric' }
- var iDag = date.toLocaleString('en-EN', options)
- // Define scraping sources (non-JS)
- var src = [{
- name: '', // Redacted
- url: '', // Redacted
- scope: '', // Redacted
- sel: {
- oFa1: 'div div div div.col-sm-10.col-sm-offset-1 div:nth-child(1) div:nth-child(2) ul li:nth-child(1) h4',
- vFa1: 'div div div div.col-sm-10.col-sm-offset-1 div:nth-child(1) div:nth-child(2) ul li:nth-child(2) h4',
- vBr1: 'div div div div.col-sm-10.col-sm-offset-1 div:nth-child(1) div:nth-child(2) ul li:nth-child(3) h4',
- }
- },
- // ...et cetera (total of 14 sources)
- ];
- //Execute scrape
- doc = {};
- doc.interest = {};
- doc.date = datestring;
- var s1 = scrape();
- var s2 = scrapeJS();
- Promise.all([s1, s2]).then(() => {
- console.log('Scraping complete, processing results');
- processResults();
- })
- /* Functions below */
- // Non-JS scraper
- async function scrape() {
- var processed = 0;
- src.forEach(item => {
- x(item.url, item.scope, item.sel)(function(err, res) {
- if (err) {
- console.log(err);
- return;
- }
- processed++;
- doc.interest[item.name] = res;
- if (src.length === processed) {
- console.log('Non-JS scraping complete')
- return
- }
- });
- });
- }
- // JS scraper
- async function scrapeJS() {
- const browser = await puppeteer.launch({ args: ['--no-sandbox', '--disable-setuid-sandbox'] });
- const page = await browser.newPage();
- await page.goto(''); // Redacted
- await page.waitForSelector('#interest1');
- var int1 = await page.evaluate(() => {
- return document.querySelector('#interest1').textContent.trim()
- });
- var int2 = await page.evaluate(() => {
- return document.querySelector('#interest2').textContent.trim()
- });
- var int3 = await page.evaluate(() => {
- return document.querySelector('#interest5').textContent.trim()
- });
- var int4 = await page.evaluate(() => {
- return document.querySelector('#interest6').textContent.trim()
- });
- //Put into pre-defined object to use later on
- browser.close();
- console.log('JS scraping complete');
- return;
- }
- // Process scrape results
- function processResults() {
- right = doc.interest;
- interest = right;
- //Database
- MongoClient.connect('mongodb://...', function(err, database) { // URL redacted
- if (err) {
- return console.log('Unable to connect to MongoDB server');
- }
- console.log('Connected to MongoDB server');
- const db = database.db('...')
- // Retrieve yesterday's results
- db.collection('interest').find({ date: yDatestring }).toArray()
- .then(res => {
- left = res[0].interest;
- //Compare the two objects
- if (_.isEqual(left, right)) {
- console.log('No changes since yesterday');
- } else {
- console.log('Changes detected, sending email')
- // Send email notification
- var changes = JSON.stringify(changes(left, right), undefined, 2);
- var html = `<p>Interest rates have changed. These are the changes:</p><p>${changes}</p>`;
- var message = {
- from: 'interest@...',
- to: '...',
- subject: 'Interests have changed',
- text: 'interests have changed!',
- html: html
- };
- transporter.sendMail(message);
- }
- })
- .then(() => {
- // Insert today's results
- if (testVar != 1) {
- db.collection('interest').insertOne(doc);
- }
- })
- .then(() => {
- function number(string) {
- return parseFloat(string.replace(',', '.')) / 100
- }
- function percent(num) {
- return (num * 100).toFixed(2).toString().replace('.', ',') + '%'
- }
- function percentWhole(num) {
- return (num * 100).toFixed(0).toString().replace('.', ',') + '%'
- }
- function comma(a) {
- return a.replace('.', ',')
- }
- function fix(a) {
- return percent(number(a))
- }
- function weighting(v1, v2, h1, h2, weights) {
- return percent(
- (number(v1) * h1 * weights + number(v2) * (h1 + h2 - h1 * weights)) / (h1 + h2)
- )
- }
- // Forsendur
- var weights = 0.8
- var ratios = {
- ...: { // Redacted
- h1: 0.7,
- h2: 0.1
- },
- ... // Et cetera
- }
- interest = {
- _meta: {
- updated: dateVar
- },
- lender1: {
- party: {
- name: '',
- img: '',
- url: ''
- },
- maximum: {
- val: `${interest['lender1'].int1}`,
- desc: '' // Some description
- },
- // Many more properties, removed for brevity
- }
- }
- // Insert today's results
- if (testVar != 1) {
- db.collection('table').insertOne(interest);
- }
- })
- .then(() => {
- delete interest['_id']
- // Test that the interest rates are of the correct form for the HTML table
- var testing = []
- _.forEach(interest, function(value, key) {
- testing.push(_.get(interest, [key, 'vBr', 'val'], '-'))
- })
- _.forEach(interest, function(value, key) {
- testing.push(_.get(interest, [key, 'vFa', 'val'], '-'))
- })
- _.forEach(interest, function(value, key) {
- testing.push(_.get(interest, [key, 'oBr', 'val'], '-'))
- })
- _.forEach(interest, function(value, key) {
- testing.push(_.get(interest, [key, 'oFa', 'val'], '-'))
- })
- for (var i = 0; i < testing.length; i++) {
- if (/d,dd%/.test(testing[i]) || testing[i] == '-') {} else {
- console.log('Error: interest rates are not of the correct form')
- break
- }
- }
- // Update XML feed
- fs.writeFile(__dirname + "/table.xml", js2xmlparser.parse('rows', interest), function(err) {
- if (err) {
- return console.log(err);
- }
- console.log('XML saved to table.xml')
- var optionsXml = {
- file: __dirname + '/table.xml',
- user: '...', // Redacted
- host: '', // Redacted
- port: '', // Redacted
- path: '' // Redacted
- }
- if (testVar != 1) {
- scp.send(optionsXml, function(err) {
- if (err) console.log(err);
- else console.log('XML uploaded to static website host');
- });
- }
- })
- // Generate HTML using HBS template
- app.set('view engine', 'hbs')
- app.render(require.resolve('./views/index'), interest, function(err, html) {
- if (err) {
- console.log(err);
- } else {
- fs.writeFile(__dirname + "/index.html", html, function(err) {
- console.log("HTML file saved");
- var optionsHtml = {
- file: __dirname + '/index.html',
- user: '...', // Redacted
- host: '', // Redacted
- port: '', // Redacted
- path: '' // Redacted
- }
- if (testVar != 1) {
- scp.send(optionsHtml, function(err) {
- if (err) console.log(err);
- else console.log('HTML uploaded to static website host');
- });
- }
- });
- }
- })
- })
- .then(() => {
- database.close()
- console.log('Database connection closed')
- })
- })
- }
- function changes(a, b) {
- if (_.isEqual(a, b)) {
- return;
- } else {
- if (_.isArray(a) && _.isArray(b)) {
- return _.reduce(b, function(array, value, index) {
- value = changes(a[index], value);
- if (!_.isUndefined(value)) {
- array[index] = value;
- }
- return array;
- }, []);
- } else if (_.isObject(a) && _.isObject(b)) {
- return _.reduce(b, function(object, value, key) {
- value = changes(a[key], value);
- if (!_.isUndefined(value)) {
- object[key] = value;
- }
- return object;
- }, {});
- } else {
- return b;
- }
- }
- }
Add Comment
Please, Sign In to add comment