Guest User

Untitled

a guest
Jan 28th, 2018
125
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 11.89 KB | None | 0 0
  1. // Variable set to 1 for testing
  2. var testVar = 0;
  3. if (testVar == 1) {
  4. console.log('Test mode: not writing to DB, not uploading to static website host')
  5. console.log('__dirname: ', __dirname)
  6. }
  7.  
  8. // Require Modules
  9. const { MongoClient, ObjectID } = require('mongodb');
  10. const Xray = require('x-ray');
  11. const puppeteer = require('puppeteer');
  12. const _ = require('lodash');
  13. const nodemailer = require('nodemailer');
  14. const fs = require('fs')
  15. const express = require('express')
  16. const hbs = require('hbs')
  17. const app = express()
  18. const js2xmlparser = require('js2xmlparser')
  19. const scp = require('scp');
  20. const moment = require('moment')
  21.  
  22. var interest = {}
  23.  
  24. // Ignore invalid SSH certificates on websites scraped
  25. process.env['NODE_TLS_REJECT_UNAUTHORIZED'] = '0';
  26.  
  27. // Create date string
  28. moment.locale('en')
  29. var dateVar = moment().format('LL').toString()
  30.  
  31. // Email Configuration
  32. let smtpConfig = {
  33. host: '', // Redacted
  34. port: 465,
  35. secure: true,
  36. auth: {
  37. user: '', // Redacted
  38. pass: '' // Redacted
  39. },
  40. tls: {
  41. rejectUnauthorized: false
  42. }
  43. };
  44. let transporter = nodemailer.createTransport(smtpConfig);
  45.  
  46. // Non-JS Scraper Configuration
  47. var x = Xray({
  48. filters: {
  49. trim: function(value) {
  50. return typeof value === 'string' ? value.trim() : value;
  51. },
  52. slice: function(value) {
  53. return value.slice(-5);
  54. },
  55. match: function(value) {
  56. arr = value.match(/.,..%/g);
  57. return arr[0];
  58. },
  59. matchNoPerc: function(value) {
  60. arr = value.match(/.,../g);
  61. return arr[0] + '%';
  62. },
  63. sliceBeg: function(value) {
  64. return value.slice(0, 5).trim();
  65. }
  66. }
  67. });
  68.  
  69. // Today and Yesterday strings
  70. var d = new Date();
  71. var datestring = ("0" + d.getDate()).slice(-2) + "-" + ("0" + (d.getMonth() + 1)).slice(-2) + "-" +
  72. d.getFullYear();
  73. d.setDate(d.getDate() - 1);
  74. var yDatestring = ("0" + d.getDate()).slice(-2) + "-" + ("0" + (d.getMonth() + 1)).slice(-2) + "-" +
  75. d.getFullYear();
  76.  
  77.  
  78. // DB lookup by date
  79. var date = new Date(Date.now())
  80. var options = { year: 'numeric', month: 'long', day: 'numeric' }
  81. var iDag = date.toLocaleString('en-EN', options)
  82.  
  83. // Define scraping sources (non-JS)
  84. var src = [{
  85. name: '', // Redacted
  86. url: '', // Redacted
  87. scope: '', // Redacted
  88. sel: {
  89. oFa1: 'div div div div.col-sm-10.col-sm-offset-1 div:nth-child(1) div:nth-child(2) ul li:nth-child(1) h4',
  90. vFa1: 'div div div div.col-sm-10.col-sm-offset-1 div:nth-child(1) div:nth-child(2) ul li:nth-child(2) h4',
  91. vBr1: 'div div div div.col-sm-10.col-sm-offset-1 div:nth-child(1) div:nth-child(2) ul li:nth-child(3) h4',
  92. }
  93. },
  94. // ...et cetera (total of 14 sources)
  95. ];
  96.  
  97. //Execute scrape
  98. doc = {};
  99. doc.interest = {};
  100. doc.date = datestring;
  101. var s1 = scrape();
  102. var s2 = scrapeJS();
  103. Promise.all([s1, s2]).then(() => {
  104. console.log('Scraping complete, processing results');
  105. processResults();
  106. })
  107.  
  108. /* Functions below */
  109.  
  110. // Non-JS scraper
  111. async function scrape() {
  112. var processed = 0;
  113. src.forEach(item => {
  114. x(item.url, item.scope, item.sel)(function(err, res) {
  115. if (err) {
  116. console.log(err);
  117. return;
  118. }
  119. processed++;
  120. doc.interest[item.name] = res;
  121. if (src.length === processed) {
  122. console.log('Non-JS scraping complete')
  123. return
  124. }
  125. });
  126. });
  127. }
  128.  
  129. // JS scraper
  130. async function scrapeJS() {
  131.  
  132. const browser = await puppeteer.launch({ args: ['--no-sandbox', '--disable-setuid-sandbox'] });
  133. const page = await browser.newPage();
  134. await page.goto(''); // Redacted
  135. await page.waitForSelector('#interest1');
  136. var int1 = await page.evaluate(() => {
  137. return document.querySelector('#interest1').textContent.trim()
  138. });
  139. var int2 = await page.evaluate(() => {
  140. return document.querySelector('#interest2').textContent.trim()
  141. });
  142. var int3 = await page.evaluate(() => {
  143. return document.querySelector('#interest5').textContent.trim()
  144. });
  145. var int4 = await page.evaluate(() => {
  146. return document.querySelector('#interest6').textContent.trim()
  147. });
  148. //Put into pre-defined object to use later on
  149. browser.close();
  150. console.log('JS scraping complete');
  151. return;
  152. }
  153.  
  154. // Process scrape results
  155. function processResults() {
  156. right = doc.interest;
  157. interest = right;
  158. //Database
  159. MongoClient.connect('mongodb://...', function(err, database) { // URL redacted
  160. if (err) {
  161. return console.log('Unable to connect to MongoDB server');
  162. }
  163. console.log('Connected to MongoDB server');
  164. const db = database.db('...')
  165.  
  166. // Retrieve yesterday's results
  167. db.collection('interest').find({ date: yDatestring }).toArray()
  168. .then(res => {
  169. left = res[0].interest;
  170. //Compare the two objects
  171. if (_.isEqual(left, right)) {
  172. console.log('No changes since yesterday');
  173. } else {
  174. console.log('Changes detected, sending email')
  175. // Send email notification
  176. var changes = JSON.stringify(changes(left, right), undefined, 2);
  177. var html = `<p>Interest rates have changed. These are the changes:</p><p>${changes}</p>`;
  178. var message = {
  179. from: 'interest@...',
  180. to: '...',
  181. subject: 'Interests have changed',
  182. text: 'interests have changed!',
  183. html: html
  184. };
  185.  
  186. transporter.sendMail(message);
  187. }
  188. })
  189. .then(() => {
  190. // Insert today's results
  191. if (testVar != 1) {
  192. db.collection('interest').insertOne(doc);
  193. }
  194. })
  195. .then(() => {
  196. function number(string) {
  197. return parseFloat(string.replace(',', '.')) / 100
  198. }
  199.  
  200. function percent(num) {
  201. return (num * 100).toFixed(2).toString().replace('.', ',') + '%'
  202. }
  203.  
  204. function percentWhole(num) {
  205. return (num * 100).toFixed(0).toString().replace('.', ',') + '%'
  206. }
  207.  
  208. function comma(a) {
  209. return a.replace('.', ',')
  210. }
  211.  
  212. function fix(a) {
  213. return percent(number(a))
  214. }
  215.  
  216. function weighting(v1, v2, h1, h2, weights) {
  217. return percent(
  218. (number(v1) * h1 * weights + number(v2) * (h1 + h2 - h1 * weights)) / (h1 + h2)
  219. )
  220. }
  221. // Forsendur
  222. var weights = 0.8
  223. var ratios = {
  224. ...: { // Redacted
  225. h1: 0.7,
  226. h2: 0.1
  227. },
  228. ... // Et cetera
  229. }
  230.  
  231. interest = {
  232. _meta: {
  233. updated: dateVar
  234. },
  235. lender1: {
  236. party: {
  237. name: '',
  238. img: '',
  239. url: ''
  240. },
  241. maximum: {
  242. val: `${interest['lender1'].int1}`,
  243. desc: '' // Some description
  244. },
  245. // Many more properties, removed for brevity
  246. }
  247. }
  248.  
  249. // Insert today's results
  250. if (testVar != 1) {
  251. db.collection('table').insertOne(interest);
  252. }
  253. })
  254.  
  255. .then(() => {
  256.  
  257. delete interest['_id']
  258. // Test that the interest rates are of the correct form for the HTML table
  259. var testing = []
  260. _.forEach(interest, function(value, key) {
  261. testing.push(_.get(interest, [key, 'vBr', 'val'], '-'))
  262. })
  263. _.forEach(interest, function(value, key) {
  264. testing.push(_.get(interest, [key, 'vFa', 'val'], '-'))
  265. })
  266. _.forEach(interest, function(value, key) {
  267. testing.push(_.get(interest, [key, 'oBr', 'val'], '-'))
  268. })
  269. _.forEach(interest, function(value, key) {
  270. testing.push(_.get(interest, [key, 'oFa', 'val'], '-'))
  271. })
  272.  
  273. for (var i = 0; i < testing.length; i++) {
  274. if (/d,dd%/.test(testing[i]) || testing[i] == '-') {} else {
  275. console.log('Error: interest rates are not of the correct form')
  276. break
  277. }
  278. }
  279.  
  280. // Update XML feed
  281. fs.writeFile(__dirname + "/table.xml", js2xmlparser.parse('rows', interest), function(err) {
  282. if (err) {
  283. return console.log(err);
  284. }
  285. console.log('XML saved to table.xml')
  286.  
  287. var optionsXml = {
  288. file: __dirname + '/table.xml',
  289. user: '...', // Redacted
  290. host: '', // Redacted
  291. port: '', // Redacted
  292. path: '' // Redacted
  293. }
  294. if (testVar != 1) {
  295. scp.send(optionsXml, function(err) {
  296. if (err) console.log(err);
  297. else console.log('XML uploaded to static website host');
  298. });
  299. }
  300. })
  301.  
  302.  
  303.  
  304. // Generate HTML using HBS template
  305. app.set('view engine', 'hbs')
  306.  
  307. app.render(require.resolve('./views/index'), interest, function(err, html) {
  308. if (err) {
  309. console.log(err);
  310. } else {
  311. fs.writeFile(__dirname + "/index.html", html, function(err) {
  312.  
  313. console.log("HTML file saved");
  314.  
  315. var optionsHtml = {
  316. file: __dirname + '/index.html',
  317. user: '...', // Redacted
  318. host: '', // Redacted
  319. port: '', // Redacted
  320. path: '' // Redacted
  321. }
  322. if (testVar != 1) {
  323. scp.send(optionsHtml, function(err) {
  324. if (err) console.log(err);
  325. else console.log('HTML uploaded to static website host');
  326.  
  327. });
  328. }
  329. });
  330. }
  331. })
  332. })
  333. .then(() => {
  334. database.close()
  335. console.log('Database connection closed')
  336. })
  337. })
  338. }
  339.  
  340. function changes(a, b) {
  341. if (_.isEqual(a, b)) {
  342. return;
  343. } else {
  344. if (_.isArray(a) && _.isArray(b)) {
  345. return _.reduce(b, function(array, value, index) {
  346. value = changes(a[index], value);
  347. if (!_.isUndefined(value)) {
  348. array[index] = value;
  349. }
  350. return array;
  351. }, []);
  352. } else if (_.isObject(a) && _.isObject(b)) {
  353. return _.reduce(b, function(object, value, key) {
  354. value = changes(a[key], value);
  355. if (!_.isUndefined(value)) {
  356. object[key] = value;
  357. }
  358. return object;
  359. }, {});
  360. } else {
  361. return b;
  362. }
  363. }
  364. }
Add Comment
Please, Sign In to add comment