Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- 'use strict';
- const request = require('request');
- const cheerio = require('cheerio');
- const _ = require('lodash');
- const mysql = require('mysql');
- const logger = require('winston');
- var urlPattern = 'http://www.mejoratuescuela.org/escuelas/index/';
- var connection = mysql.createConnection({
- host: process.env.HOST,
- user: process.env.USER,
- port: process.env.PORT,
- password: process.env.PASSWORD,
- database: process.env.DATABASE
- });
- connection.connect();
- function getGeneralData(data) {
- var dataObj = {};
- dataObj.clave = cheerio(data[0]).text().split(':')[1];
- dataObj.level = cheerio(data[1]).text().split(':')[1];
- dataObj.schedule = cheerio(data[2]).text().split(':')[1];
- dataObj.category = cheerio(data[3]).text();
- dataObj.phones = cheerio(data[4]).text().split(':')[1];
- return dataObj;
- }
- function getAddressData(data) {
- var dataObj = {};
- dataObj.address = cheerio(data[0]).text().split(':')[1];
- dataObj.city= cheerio(data[1]).text().split(':')[1];
- dataObj.town= cheerio(data[2]).text().split(':')[1];
- dataObj.state= cheerio(data[3]).text().split(':')[1];
- return dataObj;
- }
- function getStatsData(data) {
- var dataObj = {};
- dataObj.studentsCount = Number(data.eq(0).text());
- dataObj.employeesCount = Number(data.eq(1).text());
- dataObj.groupsCount = Number(data.eq(2).text());
- return dataObj;
- }
- function scrap(clave) {
- request(`${urlPattern}${clave}`, function(err,response,html){
- if (err) {
- logger.error(err);
- return;
- }
- if (response.statusCode !== 200) {
- var debug = response.statusCode == 404 ? {} : {body: response.body, headers: response.headers};
- logger.warn(`Got status code ${response.statusCode} while searching for school ${clave}`, debug);
- return;
- }
- var $ = cheerio.load(html);
- console.log(getGeneralData($('.info .block li')));
- console.log(getAddressData($('.address div[flex] li')));
- console.log(getStatsData($('.h3-num-datos')));
- });
- }
- var q = connection.query('SELECT id, CLAVE_CT, NOMBRECT FROM escuelasAll');
- q.
- on('error', function (err) {
- console.log(err);
- })
- .on('result', function (row) {
- scrap(row.CLAVE_CT);
- })
- .on('end', function () {
- console.log('Stream ended');
- connection.end();
- });
- error: Error: connect EMFILE 192.237.193.131:80 - Local (undefined:undefined)
- at Object.exports._errnoException (util.js:874:11)
- at exports._exceptionWithHostPort (util.js:897:20)
- at connect (net.js:841:14)
- at net.js:984:7
- at GetAddrInfoReqWrap.asyncCallback [as callback] (dns.js:63:16)
- at GetAddrInfoReqWrap.onlookup [as oncomplete] (dns.js:82:10)
- error: Error: connect EMFILE 192.237.193.131:80 - Local (undefined:undefined)
- at Object.exports._errnoException (util.js:874:11)
- at exports._exceptionWithHostPort (util.js:897:20)
- at connect (net.js:841:14)
- at net.js:984:7
- at GetAddrInfoReqWrap.asyncCallback [as callback] (dns.js:63:16)
- at GetAddrInfoReqWrap.onlookup [as oncomplete] (dns.js:82:10)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement