Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- var http = require('https');
- var cheerio = require('cheerio');
- var Promise = require('bluebird');
- var url = 'https://developer.teamwork.com/';
- function filterData(html) {
- var $ = cheerio.load(html);
- // console.log(html);
- // var data = {
- // title: title,
- // APIs: [{
- // apiTitle: apiTitle,
- // type: type,
- // apiUrl: apiUrl,
- // description: description,
- // req: {
- // description: description,
- // content: {}
- // },
- // res: {
- // description: description,
- // content: {}
- // }
- // }]
- // };
- var title = $('.api--main').find('h2').text();
- console.log('|' + title + '|');
- }
- function filterModules(html) {
- var $ = cheerio.load(html);
- var modules = $('.api--main').find('.lev1');
- // [{
- // moduleTitle: '',
- // moduleURL: ''
- // }]
- var modulesData = [];
- var module = '', moduleTitle = '', Url = '';
- modules.each(function (item) {
- module = $(this).find('a');
- moduleTitle = module.text();
- Url = module.attr('href');
- if (!Url.match('//')){
- moduleUrl = url + Url;
- } else {
- moduleUrl = Url;
- }
- modulesData.push({
- moduleTitle: moduleTitle,
- moduleUrl: moduleUrl
- });
- });
- return modulesData;
- }
- function printModuleInfo(data) {
- var moduleTitle;
- var moduleUrl;
- var printResults = '';
- data.forEach(function (item) {
- moduleTitle = item.moduleTitle;
- moduleUrl = item.moduleUrl;
- printResults = printResults + '<' + moduleTitle + '>n' + ' URL: ' + moduleUrl + 'n';
- });
- return printResults;
- }
- function getContents(url,title) {
- return new Promise(function(resolve, reject) {
- http.get(url, function(res) {
- console.log('crawling:'+url);
- var html = '';
- res.on('data', function(data) {
- html += data;
- });
- res.on('end', function() {
- resolve({
- title: title,
- html: html
- });
- });
- }).on('error', function(e) {
- reject(e);
- });
- });
- }
- var allOriContents = [];
- http.get(url, function (res) {
- var html = '';
- res.on('data', function (data) {
- html += data;
- });
- res.on('end', function () {
- // console.log(html);
- var modulesData = filterModules(html);
- modulesData.forEach(function (item) {
- // console.log(item.moduleTitle);
- if (!(item.moduleTitle === 'Introduction')) {
- allOriContents.push(getContents(item.moduleUrl,item.moduleTitle));
- // console.log(allOriContents[0]);
- } else {}
- });
- });
- }).on('error', function () {
- console.log('There are errors when getting urls.');
- });
- // setTimeout(function () {
- Promise
- .all(allOriContents)
- .then(function (obj) {
- console.log(obj.length);
- var modulesData = [];
- var module;
- var moduleData;
- obj.forEach(function (item) {
- console.log(item.title);
- module = filterData(item.html);
- moduleData = {
- title: item.title,
- data: module
- };
- modulesData.push(module);
- });
- modulesData.sort(function(a, b) {
- return a.title < b.title;
- });
- })
- .catch(function (err) {
- console.log(err);
- });
- // }, 120000);
Add Comment
Please, Sign In to add comment