Guest User

Untitled

a guest
Oct 23rd, 2017
73
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.65 KB | None | 0 0
  1. var http = require('https');
  2. var cheerio = require('cheerio');
  3. var Promise = require('bluebird');
  4. var url = 'https://developer.teamwork.com/';
  5.  
  6.  
  7. function filterData(html) {
  8. var $ = cheerio.load(html);
  9. // console.log(html);
  10. // var data = {
  11. // title: title,
  12. // APIs: [{
  13. // apiTitle: apiTitle,
  14. // type: type,
  15. // apiUrl: apiUrl,
  16. // description: description,
  17. // req: {
  18. // description: description,
  19. // content: {}
  20. // },
  21. // res: {
  22. // description: description,
  23. // content: {}
  24. // }
  25. // }]
  26. // };
  27. var title = $('.api--main').find('h2').text();
  28. console.log('|' + title + '|');
  29.  
  30. }
  31.  
  32.  
  33. function filterModules(html) {
  34. var $ = cheerio.load(html);
  35. var modules = $('.api--main').find('.lev1');
  36. // [{
  37. // moduleTitle: '',
  38. // moduleURL: ''
  39. // }]
  40. var modulesData = [];
  41. var module = '', moduleTitle = '', Url = '';
  42. modules.each(function (item) {
  43. module = $(this).find('a');
  44. moduleTitle = module.text();
  45. Url = module.attr('href');
  46. if (!Url.match('//')){
  47. moduleUrl = url + Url;
  48. } else {
  49. moduleUrl = Url;
  50. }
  51. modulesData.push({
  52. moduleTitle: moduleTitle,
  53. moduleUrl: moduleUrl
  54. });
  55. });
  56.  
  57. return modulesData;
  58. }
  59.  
  60. function printModuleInfo(data) {
  61. var moduleTitle;
  62. var moduleUrl;
  63. var printResults = '';
  64. data.forEach(function (item) {
  65. moduleTitle = item.moduleTitle;
  66. moduleUrl = item.moduleUrl;
  67. printResults = printResults + '<' + moduleTitle + '>n' + ' URL: ' + moduleUrl + 'n';
  68. });
  69. return printResults;
  70. }
  71.  
  72. function getContents(url,title) {
  73. return new Promise(function(resolve, reject) {
  74. http.get(url, function(res) {
  75. console.log('crawling:'+url);
  76. var html = '';
  77.  
  78. res.on('data', function(data) {
  79. html += data;
  80. });
  81.  
  82. res.on('end', function() {
  83. resolve({
  84. title: title,
  85. html: html
  86. });
  87. });
  88. }).on('error', function(e) {
  89. reject(e);
  90. });
  91.  
  92. });
  93. }
  94.  
  95.  
  96.  
  97. var allOriContents = [];
  98.  
  99. http.get(url, function (res) {
  100. var html = '';
  101. res.on('data', function (data) {
  102. html += data;
  103. });
  104.  
  105. res.on('end', function () {
  106. // console.log(html);
  107. var modulesData = filterModules(html);
  108. modulesData.forEach(function (item) {
  109. // console.log(item.moduleTitle);
  110. if (!(item.moduleTitle === 'Introduction')) {
  111. allOriContents.push(getContents(item.moduleUrl,item.moduleTitle));
  112. // console.log(allOriContents[0]);
  113. } else {}
  114. });
  115. });
  116. }).on('error', function () {
  117. console.log('There are errors when getting urls.');
  118. });
  119. // setTimeout(function () {
  120. Promise
  121. .all(allOriContents)
  122. .then(function (obj) {
  123. console.log(obj.length);
  124. var modulesData = [];
  125. var module;
  126. var moduleData;
  127. obj.forEach(function (item) {
  128. console.log(item.title);
  129. module = filterData(item.html);
  130. moduleData = {
  131. title: item.title,
  132. data: module
  133. };
  134. modulesData.push(module);
  135. });
  136. modulesData.sort(function(a, b) {
  137. return a.title < b.title;
  138. });
  139. })
  140. .catch(function (err) {
  141. console.log(err);
  142. });
  143. // }, 120000);
Add Comment
Please, Sign In to add comment