Guest User

Untitled

a guest
Aug 13th, 2018
252
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.52 KB | None | 0 0
  1. (node:53135) DeprecationWarning: current URL string parser is deprecated,
  2. and will be removed in a future version. To use the new parser, pass option
  3. { useNewUrlParser: true } to MongoClient.connect.
  4.  
  5. (node:53135) UnhandledPromiseRejectionWarning: TypeError:
  6. User.findOneAndUpdate is not a function**
  7. at upsertUser (/home/oceanm/thal/index.js:111:14)
  8. at run (/home/oceanm/thal/index.js:69:4)
  9. at <anonymous>
  10. at process._tickCallback (internal/process/next_tick.js:188:7)
  11.  
  12. (node:53135) UnhandledPromiseRejectionWarning: Unhandled promise rejection.
  13. This error originated either by throwing inside of an async function without
  14. a catch block, or by rejecting a promise which was not handled with
  15. .catch(). (rejection id: 1)
  16.  
  17. (node:53135) [DEP0018] DeprecationWarning: Unhandled promise rejections are
  18. deprecated. In the future, promise rejections that are not handled will
  19. terminate the Node.js process with a non-zero exit code.
  20.  
  21. <models/user.js>
  22.  
  23.  
  24. const mongoose = require('mongoose');
  25.  
  26. let userSchema = new mongoose.Schema({
  27. username: String,
  28. email: String,
  29. dateCrawled: Date
  30. });
  31.  
  32. <creds.js>
  33.  
  34.  
  35. module.exports = {
  36. username: 'myid',
  37. password: 'mypassword'
  38. }
  39.  
  40. <index.js>
  41.  
  42. const puppeteer = require('puppeteer');
  43. const CREDS = require('./creds');
  44. const mongoose = require('mongoose');
  45. const User = require('./models/user');
  46.  
  47. async function run() {
  48. const browser = await puppeteer.launch();
  49. const page = await browser.newPage();
  50.  
  51. await page.goto('https://github.com/login');
  52.  
  53. const USERNAME_SELECTOR = '#login_field';
  54. const PASSWORD_SELECTOR = '#password';
  55. const BUTTON_SELECTOR = '#login > form > div.auth-form-body.mt-3 > input.btn.btn-primary.btn-block';
  56.  
  57. await page.click(USERNAME_SELECTOR);
  58. await page.keyboard.type(CREDS.username);
  59.  
  60. await page.click(PASSWORD_SELECTOR);
  61. await page.keyboard.type(CREDS.password);
  62.  
  63. await page.click(BUTTON_SELECTOR);
  64.  
  65. await page.waitForNavigation();
  66.  
  67. const userToSearch = 'miyamoto';
  68. const searchUrl = `https://github.com/search?q=${userToSearch}&type=Users`;
  69. await page.goto(searchUrl);
  70. await page.waitFor(2*1000);
  71.  
  72. const LIST_USERNAME_SELECTOR = '#user_search_results > div.user-list > div:nth-child(INDEX) > div.d-flex.flex-auto > div > a';
  73. const LIST_EMAIL_SELECTOR = '#user_search_results > div.user-list > div:nth-child(INDEX) > div.d-flex.flex-auto > div > ul > li:nth-child(2) > a';
  74. const LENGTH_SELECTOR_CLASS = 'user-list-item';
  75.  
  76. let numPages = await getNumPages(page);
  77.  
  78. console.log('Numpages: ', numPages);
  79.  
  80. for (let h = 1; h <= numPages; h++) {
  81.  
  82. let pageUrl = searchUrl + '&p=' + h;
  83. await page.goto(pageUrl);
  84.  
  85. let listLength = await page.evaluate((sel) => {
  86. return document.getElementsByClassName(sel).length;
  87. }, LENGTH_SELECTOR_CLASS);
  88.  
  89. for (let i = 1; i <= listLength; i++) {
  90. // change the index to the next child
  91. let usernameSelector = LIST_USERNAME_SELECTOR.replace("INDEX", i);
  92. let emailSelector = LIST_EMAIL_SELECTOR.replace("INDEX", i);
  93.  
  94. let username = await page.evaluate((sel) => {
  95. return document.querySelector(sel).getAttribute('href').replace('/', '');
  96. }, usernameSelector);
  97.  
  98. let email = await page.evaluate((sel) => {
  99. let element = document.querySelector(sel);
  100. return element? element.innerHTML: null;
  101. }, emailSelector);
  102.  
  103. // not all users have emails visible
  104. if (!email)
  105. continue;
  106.  
  107. console.log(username, ' -> ', email);
  108.  
  109. // TODO save this user
  110. upsertUser({
  111. username: username,
  112. email: email,
  113. dateCrawled: new Date()
  114. });
  115.  
  116. }
  117. }
  118. browser.close();
  119. }
  120.  
  121. async function getNumPages(page) {
  122. const NUM_USER_SELECTOR = '#js-pjax-container > div > div.col-12.col-md-9.float-left.px-2.pt-3.pt-md-0.codesearch-results > div > div.d-flex.flex-column.flex-md-row.flex-justify-between.border-bottom.pb-3.$
  123.  
  124. let inner = await page.evaluate((sel) => {
  125. let html = document.querySelector(sel).innerHTML;
  126.  
  127. return html.replace(',', '').replace('users', '').trim();
  128. }, NUM_USER_SELECTOR);
  129.  
  130. let numUsers = parseInt(inner);
  131.  
  132. console.log('numUsers: ', numUsers);
  133.  
  134. let numPages = Math.ceil(numUsers / 10);
  135. return numPages;
  136. }
  137.  
  138.  
  139.  
  140. async function upsertUser(userObj) {
  141.  
  142. const DB_URL = 'mongodb://localhost/thal';
  143.  
  144. if (mongoose.connection.readyState == 0) { mongoose.connect(DB_URL); }
  145.  
  146. // if this email exists, update the entry, don't insert
  147. let conditions = { email: userObj.email };
  148. let options = { upsert: true, new: true, setDefaultsOnInsert: true };
  149.  
  150. User.findOneAndUpdate(conditions, userObj, options, (err, result) => {
  151. if (err) throw err;
  152. });
  153. }
  154.  
  155.  
  156.  
  157. run();
Add Comment
Please, Sign In to add comment