Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- (node:53135) DeprecationWarning: current URL string parser is deprecated,
- and will be removed in a future version. To use the new parser, pass option
- { useNewUrlParser: true } to MongoClient.connect.
- (node:53135) UnhandledPromiseRejectionWarning: TypeError:
- User.findOneAndUpdate is not a function**
- at upsertUser (/home/oceanm/thal/index.js:111:14)
- at run (/home/oceanm/thal/index.js:69:4)
- at <anonymous>
- at process._tickCallback (internal/process/next_tick.js:188:7)
- (node:53135) UnhandledPromiseRejectionWarning: Unhandled promise rejection.
- This error originated either by throwing inside of an async function without
- a catch block, or by rejecting a promise which was not handled with
- .catch(). (rejection id: 1)
- (node:53135) [DEP0018] DeprecationWarning: Unhandled promise rejections are
- deprecated. In the future, promise rejections that are not handled will
- terminate the Node.js process with a non-zero exit code.
- <models/user.js>
- const mongoose = require('mongoose');
- let userSchema = new mongoose.Schema({
- username: String,
- email: String,
- dateCrawled: Date
- });
- <creds.js>
- module.exports = {
- username: 'myid',
- password: 'mypassword'
- }
- <index.js>
- const puppeteer = require('puppeteer');
- const CREDS = require('./creds');
- const mongoose = require('mongoose');
- const User = require('./models/user');
- async function run() {
- const browser = await puppeteer.launch();
- const page = await browser.newPage();
- await page.goto('https://github.com/login');
- const USERNAME_SELECTOR = '#login_field';
- const PASSWORD_SELECTOR = '#password';
- const BUTTON_SELECTOR = '#login > form > div.auth-form-body.mt-3 > input.btn.btn-primary.btn-block';
- await page.click(USERNAME_SELECTOR);
- await page.keyboard.type(CREDS.username);
- await page.click(PASSWORD_SELECTOR);
- await page.keyboard.type(CREDS.password);
- await page.click(BUTTON_SELECTOR);
- await page.waitForNavigation();
- const userToSearch = 'miyamoto';
- const searchUrl = `https://github.com/search?q=${userToSearch}&type=Users`;
- await page.goto(searchUrl);
- await page.waitFor(2*1000);
- const LIST_USERNAME_SELECTOR = '#user_search_results > div.user-list > div:nth-child(INDEX) > div.d-flex.flex-auto > div > a';
- const LIST_EMAIL_SELECTOR = '#user_search_results > div.user-list > div:nth-child(INDEX) > div.d-flex.flex-auto > div > ul > li:nth-child(2) > a';
- const LENGTH_SELECTOR_CLASS = 'user-list-item';
- let numPages = await getNumPages(page);
- console.log('Numpages: ', numPages);
- for (let h = 1; h <= numPages; h++) {
- let pageUrl = searchUrl + '&p=' + h;
- await page.goto(pageUrl);
- let listLength = await page.evaluate((sel) => {
- return document.getElementsByClassName(sel).length;
- }, LENGTH_SELECTOR_CLASS);
- for (let i = 1; i <= listLength; i++) {
- // change the index to the next child
- let usernameSelector = LIST_USERNAME_SELECTOR.replace("INDEX", i);
- let emailSelector = LIST_EMAIL_SELECTOR.replace("INDEX", i);
- let username = await page.evaluate((sel) => {
- return document.querySelector(sel).getAttribute('href').replace('/', '');
- }, usernameSelector);
- let email = await page.evaluate((sel) => {
- let element = document.querySelector(sel);
- return element? element.innerHTML: null;
- }, emailSelector);
- // not all users have emails visible
- if (!email)
- continue;
- console.log(username, ' -> ', email);
- // TODO save this user
- upsertUser({
- username: username,
- email: email,
- dateCrawled: new Date()
- });
- }
- }
- browser.close();
- }
- async function getNumPages(page) {
- const NUM_USER_SELECTOR = '#js-pjax-container > div > div.col-12.col-md-9.float-left.px-2.pt-3.pt-md-0.codesearch-results > div > div.d-flex.flex-column.flex-md-row.flex-justify-between.border-bottom.pb-3.$
- let inner = await page.evaluate((sel) => {
- let html = document.querySelector(sel).innerHTML;
- return html.replace(',', '').replace('users', '').trim();
- }, NUM_USER_SELECTOR);
- let numUsers = parseInt(inner);
- console.log('numUsers: ', numUsers);
- let numPages = Math.ceil(numUsers / 10);
- return numPages;
- }
- async function upsertUser(userObj) {
- const DB_URL = 'mongodb://localhost/thal';
- if (mongoose.connection.readyState == 0) { mongoose.connect(DB_URL); }
- // if this email exists, update the entry, don't insert
- let conditions = { email: userObj.email };
- let options = { upsert: true, new: true, setDefaultsOnInsert: true };
- User.findOneAndUpdate(conditions, userObj, options, (err, result) => {
- if (err) throw err;
- });
- }
- run();
Add Comment
Please, Sign In to add comment