Advertisement
Guest User

Untitled

a guest
Jun 9th, 2021
184
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.22 KB | None | 0 0
  1. const puppeteer = require('puppeteer');
  2. const util = require('util');
  3. const exec = util.promisify(require('child_process').exec);
  4.  
  5. const changeProxy = () => {
  6.  
  7. (async () => {
  8.  
  9. const { stdout, stderr } = await exec("nc64 localhost 9151 <tor-change.txt");
  10. console.log('stdout:', stdout);
  11. })();
  12.  
  13. return;
  14.  
  15. }
  16.  
  17.  
  18. const preparePageForTests = async (page) => {
  19. // Pass the User-Agent Test.
  20. const userAgent = 'Mozilla/5.0 (X11; Linux x86_64)' +
  21. 'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.39 Safari/537.36';
  22. await page.setUserAgent(userAgent);
  23.  
  24. // Pass the Webdriver Test.
  25. await page.evaluateOnNewDocument(() => {
  26. Object.defineProperty(navigator, 'webdriver', {
  27. get: () => false,
  28. });
  29. });
  30.  
  31. // Pass the Chrome Test.
  32. await page.evaluateOnNewDocument(() => {
  33. // We can mock this in as much depth as we need for the test.
  34. window.navigator.chrome = {
  35. runtime: {},
  36. // etc.
  37. };
  38. });
  39.  
  40. // Pass the Permissions Test.
  41. await page.evaluateOnNewDocument(() => {
  42. const originalQuery = window.navigator.permissions.query;
  43. return window.navigator.permissions.query = (parameters) => (
  44. parameters.name === 'notifications' ?
  45. Promise.resolve({ state: Notification.permission }) :
  46. originalQuery(parameters)
  47. );
  48. });
  49.  
  50. // Pass the Plugins Length Test.
  51. await page.evaluateOnNewDocument(() => {
  52. // Overwrite the `plugins` property to use a custom getter.
  53. Object.defineProperty(navigator, 'plugins', {
  54. // This just needs to have `length > 0` for the current test,
  55. // but we could mock the plugins too if necessary.
  56. get: () => [1, 2, 3, 4, 5],
  57. });
  58. });
  59.  
  60. // Pass the Languages Test.
  61. await page.evaluateOnNewDocument(() => {
  62. // Overwrite the `plugins` property to use a custom getter.
  63. Object.defineProperty(navigator, 'languages', {
  64. get: () => ['en-US', 'en'],
  65. });
  66. });
  67. }
  68.  
  69.  
  70. const blockImages = async (page) => {
  71.  
  72. await page.setRequestInterception(true);
  73. page.on('request', request => {
  74. if (request.resourceType() === 'image')
  75. request.abort();
  76. else
  77. request.continue();
  78. });
  79. return;
  80. }
  81.  
  82.  
  83. const scrape = async (pageURL, resolve) => {
  84.  
  85. const resolutions = [[1366, 768], [1920, 1080], [1536, 864]]
  86.  
  87. const priceRegEx = /(\$)(\d{1,4}\.\d{1,2})$/;
  88. const priceStringSelector = ".form-type-radio";
  89.  
  90. const browser = await puppeteer.launch({ headless: false, args: ['--proxy-server=socks5://localhost:9150'] });
  91. const page = await browser.newPage();
  92.  
  93. await preparePageForTests(page);
  94.  
  95. // resolutions
  96.  
  97. let rndRes = Math.floor(Math.random() * resolutions.length);
  98. rndRes = resolutions[rndRes];
  99. await page.setViewport({ width: rndRes[0], height: rndRes[1] });
  100.  
  101. await page.goto(pageURL, { waitUntil: 'domcontentloaded', referer: "" });
  102.  
  103.  
  104. try {
  105. await page.waitForSelector(priceStringSelector, {
  106. timeout: 3000
  107. })
  108.  
  109. } catch (err) {
  110. throw err
  111. }
  112.  
  113. const priceString = await page.evaluate((priceStringSelector) => {
  114.  
  115. const price = document.querySelector(priceStringSelector);
  116.  
  117. const priceText = price.innerText;
  118.  
  119.  
  120.  
  121. return priceText;
  122. }, priceStringSelector);
  123.  
  124. const matchedPrice = priceString.match(priceRegEx);
  125. if(!matchedPrice){
  126. console.log(`price matching went wrong, alarm! The string was ${priceString}`);
  127. return;
  128. }
  129.  
  130. await browser.close();
  131. resolve();
  132. }
  133.  
  134. let pageURLs = ["https://nostarch.com/algorithmic-thinking", "https://nostarch.com/writegreatcode1_2e"];
  135.  
  136.  
  137.  
  138. function scrapeThemAll(){
  139.  
  140. const scrapeInProgress = new Promise((resolve, reject)=>{
  141. scrape(pageURLs[0], resolve);
  142. })
  143. scrapeInProgress.then(()=>{
  144.  
  145. changeProxy();
  146. pageURLs.shift();
  147. if(pageURLs.length){
  148. scrapeThemAll();
  149. }
  150. })
  151.  
  152. }
  153.  
  154.  
  155. scrapeThemAll();
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement