Advertisement
jayankaghosh

adobe_partners_scraper.js

May 16th, 2024
446
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
JavaScript 4.65 KB | Source Code | 0 0
  1. const fetch = require('node-fetch');
  2. const fs = require('fs');
  3. const json = require("./partners.json");
  4. const createCsvWriter = require('csv-writer').createObjectCsvWriter;
  5.  
  6. const LISTING_URL = 'https://partner-directory.adobe.io/v1/spp/listings';
  7. const DETAILS_URL = 'https://partner-directory.adobe.io/v1/spp/listing/';
  8. const JSON_FILE_NAME = __dirname + '/partners.json';
  9. const CSV_FILE_NAME = __dirname + '/partners.csv';
  10. const TOTAL_THREADS = 10;
  11.  
  12. const fetchListingPage = pageNumber => {
  13.     return new Promise(async (resolve, reject) => {
  14.         console.log(`FETCHING PAGE ${ pageNumber }`);
  15.         let url = LISTING_URL;
  16.         if (pageNumber) {
  17.             url += '?page=' + pageNumber
  18.         }
  19.         const rawResponse = await fetch(url, {
  20.             headers: {
  21.                 'X-Api-Key': 'partner_directory'
  22.             }
  23.         });
  24.         const response = await rawResponse.json();
  25.         resolve(response);
  26.     });
  27. }
  28.  
  29. let numberOfTimesDetailsLogged = 0;
  30. const fetchDetails = id => {
  31.     return new Promise(async (resolve, reject) => {
  32.         console.log(`${++numberOfTimesDetailsLogged}. FETCHING DETAILS OF ${ id }`);
  33.         const url = DETAILS_URL + id;
  34.         const rawResponse = await fetch(url, {
  35.             headers: {
  36.                 'X-Api-Key': 'partner_directory'
  37.             }
  38.         });
  39.         const response = await rawResponse.json();
  40.         resolve(response);
  41.     });
  42. }
  43.  
  44. const writeCSVFromJSON = async () => {
  45.     console.log('Writing to CSV');
  46.     const json = require(JSON_FILE_NAME);
  47.     const csvData = [];
  48.     const csvWriter = createCsvWriter({
  49.         path: CSV_FILE_NAME,
  50.         header: [
  51.             {id: 'name', title: 'Name'},
  52.             {id: 'level', title: 'Level'},
  53.             {id: 'certifiedEmployees', title: 'Certified Employees'},
  54.             {id: 'phone', title: 'Phone'},
  55.             {id: 'website', title: 'Website'},
  56.             {id: 'city', title: 'City'},
  57.             {id: 'country', title: 'Country'},
  58.             {id: 'countryId', title: 'Country ID'},
  59.             {id: 'headquarter', title: 'Headquarter'},
  60.             {id: 'address_name', title: 'Address Name'},
  61.             {id: 'postalCode', title: 'Postal Code'},
  62.             {id: 'regionId', title: 'Region ID'},
  63.             {id: 'state', title: 'State'},
  64.             {id: 'address1', title: 'Address 1'},
  65.             {id: 'address2', title: 'Address 2'}
  66.         ]
  67.     });
  68.     Object.values(json).forEach(company => {
  69.         company.addresses.forEach(address => {
  70.             csvData.push({
  71.                 'name': company.companyInfo.name,
  72.                 'level': company.companyInfo.level,
  73.                 'certifiedEmployees': company.companyInfo.certifiedEmployees,
  74.                 'phone': company.companyInfo.phone,
  75.                 'website': company.companyInfo.website,
  76.                 'city': address.city,
  77.                 'country': address.country,
  78.                 'countryId': address.countryId,
  79.                 'headquarter': (address.isHeadquarter ? 'Yes': 'No'),
  80.                 'address_name': address.name,
  81.                 'postalCode': address.postalCode,
  82.                 'regionId': address.regionId,
  83.                 'state': address.state,
  84.                 'address1': address.address1,
  85.                 'address2': address.address2,
  86.             })
  87.         });
  88.     });
  89.     await csvWriter.writeRecords(csvData);
  90.     console.log('DONE');
  91. }
  92.  
  93. (async () => {
  94.     let list = [];
  95.     let totalItems;
  96.     const finalList = {};
  97.     let totalPages = null;
  98.     let currentPage = 0;
  99.     while (true) {
  100.         const { listings = [], totalPages: pages = null, currentPage: current = null } = await fetchListingPage(currentPage);
  101.         list = [...list, ...listings];
  102.         if (pages) {
  103.             totalPages = pages;
  104.         }
  105.         if (current) {
  106.             currentPage = parseInt(current);
  107.         }
  108.         currentPage++;
  109.         if (totalPages < currentPage) {
  110.             break;
  111.         }
  112.     }
  113.     totalItems = list.length;
  114.  
  115.     const runner = async () => {
  116.         if (list.length) {
  117.             const {id} = list.shift();
  118.             finalList[id] = await fetchDetails(id);
  119.             await runner();
  120.         }
  121.     }
  122.  
  123.     for (let i = 0; i < TOTAL_THREADS; i++) {
  124.         runner();
  125.     }
  126.  
  127.     const processId = setInterval(() => {
  128.         if (Object.keys(finalList).length === totalItems) {
  129.             clearInterval(processId);
  130.             fs.writeFileSync(JSON_FILE_NAME, JSON.stringify(finalList, null, 2));
  131.             console.log('DONE');
  132.             console.log('JSON written to file ' + JSON_FILE_NAME);
  133.             writeCSVFromJSON();
  134.         }
  135.     }, 1000);
  136. })();
  137.  
  138.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement