SHARE
TWEET

Untitled

a guest Dec 16th, 2018 63 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import geocoder
  2. import json
  3. import urllib.request as req
  4. import sys, os
  5. from bs4 import BeautifulSoup
  6.  
  7. # for user-defined input
  8. '''urls = [
  9.     'https://twitter.com/AVATARmetal',
  10.     'https://twitter.com/kniazevsv',
  11.     'https://twitter.com/82d920f979824a6',
  12.     'https://twitter.com/resmiyunusemre',
  13.     'https://twitter.com/Paroxysmic1',
  14.     'https://twitter.com/KevinKoolxHalo',
  15.     'https://twitter.com/spearbins_',
  16.     'https://twitter.com/Vistari',
  17.     'https://twitter.com/AaronS0396',
  18.     'https://twitter.com/LAJediNeil',
  19.     'https://twitter.com/harryhmi3',
  20.     'https://twitter.com/KamikazeChino',
  21.     'https://twitter.com/NappierMichael',
  22.     'https://twitter.com/Ace_Neski',
  23.     'https://twitter.com/BrackBarrett',
  24.     'https://twitter.com/corgiion',
  25.     'https://twitter.com/iamjeromela',
  26.     'https://twitter.com/karinakervador',
  27.     'https://twitter.com/allerievi',
  28.     'https://twitter.com/rhithomass',
  29.     'https://twitter.com/FaultlessIan',
  30.     'https://twitter.com/Syzlak_',
  31.     'https://twitter.com/sladespiritdrum',
  32.     'https://twitter.com/JamesBarkshire',
  33.     'https://twitter.com/pbeech277',
  34.     'https://twitter.com/MedicinalChickn',
  35.     'https://twitter.com/jazzociraptor',
  36.     'https://twitter.com/JustCharl_',
  37.     'https://twitter.com/AndrewH92389697',
  38.     'https://twitter.com/Acrowcombe_',
  39.     'https://twitter.com/JimParcel',
  40.     'https://twitter.com/ohboy_itsrocio',
  41.     'https://twitter.com/wortinspektor',
  42.     'https://twitter.com/metalguy2010',
  43.     'https://twitter.com/BluenoseGhost',
  44.     'https://twitter.com/exotic_swiss',
  45.     'https://twitter.com/bootysouffle',
  46.     'https://twitter.com/LTGENSPARTAN1',
  47.     'https://twitter.com/queen_tiph',
  48.     'https://twitter.com/RxBlackHeart',
  49.     'https://twitter.com/Txuso33',
  50.     'https://twitter.com/polowk',
  51.     'https://twitter.com/SwerdNerd',
  52.     'https://twitter.com/SatanzOutlaw',
  53.     'https://twitter.com/CrownedxLoki',
  54.     'https://twitter.com/spAde_dessineux',
  55.     'https://twitter.com/effrayantperson',
  56.     'https://twitter.com/RykKov',
  57.     'https://twitter.com/dcarocasas',
  58.     'https://twitter.com/wiccathe',
  59.     'https://twitter.com/elcapitanclod',
  60.     'https://twitter.com/Dixietheprayin1',
  61.     'https://twitter.com/chrisb9297',
  62.     'https://twitter.com/kbetivas',
  63.     'https://twitter.com/Hey__Andrei',
  64.     'https://twitter.com/DungeonLewd',
  65.     'https://twitter.com/MentallyDelish',
  66.     'https://twitter.com/80sRockForever',
  67.     'https://twitter.com/johnny2098209',
  68.     'https://twitter.com/SilentJester86',
  69.     'https://twitter.com/VictoriaTasy',
  70.     'https://twitter.com/Bigrufus1986',
  71.     'https://twitter.com/raul_rosas94',
  72.     'https://twitter.com/AlpacaWAPG',
  73.     'https://twitter.com/BardownBabe',
  74.     'https://twitter.com/SrDLargePineda',
  75.     'https://twitter.com/SKubecova',
  76.     'https://twitter.com/jeroonimo47',
  77.     'https://twitter.com/JakubSmelcer',
  78.     'https://twitter.com/FelixSemtex',
  79.     'https://twitter.com/GallipeauRobby',
  80.     'https://twitter.com/ZoomBinkOreo',
  81.     'https://twitter.com/BarbatoAudrey',
  82.     'https://twitter.com/anxicucumber',
  83.     'https://twitter.com/SatansTangent',
  84.     'https://twitter.com/nrcollins',
  85.     'https://twitter.com/CorbinReid',
  86.     'https://twitter.com/Iilachowell',
  87.     'https://twitter.com/konodioda66',
  88.     'https://twitter.com/alecasa29',
  89.     'https://twitter.com/Reszman1',
  90.     'https://twitter.com/MarkRadlund',
  91.     'https://twitter.com/jsrs129',
  92.     'https://twitter.com/sixpao96',
  93.     'https://twitter.com/AmadChima',
  94.     'https://twitter.com/MotokoDevil',
  95.     'https://twitter.com/friendly_poo',
  96.     'https://twitter.com/curly_curlyk',
  97.     'https://twitter.com/AndresenErick',
  98.     'https://twitter.com/BobbyMcIntyre2',
  99.     'https://twitter.com/MatthewSengul',
  100.     'https://twitter.com/GuilhermeScors2',
  101.     'https://twitter.com/jerjacques',
  102.     'https://twitter.com/fletchtheguy',
  103.     'https://twitter.com/c_kenly',
  104.     'https://twitter.com/mc92jw',
  105.     'https://twitter.com/TumorsAndPoppy',
  106.     'https://twitter.com/FourthApparel',
  107.     'https://twitter.com/jezebelslade',
  108.     'https://twitter.com/lxnbfmv',
  109.     'https://twitter.com/HodiosoHodiado',
  110.     'https://twitter.com/CeronnHS',
  111.     'https://twitter.com/charlyemorgan',
  112.     'https://twitter.com/RecevskiL',
  113.     'https://twitter.com/voidandarcane',
  114.     'https://twitter.com/MattJ580',
  115.     'https://twitter.com/DiasKamishiro',
  116.     'https://twitter.com/meyer_shorty83',
  117.     'https://twitter.com/NicholasMurra27',
  118. ]'''
  119. urls = []
  120.  
  121. data = []
  122. country_count = {'other': 0, 'unset': 0}
  123.  
  124. filename = sys.argv[1]
  125. with open(filename, 'a') as file:
  126.     file.write('\n')
  127. with open(filename, 'r') as file:
  128.     for line in file:
  129.         urls.append(f'https://twitter.com/{line[0:-2]}')
  130.  
  131. for url in urls:
  132.     try:
  133.         html = BeautifulSoup(req.urlopen(url), features="html.parser")
  134.         tag = html.body.find('span', attrs={'class': 'ProfileHeaderCard-locationText'})
  135.         if tag.text.strip().__len__() != 0:
  136.             data.append(tag.text.strip())
  137.             print(data[data.__len__() - 1])
  138.         else:
  139.             country_count['unset'] += 1
  140.             print("No location specified")
  141.     except:
  142.         print("Error parsing user")
  143.  
  144.  
  145. for loc in data:
  146.     g = geocoder.yandex(loc, lang='en-US')
  147.     if not g.ok:
  148.         country_count['other'] += 1
  149.         print(loc + " was unable to be identified.")
  150.         continue
  151.     ccode = g.geojson['features'][0]['properties']['country_code'].lower()
  152.     if not country_count.__contains__(ccode):
  153.         country_count[ccode] = 1
  154.     else:
  155.         country_count[ccode] += 1
  156.  
  157. with open('data.json', 'w') as file:
  158.     json.dump(country_count, file)
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
Not a member of Pastebin yet?
Sign Up, it unlocks many cool features!
 
Top