Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import geocoder
- import json
- import urllib.request as req
- import sys, os
- from bs4 import BeautifulSoup
- # for user-defined input
- '''urls = [
- 'https://twitter.com/AVATARmetal',
- 'https://twitter.com/kniazevsv',
- 'https://twitter.com/82d920f979824a6',
- 'https://twitter.com/resmiyunusemre',
- 'https://twitter.com/Paroxysmic1',
- 'https://twitter.com/KevinKoolxHalo',
- 'https://twitter.com/spearbins_',
- 'https://twitter.com/Vistari',
- 'https://twitter.com/AaronS0396',
- 'https://twitter.com/LAJediNeil',
- 'https://twitter.com/harryhmi3',
- 'https://twitter.com/KamikazeChino',
- 'https://twitter.com/NappierMichael',
- 'https://twitter.com/Ace_Neski',
- 'https://twitter.com/BrackBarrett',
- 'https://twitter.com/corgiion',
- 'https://twitter.com/iamjeromela',
- 'https://twitter.com/karinakervador',
- 'https://twitter.com/allerievi',
- 'https://twitter.com/rhithomass',
- 'https://twitter.com/FaultlessIan',
- 'https://twitter.com/Syzlak_',
- 'https://twitter.com/sladespiritdrum',
- 'https://twitter.com/JamesBarkshire',
- 'https://twitter.com/pbeech277',
- 'https://twitter.com/MedicinalChickn',
- 'https://twitter.com/jazzociraptor',
- 'https://twitter.com/JustCharl_',
- 'https://twitter.com/AndrewH92389697',
- 'https://twitter.com/Acrowcombe_',
- 'https://twitter.com/JimParcel',
- 'https://twitter.com/ohboy_itsrocio',
- 'https://twitter.com/wortinspektor',
- 'https://twitter.com/metalguy2010',
- 'https://twitter.com/BluenoseGhost',
- 'https://twitter.com/exotic_swiss',
- 'https://twitter.com/bootysouffle',
- 'https://twitter.com/LTGENSPARTAN1',
- 'https://twitter.com/queen_tiph',
- 'https://twitter.com/RxBlackHeart',
- 'https://twitter.com/Txuso33',
- 'https://twitter.com/polowk',
- 'https://twitter.com/SwerdNerd',
- 'https://twitter.com/SatanzOutlaw',
- 'https://twitter.com/CrownedxLoki',
- 'https://twitter.com/spAde_dessineux',
- 'https://twitter.com/effrayantperson',
- 'https://twitter.com/RykKov',
- 'https://twitter.com/dcarocasas',
- 'https://twitter.com/wiccathe',
- 'https://twitter.com/elcapitanclod',
- 'https://twitter.com/Dixietheprayin1',
- 'https://twitter.com/chrisb9297',
- 'https://twitter.com/kbetivas',
- 'https://twitter.com/Hey__Andrei',
- 'https://twitter.com/DungeonLewd',
- 'https://twitter.com/MentallyDelish',
- 'https://twitter.com/80sRockForever',
- 'https://twitter.com/johnny2098209',
- 'https://twitter.com/SilentJester86',
- 'https://twitter.com/VictoriaTasy',
- 'https://twitter.com/Bigrufus1986',
- 'https://twitter.com/raul_rosas94',
- 'https://twitter.com/AlpacaWAPG',
- 'https://twitter.com/BardownBabe',
- 'https://twitter.com/SrDLargePineda',
- 'https://twitter.com/SKubecova',
- 'https://twitter.com/jeroonimo47',
- 'https://twitter.com/JakubSmelcer',
- 'https://twitter.com/FelixSemtex',
- 'https://twitter.com/GallipeauRobby',
- 'https://twitter.com/ZoomBinkOreo',
- 'https://twitter.com/BarbatoAudrey',
- 'https://twitter.com/anxicucumber',
- 'https://twitter.com/SatansTangent',
- 'https://twitter.com/nrcollins',
- 'https://twitter.com/CorbinReid',
- 'https://twitter.com/Iilachowell',
- 'https://twitter.com/konodioda66',
- 'https://twitter.com/alecasa29',
- 'https://twitter.com/Reszman1',
- 'https://twitter.com/MarkRadlund',
- 'https://twitter.com/jsrs129',
- 'https://twitter.com/sixpao96',
- 'https://twitter.com/AmadChima',
- 'https://twitter.com/MotokoDevil',
- 'https://twitter.com/friendly_poo',
- 'https://twitter.com/curly_curlyk',
- 'https://twitter.com/AndresenErick',
- 'https://twitter.com/BobbyMcIntyre2',
- 'https://twitter.com/MatthewSengul',
- 'https://twitter.com/GuilhermeScors2',
- 'https://twitter.com/jerjacques',
- 'https://twitter.com/fletchtheguy',
- 'https://twitter.com/c_kenly',
- 'https://twitter.com/mc92jw',
- 'https://twitter.com/TumorsAndPoppy',
- 'https://twitter.com/FourthApparel',
- 'https://twitter.com/jezebelslade',
- 'https://twitter.com/lxnbfmv',
- 'https://twitter.com/HodiosoHodiado',
- 'https://twitter.com/CeronnHS',
- 'https://twitter.com/charlyemorgan',
- 'https://twitter.com/RecevskiL',
- 'https://twitter.com/voidandarcane',
- 'https://twitter.com/MattJ580',
- 'https://twitter.com/DiasKamishiro',
- 'https://twitter.com/meyer_shorty83',
- 'https://twitter.com/NicholasMurra27',
- ]'''
- urls = []
- data = []
- country_count = {'other': 0, 'unset': 0}
- filename = sys.argv[1]
- with open(filename, 'a') as file:
- file.write('\n')
- with open(filename, 'r') as file:
- for line in file:
- urls.append(f'https://twitter.com/{line[0:-2]}')
- for url in urls:
- try:
- html = BeautifulSoup(req.urlopen(url), features="html.parser")
- tag = html.body.find('span', attrs={'class': 'ProfileHeaderCard-locationText'})
- if tag.text.strip().__len__() != 0:
- data.append(tag.text.strip())
- print(data[data.__len__() - 1])
- else:
- country_count['unset'] += 1
- print("No location specified")
- except:
- print("Error parsing user")
- for loc in data:
- g = geocoder.yandex(loc, lang='en-US')
- if not g.ok:
- country_count['other'] += 1
- print(loc + " was unable to be identified.")
- continue
- ccode = g.geojson['features'][0]['properties']['country_code'].lower()
- if not country_count.__contains__(ccode):
- country_count[ccode] = 1
- else:
- country_count[ccode] += 1
- with open('data.json', 'w') as file:
- json.dump(country_count, file)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement