Advertisement
Guest User

Untitled

a guest
Dec 16th, 2018
98
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.62 KB | None | 0 0
  1. import geocoder
  2. import json
  3. import urllib.request as req
  4. import sys, os
  5. from bs4 import BeautifulSoup
  6.  
  7. # for user-defined input
  8. '''urls = [
  9. 'https://twitter.com/AVATARmetal',
  10. 'https://twitter.com/kniazevsv',
  11. 'https://twitter.com/82d920f979824a6',
  12. 'https://twitter.com/resmiyunusemre',
  13. 'https://twitter.com/Paroxysmic1',
  14. 'https://twitter.com/KevinKoolxHalo',
  15. 'https://twitter.com/spearbins_',
  16. 'https://twitter.com/Vistari',
  17. 'https://twitter.com/AaronS0396',
  18. 'https://twitter.com/LAJediNeil',
  19. 'https://twitter.com/harryhmi3',
  20. 'https://twitter.com/KamikazeChino',
  21. 'https://twitter.com/NappierMichael',
  22. 'https://twitter.com/Ace_Neski',
  23. 'https://twitter.com/BrackBarrett',
  24. 'https://twitter.com/corgiion',
  25. 'https://twitter.com/iamjeromela',
  26. 'https://twitter.com/karinakervador',
  27. 'https://twitter.com/allerievi',
  28. 'https://twitter.com/rhithomass',
  29. 'https://twitter.com/FaultlessIan',
  30. 'https://twitter.com/Syzlak_',
  31. 'https://twitter.com/sladespiritdrum',
  32. 'https://twitter.com/JamesBarkshire',
  33. 'https://twitter.com/pbeech277',
  34. 'https://twitter.com/MedicinalChickn',
  35. 'https://twitter.com/jazzociraptor',
  36. 'https://twitter.com/JustCharl_',
  37. 'https://twitter.com/AndrewH92389697',
  38. 'https://twitter.com/Acrowcombe_',
  39. 'https://twitter.com/JimParcel',
  40. 'https://twitter.com/ohboy_itsrocio',
  41. 'https://twitter.com/wortinspektor',
  42. 'https://twitter.com/metalguy2010',
  43. 'https://twitter.com/BluenoseGhost',
  44. 'https://twitter.com/exotic_swiss',
  45. 'https://twitter.com/bootysouffle',
  46. 'https://twitter.com/LTGENSPARTAN1',
  47. 'https://twitter.com/queen_tiph',
  48. 'https://twitter.com/RxBlackHeart',
  49. 'https://twitter.com/Txuso33',
  50. 'https://twitter.com/polowk',
  51. 'https://twitter.com/SwerdNerd',
  52. 'https://twitter.com/SatanzOutlaw',
  53. 'https://twitter.com/CrownedxLoki',
  54. 'https://twitter.com/spAde_dessineux',
  55. 'https://twitter.com/effrayantperson',
  56. 'https://twitter.com/RykKov',
  57. 'https://twitter.com/dcarocasas',
  58. 'https://twitter.com/wiccathe',
  59. 'https://twitter.com/elcapitanclod',
  60. 'https://twitter.com/Dixietheprayin1',
  61. 'https://twitter.com/chrisb9297',
  62. 'https://twitter.com/kbetivas',
  63. 'https://twitter.com/Hey__Andrei',
  64. 'https://twitter.com/DungeonLewd',
  65. 'https://twitter.com/MentallyDelish',
  66. 'https://twitter.com/80sRockForever',
  67. 'https://twitter.com/johnny2098209',
  68. 'https://twitter.com/SilentJester86',
  69. 'https://twitter.com/VictoriaTasy',
  70. 'https://twitter.com/Bigrufus1986',
  71. 'https://twitter.com/raul_rosas94',
  72. 'https://twitter.com/AlpacaWAPG',
  73. 'https://twitter.com/BardownBabe',
  74. 'https://twitter.com/SrDLargePineda',
  75. 'https://twitter.com/SKubecova',
  76. 'https://twitter.com/jeroonimo47',
  77. 'https://twitter.com/JakubSmelcer',
  78. 'https://twitter.com/FelixSemtex',
  79. 'https://twitter.com/GallipeauRobby',
  80. 'https://twitter.com/ZoomBinkOreo',
  81. 'https://twitter.com/BarbatoAudrey',
  82. 'https://twitter.com/anxicucumber',
  83. 'https://twitter.com/SatansTangent',
  84. 'https://twitter.com/nrcollins',
  85. 'https://twitter.com/CorbinReid',
  86. 'https://twitter.com/Iilachowell',
  87. 'https://twitter.com/konodioda66',
  88. 'https://twitter.com/alecasa29',
  89. 'https://twitter.com/Reszman1',
  90. 'https://twitter.com/MarkRadlund',
  91. 'https://twitter.com/jsrs129',
  92. 'https://twitter.com/sixpao96',
  93. 'https://twitter.com/AmadChima',
  94. 'https://twitter.com/MotokoDevil',
  95. 'https://twitter.com/friendly_poo',
  96. 'https://twitter.com/curly_curlyk',
  97. 'https://twitter.com/AndresenErick',
  98. 'https://twitter.com/BobbyMcIntyre2',
  99. 'https://twitter.com/MatthewSengul',
  100. 'https://twitter.com/GuilhermeScors2',
  101. 'https://twitter.com/jerjacques',
  102. 'https://twitter.com/fletchtheguy',
  103. 'https://twitter.com/c_kenly',
  104. 'https://twitter.com/mc92jw',
  105. 'https://twitter.com/TumorsAndPoppy',
  106. 'https://twitter.com/FourthApparel',
  107. 'https://twitter.com/jezebelslade',
  108. 'https://twitter.com/lxnbfmv',
  109. 'https://twitter.com/HodiosoHodiado',
  110. 'https://twitter.com/CeronnHS',
  111. 'https://twitter.com/charlyemorgan',
  112. 'https://twitter.com/RecevskiL',
  113. 'https://twitter.com/voidandarcane',
  114. 'https://twitter.com/MattJ580',
  115. 'https://twitter.com/DiasKamishiro',
  116. 'https://twitter.com/meyer_shorty83',
  117. 'https://twitter.com/NicholasMurra27',
  118. ]'''
  119. urls = []
  120.  
  121. data = []
  122. country_count = {'other': 0, 'unset': 0}
  123.  
  124. filename = sys.argv[1]
  125. with open(filename, 'a') as file:
  126. file.write('\n')
  127. with open(filename, 'r') as file:
  128. for line in file:
  129. urls.append(f'https://twitter.com/{line[0:-2]}')
  130.  
  131. for url in urls:
  132. try:
  133. html = BeautifulSoup(req.urlopen(url), features="html.parser")
  134. tag = html.body.find('span', attrs={'class': 'ProfileHeaderCard-locationText'})
  135. if tag.text.strip().__len__() != 0:
  136. data.append(tag.text.strip())
  137. print(data[data.__len__() - 1])
  138. else:
  139. country_count['unset'] += 1
  140. print("No location specified")
  141. except:
  142. print("Error parsing user")
  143.  
  144.  
  145. for loc in data:
  146. g = geocoder.yandex(loc, lang='en-US')
  147. if not g.ok:
  148. country_count['other'] += 1
  149. print(loc + " was unable to be identified.")
  150. continue
  151. ccode = g.geojson['features'][0]['properties']['country_code'].lower()
  152. if not country_count.__contains__(ccode):
  153. country_count[ccode] = 1
  154. else:
  155. country_count[ccode] += 1
  156.  
  157. with open('data.json', 'w') as file:
  158. json.dump(country_count, file)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement