Advertisement
Guest User

Untitled

a guest
Sep 2nd, 2015
60
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.05 KB | None | 0 0
  1. from urlparse import urlparse
  2. from threading import Thread
  3. import httplib, sys
  4. from Queue import Queue
  5. import json
  6. import urllib2
  7. import pycurl
  8.  
  9. desc_topic = []
  10.  
  11. concurrent = 20
  12.  
  13. def do_rest_call(url):
  14. try:
  15. response = urllib2.urlopen(url).read()
  16. return response
  17. except urllib2.HTTPError as ex:
  18. if ex.code == 404 or ex.code == 400:
  19. raise Exception
  20.  
  21. def get_all_countries():
  22. url = "http://api.worldbank.org/countries?format=json"
  23. response = do_rest_call(url)
  24. data = json.loads(response)
  25. data = data[1]
  26. info = []
  27.  
  28. for element in data:
  29. info.append(element["iso2Code"])
  30.  
  31. return info
  32.  
  33. def get_string(list_topic):
  34. info = ""
  35. for element in list_topic:
  36. info += str(element)
  37.  
  38. return info
  39.  
  40. country = get_all_countries()
  41.  
  42. def get_all_urls():
  43. urls = []
  44. search_url ="http://api.worldbank.org/indicators?format=json&per_page=20000"
  45. response = do_rest_call(search_url)
  46. data = json.loads(response)
  47. data = data[1]
  48.  
  49. for element in data:
  50. urls.append("http://api.worldbank.org/countries/all/indicators/"+str(element["id"]))
  51. global desc_topic
  52. desc_topic.append((element["id"], element["sourceNote"], element["topics"]))
  53.  
  54. return urls
  55.  
  56. def doWork():
  57. while True:
  58. url = q.get()
  59. body = getBody(url)
  60. field_description = ""
  61. field_topic = ""
  62. info = []
  63. try:
  64. data = json.loads(body)
  65. data = data[1]
  66. key = data[0]["indicator"]["id"]
  67. name = data[0]["indicator"]["value"]
  68. for element in data:
  69. if element["country"]["id"] in country:
  70. info.append("worldbank/"+element["country"]["id"]+"/"+key)
  71. except:
  72. key = ""
  73. name = ""
  74. if key != "" and name != "" and info != []:
  75. for element in info:
  76. put_element = {"id": element,
  77. "name": name,
  78. "description": field_description,
  79. "topic": field_topic,
  80. "source": "WORLDBANK",
  81. "country": ""
  82. }
  83. elastic_url = "localhost:9200/timeseries/external/"+key.replace("/", ".")+"?pretty"
  84. c = pycurl.Curl()
  85. c.setopt(pycurl.URL, elastic_url)
  86. c.setopt(pycurl.POSTFIELDS, json.dumps(put_element))
  87. c.perform()
  88. q.task_done()
  89.  
  90. def getBody(ourl):
  91. url = urlparse(ourl)
  92. conn = httplib.HTTPConnection(url.netloc)
  93. conn.request("GET", url.path+"?format=json&per_page=20000")
  94. res = conn.getresponse()
  95. return res.read()
  96.  
  97. global q
  98. q = Queue(concurrent * 2)
  99. for i in range(concurrent):
  100. t = Thread(target=doWork)
  101. t.daemon = True
  102. t.start()
  103. try:
  104. for url in get_all_urls():
  105. q.put(url.strip())
  106. q.join()
  107. except KeyboardInterrupt:
  108. sys.exit(1)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement