Guest User

Untitled

a guest
Oct 19th, 2015
316
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.93 KB | None | 0 0
  1. #!/usr/bin/env python2
  2. # coding: utf-8
  3. #
  4. # Copyright 2013 Alexandre Fiori
  5. #
  6. # Licensed under the Apache License, Version 2.0 (the "License"); you may
  7. # not use this file except in compliance with the License. You may obtain
  8. # a copy of the License at
  9. #
  10. # http://www.apache.org/licenses/LICENSE-2.0
  11. #
  12. # Unless required by applicable law or agreed to in writing, software
  13. # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  14. # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  15. # License for the specific language governing permissions and limitations
  16. # under the License.
  17.  
  18. import csv
  19. import urllib2
  20. import os
  21. import sqlite3
  22. import sys
  23. import unicodedata
  24. import zipfile
  25.  
  26. dbname = "ipdb.sqlite"
  27.  
  28.  
  29. def download(url, filename=None):
  30. # print "Downloading" + url
  31.  
  32. req = urllib2.Request(url, None, {"User-Agent": "Mozilla/5.0"})
  33. req = urllib2.urlopen(req)
  34. data = req.read()
  35. req.close()
  36.  
  37. if filename:
  38. fd = open(filename, "wb")
  39. fd.write(data)
  40. fd.close()
  41.  
  42. return data
  43.  
  44.  
  45. def extract(zipfd, zipname, filename=None):
  46. if filename is None:
  47. filename = zipname
  48.  
  49. # print "Extracting " + filename
  50.  
  51. try:
  52. data = zipfd.read(zipname)
  53. except KeyError:
  54. # print "Could not extract %s from zip archive." % zipname
  55. sys.exit(1)
  56. else:
  57. fd = open(filename, "w")
  58. fd.write(data)
  59. fd.close()
  60.  
  61.  
  62. def import_csv(cursor, csvfile, table, skip_lines=0):
  63. sys.stdout.write("Importing %s: " % csvfile)
  64. sys.stdout.flush()
  65.  
  66. fd = open(csvfile)
  67. for n in range(skip_lines):
  68. fd.next()
  69.  
  70. q = None
  71. rows = []
  72. for n, row in enumerate(csv.reader(fd), 1):
  73. if q is None:
  74. question_marks = ",".join(["?"] * len(row))
  75. q = "insert into %s values (%s)" % (table, question_marks)
  76.  
  77. rows.append(map(lambda s: s.decode("latin-1"), row))
  78. if not n % 100000:
  79. sys.stderr.write(".")
  80. cursor.executemany(q, rows)
  81. rows = []
  82.  
  83. if rows:
  84. cursor.executemany(q, rows)
  85.  
  86. fd.close()
  87. # print "%d records!" % n
  88. os.unlink(csvfile)
  89.  
  90.  
  91. class world_regions(dict):
  92. """Imports a csv and only store rows that contains accented characters,
  93. indexing them by their non-accented version::
  94.  
  95. country,region (no accents) -> region with accents
  96.  
  97. Expected csv columns: country,region,city
  98. """
  99. def __init__(self, filename=None, autorm=True):
  100. self.filename = filename
  101. if filename:
  102. fd = open(filename)
  103. for row in csv.reader(fd):
  104. v = map(lambda s: s.decode("utf-8"), row[:2])
  105. k = self.strip_accents(",".join(v))
  106.  
  107. if k != v:
  108. self[k] = v[1]
  109. fd.close()
  110. if autorm is True:
  111. os.unlink(filename)
  112.  
  113. def strip_accents(self, s):
  114. return ''.join((c for c in unicodedata.normalize('NFD', s)
  115. if unicodedata.category(c) != 'Mn'))
  116.  
  117.  
  118. class world_countries(dict):
  119. def __init__(self, conn):
  120. curs = conn.cursor()
  121. curs.execute("SELECT country_code, country_name from country_blocks")
  122. for (code, name) in curs:
  123. self[code] = name
  124. curs.close()
  125.  
  126.  
  127. if __name__ == "__main__":
  128. region_url = "http://dev.maxmind.com/static/csv/codes/maxmind/region.csv"
  129. region_csv = os.path.basename(region_url)
  130. if not os.path.exists(region_csv):
  131. download(region_url, region_csv)
  132.  
  133. wr_url = "http://blog.freegeoip.net/files/all_cities_in_the_world.csv.zip"
  134. wr_zip = os.path.basename(wr_url)
  135. wr_csv = wr_zip[:-4]
  136. if not os.path.exists(wr_csv):
  137. if not os.path.exists(wr_zip):
  138. download(wr_url, wr_zip)
  139. zipfd = zipfile.ZipFile(wr_zip)
  140. extract(zipfd, wr_csv, wr_csv)
  141. zipfd.close()
  142. os.unlink(wr_zip)
  143.  
  144. geolite = "http://geolite.maxmind.com/download/geoip/database/"
  145.  
  146. city_url = geolite + "GeoLiteCity_CSV/GeoLiteCity-latest.zip"
  147. city_files = ["GeoLiteCity-Blocks.csv", "GeoLiteCity-Location.csv"]
  148. if not all(map(os.path.exists, city_files)):
  149. # print "Checking " + city_url
  150. # Fetch the most recent city database
  151. city_zip = os.path.basename(city_url)
  152. if not os.path.exists(city_zip):
  153. download(city_url, city_zip)
  154.  
  155. # Extract city csv files
  156. zipfd = zipfile.ZipFile(city_zip)
  157. for filename in zipfd.namelist():
  158. n = os.path.basename(filename)
  159. if n in city_files and not os.path.exists(n):
  160. extract(zipfd, filename, n)
  161. zipfd.close()
  162. os.unlink(city_zip)
  163.  
  164. # Fetch the country database
  165. country_url = geolite + "GeoIPCountryCSV.zip"
  166. country_zip = os.path.basename(country_url)
  167. if not os.path.exists(country_zip):
  168. download(country_url, country_zip)
  169.  
  170. country_csv = "GeoIPCountryWhois.csv"
  171. if not os.path.exists(country_csv):
  172. zipfd = zipfile.ZipFile(country_zip)
  173. extract(zipfd, country_csv)
  174. zipfd.close()
  175. os.unlink(country_zip)
  176.  
  177. # Create the IP database
  178. tmpdb = "_" + dbname + ".temp"
  179. if os.path.exists(tmpdb):
  180. os.unlink(tmpdb)
  181.  
  182. conn = sqlite3.connect(tmpdb)
  183. curs = conn.cursor()
  184.  
  185. curs.execute("""\
  186. create table country_blocks (
  187. ip_start_str text,
  188. ip_end_str text,
  189. ip_start text,
  190. ip_end text,
  191. country_code text,
  192. country_name text,
  193. primary key(ip_start))""")
  194. import_csv(curs, country_csv, "country_blocks")
  195. curs.execute("CREATE INDEX cc_idx ON country_blocks(country_code);")
  196.  
  197. curs.execute("""\
  198. create table region_names (
  199. country_code text,
  200. region_code text,
  201. region_name text,
  202. unique (country_code, region_code))""")
  203. import_csv(curs, region_csv, "region_names")
  204.  
  205. curs.execute("""\
  206. create table city_blocks (
  207. ip_start int,
  208. ip_end int,
  209. loc_id int,
  210. primary key(ip_start))""")
  211. import_csv(curs, city_files[0], "city_blocks", skip_lines=2)
  212.  
  213. curs.execute("""\
  214. create table city_location (
  215. loc_id int,
  216. country_code text,
  217. region_code text,
  218. city_name text,
  219. postal_code text,
  220. latitude real,
  221. longitude real,
  222. metro_code text,
  223. area_code text,
  224. primary key(loc_id))""")
  225. import_csv(curs, city_files[1], "city_location", skip_lines=2)
  226.  
  227. curs.close()
  228. conn.commit()
  229.  
  230. # Fix region names
  231. sys.stdout.write("Updating region names: ")
  232. sys.stdout.flush()
  233.  
  234. world_regions = world_regions("all_cities_in_the_world.csv")
  235. world_countries = world_countries(conn)
  236.  
  237. count = 0
  238. regions = conn.cursor()
  239. regions.execute("SELECT rowid, * FROM region_names")
  240.  
  241. for region in regions:
  242. region_name = region[-1] # rowid,country_code,region_code,region_name
  243. country_name = world_countries.get(region[1])
  244.  
  245. if country_name:
  246. k = country_name + "," + region_name
  247. if k in world_regions:
  248. new_name = world_regions[k]
  249. if region_name != new_name:
  250. update = conn.cursor()
  251. update.execute("UPDATE region_names SET region_name=? "
  252. "WHERE rowid=?", (new_name, region[0]))
  253. update.close()
  254. count += 1
  255.  
  256. # print "%d names updated." % count
  257.  
  258. # Fix db consistency. (patch from Alfredo Terrone)
  259. regions.execute("INSERT INTO region_names "
  260. "SELECT DISTINCT country_code,'','' FROM region_names")
  261. regions.close()
  262. conn.commit()
  263. conn.close()
  264.  
  265. # Replace any existing db with the new one
  266. if os.path.exists(dbname):
  267. os.unlink(dbname)
  268. os.rename(tmpdb, dbname)
Advertisement
Add Comment
Please, Sign In to add comment