DeaD_EyE

geoip + python3 - scan nginx access logs inclusive gzipped

Jun 23rd, 2018
145
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.38 KB | None | 0 0
  1. #!/usr/bin/env python3
  2.  
  3. # in venv
  4. # pip install python-geoip-python3
  5. # pip install python-geoip-geolite2
  6.  
  7. import glob
  8. import gzip
  9. from geoip import geolite2
  10. import collections
  11.  
  12.  
  13. def get_country(ip):
  14.     """
  15.    Returns the country as ISO 3166-1 alpha-2
  16.    If the lookup fails, it returns None
  17.    """
  18.     result = geolite2.lookup(ip)
  19.     if result:
  20.         return result.country
  21.  
  22.  
  23. def scan_ips(nginx_access_log):
  24.     filter_ips = lambda fd: (line.split()[0] for line in fd)
  25.     if nginx_access_log.endswith('.gz'):
  26.         with gzip.open(nginx_access_log, 'rt') as fd:
  27.             yield from filter_ips(fd)
  28.     else:
  29.         with open(nginx_access_log) as fd:
  30.             for line in fd:
  31.                 yield from filter_ips(fd)
  32.  
  33.  
  34. def scan_all_logs(pattern):
  35.     for file in glob.glob(pattern):
  36.         yield from scan_ips(file)
  37.  
  38. ips = tuple(scan_all_logs('/var/log/nginx/archiv.vv.ist-im-web.de_access.log*'))
  39.  
  40. unique_ips = set(ips)
  41. unique_countries = {get_country(ip) for ip in unique_ips}
  42. countries_filtered = {c for c in unique_countries if c}
  43.  
  44. countries_all = [get_country(ip) for ip in ips]
  45. countries_all = [c for c in countries_all if c]
  46.  
  47. top10 = collections.Counter(countries_all).most_common(10)
  48.  
  49. #[('US', 7407),
  50. # ('DE', 1194),
  51. # ('CA', 578),
  52. # ('GR', 264),
  53. # ('FR', 102),
  54. # ('RU', 78),
  55. # ('HK', 60),
  56. # ('CH', 56),
  57. # ('GB', 50),
  58. # ('RO', 36)]
Add Comment
Please, Sign In to add comment