Advertisement
Guest User

browscap.py

a guest
Dec 10th, 2011
285
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 9.49 KB | None | 0 0
  1. from ConfigParser import SafeConfigParser as ConfigParser
  2. import re
  3. import os
  4. import sys
  5.  
  6.  
  7. __all__ = [
  8.    "BrowserCapabilities"
  9. ]
  10.  
  11. MOBILE_UA=re.compile(".*(320x240|240x320|windows\sce|up.browser|up.link|mmp|symbian|smartphone|midp|wap|phone|vodafone|o2|pocket|mobile|pda|psp).*",re.IGNORECASE)
  12. MOBILE_UA_START=('acs-','alav','alca','amoi','audi','aste','avan','benq','bird','blac','blaz','brew','cell','cldc','cmd-','dang','doco','eric','hipt','htc','inno','ipaq','java','jigs','kddi','keji','leno','lg-c','lg-d','lg-g','lge-','maui','maxo','midp','mits','mmef','mobi','mot-','moto','mwbp','nec-','newt','noki','oper','opwv','palm','pana','pant','pdxg','phil','play','pluc','port','prox','qtek','qwap','sage','sams','sany','sch-','sec-','send','seri','sgh-','shar','sie-','siem','smal','smar','sony','sph-','symb','t-mo','teli','tim-','tosh','tsm-','upg1','upsi','vk-v','voda','wap-','wapa','wapi','wapp','wapr','webc','winw','winw','xda','xda-')
  13. BOT_UA=re.compile('.*(spyder|spider|googlebot|robot|bing|crawler).*',re.IGNORECASE)
  14.  
  15. def find_on_path(filename):
  16.    """Given a search path, find file
  17.   """
  18.    file_found = 0
  19.    paths = sys.path
  20.    for path in paths:
  21.        if os.path.exists(os.path.join(path, filename)):
  22.           return path
  23.    return None
  24.  
  25. BC_PATH=find_on_path('browscap.ini')
  26.  
  27. def mobile_useragent_check(useragent):
  28.     if MOBILE_UA.match(useragent) or useragent[:4] in MOBILE_UA_START:
  29.         return True
  30.     return False
  31.  
  32. def bot_useragent_check(useragent):
  33.     if BOT_UA.match(useragent):
  34.         return True
  35.     return False
  36.  
  37.  
  38. class Browser(object):
  39.     def __init__(self, capabilities):
  40.         self.lazy_flag = True
  41.         self.cap = capabilities
  42.  
  43.  
  44.     def parse(self):
  45.         for name, value in self.cap.items():
  46.             if name in ["tables", "aol", "javaapplets",
  47.                        "activexcontrols", "backgroundsounds",
  48.                        "vbscript", "win16", "javascript", "cdf",
  49.                        "wap", "crawler", "netclr", "beta",
  50.                         "iframes", "frames", "stripper", "wap"]:
  51.                 self.cap[name] = (value.strip().lower() == "true")
  52.             elif name in ["ecmascriptversion", "w3cdomversion"]:
  53.                 self.cap[name] = float(value)
  54.             elif name in ["css"]:
  55.                 self.cap[name] = int(value)
  56.             else:
  57.                 self.cap[name] = value
  58.         self.lazy_flag = False
  59.  
  60.  
  61.     def __repr__(self):
  62.         if self.lazy_flag: self.parse()
  63.         return repr(self.cap)
  64.  
  65.  
  66.     def get(self, name, default=None):
  67.         if self.lazy_flag: self.parse()
  68.         try:
  69.             return self[name]
  70.         except KeyError:
  71.             return default
  72.  
  73.  
  74.     def __getitem__(self, name):
  75.         if self.lazy_flag: self.parse()
  76.         return self.cap[name.lower()]
  77.  
  78.  
  79.     def keys(self):
  80.         return self.cap.keys()
  81.  
  82.  
  83.     def items(self):
  84.         if self.lazy_flag: self.parse()
  85.         return self.cap.items()
  86.  
  87.  
  88.     def values(self):
  89.         if self.lazy_flag: self.parse()
  90.         return self.cap.values()
  91.    
  92.  
  93.     def __len__(self):
  94.         return len(self.cap)
  95.  
  96.  
  97.     def supports(self, feature):
  98.         value = self.cap.get(feature)
  99.         if value == None:
  100.             return False
  101.         return value
  102.  
  103.  
  104.     def features(self):
  105.         l = []
  106.         for f in ["tables", "frames", "iframes", "javascript",
  107.                   "cookies", "w3cdomversion", "wap"]:
  108.             if self.supports(f):
  109.                 l.append(f)
  110.         if self.supports_java():
  111.             l.append("java")
  112.         if self.supports_activex():
  113.             l.append("activex")
  114.         css = self.css_version()
  115.         if css > 0:
  116.             l.append("css1")
  117.         if css > 1:
  118.             l.append("css2")
  119.         return l
  120.  
  121.  
  122.     def supports_tables(self):
  123.         return self.supports("frames")
  124.  
  125.     def supports_iframes(self):
  126.         return self.supports("iframes")
  127.  
  128.  
  129.     def supports_frames(self):
  130.         return self.supports("frames")
  131.  
  132.  
  133.     def supports_java(self):
  134.         return self.supports("javaapplets")
  135.  
  136.  
  137.     def supports_javascript(self):
  138.         return self.supports("javascript")
  139.  
  140.  
  141.     def supports_vbscript(self):
  142.         return self.supports("vbscript")
  143.  
  144.  
  145.     def supports_activex(self):
  146.         return self.supports("activexcontrols")
  147.  
  148.  
  149.     def supports_cookies(self):
  150.         return self.supports("cookies")
  151.  
  152.  
  153.     def supports_wap(self):
  154.         return self.supports("wap")
  155.  
  156.  
  157.     def css_version(self):
  158.         return self.get("css", 0)
  159.  
  160.  
  161.     def version(self):
  162.         major = self.get("majorver")
  163.         minor = self.get("minorver")
  164.         if major and minor:
  165.             return (major, minor)
  166.         elif major:
  167.             return (major, None)
  168.         elif minor:
  169.             return (None, minor)
  170.         else:
  171.             ver = self.get("version")
  172.             if ver and "." in ver:
  173.                 return tuple(ver.split(".", 1))
  174.             elif ver:
  175.                 return (ver, None)
  176.             else:
  177.                 return (None, None)
  178.  
  179.  
  180.     def dom_version(self):
  181.         return self.get("w3cdomversion", 0)
  182.  
  183.  
  184.     def is_bot(self):
  185.         ca=getattr(self,'_bot_cache',None)
  186.         if ca is None:
  187.             ca = bot_useragent_check(self.name()) or (self.get("crawler") == True)
  188.             self._bot_cache=ca
  189.         return ca
  190.  
  191.  
  192.     def is_mobile(self):
  193.         ca=getattr(self,'_mobile_cache',None)
  194.         if ca is None:
  195.             ca = mobile_useragent_check(self.name()) or (self.get("ismobiledevice") == True)
  196.             self._mobile_cache=ca
  197.         return ca
  198.  
  199.    
  200.     def name(self):
  201.         return self.get("browser")
  202.  
  203.  
  204.  
  205.  
  206. class BrowserCapabilities(object):
  207.  
  208.  
  209.     def __new__(cls, *args, **kwargs):
  210.         # Only create one instance of this clas
  211.         if "instance" not in cls.__dict__:
  212.             cls.instance = object.__new__(cls, *args, **kwargs)
  213.         return cls.instance
  214.  
  215.  
  216.     def __init__(self):
  217.         self.cache = {}
  218.         self.parse()
  219.  
  220.  
  221.     def parse(self):
  222.         cfg = ConfigParser()
  223.         files = ("browscap.ini", "bupdate.ini")
  224.         read_ok = cfg.read([os.path.join(BC_PATH, name) for name in files])
  225.         if len(read_ok) == 0:
  226.             raise IOError, "Could not read browscap.ini, " + \
  227.                   "please get it from http://www.GaryKeith.com"
  228.         self.sections = []
  229.         self.items = {}
  230.         self.browsers = {}
  231.         parents = set()
  232.         for name in cfg.sections():
  233.             qname = name
  234.             for unsafe in list("^$()[].-"):
  235.                 qname = qname.replace(unsafe, "\%s" % unsafe)
  236.             qname = qname.replace("?", ".").replace("*", ".*?")
  237.             qname = "^%s$" % qname
  238.             sec_re = re.compile(qname)
  239.             sec = dict(regex=qname)
  240.             sec.update(cfg.items(name))
  241.             p = sec.get("parent")
  242.             if p: parents.add(p)
  243.             self.browsers[name] = sec
  244.             if name not in parents:
  245.                 self.sections.append(sec_re)
  246.             self.items[sec_re] = sec
  247.  
  248.  
  249.     def query(self, useragent):
  250.         b = self.cache.get(useragent)
  251.         if b: return b
  252.  
  253.         for sec_pat in self.sections:
  254.             if sec_pat.match(useragent):
  255.                 browser = dict(agent=useragent)
  256.                 browser.update(self.items[sec_pat])
  257.                 parent = browser.get("parent")
  258.                 while parent:
  259.                     items = self.browsers[parent]
  260.                     for key, value in items.items():
  261.                         if key not in browser.keys():
  262.                             browser[key] = value
  263.                         elif key == "browser" and value != "DefaultProperties":
  264.                             browser["category"] = value # Wget, Godzilla -> Download Managers
  265.                     parent = items.get("parent")
  266.                 if browser.get("browser") != "Default Browser":
  267.                     b = Browser(browser)
  268.                     self.cache[useragent] = b
  269.                     return b
  270.         self.cache[useragent] = None
  271.  
  272.  
  273.     __call__ = query
  274.  
  275.  
  276.  
  277. def test():
  278.     bc = BrowserCapabilities()
  279.     for agent in [
  280.         "Mozilla/5.0 (compatible; Konqueror/3.5; Linux; X11; de) KHTML/3.5.2 (like Gecko) Kubuntu 6.06 Dapper",
  281.         "Mozilla/5.0 (X11; U; Linux i686; de; rv:1.8.0.5) Gecko/20060731 Ubuntu/dapper-security Firefox/1.5.0.5",
  282.         "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7.12) Gecko/20060216 Debian/1.7.12-1.1ubuntu2",
  283.         "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.5) Gecko/20060731 Ubuntu/dapper-security Epiphany/2.14 Firefox/1.5.0.5",
  284.         "Opera/9.00 (X11; Linux i686; U; en)",
  285.         "Wget/1.10.2",
  286.         "Mozilla/5.0 (X11; Linux i686; U;) Gecko/20051128 Kazehakase/0.3.3 Debian/0.3.3-1",
  287.         "Mozilla/5.0 (X11; U; Linux i386) Gecko/20063102 Galeon/1.3test",
  288.         "Mozilla/4.0 (compatible; MSIE 6.0; Windows 98)" # Tested under Wine
  289.       ]:
  290.         b = bc(agent)
  291.         if not b:
  292.             print "!", agent
  293.         else:
  294.             print b.name(), b.version(), b.get("category", ""), b.features()
  295.  
  296.  
  297. def update():
  298.     import urllib
  299.     urllib.urlretrieve("http://browsers.garykeith.com/stream.asp?BrowsCapINI",
  300.                        "browscap.ini")
  301.  
  302.  
  303. if __name__ == "__main__":
  304.     import sys, os
  305.     bc_filename = os.path.join(BC_PATH, "browscap.ini")
  306.     if not os.path.exists(bc_filename) or "-update" in sys.argv[1:]:
  307.         print "Downloading browser database to %r..." % bc_filename,
  308.         update()
  309.         print "done"
  310.     test()
  311.  
  312.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement