Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- import glob
- import operator
- import re
- import string
- import sys
- # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
- apis = ('GM_addStyle', 'GM_deleteValue', 'GM_getResourceText',
- 'GM_getResourceUrl', 'GM_getValue', 'GM_listValues', 'GM_log',
- 'GM_openInTab', 'GM_registerMenuCommand', 'GM_setValue',
- 'GM_xmlhttpRequest', 'unsafeWindow')
- api_counts = {'all': 0, 'none': 0, 'eval':0}
- for api in apis:
- api_counts[api] = 0
- metas = ('@require', '@resource', '@include', '@exclude', '@unwrap', '@version',
- '@name', '@namespace', '@description', '@author', '@homepage', '@date',
- '@license', '@match')
- meta_counts = {}
- for meta in metas:
- meta_counts[meta] = 0
- xhr_hosts = {}
- set_hosts = {}
- get_hosts = {}
- # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
- def hostToDomain(host):
- match = re.search(r'.*\.(.+......)$', host)
- if match:
- return match.group(1)
- else:
- return host
- def domainsInMetadata(metadata):
- if not metadata:
- return set([])
- includes = re.findall(r'@include\s+(.*)', metadata)
- if not includes:
- # No @include means "@include *" !
- return 'infinity'
- def urlToHost(url):
- # Try to parse a reasonable URL.
- match = re.search(r'^\w+.*?://([^/]+)', url)
- if match:
- return match.group(1)
- # Try to parse a stranger pattern like "*.amazon.*/*".
- match = re.search(r'^([^/]+)', url)
- if match:
- return match.group(1)
- return url
- includes = [x.strip() for x in includes]
- hosts = set(map(urlToHost, includes))
- for host in hosts:
- if host == '*':
- return 'infinity'
- domains = set(map(hostToDomain, hosts))
- return domains
- def numDomainsInMetadata(metadata):
- domains = domainsInMetadata(metadata)
- if 'infinity' == domains:
- return 'infinity'
- return len(domainsInMetadata(metadata))
- def domainsInXhr(source):
- regex = re.compile(r"""GM_xmlhttpRequest[^}]+url[^}]+https?://([^'"/]+)""", re.S)
- hosts = re.findall(regex, source)
- domains = set(map(hostToDomain, hosts))
- return set(domains)
- # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
- for i, filename in enumerate( sys.stdin.readlines() ):
- filename = filename.strip()
- api_counts['all'] += 1
- any_api = False
- source = file(filename).read()
- source_clean = source
- # Strip strings.
- source_clean = re.sub("""\\\\['"]""", '', source_clean)
- source_clean = re.sub("""(['"]).*?\\1""", '', source_clean)
- # Strip comments.
- source_clean = re.sub(r'/\*.*?\*/', '', source_clean, re.S)
- source_clean = re.sub('//.*\r?\n?', '', source_clean)
- this_apis = {}
- for api in apis:
- this_apis[api] = False
- if api in source_clean:
- this_apis[api] = True
- any_api = True
- api_counts[api] += 1
- if re.search(r'\beval\b', source_clean):
- api_counts['eval'] +=1
- if not any_api:
- api_counts['none'] += 1
- metadata = re.search(r'// ==UserScript==(.*?)// ==/UserScript==', source, re.S)
- if metadata and metadata.group(1):
- metadata = metadata.group(1)
- for meta in metas:
- if meta in metadata:
- meta_counts[meta] += 1
- numDomains = numDomainsInMetadata(metadata)
- if this_apis['GM_setValue']:
- set_hosts.setdefault(numDomains, 0)
- set_hosts[numDomains] += 1
- if this_apis['GM_getValue']:
- get_hosts.setdefault(numDomains, 0)
- get_hosts[numDomains] += 1
- if this_apis['GM_xmlhttpRequest']:
- numDomains = 0
- metaDomains = domainsInMetadata(metadata)
- if 'infinity' == metaDomains:
- numDomains = 'infinity'
- else:
- xhrDomains = domainsInXhr(source)
- if not xhrDomains:
- numDomains = 'unknown'
- else:
- numDomains = len(set( metaDomains | xhrDomains ))
- xhr_hosts.setdefault(numDomains, 0)
- xhr_hosts[numDomains] += 1
- print ""
- print "%10s %s" % ('Number', 'API')
- for api, count in sorted(api_counts.items(), key=operator.itemgetter(1), reverse=True):
- print "%10d %s" % (count, api)
- print ""
- print "%10s %s" % ('Number', 'Imperative')
- for meta, count in sorted(meta_counts.items(), key=operator.itemgetter(1), reverse=True):
- print "%10d %s" % (count, meta)
- print ""
- print "%10s %s" % ('Number', 'Distinct hosts (XHR)')
- print "%10d %s" % (api_counts['GM_xmlhttpRequest'], 'all scripts')
- for hosts, count in sorted(xhr_hosts.items(), key=operator.itemgetter(1), reverse=True):
- print "%10d %s" % (count, hosts)
- print "%10s %s" % ('Number', 'Distinct hosts (set)')
- print "%10d %s" % (api_counts['GM_setValue'], 'all scripts')
- for hosts, count in sorted(set_hosts.items(), key=operator.itemgetter(1), reverse=True):
- print "%10d %s" % (count, hosts)
- print "%10s %s" % ('Number', 'Distinct hosts (get)')
- print "%10d %s" % (api_counts['GM_setValue'], 'all scripts')
- for hosts, count in sorted(get_hosts.items(), key=operator.itemgetter(1), reverse=True):
- print "%10d %s" % (count, hosts)
Add Comment
Please, Sign In to add comment