Guest User

Untitled

a guest
Apr 25th, 2018
75
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.63 KB | None | 0 0
  1. #!/usr/bin/env python
  2.  
  3. import glob
  4. import operator
  5. import re
  6. import string
  7. import sys
  8.  
  9. # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
  10.  
  11. apis = ('GM_addStyle', 'GM_deleteValue', 'GM_getResourceText',
  12. 'GM_getResourceUrl', 'GM_getValue', 'GM_listValues', 'GM_log',
  13. 'GM_openInTab', 'GM_registerMenuCommand', 'GM_setValue',
  14. 'GM_xmlhttpRequest', 'unsafeWindow')
  15.  
  16. api_counts = {'all': 0, 'none': 0, 'eval':0}
  17. for api in apis:
  18. api_counts[api] = 0
  19.  
  20. metas = ('@require', '@resource', '@include', '@exclude', '@unwrap', '@version',
  21. '@name', '@namespace', '@description', '@author', '@homepage', '@date',
  22. '@license', '@match')
  23. meta_counts = {}
  24. for meta in metas:
  25. meta_counts[meta] = 0
  26.  
  27. xhr_hosts = {}
  28. set_hosts = {}
  29. get_hosts = {}
  30.  
  31. # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
  32.  
  33. def hostToDomain(host):
  34. match = re.search(r'.*\.(.+......)$', host)
  35. if match:
  36. return match.group(1)
  37. else:
  38. return host
  39.  
  40. def domainsInMetadata(metadata):
  41. if not metadata:
  42. return set([])
  43.  
  44. includes = re.findall(r'@include\s+(.*)', metadata)
  45. if not includes:
  46. # No @include means "@include *" !
  47. return 'infinity'
  48.  
  49. def urlToHost(url):
  50. # Try to parse a reasonable URL.
  51. match = re.search(r'^\w+.*?://([^/]+)', url)
  52. if match:
  53. return match.group(1)
  54.  
  55. # Try to parse a stranger pattern like "*.amazon.*/*".
  56. match = re.search(r'^([^/]+)', url)
  57. if match:
  58. return match.group(1)
  59.  
  60. return url
  61.  
  62. includes = [x.strip() for x in includes]
  63. hosts = set(map(urlToHost, includes))
  64.  
  65. for host in hosts:
  66. if host == '*':
  67. return 'infinity'
  68.  
  69. domains = set(map(hostToDomain, hosts))
  70. return domains
  71.  
  72. def numDomainsInMetadata(metadata):
  73. domains = domainsInMetadata(metadata)
  74. if 'infinity' == domains:
  75. return 'infinity'
  76. return len(domainsInMetadata(metadata))
  77.  
  78. def domainsInXhr(source):
  79. regex = re.compile(r"""GM_xmlhttpRequest[^}]+url[^}]+https?://([^'"/]+)""", re.S)
  80. hosts = re.findall(regex, source)
  81. domains = set(map(hostToDomain, hosts))
  82. return set(domains)
  83.  
  84. # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
  85.  
  86. for i, filename in enumerate( sys.stdin.readlines() ):
  87. filename = filename.strip()
  88. api_counts['all'] += 1
  89. any_api = False
  90.  
  91. source = file(filename).read()
  92. source_clean = source
  93. # Strip strings.
  94. source_clean = re.sub("""\\\\['"]""", '', source_clean)
  95. source_clean = re.sub("""(['"]).*?\\1""", '', source_clean)
  96. # Strip comments.
  97. source_clean = re.sub(r'/\*.*?\*/', '', source_clean, re.S)
  98. source_clean = re.sub('//.*\r?\n?', '', source_clean)
  99.  
  100. this_apis = {}
  101. for api in apis:
  102. this_apis[api] = False
  103. if api in source_clean:
  104. this_apis[api] = True
  105. any_api = True
  106. api_counts[api] += 1
  107.  
  108. if re.search(r'\beval\b', source_clean):
  109. api_counts['eval'] +=1
  110.  
  111. if not any_api:
  112. api_counts['none'] += 1
  113.  
  114. metadata = re.search(r'// ==UserScript==(.*?)// ==/UserScript==', source, re.S)
  115. if metadata and metadata.group(1):
  116. metadata = metadata.group(1)
  117. for meta in metas:
  118. if meta in metadata:
  119. meta_counts[meta] += 1
  120.  
  121. numDomains = numDomainsInMetadata(metadata)
  122. if this_apis['GM_setValue']:
  123. set_hosts.setdefault(numDomains, 0)
  124. set_hosts[numDomains] += 1
  125. if this_apis['GM_getValue']:
  126. get_hosts.setdefault(numDomains, 0)
  127. get_hosts[numDomains] += 1
  128.  
  129. if this_apis['GM_xmlhttpRequest']:
  130. numDomains = 0
  131. metaDomains = domainsInMetadata(metadata)
  132. if 'infinity' == metaDomains:
  133. numDomains = 'infinity'
  134. else:
  135. xhrDomains = domainsInXhr(source)
  136. if not xhrDomains:
  137. numDomains = 'unknown'
  138. else:
  139. numDomains = len(set( metaDomains | xhrDomains ))
  140. xhr_hosts.setdefault(numDomains, 0)
  141. xhr_hosts[numDomains] += 1
  142.  
  143. print ""
  144. print "%10s %s" % ('Number', 'API')
  145. for api, count in sorted(api_counts.items(), key=operator.itemgetter(1), reverse=True):
  146. print "%10d %s" % (count, api)
  147.  
  148. print ""
  149. print "%10s %s" % ('Number', 'Imperative')
  150. for meta, count in sorted(meta_counts.items(), key=operator.itemgetter(1), reverse=True):
  151. print "%10d %s" % (count, meta)
  152.  
  153. print ""
  154.  
  155. print "%10s %s" % ('Number', 'Distinct hosts (XHR)')
  156. print "%10d %s" % (api_counts['GM_xmlhttpRequest'], 'all scripts')
  157. for hosts, count in sorted(xhr_hosts.items(), key=operator.itemgetter(1), reverse=True):
  158. print "%10d %s" % (count, hosts)
  159.  
  160. print "%10s %s" % ('Number', 'Distinct hosts (set)')
  161. print "%10d %s" % (api_counts['GM_setValue'], 'all scripts')
  162. for hosts, count in sorted(set_hosts.items(), key=operator.itemgetter(1), reverse=True):
  163. print "%10d %s" % (count, hosts)
  164.  
  165. print "%10s %s" % ('Number', 'Distinct hosts (get)')
  166. print "%10d %s" % (api_counts['GM_setValue'], 'all scripts')
  167. for hosts, count in sorted(get_hosts.items(), key=operator.itemgetter(1), reverse=True):
  168. print "%10d %s" % (count, hosts)
Add Comment
Please, Sign In to add comment