Guest User

Untitled

a guest
May 24th, 2018
121
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 14.04 KB | None | 0 0
  1. ################################################################################
  2. # coding: utf8
  3. ################################################################################
  4.  
  5. # Std Libs
  6. from __future__ import with_statement
  7.  
  8. import re
  9. import pprint
  10. import unittest
  11. import threading
  12. import Queue
  13. import os
  14. import functools
  15. import subprocess
  16. import bisect
  17. import time
  18. import mmap
  19.  
  20. from os.path import join, normpath, dirname
  21. from itertools import izip
  22.  
  23. ################################################################################
  24.  
  25. TAGS_RE = re.compile (
  26.  
  27. '(?P<symbol>[^\t]+)\t'
  28. '(?P<filename>[^\t]+)\t'
  29. '(?P<ex_command>.*?);"\t'
  30. '(?P<type>[^\t]+)'
  31. '(?:\t(?P<fields>.*))?'
  32. )
  33.  
  34. SYMBOL = 0
  35. FILENAME = 1
  36.  
  37. ################################################################################
  38.  
  39. def parse_tag_lines(lines, order_by='symbol'):
  40. tags_lookup = {}
  41.  
  42. for search_obj in (t for t in (TAGS_RE.search(l) for l in lines) if t):
  43. tag = post_process_tag(search_obj)
  44. tags_lookup.setdefault(tag[order_by], []).append(tag)
  45.  
  46. return tags_lookup
  47.  
  48.  
  49. def unescape_ex(ex):
  50. return re.sub(r"\\(\$|/|\^|\\)", r'\1', ex)
  51.  
  52. def process_ex_cmd(ex):
  53. return ex if ex.isdigit() else unescape_ex(ex[2:-2])
  54.  
  55. def post_process_tag(search_obj):
  56. tag = search_obj.groupdict()
  57.  
  58. fields = tag.get('fields')
  59. if fields:
  60. tag.update(process_fields(fields))
  61.  
  62. tag['ex_command'] = process_ex_cmd(tag['ex_command'])
  63.  
  64. return tag
  65.  
  66. def process_fields(fields):
  67. fields_dict = {}
  68.  
  69. for f in fields.split('\t'):
  70. f = f.split(':')
  71.  
  72. # These, if existing, are keys with no values... retarded
  73. for key in f[:-2]:
  74. fields_dict[key] = True # Essentially boolean?
  75.  
  76. # The last two are actual key value pairs because separated by \t
  77. key, value = f[-2:]
  78. fields_dict[key] = value
  79.  
  80. return fields_dict
  81.  
  82.  
  83. ################################################################################
  84.  
  85. def resort_ctags(tag_file):
  86. keys = {}
  87.  
  88. with open(tag_file) as fh:
  89. for l in fh:
  90. keys.setdefault(l.split('\t')[FILENAME], []).append(l)
  91.  
  92. with open(tag_file + '_unsorted', 'w') as fw:
  93. for k in sorted(keys):
  94. fw.write(''.join(keys[k]))
  95.  
  96. # def resort_ctags_mmap(tag_file):
  97. # with open(tag_file) as read_in:
  98. # with open(tag_file + '_mmap', 'r+') as fh:
  99. # mapped = mmap.mmap(fh.fileno(), 0)
  100.  
  101. # for l in read_in:
  102. # mapped.write(l)
  103.  
  104. # mapped.flush()
  105. # mapped.close()
  106.  
  107. def build_ctags(ctags_exe, tag_file):
  108. cmd = [ctags_exe, '-R']
  109.  
  110. # cmd = [ctags_exe, '-R']
  111.  
  112. # cmds = [cmd] + [cmd[:]]
  113. # cmds[-1].extend(['--sort=no', '-f', 'tags_unsorted'])
  114. # cmd = ' && '.join(subprocess.list2cmdline(c) for c in cmds)
  115.  
  116. p = subprocess.Popen(cmd, cwd = dirname(tag_file), shell=1)
  117. p.wait()
  118.  
  119. # Faster than ctags.exe again:
  120. resort_ctags(tag_file)
  121.  
  122. return tag_file
  123.  
  124. ################################################################################
  125.  
  126. def log_divides(f):
  127. f.accessed = 0
  128. def wrapped(self, i):
  129. item = f(self, i)
  130. f.accessed += 1
  131. print f.accessed, i, self.fh.tell()
  132. return item
  133. return wrapped
  134.  
  135. class TagFile(object):
  136. def __init__(self, p, column):
  137. self.p = p
  138. self.column = column
  139.  
  140. # @log_divides
  141. def __getitem__(self, index):
  142. self.fh.seek(index)
  143. self.fh.readline()
  144. return self.fh.readline().split('\t')[self.column]
  145.  
  146. def __len__(self):
  147. return os.stat(self.p)[6]
  148.  
  149. def get(self, *tags):
  150. with open(self.p, 'r+') as fh:
  151. self.fh = mmap.mmap(fh.fileno(), 0)
  152.  
  153. for tag in tags:
  154. b4 = bisect.bisect_left(self, tag)
  155. fh.seek(b4)
  156.  
  157. for l in fh:
  158. comp = cmp(l.split('\t')[self.column], tag)
  159.  
  160. if comp == -1: continue
  161. elif comp: break
  162.  
  163. yield l
  164.  
  165. self.fh.close()
  166.  
  167. def get_tags_dict(self, *tags):
  168. return parse_tag_lines(self.get(*tags))
  169.  
  170. ################################################################################
  171.  
  172.  
  173.  
  174. def parse_tag_file(tag_file):
  175. with open(tag_file) as tf:
  176. tags = parse_tag_lines(tf)
  177.  
  178. return tags
  179.  
  180. # def get_tags_for_file(ctags_exe, a_file):
  181. # cmd = [ctags_exe, '-f', '-', a_file]
  182.  
  183. # p = subprocess.Popen (
  184. # cmd, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, shell=1 )
  185.  
  186. # p.wait()
  187.  
  188. # tags = parse_tag_lines(p.stdout)
  189. # p.stdout.close()
  190.  
  191. # return tags
  192.  
  193. # def index_tag_file(tag_file, column=0):
  194. # index = {}
  195.  
  196. # with open(tag_file, 'rb') as tags:
  197. # position = 0
  198.  
  199. # for l in tags:
  200. # field = l.split('\t')[column]
  201. # if field not in index:
  202. # index[field] = position
  203.  
  204. # position += len(l)
  205.  
  206. # return index
  207.  
  208. # def get_tags_for_field(field, tag_file, index, column=0):
  209. # position = index.get(field)
  210. # if not position: return {}
  211.  
  212. # with open(tag_file) as fh:
  213. # fh.seek(position)
  214. # tag_lines = list(takewhile(lambda l:l.split('\t')[column] == field, fh))
  215.  
  216. # return parse_tag_lines(tag_lines)
  217.  
  218. ################################################################################
  219.  
  220. class Tag(object):
  221. "dot.syntatic sugar for tag dicts"
  222. def __init__(self, tag_dict):
  223. self.__dict__ = tag_dict
  224.  
  225. def __repr__(self):
  226. return pprint.pformat(self.__dict__)
  227.  
  228. ################################################################################
  229.  
  230. # class CTagsCache(object):
  231. # cache = {}
  232. # pending = {}
  233.  
  234. # def __init__(self, status=None):
  235. # self.Q = Queue.Queue()
  236. # self.OQ = Queue.Queue()
  237.  
  238. # self.t = threading.Thread(target=self.thread)
  239. # self.t.setDaemon(1)
  240. # self.t.start()
  241.  
  242. # self.status=status
  243.  
  244. # def thread(self):
  245. # while True:
  246. # path = self.Q.get()
  247. # column = 1 if path.endswith('unsorted') else 0
  248.  
  249. # self.OQ.put((path, index_tag_file(path, column)))
  250.  
  251. # self.Q.task_done()
  252. # if self.status: self.status(path)
  253.  
  254. # def get(self, path):
  255. # if path not in self.cache:
  256. # if path not in self.pending:
  257. # self.Q.put(path)
  258. # self.pending[path] = True
  259.  
  260. # while True:
  261. # try:
  262. # tag_path, tag_dict = self.OQ.get_nowait()
  263. # self.cache[tag_path] = tag_dict
  264. # self.pending.pop(tag_path)
  265. # self.OQ.task_done()
  266.  
  267. # except Queue.Empty:
  268. # break
  269.  
  270. # return self.cache.get(path, {})
  271.  
  272. ################################################################################
  273.  
  274. # - Parse an existing CTAGS file, and implement go-to-tag-under-cursor. CTAGS
  275. # files can get quite large, so representing them efficiently should be a goal.
  276. # Ideally, parsing should also be done in another thread, so the editor isn't
  277. # blocked while reading in a multi-megabyte file. Getting this implemented
  278. # nicely is a fair bit of work.
  279.  
  280. # Next step would be to automatically run exuberant ctags in the current
  281. # directory, if there isn't a CTAGS file already, or then one that does exist is
  282. # out of date.
  283.  
  284. # Once we're at a start where symbol definitions are in memory, there's a number
  285. # of other things that can be done, such as listing them in the quick panel, and
  286. # hooking them into auto-complete.
  287.  
  288. ################################################################################
  289.  
  290.  
  291. class CTagsTest(unittest.TestCase):
  292. def test_all_search_strings_work(self):
  293. os.chdir(os.path.dirname(__file__))
  294. tags = parse_tag_file('tags')
  295.  
  296. failures = []
  297.  
  298. for symbol, tag_list in tags.iteritems():
  299. for tag in (Tag(t) for t in tag_list):
  300. if not tag.ex_command.isdigit():
  301. with open(tag.filename, 'r+') as fh:
  302. mapped = mmap.mmap(fh.fileno(), 0)
  303. if not mapped.find(tag.ex_command):
  304. failures += [tag.ex_command]
  305.  
  306. for f in failures:
  307. print f
  308.  
  309. self.assertEqual(len(failures), 0, 'update tag files and try again')
  310.  
  311. def test_tags_files(self):
  312. tags = r"tags"
  313. tag_file = TagFile(tags, SYMBOL)
  314.  
  315. with open(tags, 'r') as fh:
  316. latest = ''
  317. lines = []
  318.  
  319. for l in fh:
  320. symbol = l.split('\t')[SYMBOL]
  321.  
  322. if symbol != latest:
  323.  
  324. if latest:
  325. tags = list(tag_file.get(latest))
  326. self.assertEqual(lines, tags)
  327.  
  328. lines = []
  329.  
  330. latest = symbol
  331.  
  332. lines += [l]
  333.  
  334. # def scribble():
  335. # raw_input('About to use memory')
  336.  
  337. # import time
  338. # tags = 'C://python25//lib//tags'
  339.  
  340. # t1 = time.time()
  341. # index = index_tag_file(tags)
  342. # print time.time() - t1
  343.  
  344. # t1 = time.time()
  345. # print get_tags_for_field("struct_GLUnurbs", tags, index)
  346. # print time.time() - t1
  347.  
  348. # raw_input('Press enter')
  349.  
  350. # print get_tags_for_file('ctags.exe', 'ctags.py')
  351.  
  352. def scribble():
  353. # raw_input('About to use memory')
  354.  
  355. import time
  356. tags = 'C://python25//lib//tags'
  357.  
  358. t1 = time.time()
  359.  
  360. a = list(TagFile(tags, SYMBOL).get_tags_dict('Test','Tests'))
  361.  
  362. print time.time() - t1
  363.  
  364. print len(a)
  365.  
  366. # raw_input('Press enter')
  367.  
  368.  
  369. if __name__ == '__main__':
  370. if 0: scribble()
  371. else: unittest.main()
  372.  
  373. ################################################################################
  374. # TAG FILE FORMAT
  375.  
  376. # When not running in etags mode, each entry in the tag file consists of a
  377. # separate line, each looking like this in the most general case:
  378.  
  379. # tag_name<TAB>file_name<TAB>ex_cmd;"<TAB>extension_fields
  380.  
  381. # The fields and separators of these lines are specified as follows:
  382.  
  383. # 1.
  384.  
  385. # tag name
  386.  
  387. # 2.
  388.  
  389. # single tab character
  390.  
  391. # 3.
  392.  
  393. # name of the file in which the object associated with the tag is located
  394.  
  395. # 4.
  396.  
  397. # single tab character
  398.  
  399. # 5.
  400.  
  401. # EX command used to locate the tag within the file; generally a search
  402. # pattern (either /pattern/ or ?pattern?) or line number (see −−excmd). Tag
  403. # file format 2 (see −−format) extends this EX command under certain
  404. # circumstances to include a set of extension fields (described below)
  405. # embedded in an EX comment immediately appended to the EX command, which
  406. # leaves it backward-compatible with original vi(1) implementations.
  407.  
  408. # A few special tags are written into the tag file for internal purposes. These
  409. # tags are composed in such a way that they always sort to the top of the file.
  410. # Therefore, the first two characters of these tags are used a magic number to
  411. # detect a tag file for purposes of determining whether a valid tag file is
  412. # being overwritten rather than a source file. Note that the name of each source
  413. # file will be recorded in the tag file exactly as it appears on the command
  414. # line.
  415.  
  416. # Therefore, if the path you specified on the command line was relative to the
  417. # current directory, then it will be recorded in that same manner in the tag
  418. # file. See, however, the −−tag−relative option for how this behavior can be
  419. # modified.
  420.  
  421. # Extension fields are tab-separated key-value pairs appended to the end of the
  422. # EX command as a comment, as described above. These key value pairs appear in
  423. # the general form "key:value". Their presence in the lines of the tag file are
  424. # controlled by the −−fields option. The possible keys and the meaning of their
  425. # values are as follows:
  426.  
  427. # access
  428.  
  429. # Indicates the visibility of this class member, where value is specific to
  430. # the language.
  431.  
  432. # file
  433.  
  434. # Indicates that the tag has file-limited visibility. This key has no
  435. # corresponding value.
  436.  
  437. # kind
  438.  
  439. # Indicates the type, or kind, of tag. Its value is either one of the
  440. # corresponding one-letter flags described under the various −−<LANG>−kinds
  441. # options above, or a full name. It is permitted (and is, in fact, the
  442. # default) for the key portion of this field to be omitted. The optional
  443. # behaviors are controlled with the −−fields option.
  444.  
  445. # implementation
  446.  
  447. # When present, this indicates a limited implementation (abstract vs. concrete)
  448. # of a routine or class, where value is specific to the language ("virtual" or
  449. # "pure virtual" for C++; "abstract" for Java).
  450.  
  451. # inherits
  452.  
  453. # When present, value. is a comma-separated list of classes from which this
  454. # class is derived (i.e. inherits from).
  455.  
  456. # signature
  457.  
  458. # When present, value is a language-dependent representation of the
  459. # signature of a routine. A routine signature in its complete form specifies
  460. # the return type of a routine and its formal argument list. This extension
  461. # field is presently supported only for C-based languages and does not
  462. # include the return type.
  463.  
  464. # In addition, information on the scope of the tag definition may be available,
  465. # with the key portion equal to some language-dependent construct name and its
  466. # value the name declared for that construct in the program. This scope entry
  467. # indicates the scope in which the tag was found. For example, a tag generated
  468. # for a C structure member would have a scope looking like "struct:myStruct".
Add Comment
Please, Sign In to add comment