Guest User

Untitled

a guest
Dec 7th, 2016
78
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 10.64 KB | None | 0 0
  1. # Script to add back comments removed from source code
  2. # Example use case: comments stripped out of HTC kernel releases
  3. #
  4. # Copyright (C) 2016 Sultan Qasim Khan
  5. #
  6. # Usage:
  7. # 1. Find the closest upstream source release with comments (eg. CAF)
  8. # 2. After checking out the closest upstream release, create a git
  9. # commit overlaying the OEM modifications. Do this by replacing
  10. # all files with the OEM versions and committing the change.
  11. # 3. Run "git diff HEAD~1" and save output to a file. This script will
  12. # parse the diff, and create a patch file reverting the removal of
  13. # comments from upstream code, while not reverting any other changes.
  14. # 4. Use "git apply" to apply the generated patch file to restore comments.
  15. #
  16. # Usage Example:
  17. # Assume current commit is a stripped HTC release over a commented CAF base
  18. # git diff HEAD~1 >vendor_changes
  19. # python3.5 recommenter.py vendor_changes comment_restoration
  20. # git apply comment_restoration
  21. # git commit -a -m "restore stripped comments"
  22.  
  23. import sys
  24.  
  25. def extract_changes(lines):
  26. """
  27. Take in the lines of a git diff, output a list of tuples of filename
  28. and changes for each file.
  29. """
  30. changes = []
  31. cur_filename = None
  32. cur_lines = []
  33. skip_lines = False
  34. deleted = False
  35.  
  36. for line in lines:
  37. if skip_lines:
  38. if line.startswith(b'deleted file'): deleted = True
  39. elif line.startswith(b'@@'): skip_lines = False
  40. elif line.startswith(b'diff --git a/'): skip_lines = False
  41. else: continue
  42.  
  43. if line.startswith(b'diff --git a/'):
  44. if not deleted and cur_filename is not None:
  45. changes.append((cur_filename, cur_lines))
  46. deleted = False
  47. cur_filename = line.split(b' b/')[1].rstrip()
  48. cur_lines = []
  49. skip_lines = True
  50. else:
  51. cur_lines.append(line)
  52.  
  53. if not deleted and cur_filename is not None:
  54. changes.append((cur_filename, cur_lines))
  55.  
  56. return changes
  57.  
  58. def _extract_diff_line_params(line):
  59. """
  60. Takes in a line like:
  61. b'@@ -176,9 +179,6 @@ static const inline bool is_cpu_secure(void)\n'
  62.  
  63. Outputs a tuple like:
  64. (176, 9, 179, 6, b'static const inline bool is_cpu_secure(void)')
  65. """
  66. chunks = line.split(b' ', 4)
  67. oldline_str, oldcnt_str = chunks[1][1:].split(b',')
  68. try:
  69. newline_str, newcnt_str = chunks[2][1:].split(b',')
  70. except ValueError:
  71. newline_str = b'0'
  72. newcnt_str = chunks[2][1:]
  73. oldline, oldcnt, newline, newcnt = [int(x) for x in [
  74. oldline_str, oldcnt_str, newline_str, newcnt_str]]
  75. if len(chunks) > 4:
  76. funcname = chunks[4].rstrip()
  77. else:
  78. funcname = b''
  79. return (oldline, oldcnt, newline, newcnt, funcname)
  80.  
  81. def cluster_diffs(diff):
  82. """
  83. Takes in a diff of a single file generated by extract_changes
  84. This clusters all the changes by line groups.
  85. It will convert each change group into a tuple of the form:
  86. (old_line_num, old_line_count, new_line_num, new_line_count, funcname, diff)
  87. """
  88. clusters = []
  89. cur_params = None
  90. cluster_lines = []
  91.  
  92. for line in diff:
  93. if line.startswith(b'@@ -'):
  94. if cur_params is not None:
  95. clusters.append(cur_params + (cluster_lines,))
  96. cur_params = _extract_diff_line_params(line)
  97. cluster_lines = []
  98. else:
  99. cluster_lines.append(line)
  100.  
  101. if cur_params is not None:
  102. clusters.append(cur_params + (cluster_lines,))
  103.  
  104. return clusters
  105.  
  106. def strip_comments(lines):
  107. """
  108. Takes a set of lines, and strips out all multiline (/* */) and
  109. single line (//) comments.
  110. """
  111. # first take out multiline style comments
  112. # right now, I'm kinda lazy and won't handle multiple
  113. # such comments in one line (it's a rare edge case anyway)
  114. in_comment = False
  115. new_lines = []
  116. for line in lines:
  117. if in_comment:
  118. if b'*/' in line:
  119. new_lines.append(line[line.index(b'*/') + 2:])
  120. in_comment = False
  121. else:
  122. continue
  123. else:
  124. if b'/*' in line:
  125. if b'*/' in line: # single line
  126. new_line = line[:line.index(b'/*')]
  127. new_line += line[line.index(b'*/') + 2:]
  128. new_lines.append(new_line)
  129. else: # multi line
  130. new_lines.append(line[:line.index(b'/*')] + b'\n')
  131. in_comment = True
  132. else:
  133. new_lines.append(line)
  134.  
  135. # annoying edge case: diff chunk ends while in comment
  136. # I don't feel like handling it properly, so lets just leave the
  137. # comments in for this bad case
  138. if in_comment:
  139. new_lines = lines
  140.  
  141. # now take out single line double slash comments
  142. lines = new_lines
  143. new_lines = []
  144. for line in lines:
  145. if b'//' in line:
  146. new_lines.append(line[:line.index(b'//')] + b'\n')
  147. else:
  148. new_lines.append(line)
  149.  
  150. return new_lines
  151.  
  152. def strip_trailing_whitespace(lines):
  153. # also strip empty lines
  154. new_lines = []
  155. for line in lines:
  156. stripped = line.rstrip()
  157. if len(stripped) == 0: continue
  158. new_lines.append(stripped + b'\n')
  159. return new_lines
  160.  
  161. def strip_leading_indicators(lines):
  162. # leading indicator is +, -, or space
  163. return [line[1:] for line in lines]
  164.  
  165. def check_diff_for_comment_removal(old, new):
  166. """
  167. old is the diff listing of lines removed in a change
  168. new is the diff listing of lines added in a change
  169.  
  170. This function examines a change to see if the only effect it has
  171. is to remove a comment. If this is the case, the function will
  172. return true. Else, it will return false.
  173. """
  174. old = strip_leading_indicators(old)
  175. new = strip_leading_indicators(new)
  176.  
  177. stripped_old = strip_trailing_whitespace(old)
  178. stripped_new = strip_trailing_whitespace(new)
  179.  
  180. # if the only change is whitespace removal and not actual comment removal,
  181. # don't do anything
  182. if stripped_old == stripped_new:
  183. return False
  184.  
  185. cstripped_old = strip_trailing_whitespace(strip_comments(old))
  186.  
  187. # if the new one is the same as the old one with comments stripped,
  188. # we have an issue
  189. return cstripped_old == stripped_new
  190.  
  191. def invert_change(old, new):
  192. new_inv = [b'-' + line[1:] for line in new]
  193. old_inv = [b'+' + line[1:] for line in old]
  194. return new_inv + old_inv
  195.  
  196. def neutralize_change(new):
  197. return [b' ' + line[1:] for line in new]
  198.  
  199. def invert_comment_removals(cluster_diff):
  200. """
  201. Takes in the diffs of one cluster (as clustered by cluster_diffs)
  202. It will make changes that are not comment removals into no-ops
  203. It will revert any changes that remove comments without adding or
  204. changing anything else.
  205. """
  206. diff_out = []
  207. in_diff = False
  208. last_old = None
  209. last_new = None
  210.  
  211. for line in cluster_diff:
  212. if in_diff:
  213. if line.startswith(b'-'):
  214. last_old.append(line)
  215. elif line.startswith(b'+'):
  216. last_new.append(line)
  217. else:
  218. if check_diff_for_comment_removal(last_old, last_new):
  219. diff_out.extend(invert_change(last_old, last_new))
  220. else:
  221. diff_out.extend(neutralize_change(last_new))
  222. in_diff = False
  223. diff_out.append(line)
  224. else:
  225. if line.startswith(b'-'):
  226. in_diff = True
  227. last_old = [line]
  228. last_new = []
  229. elif line.startswith(b'+'):
  230. in_diff = True
  231. last_old = []
  232. last_new = [line]
  233. else:
  234. diff_out.append(line)
  235.  
  236. if in_diff:
  237. if check_diff_for_comment_removal(last_old, last_new):
  238. diff_out.extend(invert_change(last_old, last_new))
  239. else:
  240. diff_out.extend(neutralize_change(last_new))
  241.  
  242. return diff_out
  243.  
  244. def count_additions_and_removals(cluster_diff):
  245. additions = 0
  246. removals = 0
  247. for line in cluster_diff:
  248. if line.startswith(b'-'): removals += 1
  249. elif line.startswith(b'+'): additions += 1
  250. return additions, removals
  251.  
  252. def recompute_diff_lines(file_clusters):
  253. """
  254. Takes in a list of all the diff clusters in a file after we have done
  255. comment removal inversion. This will recompute the line numbers after
  256. we have done the diff inversion. This will return the same list of
  257. diff clusters passed in, but with line numbers corrected.
  258. """
  259. lines_added = 0
  260. new_clusters = []
  261.  
  262. for cluster in file_clusters:
  263. additions, removals = count_additions_and_removals(cluster[5])
  264. if additions == 0 and removals == 0:
  265. # get rid of empty diffs
  266. continue
  267. new_clusters.append((
  268. cluster[2],
  269. cluster[3],
  270. cluster[2] + lines_added,
  271. cluster[3] + additions - removals,
  272. cluster[4],
  273. cluster[5]))
  274. lines_added += additions - removals
  275.  
  276. return new_clusters
  277.  
  278. def convert_cluster_diff_to_lines(cluster):
  279. """
  280. Takes in a cluster diff tuple (old_lime, old_count, new_line...)
  281. Converts it back into the form you see in git diff
  282. (ie. it readds the @@ -123,4 +567,8 @@ ... type headers)
  283. """
  284. lines = []
  285. lines.append(b'@@ -%i,%i +%i,%i @@ %s\n' % cluster[:5])
  286. lines.extend(cluster[5])
  287. return lines
  288.  
  289. def main(argv):
  290. infile = open(argv[1], 'rb')
  291. lines = infile.readlines()
  292. infile.close()
  293.  
  294. changed_files = extract_changes(lines)
  295.  
  296. file_diff_clusters = []
  297. for fname, diff in changed_files:
  298. file_diff_clusters.append((fname, cluster_diffs(diff)))
  299.  
  300. # generate the diffs to add back comments
  301. readd_comment_diff_clusters = []
  302. for fname, clusters in file_diff_clusters:
  303. new_clusters = []
  304. for cluster in clusters:
  305. new_diff = invert_comment_removals(cluster[5])
  306. new_clusters.append(cluster[:5] + (new_diff,))
  307. new_clusters = recompute_diff_lines(new_clusters)
  308. readd_comment_diff_clusters.append((fname, new_clusters))
  309.  
  310. # write out the comment readding diffs to a git-readable file format
  311. outfile = open(argv[2], 'wb')
  312. for fname, clusters in readd_comment_diff_clusters:
  313. if len(clusters) == 0: continue
  314. outfile.write(b'diff --git a/%s b/%s\n' % (fname, fname))
  315. outfile.write(b'--- a/%s\n' % fname)
  316. outfile.write(b'+++ b/%s\n' % fname)
  317. for cluster in clusters:
  318. outfile.writelines(convert_cluster_diff_to_lines(cluster))
  319. outfile.write(b'\n')
  320. outfile.close()
  321.  
  322.  
  323. if sys.version_info < (3, 5):
  324. raise Exception("Script requires Python 3.5 or newer")
  325.  
  326. if __name__ == "__main__":
  327. main(sys.argv)
Add Comment
Please, Sign In to add comment