Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Script to add back comments removed from source code
- # Example use case: comments stripped out of HTC kernel releases
- #
- # Copyright (C) 2016 Sultan Qasim Khan
- #
- # Usage:
- # 1. Find the closest upstream source release with comments (eg. CAF)
- # 2. After checking out the closest upstream release, create a git
- # commit overlaying the OEM modifications. Do this by replacing
- # all files with the OEM versions and committing the change.
- # 3. Run "git diff HEAD~1" and save output to a file. This script will
- # parse the diff, and create a patch file reverting the removal of
- # comments from upstream code, while not reverting any other changes.
- # 4. Use "git apply" to apply the generated patch file to restore comments.
- #
- # Usage Example:
- # Assume current commit is a stripped HTC release over a commented CAF base
- # git diff HEAD~1 >vendor_changes
- # python3.5 recommenter.py vendor_changes comment_restoration
- # git apply comment_restoration
- # git commit -a -m "restore stripped comments"
- import sys
- def extract_changes(lines):
- """
- Take in the lines of a git diff, output a list of tuples of filename
- and changes for each file.
- """
- changes = []
- cur_filename = None
- cur_lines = []
- skip_lines = False
- deleted = False
- for line in lines:
- if skip_lines:
- if line.startswith(b'deleted file'): deleted = True
- elif line.startswith(b'@@'): skip_lines = False
- elif line.startswith(b'diff --git a/'): skip_lines = False
- else: continue
- if line.startswith(b'diff --git a/'):
- if not deleted and cur_filename is not None:
- changes.append((cur_filename, cur_lines))
- deleted = False
- cur_filename = line.split(b' b/')[1].rstrip()
- cur_lines = []
- skip_lines = True
- else:
- cur_lines.append(line)
- if not deleted and cur_filename is not None:
- changes.append((cur_filename, cur_lines))
- return changes
- def _extract_diff_line_params(line):
- """
- Takes in a line like:
- b'@@ -176,9 +179,6 @@ static const inline bool is_cpu_secure(void)\n'
- Outputs a tuple like:
- (176, 9, 179, 6, b'static const inline bool is_cpu_secure(void)')
- """
- chunks = line.split(b' ', 4)
- oldline_str, oldcnt_str = chunks[1][1:].split(b',')
- try:
- newline_str, newcnt_str = chunks[2][1:].split(b',')
- except ValueError:
- newline_str = b'0'
- newcnt_str = chunks[2][1:]
- oldline, oldcnt, newline, newcnt = [int(x) for x in [
- oldline_str, oldcnt_str, newline_str, newcnt_str]]
- if len(chunks) > 4:
- funcname = chunks[4].rstrip()
- else:
- funcname = b''
- return (oldline, oldcnt, newline, newcnt, funcname)
- def cluster_diffs(diff):
- """
- Takes in a diff of a single file generated by extract_changes
- This clusters all the changes by line groups.
- It will convert each change group into a tuple of the form:
- (old_line_num, old_line_count, new_line_num, new_line_count, funcname, diff)
- """
- clusters = []
- cur_params = None
- cluster_lines = []
- for line in diff:
- if line.startswith(b'@@ -'):
- if cur_params is not None:
- clusters.append(cur_params + (cluster_lines,))
- cur_params = _extract_diff_line_params(line)
- cluster_lines = []
- else:
- cluster_lines.append(line)
- if cur_params is not None:
- clusters.append(cur_params + (cluster_lines,))
- return clusters
- def strip_comments(lines):
- """
- Takes a set of lines, and strips out all multiline (/* */) and
- single line (//) comments.
- """
- # first take out multiline style comments
- # right now, I'm kinda lazy and won't handle multiple
- # such comments in one line (it's a rare edge case anyway)
- in_comment = False
- new_lines = []
- for line in lines:
- if in_comment:
- if b'*/' in line:
- new_lines.append(line[line.index(b'*/') + 2:])
- in_comment = False
- else:
- continue
- else:
- if b'/*' in line:
- if b'*/' in line: # single line
- new_line = line[:line.index(b'/*')]
- new_line += line[line.index(b'*/') + 2:]
- new_lines.append(new_line)
- else: # multi line
- new_lines.append(line[:line.index(b'/*')] + b'\n')
- in_comment = True
- else:
- new_lines.append(line)
- # annoying edge case: diff chunk ends while in comment
- # I don't feel like handling it properly, so lets just leave the
- # comments in for this bad case
- if in_comment:
- new_lines = lines
- # now take out single line double slash comments
- lines = new_lines
- new_lines = []
- for line in lines:
- if b'//' in line:
- new_lines.append(line[:line.index(b'//')] + b'\n')
- else:
- new_lines.append(line)
- return new_lines
- def strip_trailing_whitespace(lines):
- # also strip empty lines
- new_lines = []
- for line in lines:
- stripped = line.rstrip()
- if len(stripped) == 0: continue
- new_lines.append(stripped + b'\n')
- return new_lines
- def strip_leading_indicators(lines):
- # leading indicator is +, -, or space
- return [line[1:] for line in lines]
- def check_diff_for_comment_removal(old, new):
- """
- old is the diff listing of lines removed in a change
- new is the diff listing of lines added in a change
- This function examines a change to see if the only effect it has
- is to remove a comment. If this is the case, the function will
- return true. Else, it will return false.
- """
- old = strip_leading_indicators(old)
- new = strip_leading_indicators(new)
- stripped_old = strip_trailing_whitespace(old)
- stripped_new = strip_trailing_whitespace(new)
- # if the only change is whitespace removal and not actual comment removal,
- # don't do anything
- if stripped_old == stripped_new:
- return False
- cstripped_old = strip_trailing_whitespace(strip_comments(old))
- # if the new one is the same as the old one with comments stripped,
- # we have an issue
- return cstripped_old == stripped_new
- def invert_change(old, new):
- new_inv = [b'-' + line[1:] for line in new]
- old_inv = [b'+' + line[1:] for line in old]
- return new_inv + old_inv
- def neutralize_change(new):
- return [b' ' + line[1:] for line in new]
- def invert_comment_removals(cluster_diff):
- """
- Takes in the diffs of one cluster (as clustered by cluster_diffs)
- It will make changes that are not comment removals into no-ops
- It will revert any changes that remove comments without adding or
- changing anything else.
- """
- diff_out = []
- in_diff = False
- last_old = None
- last_new = None
- for line in cluster_diff:
- if in_diff:
- if line.startswith(b'-'):
- last_old.append(line)
- elif line.startswith(b'+'):
- last_new.append(line)
- else:
- if check_diff_for_comment_removal(last_old, last_new):
- diff_out.extend(invert_change(last_old, last_new))
- else:
- diff_out.extend(neutralize_change(last_new))
- in_diff = False
- diff_out.append(line)
- else:
- if line.startswith(b'-'):
- in_diff = True
- last_old = [line]
- last_new = []
- elif line.startswith(b'+'):
- in_diff = True
- last_old = []
- last_new = [line]
- else:
- diff_out.append(line)
- if in_diff:
- if check_diff_for_comment_removal(last_old, last_new):
- diff_out.extend(invert_change(last_old, last_new))
- else:
- diff_out.extend(neutralize_change(last_new))
- return diff_out
- def count_additions_and_removals(cluster_diff):
- additions = 0
- removals = 0
- for line in cluster_diff:
- if line.startswith(b'-'): removals += 1
- elif line.startswith(b'+'): additions += 1
- return additions, removals
- def recompute_diff_lines(file_clusters):
- """
- Takes in a list of all the diff clusters in a file after we have done
- comment removal inversion. This will recompute the line numbers after
- we have done the diff inversion. This will return the same list of
- diff clusters passed in, but with line numbers corrected.
- """
- lines_added = 0
- new_clusters = []
- for cluster in file_clusters:
- additions, removals = count_additions_and_removals(cluster[5])
- if additions == 0 and removals == 0:
- # get rid of empty diffs
- continue
- new_clusters.append((
- cluster[2],
- cluster[3],
- cluster[2] + lines_added,
- cluster[3] + additions - removals,
- cluster[4],
- cluster[5]))
- lines_added += additions - removals
- return new_clusters
- def convert_cluster_diff_to_lines(cluster):
- """
- Takes in a cluster diff tuple (old_lime, old_count, new_line...)
- Converts it back into the form you see in git diff
- (ie. it readds the @@ -123,4 +567,8 @@ ... type headers)
- """
- lines = []
- lines.append(b'@@ -%i,%i +%i,%i @@ %s\n' % cluster[:5])
- lines.extend(cluster[5])
- return lines
- def main(argv):
- infile = open(argv[1], 'rb')
- lines = infile.readlines()
- infile.close()
- changed_files = extract_changes(lines)
- file_diff_clusters = []
- for fname, diff in changed_files:
- file_diff_clusters.append((fname, cluster_diffs(diff)))
- # generate the diffs to add back comments
- readd_comment_diff_clusters = []
- for fname, clusters in file_diff_clusters:
- new_clusters = []
- for cluster in clusters:
- new_diff = invert_comment_removals(cluster[5])
- new_clusters.append(cluster[:5] + (new_diff,))
- new_clusters = recompute_diff_lines(new_clusters)
- readd_comment_diff_clusters.append((fname, new_clusters))
- # write out the comment readding diffs to a git-readable file format
- outfile = open(argv[2], 'wb')
- for fname, clusters in readd_comment_diff_clusters:
- if len(clusters) == 0: continue
- outfile.write(b'diff --git a/%s b/%s\n' % (fname, fname))
- outfile.write(b'--- a/%s\n' % fname)
- outfile.write(b'+++ b/%s\n' % fname)
- for cluster in clusters:
- outfile.writelines(convert_cluster_diff_to_lines(cluster))
- outfile.write(b'\n')
- outfile.close()
- if sys.version_info < (3, 5):
- raise Exception("Script requires Python 3.5 or newer")
- if __name__ == "__main__":
- main(sys.argv)
Add Comment
Please, Sign In to add comment