Guest User

Untitled

a guest
Jan 18th, 2018
81
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.10 KB | None | 0 0
  1. #!/usr/bin/env python
  2. import os
  3. import argparse
  4. from collections import Counter
  5.  
  6. import ast
  7. from nltk import pos_tag
  8.  
  9.  
  10. DEFAULT_PATH = '.'
  11. DEFAULT_STATS_SIZE = 10
  12. DEFAULT_SCANNING_PROJECTS = (
  13. 'django',
  14. 'flask',
  15. 'pyramid',
  16. 'reddit',
  17. 'requests',
  18. 'sqlalchemy',
  19. )
  20.  
  21.  
  22. def get_console_or_default_args(default_path, default_stats_size):
  23. parser = argparse.ArgumentParser(description='Finds verbs in python code')
  24. parser.add_argument(
  25. '--path',
  26. '-p',
  27. default=default_path,
  28. help='a path of scanning directory'
  29. )
  30. parser.add_argument(
  31. '--projects',
  32. help='a comma separated list of projects we are going to scan'
  33. )
  34. parser.add_argument(
  35. '--stats_limit',
  36. '-l',
  37. default=default_stats_size,
  38. type=int,
  39. help='a limit of verbs in stats'
  40. )
  41. return parser.parse_args()
  42.  
  43.  
  44. def seek_python_files(_path, limit_of_files_amount=None):
  45. paths_to_python_files = []
  46. for dir_name, _dirs, files in os.walk(_path, topdown=True):
  47. python_files = filter(lambda x: x.endswith('.py'), files)
  48. paths_to_python_files += map(lambda x, d=dir_name: os.path.join(d, x),
  49. python_files)
  50. return paths_to_python_files[:limit_of_files_amount]
  51.  
  52.  
  53. def load_file_content(path_to_file):
  54. with open(path_to_file, encoding='utf-8') as file_handler:
  55. return file_handler.read()
  56.  
  57.  
  58. def make_ast_trees_list(files_paths_list):
  59. trees = []
  60. for file_path in files_paths_list:
  61. try:
  62. trees.append(ast.parse(load_file_content(file_path)))
  63. except SyntaxError:
  64. pass
  65. return trees
  66.  
  67.  
  68. def get_nodes_names_from_ast_trees(ast_trees):
  69. names_of_ast_nodes = []
  70. for ast_tree in ast_trees:
  71. names_of_ast_nodes += [node.name.lower() for node in ast.walk(ast_tree)
  72. if isinstance(node, ast.FunctionDef)]
  73. return names_of_ast_nodes
  74.  
  75.  
  76. def is_verb(word):
  77. if not word:
  78. return False
  79. index_of_tuple_tagged_word = 0
  80. tagged_as_verb = 'VB'
  81. word, nltk_tag = pos_tag([word])[index_of_tuple_tagged_word]
  82. return nltk_tag.startswith(tagged_as_verb)
  83.  
  84.  
  85. def get_verbs_from_nodes_names(nodes_names_list):
  86. verbs = []
  87. for node_name in nodes_names_list:
  88. verbs += [word for word in node_name.split('_') if is_verb(word)]
  89. return verbs
  90.  
  91.  
  92. def get_top_verbs(_verbs_list, _stats_size):
  93. return Counter(_verbs_list).most_common(_stats_size)
  94.  
  95.  
  96. def print_project_stats(verbs_list, _project):
  97. if verbs_list:
  98. print('The stats of using verbs in function names of {} project:'.
  99. format(_project))
  100. for verb, count in verbs_list:
  101. print(' \'{}\' used {} times'.format(verb, count))
  102. else:
  103. print('There is no stats for \'{}\' project'.format(_project))
  104. print('\r')
  105.  
  106.  
  107. def update_total_stats(_verbs_list, _verbs_dict):
  108. for verb, count in _verbs_list:
  109. _verbs_dict[verb] = _verbs_dict.get(verb, 0) + count
  110. return _verbs_dict
  111.  
  112.  
  113. def print_total_stats(verb_dict):
  114. if verb_dict:
  115. print('Total stats:')
  116. for verb, count in sorted(verb_dict.items(), key=lambda x: x[1],
  117. reverse=True):
  118. print(' \'{}\' used {} times'.format(verb, count))
  119.  
  120.  
  121. if __name__ == '__main__':
  122. args = get_console_or_default_args(DEFAULT_PATH, DEFAULT_STATS_SIZE)
  123.  
  124. if args.projects:
  125. scanning_projects = args.projects.split(',')
  126. else:
  127. scanning_projects = DEFAULT_SCANNING_PROJECTS
  128.  
  129. total_verbs_dict = {}
  130.  
  131. for project in scanning_projects:
  132. scanning_path = os.path.join(args.path, project)
  133. list_of_python_files = seek_python_files(scanning_path)
  134. ast_trees_list = make_ast_trees_list(list_of_python_files)
  135. list_of_nodes_names = get_nodes_names_from_ast_trees(ast_trees_list)
  136. list_of_verbs = get_verbs_from_nodes_names(list_of_nodes_names)
  137. top_verbs_list = get_top_verbs(list_of_verbs, args.stats_limit)
  138.  
  139. print_project_stats(top_verbs_list, project)
  140. total_verbs_dict = update_total_stats(top_verbs_list, total_verbs_dict)
  141.  
  142. print_total_stats(total_verbs_dict)
Add Comment
Please, Sign In to add comment