Guest User

Untitled

a guest
Jan 19th, 2018
113
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.09 KB | None | 0 0
  1. from typing import Union, Optional, List, Dict, Set
  2. from enum import Enum
  3. from os import walk
  4. from os.path import realpath, join, isfile, isdir, basename, getsize
  5. from hashlib import md5,sha256
  6. from argparse import ArgumentParser
  7. from json import loads as unpickle, dumps as pickle
  8.  
  9. import sys
  10.  
  11.  
  12. def is_blank(text:Union[str,chr] = None, cast:bool = True) -> bool:
  13. if text is None:
  14. return True
  15.  
  16. if not isinstance(text, (str,chr)) and not cast:
  17. return False
  18.  
  19. _text = str(text)
  20. for c in _text:
  21. if c != ' ' and c != '\t' and c != '\n' and c != '\r' and c != chr(160):
  22. return False
  23.  
  24. return True
  25.  
  26. def is_not_blank(text:Union[str,chr] = None) -> bool:
  27. return not is_blank(text)
  28.  
  29. def trim_or_None_if_empty(text = None, strict:bool = False) -> Optional[str]:
  30. if text is None:
  31. return None
  32.  
  33. if strict and not isinstance(text, (str,chr)):
  34. as_text = None
  35. try:
  36. as_text = str(text)
  37. if as_text:
  38. as_text = ": " + as_text
  39. except:
  40. pass
  41. raise ValueError('A string or unicode character must be provided. A {} was given instead{}'.format(type(text), as_text))
  42.  
  43. trimmed = str(text).strip()
  44. if len(trimmed) < 1:
  45. return None
  46.  
  47. return trimmed
  48.  
  49. class Algorithm(Enum):
  50. SHA256 = 'sha-256'
  51. MD5 = 'md5'
  52.  
  53. @classmethod
  54. def values(cls) -> List['Algorithm']:
  55. return (Algorithm.SHA256, Algorithm.MD5)
  56.  
  57. @classmethod
  58. def from_string(cls, text: str):
  59. trimmed = trim_or_None_if_empty(text)
  60. if trimmed is None:
  61. return None
  62.  
  63. trimmed = trimmed.upper()
  64. for t in cls.values():
  65. if (t.name == trimmed
  66. or t.value.upper() == trimmed
  67. or t.value.replace('-', '_').upper() == trimmed):
  68. return t
  69.  
  70. return None
  71.  
  72.  
  73. class File(object):
  74. def __init__(self, path:str, checksums:Dict[Algorithm,str] = {}):
  75. self.path = path
  76. self.checksums = checksums or {}
  77. self.size = 0
  78.  
  79. @property
  80. def path(self):
  81. return self.__path
  82.  
  83. @path.setter
  84. def path(self, value:str = None) -> 'File':
  85. self.__path = trim_or_None_if_empty(value)
  86. return self
  87.  
  88. @property
  89. def name(self):
  90. if self.path:
  91. return basename(self.path)
  92.  
  93. @property
  94. def size(self):
  95. return self.__size
  96.  
  97. @size.setter
  98. def size(self, bytes:int = None):
  99. self.__size = 0 if bytes is None or bytes < 0 else bytes
  100.  
  101. def __getstate__(self):
  102. data = {'py/object': self.__class__.__name__ }
  103. data['path'] = self.path
  104. return data
  105.  
  106. def __str__(self):
  107. try:
  108. return pickle(self)
  109. except:
  110. return self.path
  111.  
  112.  
  113. class Column(Enum):
  114. PATH = 'Path'
  115. DIRECTORY = 'Directory'
  116. NAME = 'Name'
  117. SHA256 = 'SHA-256'
  118. MD5 = 'MD5'
  119. SIZE = "Size"
  120.  
  121. @classmethod
  122. def values(cls) -> List['Column']:
  123. return (Column.NAME, Column.SHA256, Column.MD5, Column.PATH, Column.DIRECTORY, Column.SIZE)
  124.  
  125. @classmethod
  126. def from_string(cls, text: str):
  127. trimmed = trim_or_None_if_empty(text)
  128. if trimmed is None:
  129. return None
  130.  
  131. trimmed = trimmed.upper()
  132. for t in cls.values():
  133. if (t.name == trimmed
  134. or t.value.upper() == trimmed
  135. or t.value.replace('-', '_').upper() == trimmed):
  136. return t
  137.  
  138. return None
  139.  
  140.  
  141. def calculate_checksum(file:str, algorithm:Algorithm):
  142. hash = None
  143. if algorithm is Algorithm.MD5:
  144. hash = md5()
  145. elif algorithm is Algorithm.SHA256:
  146. hash = sha256()
  147. else:
  148. raise ValueError('Unsuported algorithm: ' + str(algorithm))
  149.  
  150. with open(file, "rb") as f:
  151. for chunk in iter(lambda: f.read(4096), b""):
  152. hash.update(chunk)
  153. return hash.hexdigest()
  154.  
  155. arg_parser = ArgumentParser(description='Calculate md5 or sha256 of file(s)')
  156. arg_parser.add_argument('file')
  157. arg_parser.add_argument('--sha256', action='store_true', default=False)
  158. arg_parser.add_argument('--md5', action='store_true', default=True)
  159.  
  160.  
  161. args = arg_parser.parse_args()
  162.  
  163. def format_files(files:List[File] = [], columns:List[Column] = None, delimiter:chr = '|') -> str:
  164. _columns = columns
  165. if columns is None or len(columns) < 1:
  166. colSet = set([])
  167. for f in files:
  168. if is_not_blank(f.path):
  169. colSet.add(Column.PATH)
  170. elif is_not_blank(f.name):
  171. colSet.add(Column.NAME)
  172.  
  173. if f.checksums is not None and len(f.checksums) > 0:
  174. if f.checksums.get(Algorithm.SHA256,None) is not None:
  175. colSet.add(Column.SHA256)
  176.  
  177. if f.checksums.get(Algorithm.MD5,None) is not None:
  178. colSet.add(Column.MD5)
  179.  
  180. _columns = list(colSet)
  181.  
  182. for f in files:
  183. print(format_file(f, _columns, delimiter), file=sys.stdout)
  184.  
  185.  
  186. def format_file(file:File = None, columns:List[Column] = [Column.PATH, Column.SHA256, Column.MD5, Column.SIZE], delimiter:chr = '|') -> str:
  187. if file is None:
  188. return ''
  189.  
  190. if columns is None or len(columns) < 1:
  191. return ''
  192.  
  193. result = ''
  194. for col in columns:
  195. if result:
  196. result = result + delimiter
  197. if col is Column.PATH:
  198. result = result + ('' if file.path is None else file.path)
  199. elif col is Column.DIRECTORY:
  200. result = result + ('' if file.directory is None else file.directory)
  201. elif col is Column.NAME:
  202. result = result + ('' if file.name is None else file.name)
  203. elif col is Column.SHA256:
  204. sha256_checksum = None if file.checksums is None else file.checksums.get(Algorithm.SHA256, None)
  205. result = result + ('' if sha256_checksum is None else sha256_checksum)
  206. elif col is Column.MD5:
  207. md5_checksum = None if file.checksums is None else file.checksums.get(Algorithm.MD5, None)
  208. result = result + ('' if md5_checksum is None else md5_checksum)
  209. elif col is Column.SIZE:
  210. result = result + '0' if file.size is None else str(file.size)
  211. else:
  212. raise ValueError('Unsupported column for formatting')
  213. return result
  214.  
  215. file_paths = [] # List which will store all of the full filepaths.
  216. if isfile(args.file):
  217. f = File(realpath(args.file))
  218. if args.md5:
  219. f.checksums[Algorithm.MD5] = calculate_checksum(f.path, Algorithm.MD5)
  220. if args.sha256:
  221. f.checksums[Algorithm.SHA256] = calculate_checksum(f.path, Algorithm.SHA256)
  222.  
  223. file_paths.append(f)
  224. else:
  225. # Walk the tree.
  226. for root, directories, files in walk(args.file):
  227. for filename in files:
  228. # Join the two strings in order to form the full filepath.
  229. filepath = join(root, filename)
  230. f = File(realpath(filepath))
  231. if args.md5:
  232. f.checksums[Algorithm.MD5] = calculate_checksum(f.path, Algorithm.MD5)
  233. if args.sha256:
  234. f.checksums[Algorithm.SHA256] = calculate_checksum(f.path, Algorithm.SHA256)
  235. f.size = getsize(filepath)
  236. file_paths.append(f)
  237.  
  238. format_files(file_paths)
Add Comment
Please, Sign In to add comment