Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from typing import Union, Optional, List, Dict, Set
- from enum import Enum
- from os import walk
- from os.path import realpath, join, isfile, isdir, basename, getsize
- from hashlib import md5,sha256
- from argparse import ArgumentParser
- from json import loads as unpickle, dumps as pickle
- import sys
- def is_blank(text:Union[str,chr] = None, cast:bool = True) -> bool:
- if text is None:
- return True
- if not isinstance(text, (str,chr)) and not cast:
- return False
- _text = str(text)
- for c in _text:
- if c != ' ' and c != '\t' and c != '\n' and c != '\r' and c != chr(160):
- return False
- return True
- def is_not_blank(text:Union[str,chr] = None) -> bool:
- return not is_blank(text)
- def trim_or_None_if_empty(text = None, strict:bool = False) -> Optional[str]:
- if text is None:
- return None
- if strict and not isinstance(text, (str,chr)):
- as_text = None
- try:
- as_text = str(text)
- if as_text:
- as_text = ": " + as_text
- except:
- pass
- raise ValueError('A string or unicode character must be provided. A {} was given instead{}'.format(type(text), as_text))
- trimmed = str(text).strip()
- if len(trimmed) < 1:
- return None
- return trimmed
- class Algorithm(Enum):
- SHA256 = 'sha-256'
- MD5 = 'md5'
- @classmethod
- def values(cls) -> List['Algorithm']:
- return (Algorithm.SHA256, Algorithm.MD5)
- @classmethod
- def from_string(cls, text: str):
- trimmed = trim_or_None_if_empty(text)
- if trimmed is None:
- return None
- trimmed = trimmed.upper()
- for t in cls.values():
- if (t.name == trimmed
- or t.value.upper() == trimmed
- or t.value.replace('-', '_').upper() == trimmed):
- return t
- return None
- class File(object):
- def __init__(self, path:str, checksums:Dict[Algorithm,str] = {}):
- self.path = path
- self.checksums = checksums or {}
- self.size = 0
- @property
- def path(self):
- return self.__path
- @path.setter
- def path(self, value:str = None) -> 'File':
- self.__path = trim_or_None_if_empty(value)
- return self
- @property
- def name(self):
- if self.path:
- return basename(self.path)
- @property
- def size(self):
- return self.__size
- @size.setter
- def size(self, bytes:int = None):
- self.__size = 0 if bytes is None or bytes < 0 else bytes
- def __getstate__(self):
- data = {'py/object': self.__class__.__name__ }
- data['path'] = self.path
- return data
- def __str__(self):
- try:
- return pickle(self)
- except:
- return self.path
- class Column(Enum):
- PATH = 'Path'
- DIRECTORY = 'Directory'
- NAME = 'Name'
- SHA256 = 'SHA-256'
- MD5 = 'MD5'
- SIZE = "Size"
- @classmethod
- def values(cls) -> List['Column']:
- return (Column.NAME, Column.SHA256, Column.MD5, Column.PATH, Column.DIRECTORY, Column.SIZE)
- @classmethod
- def from_string(cls, text: str):
- trimmed = trim_or_None_if_empty(text)
- if trimmed is None:
- return None
- trimmed = trimmed.upper()
- for t in cls.values():
- if (t.name == trimmed
- or t.value.upper() == trimmed
- or t.value.replace('-', '_').upper() == trimmed):
- return t
- return None
- def calculate_checksum(file:str, algorithm:Algorithm):
- hash = None
- if algorithm is Algorithm.MD5:
- hash = md5()
- elif algorithm is Algorithm.SHA256:
- hash = sha256()
- else:
- raise ValueError('Unsuported algorithm: ' + str(algorithm))
- with open(file, "rb") as f:
- for chunk in iter(lambda: f.read(4096), b""):
- hash.update(chunk)
- return hash.hexdigest()
- arg_parser = ArgumentParser(description='Calculate md5 or sha256 of file(s)')
- arg_parser.add_argument('file')
- arg_parser.add_argument('--sha256', action='store_true', default=False)
- arg_parser.add_argument('--md5', action='store_true', default=True)
- args = arg_parser.parse_args()
- def format_files(files:List[File] = [], columns:List[Column] = None, delimiter:chr = '|') -> str:
- _columns = columns
- if columns is None or len(columns) < 1:
- colSet = set([])
- for f in files:
- if is_not_blank(f.path):
- colSet.add(Column.PATH)
- elif is_not_blank(f.name):
- colSet.add(Column.NAME)
- if f.checksums is not None and len(f.checksums) > 0:
- if f.checksums.get(Algorithm.SHA256,None) is not None:
- colSet.add(Column.SHA256)
- if f.checksums.get(Algorithm.MD5,None) is not None:
- colSet.add(Column.MD5)
- _columns = list(colSet)
- for f in files:
- print(format_file(f, _columns, delimiter), file=sys.stdout)
- def format_file(file:File = None, columns:List[Column] = [Column.PATH, Column.SHA256, Column.MD5, Column.SIZE], delimiter:chr = '|') -> str:
- if file is None:
- return ''
- if columns is None or len(columns) < 1:
- return ''
- result = ''
- for col in columns:
- if result:
- result = result + delimiter
- if col is Column.PATH:
- result = result + ('' if file.path is None else file.path)
- elif col is Column.DIRECTORY:
- result = result + ('' if file.directory is None else file.directory)
- elif col is Column.NAME:
- result = result + ('' if file.name is None else file.name)
- elif col is Column.SHA256:
- sha256_checksum = None if file.checksums is None else file.checksums.get(Algorithm.SHA256, None)
- result = result + ('' if sha256_checksum is None else sha256_checksum)
- elif col is Column.MD5:
- md5_checksum = None if file.checksums is None else file.checksums.get(Algorithm.MD5, None)
- result = result + ('' if md5_checksum is None else md5_checksum)
- elif col is Column.SIZE:
- result = result + '0' if file.size is None else str(file.size)
- else:
- raise ValueError('Unsupported column for formatting')
- return result
- file_paths = [] # List which will store all of the full filepaths.
- if isfile(args.file):
- f = File(realpath(args.file))
- if args.md5:
- f.checksums[Algorithm.MD5] = calculate_checksum(f.path, Algorithm.MD5)
- if args.sha256:
- f.checksums[Algorithm.SHA256] = calculate_checksum(f.path, Algorithm.SHA256)
- file_paths.append(f)
- else:
- # Walk the tree.
- for root, directories, files in walk(args.file):
- for filename in files:
- # Join the two strings in order to form the full filepath.
- filepath = join(root, filename)
- f = File(realpath(filepath))
- if args.md5:
- f.checksums[Algorithm.MD5] = calculate_checksum(f.path, Algorithm.MD5)
- if args.sha256:
- f.checksums[Algorithm.SHA256] = calculate_checksum(f.path, Algorithm.SHA256)
- f.size = getsize(filepath)
- file_paths.append(f)
- format_files(file_paths)
Add Comment
Please, Sign In to add comment