Guest User

Text replacement script

a guest
Feb 22nd, 2019
131
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.79 KB | None | 0 0
  1. #!/usr/bin/python
  2.  
  3. '''
  4.    This script runs on Python 3.6+
  5.    
  6.    It replaces text in files scanning directories recursively;
  7.    see -h for help.
  8. '''
  9.  
  10.  
  11. import sys, os
  12. from argparse import ArgumentParser
  13.  
  14.  
  15. def format_link(filepath):
  16.     if os.path.islink(filepath):
  17.         return f'{filepath} -> {os.path.realpath(filepath)}'
  18.     else:
  19.         return filepath
  20.  
  21. def open_and_read_file(filepath, encodings):
  22.     for e in encodings.split(','):
  23.         try:
  24.             file = open(filepath, 'r+', encoding=e)
  25.             text = file.read()
  26.             return file, text
  27.         except UnicodeDecodeError:
  28.             file.close()
  29.         except Exception as e:
  30.             print(format_link(filepath), e, sep=': ')
  31.             break
  32.     return None, None
  33.  
  34. def replace_in_place(filepath, args):
  35.     file, text = open_and_read_file(filepath, args.e)
  36.     if file and text:
  37.         if args.old in text:
  38.             print(format_link(filepath))
  39.             if args.c > 0:
  40.                 r = text.replace(args.old, args.new, args.c)
  41.                 file.seek(0)
  42.                 file.write(r)
  43.                 file.truncate()
  44.         file.close()
  45.  
  46. def resolve_file_path(fp, followlinks):
  47.     if os.path.islink(fp) and not followlinks:
  48.         return False
  49.     if os.path.exists(fp) and os.path.isfile(fp):
  50.         return fp
  51.     else:
  52.         return False
  53.        
  54. def filter_files(filenames, dirpath, args):
  55.     if args.ext:
  56.         extensions = args.ext.split(',')
  57.         filt_func = lambda fn: any(fn.lower().endswith(e) for e in extensions)
  58.         filenames = filter(filt_func, filenames)
  59.     for fn in filenames:
  60.         fp = os.path.join(dirpath, fn)
  61.         fp = resolve_file_path(fp, followlinks=args.f)
  62.         if fp:
  63.             real_path = os.path.realpath(fp)
  64.             if real_path not in args.processed_files:
  65.                 args.processed_files.append(real_path)
  66.                 yield fp
  67.             else:
  68.                 print(f"File '{real_path}' has been already processed")
  69.  
  70. def file_list_replace(filenames, dirpath, args):
  71.     filepaths = filter_files(filenames, dirpath, args)
  72.     for fp in filepaths:
  73.         replace_in_place(fp, args)
  74.  
  75. def recursive_replace(path, args):
  76.     walk = os.walk(path, onerror=lambda e: print(e), followlinks=args.f)
  77.     for dirpath, dirnames, filenames in walk:
  78.         file_list_replace(filenames, dirpath, args)
  79.  
  80.  
  81. def parse_arguments():
  82.     desc = 'Replaces text in files scanning directories recursively. ' \
  83.            'Matched files names are printed to stdout.'
  84.     epi = f'Example: {sys.argv[0]} old.txt new.txt /path/to/dir'
  85.    
  86.     arg_parser = ArgumentParser(description=desc, epilog=epi)
  87.     sys_encoding = sys.getdefaultencoding()
  88.     files_encoding = f'utf-8,cp1251'
  89.     args = {
  90.         '-c':   {   'metavar': 'N',
  91.                     'type': int,
  92.                     'default': 1,
  93.                     'help': 'maximum replacements count in one file; '
  94.                             '0 disables replacement, so only search will '
  95.                             'be performed'
  96.                 },
  97.         '-ext': {   'metavar': 'EXTENSIONS_LIST',
  98.                     'default': '',
  99.                     'help': 'comma separated lower case list of file extensions; '
  100.                             'only files with extensions specified will '
  101.                             'be processed; default is to process all files'
  102.                 },
  103.         '-e':   {   'metavar': 'ENCODINGS_LIST',
  104.                     'default': files_encoding,
  105.                     'help': 'comma separated list of encodings '
  106.                             f"to open files with; default is '{files_encoding}'"
  107.                 },
  108.         '-es':  {   'metavar': 'ENCODING',
  109.                     'default': sys_encoding,
  110.                     'help': 'encodings for `search_for.txt`; '
  111.                             f"default is '{sys_encoding}' (system)"
  112.                 },
  113.         '-er':  {   'metavar': 'ENCODING',
  114.                     'default': sys_encoding,
  115.                     'help': 'encodings for `replace_with.txt`; '
  116.                             f"default is '{sys_encoding}' (system)"
  117.                 },
  118.         '-f':   {   'action': 'store_true',
  119.                     'help': 'follow symbolic links; default is not to follow '
  120.                             'except those that explicitly passed as arguments'
  121.                 },
  122.         'old':  {   'metavar': '<search_for.txt>',
  123.                     'help': 'contents of this file will be searched'
  124.                 },
  125.         'new':  {   'metavar': '<replace_with.txt>',
  126.                     'help': 'if found in searched files, contents of '
  127.                             '`search_for.txt` will be replaced with contents '
  128.                             'of `replace_with.txt`'
  129.                 },
  130.         'dirs': {   'metavar': 'DIR_OR_FILE',
  131.                     'nargs': '+',
  132.                     'help': 'directory or file to be processed; '
  133.                             'directories are scanned recursively'
  134.                 }
  135.     }
  136.  
  137.     for a in args:
  138.         arg_parser.add_argument(a, **args[a])
  139.     args = arg_parser.parse_args()
  140.     return args
  141.  
  142.  
  143. def main():
  144.     args = parse_arguments()
  145.    
  146.     try:
  147.         args.old = open(args.old, encoding=args.es).read()
  148.         args.new = open(args.new, encoding=args.er).read()
  149.     except Exception as e:
  150.         print(str(e).capitalize())
  151.         exit(1)
  152.    
  153.     dirs = [d for d in args.dirs if os.path.isdir(d)]
  154.     files = [f for f in args.dirs if os.path.isfile(f)]
  155.  
  156.     if not files and not dirs:
  157.         print('Nothing to do')
  158.         exit()
  159.  
  160.     args.processed_files = []
  161.  
  162.     for d in dirs:
  163.         recursive_replace(d, args)
  164.  
  165.     path = ''
  166.     args.f = True
  167.     file_list_replace(files, path, args)
  168.  
  169.  
  170. if __name__ == '__main__':
  171.     main()
Add Comment
Please, Sign In to add comment