Advertisement
Guest User

Search/[delete] multiple word processor files

a guest
Sep 5th, 2014
320
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/env python3
  2.  
  3. #Dependencies: catdoc, Python 3.x
  4. #on Ubuntu, you can install it with this: sudo apt-get install catdoc
  5.  
  6. import subprocess, shlex, glob, os, zipfile, re, html.parser, sys;
  7.  
  8. def files_with_word(word, path, delete=None):
  9.     if delete==None or delete=="delete" or delete==True or delete==False:
  10.         pass;
  11.     else:
  12.         print("The third argument must either be the word, ‘delete’, or it must be omitted.\n\nNo files deleted.");
  13.         return;
  14.     files_with_word_list=[];
  15.     for x in glob.glob(os.path.join(path, "*.doc")):
  16.         command=shlex.split('catdoc -w "' + x + '"');
  17.         file_text, err=subprocess.Popen(command, stdout=subprocess.PIPE).communicate();
  18.         if word in str(file_text):
  19.             files_with_word_list.append(x);
  20.     for x in glob.glob(os.path.join(path, "*.docx"))+glob.glob(os.path.join(path, "*.odt")):
  21.         z=zipfile.ZipFile(x);
  22.         zippath="word/document.xml";
  23.         if x.endswith("odt")==True:
  24.             zippath="content.xml";
  25.         file_text=z.read(zippath).decode();
  26.         file_text=re.sub(r"<[^>]*>", r"", file_text)[1:];
  27.         file_text=html.parser.HTMLParser().unescape(file_text);
  28.         #print(file_text); #This is for debugging purposes.
  29.         if word in file_text:
  30.             files_with_word_list.append(x);
  31.     print("\n".join(files_with_word_list));
  32.     if delete=="delete" or delete==True:
  33.         for x in files_with_word_list:
  34.             print("Deleting " + x);
  35.             os.remove(x);
  36.  
  37. if len(sys.argv)<=2 or len(sys.argv)>4:
  38.     if len(sys.argv)==2 and sys.argv[1] not in {"--help", "-h", "--h", "-help"}:
  39.         answer=input("Do you wish to delete the files? ");
  40.         if answer.lower().strip() in {"y", "yes", "sure", "okay", "yeah", "yep", "yea", "of course", "certainly", "assuredly", "affirmative", "why not"}:
  41.             files_with_word(sys.argv[1], "", "delete");
  42.         else:
  43.             files_with_word(sys.argv[1], "");
  44.             print("(Files not deleted.)");
  45.     else:
  46.         print("This program searches word processor files (doc, docx and odt) for words or phrases, and optonally allows you to delete any matches it finds.\nUsage:\nfindwpdir.py phrase path [delete]\n(You may omit the path for the current directory.)");
  47. else:
  48.     try:
  49.         files_with_word(sys.argv[1], sys.argv[2], sys.argv[3]);
  50.     except IndexError:
  51.         files_with_word(sys.argv[1], sys.argv[2]);
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement