Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3
- #Dependencies: catdoc, Python 3.x
- #on Ubuntu, you can install it with this: sudo apt-get install catdoc
- import subprocess, shlex, glob, os, zipfile, re, html.parser, sys;
- def files_with_word(word, path, delete=None):
- if delete==None or delete=="delete" or delete==True or delete==False:
- pass;
- else:
- print("The third argument must either be the word, ‘delete’, or it must be omitted.\n\nNo files deleted.");
- return;
- files_with_word_list=[];
- for x in glob.glob(os.path.join(path, "*.doc")):
- command=shlex.split('catdoc -w "' + x + '"');
- file_text, err=subprocess.Popen(command, stdout=subprocess.PIPE).communicate();
- if word in str(file_text):
- files_with_word_list.append(x);
- for x in glob.glob(os.path.join(path, "*.docx"))+glob.glob(os.path.join(path, "*.odt")):
- z=zipfile.ZipFile(x);
- zippath="word/document.xml";
- if x.endswith("odt")==True:
- zippath="content.xml";
- file_text=z.read(zippath).decode();
- file_text=re.sub(r"<[^>]*>", r"", file_text)[1:];
- file_text=html.parser.HTMLParser().unescape(file_text);
- #print(file_text); #This is for debugging purposes.
- if word in file_text:
- files_with_word_list.append(x);
- print("\n".join(files_with_word_list));
- if delete=="delete" or delete==True:
- for x in files_with_word_list:
- print("Deleting " + x);
- os.remove(x);
- if len(sys.argv)<=2 or len(sys.argv)>4:
- if len(sys.argv)==2 and sys.argv[1] not in {"--help", "-h", "--h", "-help"}:
- answer=input("Do you wish to delete the files? ");
- if answer.lower().strip() in {"y", "yes", "sure", "okay", "yeah", "yep", "yea", "of course", "certainly", "assuredly", "affirmative", "why not"}:
- files_with_word(sys.argv[1], "", "delete");
- else:
- files_with_word(sys.argv[1], "");
- print("(Files not deleted.)");
- else:
- print("This program searches word processor files (doc, docx and odt) for words or phrases, and optonally allows you to delete any matches it finds.\nUsage:\nfindwpdir.py phrase path [delete]\n(You may omit the path for the current directory.)");
- else:
- try:
- files_with_word(sys.argv[1], sys.argv[2], sys.argv[3]);
- except IndexError:
- files_with_word(sys.argv[1], sys.argv[2]);
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement