SHOW:
|
|
- or go back to the newest paste.
1 | #!/usr/bin/env python3 | |
2 | ||
3 | #Dependencies: catdoc, Python 3.x | |
4 | #on Ubuntu, you can install it with this: sudo apt-get install catdoc | |
5 | ||
6 | import subprocess, shlex, glob, os, zipfile, re, html.parser, sys; | |
7 | ||
8 | def files_with_word(word, path, delete=None): | |
9 | if delete==None or delete=="delete" or delete==True or delete==False: | |
10 | pass; | |
11 | else: | |
12 | print("The third argument must either be the word, ‘delete’, or it must be omitted.\n\nNo files deleted."); | |
13 | return; | |
14 | files_with_word_list=[]; | |
15 | for x in glob.glob(os.path.join(path, "*.doc")): | |
16 | command=shlex.split('catdoc -w "' + x + '"'); | |
17 | file_text, err=subprocess.Popen(command, stdout=subprocess.PIPE).communicate(); | |
18 | if word in str(file_text): | |
19 | files_with_word_list.append(x); | |
20 | for x in glob.glob(os.path.join(path, "*.docx"))+glob.glob(os.path.join(path, "*.odt")): | |
21 | z=zipfile.ZipFile(x); | |
22 | zippath="word/document.xml"; | |
23 | if x.endswith("odt")==True: | |
24 | zippath="content.xml"; | |
25 | file_text=z.read(zippath).decode(); | |
26 | file_text=re.sub(r"<[^>]*>", r"", file_text)[1:]; | |
27 | - | if delete=="delete": |
27 | + | |
28 | #print(file_text); #This is for debugging purposes. | |
29 | if word in file_text: | |
30 | files_with_word_list.append(x); | |
31 | - | elif delete==None: |
31 | + | |
32 | if delete=="delete" or delete==True: | |
33 | for x in files_with_word_list: | |
34 | - | raise ValueError("The third argument must either be the word, ‘delete’, or it must be omitted."); |
34 | + | |
35 | os.remove(x); | |
36 | - | try: |
36 | + | |
37 | - | files_with_word(sys.argv[1], sys.argv[2], sys.argv[3]); |
37 | + | if len(sys.argv)<=2 or len(sys.argv)>4: |
38 | - | except IndexError: |
38 | + | if len(sys.argv)==2 and sys.argv[1] not in {"--help", "-h", "--h", "-help"}: |
39 | - | files_with_word(sys.argv[1], sys.argv[2]); |
39 | + | answer=input("Do you wish to delete the files? "); |
40 | if answer.lower().strip() in {"y", "yes", "sure", "okay", "yeah", "yep", "yea", "of course", "certainly", "assuredly", "affirmative", "why not"}: | |
41 | files_with_word(sys.argv[1], "", "delete"); | |
42 | else: | |
43 | files_with_word(sys.argv[1], ""); | |
44 | print("(Files not deleted.)"); | |
45 | else: | |
46 | print("This program searches word processor files (doc, docx and odt) for words or phrases, and optonally allows you to delete any matches it finds.\nUsage:\nfindwpdir.py phrase path [delete]\n(You may omit the path for the current directory.)"); | |
47 | else: | |
48 | try: | |
49 | files_with_word(sys.argv[1], sys.argv[2], sys.argv[3]); | |
50 | except IndexError: | |
51 | files_with_word(sys.argv[1], sys.argv[2]); |