View difference between Paste ID: PJ4r9JBK and HGt2UKGr
SHOW: | | - or go back to the newest paste.
1
#!/usr/bin/env python3
2
3
#Dependencies: catdoc, Python 3.x
4
#on Ubuntu, you can install it with this: sudo apt-get install catdoc
5
6
import subprocess, shlex, glob, os, zipfile, re, html.parser, sys;
7
8
def files_with_word(word, path, delete=None):
9
    if delete==None or delete=="delete" or delete==True or delete==False:
10
        pass;
11
    else:
12
        print("The third argument must either be the word, ‘delete’, or it must be omitted.\n\nNo files deleted.");
13
        return;
14
    files_with_word_list=[];
15
    for x in glob.glob(os.path.join(path, "*.doc")):
16
        command=shlex.split('catdoc -w "' + x + '"');
17
        file_text, err=subprocess.Popen(command, stdout=subprocess.PIPE).communicate();
18
        if word in str(file_text):
19
            files_with_word_list.append(x);
20
    for x in glob.glob(os.path.join(path, "*.docx"))+glob.glob(os.path.join(path, "*.odt")):
21
        z=zipfile.ZipFile(x);
22
        zippath="word/document.xml";
23
        if x.endswith("odt")==True:
24
            zippath="content.xml";
25
        file_text=z.read(zippath).decode();
26
        file_text=re.sub(r"<[^>]*>", r"", file_text)[1:];
27-
    if delete=="delete":
27+
28
        #print(file_text); #This is for debugging purposes.
29
        if word in file_text:
30
            files_with_word_list.append(x);
31-
    elif delete==None:
31+
32
    if delete=="delete" or delete==True:
33
        for x in files_with_word_list:
34-
        raise ValueError("The third argument must either be the word, ‘delete’, or it must be omitted.");
34+
35
            os.remove(x);
36-
try:
36+
37-
    files_with_word(sys.argv[1], sys.argv[2], sys.argv[3]);
37+
if len(sys.argv)<=2 or len(sys.argv)>4:
38-
except IndexError:
38+
    if len(sys.argv)==2 and sys.argv[1] not in {"--help", "-h", "--h", "-help"}:
39-
    files_with_word(sys.argv[1], sys.argv[2]);
39+
        answer=input("Do you wish to delete the files? ");
40
        if answer.lower().strip() in {"y", "yes", "sure", "okay", "yeah", "yep", "yea", "of course", "certainly", "assuredly", "affirmative", "why not"}:
41
            files_with_word(sys.argv[1], "", "delete");
42
        else:
43
            files_with_word(sys.argv[1], "");
44
            print("(Files not deleted.)");
45
    else:
46
        print("This program searches word processor files (doc, docx and odt) for words or phrases, and optonally allows you to delete any matches it finds.\nUsage:\nfindwpdir.py phrase path [delete]\n(You may omit the path for the current directory.)");
47
else:
48
    try:
49
        files_with_word(sys.argv[1], sys.argv[2], sys.argv[3]);
50
    except IndexError:
51
        files_with_word(sys.argv[1], sys.argv[2]);