View difference between Paste ID: UdmXviCn and dm6GMXmk
SHOW: | | - or go back to the newest paste.
1-
#Dependencies: catdoc
1+
#!/usr/bin/env python3
2
3
#Dependencies: catdoc, Python 3.x
4-
import subprocess, shlex, glob, os, zipfile, re, html.parser;
4+
5
6-
def files_with_word(path, word, delete=False):
6+
import subprocess, shlex, glob, os, zipfile, re, html.parser, sys, shutil;
7-
    files_with_word=[];
7+
8-
    for x in glob.glob(os.path.join(path, "*.doc")):
8+
def files_with_word(word, from_dir, to_dir=None):
9
    from_dir=os.path.abspath(from_dir);
10
    if to_dir!=None:
11
        to_dir=os.path.abspath(to_dir);
12-
            files_with_word.append(x);
12+
    if not os.path.exists(from_dir):
13-
    for x in glob.glob(os.path.join(path, "*.docx")):
13+
        print("Your source path does not exist.");
14
        return;
15-
        file_text=z.read("word/document.xml").decode();
15+
    if to_dir!=None:
16
        if not os.path.exists(to_dir):
17
            yn="";
18
            while yn.lower() not in {"y", "yes", "n", "no"}:
19-
            files_with_word.append(x);
19+
                yn=input("Your destination path, " +to_dir+ ", does not exist. Create? (y/n) ");
20-
    print("\n".join(files_with_word));
20+
            if yn.lower().strip() in {"y", "yes"}:
21-
    if delete==True:
21+
                os.makedirs(to_dir);
22-
        for x in files_with_word:
22+
            else:
23-
            print("Deleting " + x);
23+
                print("Not created (and not used). No files will be moved.");
24-
            os.remove(x);
24+
                to_dir=None;
25
    files_with_word_list=[];
26-
files_with_word("my/directory/path", "word or phrase", delete=True);
26+
    for x in glob.glob(os.path.join(from_dir, "*.doc")):
27
        command=shlex.split('catdoc -w "' + x + '"');
28
        file_text, err=subprocess.Popen(command, stdout=subprocess.PIPE).communicate();
29
        if word in str(file_text):
30
            files_with_word_list.append(x);
31
    for x in glob.glob(os.path.join(from_dir, "*.docx"))+glob.glob(os.path.join(from_dir, "*.odt")):
32
        z=zipfile.ZipFile(x);
33
        zippath="word/document.xml";
34
        if x.endswith("odt")==True:
35
            zippath="content.xml";
36
        file_text=z.read(zippath).decode();
37
        file_text=re.sub(r"<[^>]*>", r"", file_text)[1:];
38
        file_text=html.parser.HTMLParser().unescape(file_text);
39
        #print(file_text); #This is for debugging purposes.
40
        if word in file_text:
41
            files_with_word_list.append(x);
42
    if len(files_with_word_list)==0:
43
        print("No matches found.");
44
    else:
45
        if to_dir==None:
46
            print("\n".join(files_with_word_list).strip());
47
    if to_dir!=None:
48
        for x in files_with_word_list:
49
            #os.remove(x);
50
            xfilename=x.split(os.sep)[-1];
51
            to_x=os.path.join(to_dir, xfilename);
52
            print("Found " + x + " moving to " + to_x);
53
            shutil.move(x, to_x);
54
55
files_with_word(word="test", from_dir="", to_dir="test/backup");