SHOW:
|
|
- or go back to the newest paste.
1 | - | #Dependencies: catdoc |
1 | + | #!/usr/bin/env python3 |
2 | ||
3 | #Dependencies: catdoc, Python 3.x | |
4 | - | import subprocess, shlex, glob, os, zipfile, re, html.parser; |
4 | + | |
5 | ||
6 | - | def files_with_word(path, word, delete=False): |
6 | + | import subprocess, shlex, glob, os, zipfile, re, html.parser, sys, shutil; |
7 | - | files_with_word=[]; |
7 | + | |
8 | - | for x in glob.glob(os.path.join(path, "*.doc")): |
8 | + | def files_with_word(word, from_dir, to_dir=None): |
9 | from_dir=os.path.abspath(from_dir); | |
10 | if to_dir!=None: | |
11 | to_dir=os.path.abspath(to_dir); | |
12 | - | files_with_word.append(x); |
12 | + | if not os.path.exists(from_dir): |
13 | - | for x in glob.glob(os.path.join(path, "*.docx")): |
13 | + | print("Your source path does not exist."); |
14 | return; | |
15 | - | file_text=z.read("word/document.xml").decode(); |
15 | + | if to_dir!=None: |
16 | if not os.path.exists(to_dir): | |
17 | yn=""; | |
18 | while yn.lower() not in {"y", "yes", "n", "no"}: | |
19 | - | files_with_word.append(x); |
19 | + | yn=input("Your destination path, " +to_dir+ ", does not exist. Create? (y/n) "); |
20 | - | print("\n".join(files_with_word)); |
20 | + | if yn.lower().strip() in {"y", "yes"}: |
21 | - | if delete==True: |
21 | + | os.makedirs(to_dir); |
22 | - | for x in files_with_word: |
22 | + | else: |
23 | - | print("Deleting " + x); |
23 | + | print("Not created (and not used). No files will be moved."); |
24 | - | os.remove(x); |
24 | + | to_dir=None; |
25 | files_with_word_list=[]; | |
26 | - | files_with_word("my/directory/path", "word or phrase", delete=True); |
26 | + | for x in glob.glob(os.path.join(from_dir, "*.doc")): |
27 | command=shlex.split('catdoc -w "' + x + '"'); | |
28 | file_text, err=subprocess.Popen(command, stdout=subprocess.PIPE).communicate(); | |
29 | if word in str(file_text): | |
30 | files_with_word_list.append(x); | |
31 | for x in glob.glob(os.path.join(from_dir, "*.docx"))+glob.glob(os.path.join(from_dir, "*.odt")): | |
32 | z=zipfile.ZipFile(x); | |
33 | zippath="word/document.xml"; | |
34 | if x.endswith("odt")==True: | |
35 | zippath="content.xml"; | |
36 | file_text=z.read(zippath).decode(); | |
37 | file_text=re.sub(r"<[^>]*>", r"", file_text)[1:]; | |
38 | file_text=html.parser.HTMLParser().unescape(file_text); | |
39 | #print(file_text); #This is for debugging purposes. | |
40 | if word in file_text: | |
41 | files_with_word_list.append(x); | |
42 | if len(files_with_word_list)==0: | |
43 | print("No matches found."); | |
44 | else: | |
45 | if to_dir==None: | |
46 | print("\n".join(files_with_word_list).strip()); | |
47 | if to_dir!=None: | |
48 | for x in files_with_word_list: | |
49 | #os.remove(x); | |
50 | xfilename=x.split(os.sep)[-1]; | |
51 | to_x=os.path.join(to_dir, xfilename); | |
52 | print("Found " + x + " moving to " + to_x); | |
53 | shutil.move(x, to_x); | |
54 | ||
55 | files_with_word(word="test", from_dir="", to_dir="test/backup"); |