Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/bin/bash
- ABS_PATH=.\
- # Colourise the output
- RED='\033[0;31m' # Red
- GRE='\033[0;32m' # Green
- YEL='\033[1;33m' # Yellow
- NCL='\033[0m' # No Color
- function file_specification() {
- FILE_NAME="$(basename "${entry}")"
- DIR="$(dirname "${entry}")"
- NAME="${FILE_NAME%.*}"
- EXT="${FILE_NAME##*.}"
- SIZE="$(du -sh "${entry}" | cut -f1)"
- printf "%*s${GRE}%s${NCL}\n" $((indent+4)) '' "${entry}"
- printf "%*s\tFile name:\t${YEL}%s${NCL}\n" $((indent+4)) '' "$FILE_NAME"
- printf "%*s\tDirectory:\t${YEL}%s${NCL}\n" $((indent+4)) '' "$DIR"
- printf "%*s\tName only:\t${YEL}%s${NCL}\n" $((indent+4)) '' "$NAME"
- printf "%*s\tExtension:\t${YEL}%s${NCL}\n" $((indent+4)) '' "$EXT"
- printf "%*s\tFile size:\t${YEL}%s${NCL}\n" $((indent+4)) '' "$SIZE"
- }
- function walk() {
- local indent="${2:-0}"
- printf "\n%*s${RED}%s${NCL}\n\n" "$indent" '' "$1"
- # If the entry is a file do some operations
- for entry in "$1"/*; do [[ -f "$entry" ]] && file_specification; # done
- # If the entry is a directory call walk() == create recursion
- for entry in "$1"/*; do [[ -d "$entry" ]] && walk "$entry" $((indent+4)); # done
- }
- # If the path is empty use the current, otherwise convert relative to absolute; Exec walk()
- function view_dir () {
- [[ -z "${1}" ]] && ABS_PATH="${PWD}" || cd "${1}" && ABS_PATH="${PWD}"
- walk "${ABS_PATH}"
- echo
- }
- # Search for key files and rename them based on "Lastname, Firstname" from the directory path and save
- # in the directory called.
- TIMESTAMP=`date '+%Y%m%d_%H-%M-%S'`
- mkdir ${TIMESTAMP}
- mkdir "${TIMESTAMP}/Keyword"
- echo "Copying source files"
- for f in ./**/**/Keyword*.doc; do
- cp -v "${f}" "${TIMESTAMP}/Keyword/$(echo "${f}" | grep -o '^\.\/[a-zA-Z0-9\-\s]*,\s[a-zA-Z0-9]*')_Keyword.doc"
- done
- for f in ./**/**/Keyword*.docx; do
- cp -v "${f}" "${TIMESTAMP}/Keyword/$(echo "${f}" | grep -o '^\.\/[a-zA-Z0-9\-\s]*,\s[a-zA-Z0-9]*')_Keyword.docx"
- done
- echo "Listing contents"
- ls -al "${TIMESTAMP}"/Keyword
- sleep 1 # Make sure to get a new timestamp
- TIMESTAMP_1=`date '+%Y%m%d_%H_-%M-%S'`
- mkdir -p ${TIMESTAMP_1}/Originals
- mkdir -p ${TIMESTAMP_1}/Word
- mkdir -p ${TIMESTAMP_1}/Zip
- mkdir -p ${TIMESTAMP_1}/Xml
- mkdir -p ${TIMESTAMP_1}/Txt
- echo "convert doc to docx"
- for f in *.doc; do
- textutil -convert docx "${f}"
- done
- for f in *.docx; do
- textutil -convert txt "${f}"
- done
- cp -v *.doc "${TIMESTAMP_1}/Originals/"
- cp -v *.docx "${TIMESTAMP_1}/Word/"
- cp -v *.docx "${TIMESTAMP_1}/Xml/"
- cp -v *.txt "${TIMESTAMP_1}/Txt"
- cd "${TIMESTAMP_1}/Xml/"
- for f in *.docx; do
- new_file="$(echo "${f}" | grep -o '^.*[^.docx]')"
- cp -v "${f}" "${new_file}.zip"
- unzip "${f}" -d "${new_file}"
- done
- mv -v *.zip ../Zip
- cd ../../
- rm -v *.docx *.doc
- # Now we have the converted textfiles, clean and prepare for SQLITE3 insertion
- for f in *.txt; do
- cat -s "{f}" > "{f}_.txt"
- done
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement