Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #! /usr/bin/env sh
- # usage: ./epubcount.sh <epubfile>
- # remember to `chmod +x epubcount.sh`
- IFS=$'\n' #change the internal field separator, otherwise file with spaces will fail.
- mkdir temporary/ #use a temporary directory
- unzip -jq $1 *html -d temporary/ #unzip all xhtml/html files of the epub archive
- cd temporary/ #change directory
- cat *html > out.html #concatenate all html
- count=`cat out.html | sed 's/<[^<]*>/ /g' | wc -w` #count the words of the concat'd file, excluding tags.
- echo $1 ": " $count
- cd ..
- rm -rf temporary/ #we can trash the directory
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement