Advertisement
Guest User

Document Scanning incl OCR

a guest
Aug 19th, 2013
273
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Bash 1.31 KB | None | 0 0
  1. #!/bin/bash
  2.  
  3. set -x
  4. set -e
  5.  
  6. target="${1}.pdf"
  7. num_pages=${num_pages:-1}
  8.  
  9. if [[ -z "$target" ]]; then
  10.     echo >&2 "Usage: $0 DESTINATION"
  11.     exit 1
  12. fi
  13.  
  14. echo "Scanning $num_pages page(s) to $target..."
  15.  
  16. tempdir=$(mktemp -d)
  17. scansource=$(LC_ALL=C scanimage -L \
  18.         | grep 'HP 7650 Document scanner' \
  19.         | sed "s,^device \`\([^']\+\)'.*,\1,")
  20.  
  21. (
  22.     cd "$tempdir"
  23.     scanimage -d"$scansource" -B64 --format=tiff -l0 -t0 -x215 -y297 \
  24.         --batch-start=0 \
  25.         --batch-count="$num_pages" \
  26.         --batch-increment=1 \
  27.         -b \
  28.         --mode Color \
  29.         --resolution 300 \
  30.         --source ADF
  31. )
  32.  
  33. convert="convert ${tempdir}/out0.tif"
  34. convert "${tempdir}/out0.tif" "${tempdir}/out0.pdf"
  35. gs="gs \
  36.    -dBATCH \
  37.    -q \
  38.    -dNOPAUSE \
  39.    -sDEVICE=pdfwrite \
  40.    -sOutputFile=- \
  41.    ${tempdir}/out0.pdf"
  42.  
  43. for ((i=1; i < num_pages; ++i)) do
  44.     convert "${tempdir}/out${i}.tif" "${tempdir}/out${i}.pdf"
  45.     convert="$convert -append ${tempdir}/out${i}.tif"
  46.     gs="$gs ${tempdir}/out${i}.pdf"
  47. done
  48.  
  49. $convert "${tempdir}/scanimage.pnm"
  50.  
  51. gocr -i "${tempdir}/scanimage.pnm" -o "${tempdir}/${1}.txt" -f UTF8
  52. a2ps -Xutf-8 -i "${tempdir}/${1}.txt" -o "${tempdir}/${1}.ps"
  53. $gs "${tempdir}/${1}.ps" > "$target"
  54.  
  55. ls -lh "$tempdir"
  56. ls -lh "$target"
  57. rm -r "$tempdir"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement