Guest User

Untitled

a guest
Dec 15th, 2017
86
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.88 KB | None | 0 0
  1. sudo apt-get install gscan2pdf
  2.  
  3. #!/bin/bash
  4. #scan2PDF
  5. #Requires: tesseract 3.03 for OCR to PDF
  6. # scanimage for scanning, I use 1.0.24
  7. # pdfunite to merge multiple PDF into one, I use 0.26.5
  8. #
  9. # Use scanimage -L to get a list of devices.
  10. # e.g. device `genesys:libusb:006:003' is a Canon LiDE 210 flatbed scanner
  11. # then copy/paste genesys:libusb:006:003 into SCANNER below.
  12. # play with CONTRAST to get good images
  13. DPI=300
  14. TESS_LANG=nor #Language that Tesseract uses for OCR
  15. SCANNER=genesys:libusb:006:003 #My USB scanner
  16. CONTRAST=35 #Contrast to remove paper look
  17.  
  18. FILENAME=$1 #Agrument 1,filename
  19. PAGES=$2 #Argument 2, number of pages
  20.  
  21. re='^[0-9]+$' #Check if second argument is a number
  22. if ! [[ ${PAGES} =~ $re ]] ; then
  23. echo "error: Usage: $0 filename number_of_pages" >&2; exit 1
  24. fi
  25.  
  26. SCRIPT_NAME=`basename "$0" .sh` #Directory to store temporary files
  27. TMP_DIR=${SCRIPT_NAME}-tmp
  28.  
  29. if [ -d ${TMP_DIR} ] #Check if it exists a directory already
  30. then
  31. echo Error: The directory ${TMP_DIR} exists.
  32. exit 2
  33. fi
  34. mkdir ${TMP_DIR} #Make and go to temp dir
  35. cd ${TMP_DIR}
  36.  
  37. echo Starts Scanimage...
  38. scanimage -d ${SCANNER} --format=tiff --mode Color --resolution ${DPI} -p --contrast ${CONTRAST} --batch-start=1 --batch-count=${PAGES} --batch-prompt
  39.  
  40.  
  41. echo Starts Tesseract OCR
  42.  
  43. for file in *.tif #Goes through every tif file in temp dir
  44. do
  45. tesseract $file ${file%.tif} -l ${TESS_LANG} pdf
  46.  
  47. done
  48.  
  49. if [ "$PAGES" = "1" ] #How many pages
  50. then
  51. cp out1.pdf ../${FILENAME}.pdf #Only one page, just copy the PDF back
  52. else
  53. for file in *.pdf #More pages, merge the pages into one PDF and copy back
  54. do
  55. pdfuniteargs+=${file}
  56. pdfuniteargs+=" "
  57. done
  58. pdfunite $pdfuniteargs ../${FILENAME}.pdf
  59. fi
  60. echo ${FILENAME}.pdf done
  61.  
  62. rm * #Done, clean up
  63. cd ..
  64. rmdir ${TMP_DIR}
Add Comment
Please, Sign In to add comment