Advertisement
Guest User

Standalone Tesseract build script

a guest
Apr 21st, 2015
11,258
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Bash 8.90 KB | None | 0 0
  1. #!/bin/bash
  2.  
  3. #
  4. # Build Script for making standalone version of Tesseract
  5. # Wes Fowlks
  6. # 10/01/2014
  7. # Originally posted at:https://code.google.com/p/tesseract-ocr/issues/detail?id=1326
  8. #
  9.  
  10. BUILD_ZLIB=0
  11. BUILD_LIBJPEG=0
  12. BUILD_LIBPNG=0
  13. BUILD_LEPTONICA=0
  14. BUILD_TESSERACT=1
  15.  
  16. # Get the base directory of where the script is
  17. BASE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
  18. BUILD_DIR=$BASE_DIR/build
  19. ARCHIVE_DIR=$BASE_DIR/archives
  20. SRC_DIR=$BASE_DIR/src
  21. TESSERACT_DIR=$BASE_DIR/tesseract
  22.  
  23. #Library Versions
  24. ZLIB_VERSION=1.2.8
  25. LIBPNG_VERSION=1.6.13
  26. LIBJPEG_VERSION=9a
  27. LEPTONICA_VERSION=1.71
  28. TESSERACT_VERSION=3.02.02
  29.  
  30. echo "Base Build Directory: " $BUILD_DIR
  31.  
  32. # Functions usefull throughtout the script
  33. function setupDirs() {
  34.        if [ ! -d "$ARCHIVE_DIR" ]; then
  35.                mkdir $ARCHIVE_DIR
  36.        fi
  37.  
  38.        if [ ! -d "$SRC_DIR" ]; then
  39.                mkdir $SRC_DIR
  40.        fi
  41.  
  42.        if [ ! -d "$BUILD_DIR" ]; then
  43.                mkdir $BUILD_DIR
  44.        fi
  45. }
  46.  
  47. # First check to see if zlib
  48. if [ $BUILD_ZLIB = 1 ]
  49. then
  50.        echo "Building ZLIB"
  51.        setupDirs
  52.  
  53.        # Clean up old files
  54.        rm -rf $SRC_DIR/zlib* $BUILD_DIR/zlib*
  55.  
  56.        if [ ! -f "$ARCHIVE_DIR/zlib-$ZLIB_VERSION.tar.gz" ]; then
  57.                #Download the file
  58.                curl -o $ARCHIVE_DIR/zlib-$ZLIB_VERSION.tar.gz http://zlib.net/zlib-$ZLIB_VERSION.tar.gz
  59.        fi
  60.  
  61.        echo "Extracting archive"
  62.        tar -xzf $ARCHIVE_DIR/zlib-$ZLIB_VERSION.tar.gz -C $SRC_DIR
  63.  
  64.        cd "$SRC_DIR/zlib-$ZLIB_VERSION"
  65.  
  66.        echo "Configuring ZLIB for Standalone"
  67.        ./configure --solo --static
  68.  
  69.        echo "Building Zlib and deploying to $BUILD_DIR"
  70.        make install prefix=$BUILD_DIR
  71.  
  72.        #Check if the build was successful
  73.        if [ -f "$BUILD_DIR/include/zlib.h" ]; then
  74.                echo "ZLIB Build Successful"
  75.        else
  76.                echo "ZLIB build failed. Exiting."
  77.                exit 1
  78.        fi
  79.  
  80. else
  81.        echo "Skipping ZLib"
  82. fi
  83.  
  84. # Build Libjpeg
  85. if [ $BUILD_LIBJPEG = 1 ]
  86. then
  87.  
  88.        echo "Building Lib Jpeg"
  89.        setupDirs
  90.  
  91.        # Clean up old files
  92.        rm -rf $SRC_DIR/jpeg* $BUILD_DIR/jpeg*
  93.  
  94.        if [ ! -f "$ARCHIVE_DIR/jpegsrc.v$LIBJPEG_VERSION.tar.gz" ]; then
  95.                #Download the file
  96.                curl -o $ARCHIVE_DIR/jpeg.v$LIBJPEG_VERSION.tar.gz http://www.ijg.org/files/jpegsrc.v$LIBJPEG_VERSION.tar.gz
  97.        fi
  98.  
  99.        echo "Extracting archive"
  100.        tar -xzf $ARCHIVE_DIR/jpeg.v$LIBJPEG_VERSION.tar.gz -C $SRC_DIR
  101.  
  102.        cd "$SRC_DIR/jpeg-$LIBJPEG_VERSION"
  103.  
  104.        echo "Configuring Lib Jpeg for Standalone"
  105.        ./configure --disable-shared --prefix=$BUILD_DIR
  106.  
  107.        echo "Building LIBJPEG and deploying to $BUILD_DIR"
  108.        make install
  109.  
  110.        #Check if the build was successful
  111.        if [ -f "$BUILD_DIR/include/jpeglib.h" ]; then
  112.                echo "LIB JPEG Build Successful"
  113.        else
  114.                echo "LIBJPEG build failed. Exiting."
  115.                exit 1
  116.        fi
  117.  
  118. else
  119.        echo "Skipping LIBJPEG"
  120. fi
  121.  
  122. # Build Lib PNG
  123. if [ $BUILD_LIBPNG = 1 ]
  124. then
  125.        echo "Building Lib PNG"
  126.        setupDirs
  127.  
  128.        # Clean up old files
  129.        rm -rf $SRC_DIR/libpng* $BUILD_DIR/libpng*
  130.  
  131.        if [ ! -f "$ARCHIVE_DIR/libpng-$LIBPNG_VERSION.tar.gz" ]; then
  132.                #Download the file
  133.                curl -L -o $ARCHIVE_DIR/libpng-$LIBPNG_VERSION.tar.gz http://downloads.sourceforge.net/project/libpng/libpng16/1.6.13/libpng-1.6.13.tar.gz?use_mirror=tcpdiag
  134.        fi
  135.  
  136.        echo "Extracting archive"
  137.        tar -xzf $ARCHIVE_DIR/libpng-$LIBPNG_VERSION.tar.gz -C $SRC_DIR
  138.  
  139.        cd "$SRC_DIR/libpng-$LIBPNG_VERSION"
  140.  
  141.        echo "Copying libz header files to libpng"
  142.        cp $BUILD_DIR/include/zlib.h .
  143.        cp $BUILD_DIR/include/zconf.h .
  144.  
  145.        echo "Configuring Lib PNG for Standalone"
  146.        ./configure --prefix=$BUILD_DIR
  147.  
  148.        echo "Building LIBPNG and deploying to $BUILD_DIR"
  149.        make check
  150.        make install
  151.  
  152.        #Check if the build was successful
  153.        if [ -f "$BUILD_DIR/include/libpng16/png.h" ]; then
  154.                echo "LIB PNG Build Successful"
  155.        else
  156.                echo "LIBPNG build failed. Exiting."
  157.                exit 1
  158.        fi
  159.  
  160. else
  161.        echo "Skipping LIBPNG"
  162. fi
  163.  
  164. # Build Leptonica
  165. if [ $BUILD_LEPTONICA = 1 ]
  166. then
  167.        echo "Building Leptonica"
  168.        setupDirs
  169.  
  170.        # Clean up old files
  171.        rm -rf $SRC_DIR/leptonica* $BUILD_DIR/leptonica*
  172.  
  173.        if [ ! -f "$ARCHIVE_DIR/leptonica-$LEPTONICA_VERSION.tar.gz" ]; then
  174.                #Download the file
  175.                curl -o $ARCHIVE_DIR/leptonica-$LEPTONICA_VERSION.tar.gz http://www.leptonica.com/source/leptonica-$LEPTONICA_VERSION.tar.gz
  176.        fi
  177.  
  178.        echo "Extracting archive"
  179.        tar -xzf $ARCHIVE_DIR/leptonica-$LEPTONICA_VERSION.tar.gz -C $SRC_DIR
  180.  
  181.        cd "$SRC_DIR/leptonica-$LEPTONICA_VERSION"
  182.        
  183.        echo "Configuring leptonica for standalone"
  184.        ./make-for-local
  185.  
  186.        echo "Modifying environ.h"
  187.        cat src/environ.h |sed -e 's/#define  HAVE_LIBTIFF     1/#define  HAVE_LIBTIFF     0/g' > src/environ.test.h
  188.        mv src/environ.test.h src/environ.h
  189.  
  190.        echo "Copying dependencies to leptonica"
  191.        cp -r $BUILD_DIR/include src
  192.        cd src
  193.  
  194.        echo "Building LEPTONICA and deploying to $BUILD_DIR"
  195.        make EXTRAINCLUDES="-I./include -I./include/libpng16"
  196.  
  197.        #Check if the build was successful
  198.        if [ -f "$SRC_DIR/leptonica-$LEPTONICA_VERSION/lib/nodebug/liblept.a" ]; then
  199.                echo "Leptonica Build Successful"
  200.        else
  201.                echo "LEPTONICA build failed. Exiting."
  202.                exit 1
  203.        fi
  204.  
  205.        echo "Copying files for Tesseract"
  206.        cp $SRC_DIR/leptonica-$LEPTONICA_VERSION/lib/nodebug/liblept.a $BUILD_DIR/lib
  207.  
  208.        if [ ! -f "$BUILD_DIR/include/leptonica" ]; then
  209.                mkdir $BUILD_DIR/include/leptonica
  210.        fi
  211.  
  212.        cp $SRC_DIR/leptonica-$LEPTONICA_VERSION/src/*.h $BUILD_DIR/include/leptonica
  213.  
  214. else
  215.        echo "Skipping Leptonica"
  216. fi
  217.  
  218. # Build Tesseract
  219. if [ $BUILD_TESSERACT = 1 ]
  220. then
  221.  
  222.        echo "Building Tesseract"
  223.        rm -rf $SRC_DIR/tesseract*
  224.  
  225.        #Create Tesseract Build Directory
  226.        if [ ! -d "$TESSERACT_DIR" ]; then
  227.                mkdir $TESSERACT_DIR
  228.        else
  229.                rm -rf $TESSERACT_DIR/*
  230.        fi
  231.  
  232.        if [ ! -f "$ARCHIVE_DIR/tesseract-ocr-$TESSERACT_VERSION.tar.gz" ]; then
  233.                #Download the file
  234.                curl -L -o $ARCHIVE_DIR/tesseract-ocr-$TESSERACT_VERSION.tar.gz https://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.02.tar.gz
  235.        fi
  236.  
  237.        echo "Extracting archive"
  238.        tar -xzf $ARCHIVE_DIR/tesseract-ocr-$TESSERACT_VERSION.tar.gz -C $SRC_DIR
  239.        cd "$SRC_DIR/tesseract-ocr"
  240.  
  241.        cp -r $BUILD_DIR/include src
  242.        cp -r $BUILD_DIR/bin src
  243.        cp -r $BUILD_DIR/lib src
  244.  
  245.        mv configure configure_old
  246.  
  247.        echo "Putting some magic sauce in the configure script"
  248.        echo "CXXFLAGS=\"-I$BUILD_DIR/include -I$BUILD_DIR/include/libpng16 -I$BUILD_DIR/include/leptonica -lpng -ljpeg -lz\"
  249. LDFLAGS=\"-L$BUILD_DIR/lib\"
  250. LIBLEPT_HEADERSDIR=\"$BUILD_DIR/include/leptonica\"" > configure
  251.  
  252.        cat configure_old >> configure
  253.        rm configure_old
  254.  
  255.        #change the permissions on configure to make it executable again
  256.        chmod 755 configure
  257.  
  258.        echo "Configuring Tesseract"
  259.        ./configure --prefix=$TESSERACT_DIR --disable-tessdata-prefix
  260.  
  261.        echo "Configuration Configuration done, now Building"
  262.        make install
  263.  
  264.        ls $TESSERACT_DIR/bin
  265.  
  266.        if [ -x "$TESSERACT_DIR/bin/tesseract" ]; then
  267.                echo "Tesseract Build Successful"
  268.        else
  269.                echo "Tesseract build failed. Exiting."
  270.                exit 1
  271.        fi
  272.  
  273.        echo "Checking the language files"
  274.        if [ ! -f "$ARCHIVE_DIR/tesseract-ocr-3.02.eng.tar.gz" ]; then
  275.                #Download the file
  276.                curl -L -o $ARCHIVE_DIR/tesseract-ocr-3.02.eng.tar.gz https://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.eng.tar.gz
  277.        fi
  278.  
  279.        echo "Checking OSD (Optical Script Detection) models"
  280.        if [ ! -f "$ARCHIVE_DIR/tesseract-ocr-3.01.osd.tar.gz" ]; then
  281.                #Download the file
  282.                curl -L -o $ARCHIVE_DIR/tesseract-ocr-3.01.osd.tar.gz https://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.01.osd.tar.gz
  283.        fi
  284.  
  285.        echo "Installing Languages and OSD"
  286.        tar -xzf $ARCHIVE_DIR/tesseract-ocr-3.02.eng.tar.gz -C $TESSERACT_DIR/bin
  287.        tar -xzf $ARCHIVE_DIR/tesseract-ocr-3.01.osd.tar.gz -C $TESSERACT_DIR/bin
  288.  
  289.        cd $TESSERACT_DIR/bin
  290.  
  291.        echo "Tesseract is now built and can be found at: $BUILD_DIR"
  292.  
  293. else
  294.        echo "Skipping Tesseract"
  295. fi
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement