zdenop

install tessdata from github repository

Jun 28th, 2015
1,025
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. /***************************************************************************
  2.  *
  3.  * compile:
  4.  *  g++ get_tessdata.cpp -o get_tessdata -std=c++0x -lcurl -ltesseract
  5.  * run:
  6.  *  ./get_tessdata
  7.  *
  8.  * You may opt to use, copy, modify, merge, publish, distribute and/or sell
  9.  * copies of the Software, and permit persons to whom the Software is
  10.  * furnished to do so, under the terms of the COPYING file.
  11.  *
  12.  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
  13.  * KIND, either express or implied.
  14.  *
  15.  ***************************************************************************/
  16.  
  17. #include <cmath>
  18. #include <cstring>
  19.  
  20. #define CURL_STATICLIB
  21. #include <curl/curl.h>
  22. #include <tesseract/baseapi.h>
  23.  
  24. // source: http://stackoverflow.com/questions/1637587/c-libcurl-console-progress-bar
  25. int progress_func(void* ptr, double TotalToDownload, double NowDownloaded,
  26.                   double TotalToUpload, double NowUploaded) {
  27.     // ensure that the file to be downloaded is not empty
  28.     // because that would cause a division by zero error later on
  29.     if (TotalToDownload <= 0.0) {
  30.         return 0;
  31.     }
  32.  
  33.     // how wide you want the progress meter to be
  34.     int totaldotz = 40;
  35.     double fractiondownloaded = NowDownloaded / TotalToDownload;
  36.     // part of the progressmeter that's already "full"
  37.     int dotz = std::round(fractiondownloaded * totaldotz);
  38.  
  39.     // create the "meter"
  40.     int ii = 0;
  41.     printf("%3.0f%% [", fractiondownloaded * 100);
  42.     // part that's full already
  43.     for ( ; ii < dotz; ii++) {
  44.         printf("=");
  45.     }
  46.     // remaining part (spaces)
  47.     for ( ; ii < totaldotz; ii++) {
  48.         printf(" ");
  49.     }
  50.     // and back to line begin - do not forget the fflush to avoid output buffering problems!
  51.     printf("]\r");
  52.     fflush(stdout);
  53.     // if you don't return 0, the transfer will be aborted - see the documentation
  54.     return 0;
  55. }
  56.  
  57. size_t write_data(void *ptr, size_t size, size_t nmemb, FILE *stream) {
  58.     size_t written;
  59.     written = fwrite(ptr, size, nmemb, stream);
  60.     return written;
  61. }
  62.  
  63. void receive_file(const char *file_name, char *download_url) {
  64.     CURL *curl;
  65.     FILE *fp;
  66.     CURLcode res;
  67.  
  68.     curl = curl_easy_init();
  69.     if (curl) {
  70.         fp = fopen(file_name,"wb");
  71.         if (!fp) {
  72.             fprintf(stderr, "Can not open output file! Quitting...\n");
  73.         }
  74.  
  75.         curl_easy_setopt(curl, CURLOPT_URL, download_url);
  76.         curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_data);
  77.         curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp);
  78.         // Internal CURL progressmeter must be disabled if we provide our own callback
  79.         curl_easy_setopt(curl, CURLOPT_NOPROGRESS, false);
  80.         // Install the callback function
  81.         curl_easy_setopt(curl, CURLOPT_PROGRESSFUNCTION, progress_func);
  82.  
  83.         fprintf(stdout, "Starting download of %s:\n", download_url);
  84.         res = curl_easy_perform(curl);
  85.  
  86.         if(res != CURLE_OK) {
  87.             fprintf(stderr, "\nThere were errors during download:\n%s\n",
  88.                     curl_easy_strerror(res));
  89.         } else {
  90.             fprintf(stdout, "\nDownload finished!\n");
  91.         }
  92.  
  93.         /* now extract transfer info */
  94.         double speed, download_time;
  95.         curl_easy_getinfo(curl, CURLINFO_SPEED_DOWNLOAD, &speed);
  96.         curl_easy_getinfo(curl, CURLINFO_TOTAL_TIME, &download_time);
  97.  
  98.         fprintf(stdout, "Speed: %.3f bytes/sec during %.3f seconds\n",
  99.                 speed, download_time);
  100.  
  101.         /* always cleanup */
  102.         curl_easy_cleanup(curl);
  103.         fclose(fp);
  104.     } else {
  105.         fprintf(stdout, "Can not init curl... Closing.\n");
  106.     }
  107. }
  108.  
  109. int main(int argc, char* argv[]) {
  110.     const char* file_name = "eng.traineddata";
  111.     const char* base_url = "https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/";
  112.     bool force = 0;
  113.  
  114.     if (argc < 1) {
  115.         fprintf(stderr, "Usage is:\n\t%s -f <traineddata> [-b url] [--force]\n\nFor example:"
  116.                 "\n\t%s -f eng.%s -b %s\n", argv[0], argv[0], file_name, base_url);
  117.         return(1);
  118.     } else {
  119.         for (int i = 1; i < argc; i++) {
  120.             if (i + 1 < argc) {
  121.                 if (strcmp(argv[i], "-f") == 0) {
  122.                     file_name = argv[i + 1];
  123.                 } else if (strcmp(argv[i], "-b") == 0) {
  124.                     base_url = argv[i + 1];
  125.                 }
  126.             } else if (strcmp(argv[i], "--force") == 0) {
  127.                 // force will trigger download even file is already downloaded
  128.                 force = 1;
  129.             }
  130.         }
  131.     }
  132.  
  133.     char url[strlen(base_url) + strlen(file_name) + 1];
  134.     strcpy(url, base_url);
  135.     strcat(url, file_name);
  136.  
  137.     // install path
  138.     tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
  139.     if (api->Init(NULL, "eng")) {
  140.         fprintf(stderr, "Could not initialize tesseract.\n");
  141.         return 1;
  142.     }
  143.     const char *datapath = api->GetDatapath();
  144.     fprintf(stdout, "datapath is %s\n", datapath);
  145.  
  146.     char destination[strlen(datapath) + strlen(file_name) + 1];
  147.     strcpy(destination, datapath);
  148.     strcat(destination, file_name);
  149.  
  150.     // Check if file is not downloaded already e.g. installation failed
  151.     if (FILE *fp = fopen(destination, "r")) {
  152.         fseek(fp, 0, SEEK_END);
  153.         int size = ftell(fp);
  154.         fclose(fp);
  155.         if (size > 0 && !force) {
  156.             fprintf(stdout, "%s is already downloaded.\n", destination);
  157.         } else {
  158.             receive_file(destination, url);
  159.         }
  160.  
  161.     } else {
  162.         receive_file(destination, url);
  163.     }
  164.  
  165.     return 0;
  166. }
RAW Paste Data