Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /***************************************************************************
- *
- * compile:
- * g++ get_tessdata.cpp -o get_tessdata -std=c++0x -lcurl -ltesseract
- * run:
- * ./get_tessdata
- *
- * You may opt to use, copy, modify, merge, publish, distribute and/or sell
- * copies of the Software, and permit persons to whom the Software is
- * furnished to do so, under the terms of the COPYING file.
- *
- * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
- * KIND, either express or implied.
- *
- ***************************************************************************/
- #include <cmath>
- #include <cstring>
- #define CURL_STATICLIB
- #include <curl/curl.h>
- #include <tesseract/baseapi.h>
- // source: http://stackoverflow.com/questions/1637587/c-libcurl-console-progress-bar
- int progress_func(void* ptr, double TotalToDownload, double NowDownloaded,
- double TotalToUpload, double NowUploaded) {
- // ensure that the file to be downloaded is not empty
- // because that would cause a division by zero error later on
- if (TotalToDownload <= 0.0) {
- return 0;
- }
- // how wide you want the progress meter to be
- int totaldotz = 40;
- double fractiondownloaded = NowDownloaded / TotalToDownload;
- // part of the progressmeter that's already "full"
- int dotz = std::round(fractiondownloaded * totaldotz);
- // create the "meter"
- int ii = 0;
- printf("%3.0f%% [", fractiondownloaded * 100);
- // part that's full already
- for ( ; ii < dotz; ii++) {
- printf("=");
- }
- // remaining part (spaces)
- for ( ; ii < totaldotz; ii++) {
- printf(" ");
- }
- // and back to line begin - do not forget the fflush to avoid output buffering problems!
- printf("]\r");
- fflush(stdout);
- // if you don't return 0, the transfer will be aborted - see the documentation
- return 0;
- }
- size_t write_data(void *ptr, size_t size, size_t nmemb, FILE *stream) {
- size_t written;
- written = fwrite(ptr, size, nmemb, stream);
- return written;
- }
- void receive_file(const char *file_name, char *download_url) {
- CURL *curl;
- FILE *fp;
- CURLcode res;
- curl = curl_easy_init();
- if (curl) {
- fp = fopen(file_name,"wb");
- if (!fp) {
- fprintf(stderr, "Can not open output file! Quitting...\n");
- }
- curl_easy_setopt(curl, CURLOPT_URL, download_url);
- curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_data);
- curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp);
- // Internal CURL progressmeter must be disabled if we provide our own callback
- curl_easy_setopt(curl, CURLOPT_NOPROGRESS, false);
- // Install the callback function
- curl_easy_setopt(curl, CURLOPT_PROGRESSFUNCTION, progress_func);
- fprintf(stdout, "Starting download of %s:\n", download_url);
- res = curl_easy_perform(curl);
- if(res != CURLE_OK) {
- fprintf(stderr, "\nThere were errors during download:\n%s\n",
- curl_easy_strerror(res));
- } else {
- fprintf(stdout, "\nDownload finished!\n");
- }
- /* now extract transfer info */
- double speed, download_time;
- curl_easy_getinfo(curl, CURLINFO_SPEED_DOWNLOAD, &speed);
- curl_easy_getinfo(curl, CURLINFO_TOTAL_TIME, &download_time);
- fprintf(stdout, "Speed: %.3f bytes/sec during %.3f seconds\n",
- speed, download_time);
- /* always cleanup */
- curl_easy_cleanup(curl);
- fclose(fp);
- } else {
- fprintf(stdout, "Can not init curl... Closing.\n");
- }
- }
- int main(int argc, char* argv[]) {
- const char* file_name = "eng.traineddata";
- const char* base_url = "https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/";
- bool force = 0;
- if (argc < 1) {
- fprintf(stderr, "Usage is:\n\t%s -f <traineddata> [-b url] [--force]\n\nFor example:"
- "\n\t%s -f eng.%s -b %s\n", argv[0], argv[0], file_name, base_url);
- return(1);
- } else {
- for (int i = 1; i < argc; i++) {
- if (i + 1 < argc) {
- if (strcmp(argv[i], "-f") == 0) {
- file_name = argv[i + 1];
- } else if (strcmp(argv[i], "-b") == 0) {
- base_url = argv[i + 1];
- }
- } else if (strcmp(argv[i], "--force") == 0) {
- // force will trigger download even file is already downloaded
- force = 1;
- }
- }
- }
- char url[strlen(base_url) + strlen(file_name) + 1];
- strcpy(url, base_url);
- strcat(url, file_name);
- // install path
- tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
- if (api->Init(NULL, "eng")) {
- fprintf(stderr, "Could not initialize tesseract.\n");
- return 1;
- }
- const char *datapath = api->GetDatapath();
- fprintf(stdout, "datapath is %s\n", datapath);
- char destination[strlen(datapath) + strlen(file_name) + 1];
- strcpy(destination, datapath);
- strcat(destination, file_name);
- // Check if file is not downloaded already e.g. installation failed
- if (FILE *fp = fopen(destination, "r")) {
- fseek(fp, 0, SEEK_END);
- int size = ftell(fp);
- fclose(fp);
- if (size > 0 && !force) {
- fprintf(stdout, "%s is already downloaded.\n", destination);
- } else {
- receive_file(destination, url);
- }
- } else {
- receive_file(destination, url);
- }
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement