Advertisement
zdenop

tesseract makebox alternative

Dec 30th, 2012
299
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 1.36 KB | None | 0 0
  1. /*
  2.  * build with:
  3.  *    g++ -o makebox makebox.cpp -ltesseract
  4.  * run with:
  5.  *    ./makebox | sort -k 3nr -k 2n >zzz.box
  6.  */
  7.  
  8. #include <tesseract/baseapi.h>
  9. #include <leptonica/allheaders.h>
  10.  
  11. int main()
  12. {
  13.     Pix *image;
  14.     BOX *box;
  15.     l_int32 i, nwords;
  16.     char *outText, *retStr;
  17.  
  18.     tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
  19.  
  20.     api->Init("/usr/src/tesseract-3.02/", "eng");
  21.  
  22.     image = pixRead("/home/user/Download/zzz.tiff");
  23.     api->SetImage(image);
  24.     api->SetVariable("tessedit_char_whitelist","0123456789-.");
  25.  
  26.     Boxa* boxes = api->GetComponentImages(tesseract::RIL_SYMBOL, true, NULL, NULL);
  27.     nwords = boxaGetCount(boxes);
  28.  
  29.     for (i = 0; i < nwords; i++) {
  30.         box = boxaGetBox(boxes, i, L_CLONE);
  31.         api->SetRectangle(box->x, box->y, box->w, box->h);
  32.         outText = api->GetUTF8Text();
  33.         //remove "\n" from outText
  34.         if (strlen(outText)) {
  35.             retStr = outText;
  36.             retStr[strlen(outText)-2] = 0;
  37.         }
  38.         else retStr = 0;
  39.         // print box style
  40.         if (retStr)
  41.             printf("%s %d %d %d %d 0\n", retStr,
  42.                    box->x, image->h-(box->y + box->h),
  43.                    box->x + box->w,  image->h-box->y);
  44.     }
  45.  
  46.     api->End();
  47.     delete [] outText, retStr;
  48.     pixDestroy(&image);
  49.     boxaDestroy(&boxes);
  50.     return 0;
  51. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement