Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*
- * build with:
- * g++ -o makebox makebox.cpp -ltesseract
- * run with:
- * ./makebox | sort -k 3nr -k 2n >zzz.box
- */
- #include <tesseract/baseapi.h>
- #include <leptonica/allheaders.h>
- int main()
- {
- Pix *image;
- BOX *box;
- l_int32 i, nwords;
- char *outText, *retStr;
- tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
- api->Init("/usr/src/tesseract-3.02/", "eng");
- image = pixRead("/home/user/Download/zzz.tiff");
- api->SetImage(image);
- api->SetVariable("tessedit_char_whitelist","0123456789-.");
- Boxa* boxes = api->GetComponentImages(tesseract::RIL_SYMBOL, true, NULL, NULL);
- nwords = boxaGetCount(boxes);
- for (i = 0; i < nwords; i++) {
- box = boxaGetBox(boxes, i, L_CLONE);
- api->SetRectangle(box->x, box->y, box->w, box->h);
- outText = api->GetUTF8Text();
- //remove "\n" from outText
- if (strlen(outText)) {
- retStr = outText;
- retStr[strlen(outText)-2] = 0;
- }
- else retStr = 0;
- // print box style
- if (retStr)
- printf("%s %d %d %d %d 0\n", retStr,
- box->x, image->h-(box->y + box->h),
- box->x + box->w, image->h-box->y);
- }
- api->End();
- delete [] outText, retStr;
- pixDestroy(&image);
- boxaDestroy(&boxes);
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement