MichaelYoung

Equation OCR

Jan 13th, 2013
3,139
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. /*
  2. ayoungprogrammer.blogspot.com
  3.  
  4. Equation OCR
  5.  
  6. */
  7.  
  8. #include <iostream>
  9. #include <baseapi.h>
  10.  
  11.  #include <Windows.h>
  12.  
  13. #ifdef _CH_
  14. #pragma package <opencv>
  15. #endif
  16.  
  17. #ifndef _EiC
  18. #include "cv.h"
  19. #include "highgui.h"
  20. #include "ml.h"
  21. #include <stdio.h>
  22. #include <stdlib.h>
  23. #include <ctype.h>
  24. #endif
  25.  
  26. using namespace std;
  27. using namespace cv;
  28.  
  29.  
  30. std::string lang = "mat"; // file on disk is "my_trained_file.traineddata"
  31.     tesseract::TessBaseAPI tess_api;
  32.  
  33.  
  34.  
  35. class comparator{
  36. public:
  37.     bool operator()(vector<Point> c1,vector<Point>c2){
  38.        
  39.         return boundingRect( Mat(c1)).x<boundingRect( Mat(c2)).x;
  40.  
  41.     }
  42.  
  43. };
  44.  
  45. void sendToWolfram(string eqn){
  46.  
  47.  
  48.     stringstream url;
  49.     url<<"http://www.wolframalpha.com/input/?i=";
  50.  
  51.     int i;
  52.     for(i=0;i<eqn.length();i++){
  53.         if(eqn[i]>='0'&&eqn[i]<='9')url<<eqn[i];
  54.         if(eqn[i]=='-')url<<eqn[i];
  55.         if(eqn[i]=='f')url<<eqn[i];
  56.         if(eqn[i]=='x')url<<eqn[i];
  57.         if(eqn[i]=='+')url<<"%2B";
  58.         if(eqn[i]=='^')url<<"%5E";
  59.         if(eqn[i]=='=')url<<"%3D";
  60.         if(eqn[i]=='(')url<<"%28";
  61.         if(eqn[i]==')')url<<"%29";
  62.        
  63.  
  64.     }
  65.     cout<<url.str()<<endl;
  66.  
  67.  
  68. }
  69.  
  70.  
  71.  
  72.  
  73. //Extracts the eqn from the mat
  74. string extractContours(Mat& image,vector< vector<Point> > contours_poly){
  75.  
  76.     vector<Mat> extracted;
  77.    
  78.     stringstream outputText;
  79.     sort(contours_poly.begin(),contours_poly.end(),comparator());
  80.  
  81.  
  82.     //Loop through all contours to extract
  83.          for( int i = 0; i< contours_poly.size(); i++ ){
  84.  
  85.             Rect r = boundingRect( Mat(contours_poly[i]) );
  86.            
  87.            
  88.             Mat mask = Mat::zeros(image.size(), CV_8UC1);
  89.             drawContours(mask, contours_poly, i, Scalar(255), CV_FILLED); // This is a OpenCV function
  90.  
  91.             //Check for equal sign (2 dashes on top of each other) and merge
  92.             if(i+1<contours_poly.size()){
  93.                 Rect r2 = boundingRect( Mat(contours_poly[i+1]) );
  94.                 if(abs(r2.x-r.x)<20){
  95.                     drawContours(mask, contours_poly, i+1, Scalar(255), CV_FILLED); // This is a OpenCV function
  96.                     i++;
  97.                     int minX = min(r.x,r2.x);
  98.                     int minY = min(r.y,r2.y);
  99.                     int maxX =  max(r.x+r.width,r2.x+r2.width);
  100.                     int maxY = max(r.y+r.height,r2.y+r2.height);
  101.                     r = Rect(minX,minY,maxX - minX,maxY-minY);
  102.  
  103.                     if((double)r2.width/r2.height>3){
  104.                
  105.                         outputText<<"=";
  106.                         continue;
  107.  
  108.                     }else {
  109.                         outputText<<"i";
  110.                         continue;
  111.                     }
  112.  
  113.                 }
  114.             }
  115.            
  116.             if((double)r.width/r.height>3.0){
  117.                
  118.                 outputText<<"-";
  119.                 continue;
  120.  
  121.  
  122.             }
  123.            
  124.             if(r.y+r.height<image.size().height*2.0/3.0){
  125.                 outputText<<"^";
  126.             }
  127.  
  128.            
  129.  
  130.  
  131.              Mat extractPic;
  132.              image.copyTo(extractPic,mask);
  133.              Mat resizedPic = extractPic(r);
  134.  
  135.        
  136.             //Use tesseract to use ocr
  137.             tess_api.TesseractRect( resizedPic .data, 1, resizedPic .step1(), 0, 0, resizedPic .cols, resizedPic .rows);
  138.             tess_api.SetImage(resizedPic .data,resizedPic.size().width,resizedPic .size().height,resizedPic .channels(),resizedPic .step1());
  139.             tess_api.Recognize(0);
  140.             const char* out=tess_api.GetUTF8Text();
  141.  
  142.  
  143.             //Output character to stream
  144.             outputText<<out[0];
  145.          
  146.  
  147.          }
  148.  
  149.  
  150.          cout<<outputText.str()<<endl;
  151.         sendToWolfram(outputText.str());
  152.  
  153.  
  154.     return outputText.str();
  155.  
  156.  
  157.  
  158. }
  159.  
  160. void getContours(const char* filename)
  161. {
  162.   cv::Mat img = cv::imread(filename, 0);
  163.  
  164.  
  165.   //Apply blur to smooth edges and use adapative thresholding
  166.    cv::Size size(3,3);
  167.   cv::GaussianBlur(img,img,size,0);
  168.    adaptiveThreshold(img, img,255,CV_ADAPTIVE_THRESH_MEAN_C, CV_THRESH_BINARY,75,10);
  169.   cv::bitwise_not(img, img);
  170.  
  171.  
  172.  
  173.  
  174.   cv::Mat img2 = img.clone();
  175.  
  176.  
  177.   std::vector<cv::Point> points;
  178.   cv::Mat_<uchar>::iterator it = img.begin<uchar>();
  179.   cv::Mat_<uchar>::iterator end = img.end<uchar>();
  180.   for (; it != end; ++it)
  181.     if (*it)
  182.       points.push_back(it.pos());
  183.  
  184.   cv::RotatedRect box = cv::minAreaRect(cv::Mat(points));
  185.  
  186.    double angle = box.angle;
  187.   if (angle < -45.)
  188.     angle += 90.;
  189.      
  190.   cv::Point2f vertices[4];
  191.   box.points(vertices);
  192.   for(int i = 0; i < 4; ++i)
  193.     cv::line(img, vertices[i], vertices[(i + 1) % 4], cv::Scalar(255, 0, 0), 1, CV_AA);
  194.  
  195.      
  196.  
  197.    cv::Mat rot_mat = cv::getRotationMatrix2D(box.center, angle, 1);
  198.  
  199.    cv::Mat rotated;
  200.   cv::warpAffine(img2, rotated, rot_mat, img.size(), cv::INTER_CUBIC);
  201.  
  202.    
  203.  
  204.   cv::Size box_size = box.size;
  205.   if (box.angle < -45.)
  206.     std::swap(box_size.width, box_size.height);
  207.   cv::Mat cropped;
  208.  
  209.   cv::getRectSubPix(rotated, box_size, box.center, cropped);
  210.    
  211.     Mat cropped2=cropped.clone();
  212. cvtColor(cropped2,cropped2,CV_GRAY2RGB);
  213.  
  214. Mat cropped3 = cropped.clone();
  215. cvtColor(cropped3,cropped3,CV_GRAY2RGB);
  216.  
  217.  vector<vector<Point> > contours;
  218.   vector<Vec4i> hierarchy;
  219.  
  220.   /// Find contours
  221.   cv:: findContours( cropped, contours, hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_TC89_KCOS, Point(0, 0) );
  222.  
  223.  
  224.  
  225.   /// Approximate contours to polygons + get bounding rects and circles
  226.   vector<vector<Point> > contours_poly( contours.size() );
  227.   vector<Rect> boundRect( contours.size() );
  228.   vector<Point2f>center( contours.size() );
  229.   vector<float>radius( contours.size() );
  230.  
  231.  
  232.   //Get poly contours
  233.     for( int i = 0; i < contours.size(); i++ )
  234.      {
  235.          approxPolyDP( Mat(contours[i]), contours_poly[i], 3, true );
  236.      }
  237.  
  238.  
  239.   //Get only important contours, merge contours that are within another
  240.   vector<vector<Point> > validContours;
  241.     for (int i=0;i<contours_poly.size();i++){
  242.        
  243.         Rect r = boundingRect(Mat(contours_poly[i]));
  244.         if(r.area()<100)continue;
  245.         bool inside = false;
  246.         for(int j=0;j<contours_poly.size();j++){
  247.             if(j==i)continue;
  248.            
  249.             Rect r2 = boundingRect(Mat(contours_poly[j]));
  250.             if(r2.area()<100||r2.area()<r.area())continue;
  251.             if(r.x>r2.x&&r.x+r.width<r2.x+r2.width&&
  252.                 r.y>r2.y&&r.y+r.height<r2.y+r2.height){
  253.  
  254.                 inside = true;
  255.             }
  256.         }
  257.         if(inside)continue;
  258.         validContours.push_back(contours_poly[i]);
  259.     }
  260.  
  261.  
  262.     //Get bounding rects
  263.     for(int i=0;i<validContours.size();i++){
  264.         boundRect[i] = boundingRect( Mat(validContours[i]) );
  265.     }
  266.  
  267.  
  268.     //Display
  269.   Scalar color = Scalar(0,255,0);
  270.   for( int i = 0; i< validContours.size(); i++ )
  271.      {
  272.     if(boundRect[i].area()<100)continue;
  273.       drawContours( cropped2, validContours, i, color, 1, 8, vector<Vec4i>(), 0, Point() );
  274.        rectangle( cropped2, boundRect[i].tl(), boundRect[i].br(),color, 2, 8, 0 );
  275.      }
  276.  
  277.   imshow("Contours",cropped2);
  278.  
  279. extractContours(cropped3,validContours);
  280.  
  281.  
  282. cv::waitKey(0);
  283.  
  284. }
  285.  
  286. int main(void){
  287.  
  288.  
  289.  
  290.     //Init tesseract
  291.     tess_api.Init("", lang.c_str(), tesseract::OEM_DEFAULT);
  292.     tess_api.SetPageSegMode(static_cast<tesseract::PageSegMode>(10));
  293.  
  294. char fileName[256];
  295.  
  296. //cout<<numOfFiles("C:\\Users\\Michael\\Documents\\Visual Studio 2008\\Projects\\Project1\\OCRTest\\OCRTest\\output\\x\\*.txt");
  297.  
  298. cin>>fileName;
  299. getContours(fileName);
  300.  
  301.  
  302.  
  303.  
  304.  
  305. }
  306.  
  307.  
  308.  
  309. /* Code to put in output folder
  310.             char cCurrentPath[256];
  311.             GetCurrentDirectory(sizeof(cCurrentPath),cCurrentPath );
  312.             fileOutput<<cCurrentPath;
  313.  
  314.              fileOutput<<"\\output\\"<<ch;
  315.              
  316.             cout<<dirExists(fileOutput.str())<<endl;
  317.  
  318.             stringstream fileName;
  319.             fileName<<fileOutput.str();
  320.  
  321.             fileOutput<<"\\*.jpg"; 
  322.  
  323.              int n = numOfFiles((char*)fileOutput.str().c_str());
  324.              cout<<n<<endl;
  325.              fileName<<"\\"<<n<<".jpg";
  326.              imwrite(fileName.str(),image);
  327.             cout<<fileName.str()<<endl;*/
RAW Paste Data

Adblocker detected! Please consider disabling it...

We've detected AdBlock Plus or some other adblocking software preventing Pastebin.com from fully loading.

We don't have any obnoxious sound, or popup ads, we actively block these annoying types of ads!

Please add Pastebin.com to your ad blocker whitelist or disable your adblocking software.

×