OpenCV Optical Flow Demo

/* --Sparse Optical Flow Demo Program--
* Written by David Stavens ([email protected])
*/

#include <stdio.h>
#include <cv.h>
#include <highgui.h>
#include <math.h>

#define NO_FEATURES 400
#define MIN_VECT_COMP 2000

static const double pi = 3.14159265358979323846;

inline static double square(int a)
{
	return a * a;
}

/* This is just an inline that allocates images. I did this to reduce clutter in the
* actual computer vision algorithmic code. Basically it allocates the requested image
* unless that image is already non-NULL. It always leaves a non-NULL image as-is even
* if that image's size, depth, and/or channels are different than the request.
*/
inline static void allocateOnDemand(IplImage **img, CvSize size, int depth, int channels) {
	if (*img != NULL)
		return;

	*img = cvCreateImage(size, depth, channels);
	if (*img == NULL) {
		fprintf(stderr, "Error: Couldn't allocate image. Out of memory?\n");
		exit(-1);
	}
}

int main(void)
{
	/* Create an object that decodes the input video stream. */
	CvCapture *input_video = cvCaptureFromCAM(0);

	if (input_video == NULL)
	{
		/* Either the video didn't exist OR it uses a codec OpenCV
		* doesn't support.
		*/
		fprintf(stderr, "Error: Can't open video.\n");
		return -1;
	}

	/* Read the video's frame size. */
	CvSize frame_size;
	frame_size.height = (int) cvGetCaptureProperty(input_video, CV_CAP_PROP_FRAME_HEIGHT);
	frame_size.width = (int) cvGetCaptureProperty(input_video, CV_CAP_PROP_FRAME_WIDTH);

	/* Create three windows called "N", "N+1", and "Optical Flow"
	* for visualizing the output. Have those windows automatically change their
	* size to match the output.
	*/
	cvNamedWindow("N", CV_WINDOW_AUTOSIZE);
	cvNamedWindow("N+1", CV_WINDOW_AUTOSIZE);
	cvNamedWindow("Optical Flow", CV_WINDOW_AUTOSIZE);

	/* Color filters. */
	CvScalar hsv_min = cvScalar(0, 30, 80, 0);
	CvScalar hsv_max = cvScalar(50, 100, 255, 0);

	long current_frame = 0;
	int key_pressed = 0;

	while(key_pressed != 27) {
		static IplImage *frame = NULL, *frame1 = NULL, *frame1_1C = NULL, *frame2_1C = NULL, *eig_image = NULL, *temp_image = NULL, *pyramid1 = NULL, *pyramid2 = NULL;

		/* Go to the frame we want. Important if multiple frames are queried in
		* the loop which they of course are for optical flow. Note that the very
		* first call to this is actually not needed. (Because the correct position
		* is set outsite the for() loop.)
		*/
//		cvSetCaptureProperty(input_video, CV_CAP_PROP_POS_FRAMES, current_frame);

		/* Get the next frame of the video.
		* IMPORTANT! cvQueryFrame() always returns a pointer to the _same_
		* memory location. So successive calls:
		* frame1 = cvQueryFrame();
		* frame2 = cvQueryFrame();
		* frame3 = cvQueryFrame();
		* will result in (frame1 == frame2 && frame2 == frame3) being true.
		* The solution is to make a copy of the cvQueryFrame() output.
		*/
		frame = cvQueryFrame(input_video);
		if (frame == NULL)
		{
			/* Why did we get a NULL frame? We shouldn't be at the end. */
			fprintf(stderr, "Error: Hmm. The end came sooner than we thought.\n");
			return -1;
		}

		/* Allocate necessary storage for skin detection. */
		allocateOnDemand(&temp_image, frame_size, IPL_DEPTH_8U, 3);

		/* We'll make a full color backup of this frame so that we can draw on it.
		* (It's not the best idea to draw on the static memory space of cvQueryFrame().)
		*/
		allocateOnDemand(&frame1, frame_size, IPL_DEPTH_8U, 3);
		cvConvertImage(frame, frame1);

		/* Allocate another image if not already allocated.
		* Image has ONE challenge of color (ie: monochrome) with 8-bit "color" depth.
		* This is the image format OpenCV algorithms actually operate on (mostly).
		*/
		allocateOnDemand(&frame1_1C, frame_size, IPL_DEPTH_8U, 1);

		cvCvtColor(frame, temp_image, CV_BGR2HSV);
		cvInRangeS(temp_image, hsv_min, hsv_max, frame1_1C);
		cvSet(temp_image, cvScalar(0, 0, 0, 0));
		cvCopy(frame, temp_image, frame1_1C);
		cvConvertImage(temp_image, frame1_1C);

		/* Convert whatever the AVI image format is into OpenCV's preferred format.
		* AND flip the image vertically. Flip is a shameless hack. OpenCV reads
		* in AVIs upside-down by default. (No comment :-))
		*/
		// cvConvertImage(frame, frame1_1C);

		/* Get the second frame of video. Sample principles as the first. */
		frame = cvQueryFrame(input_video);
		if (frame == NULL)
		{
			fprintf(stderr, "Error: Hmm. The end came sooner than we thought.\n");
			return -1;
		}

		allocateOnDemand(&frame2_1C, frame_size, IPL_DEPTH_8U, 1);

		cvCvtColor(frame, temp_image, CV_BGR2HSV);
		cvInRangeS(temp_image, hsv_min, hsv_max, frame2_1C);
		cvSet(temp_image, cvScalar(0, 0, 0, 0));
		cvCopy(frame, temp_image, frame2_1C);
		cvConvertImage(temp_image, frame2_1C);

		// cvConvertImage(frame, frame2_1C);

		/* Shi and Tomasi Feature Tracking! */
		/* Preparation: Allocate the necessary storage. */
		allocateOnDemand(&eig_image, frame_size, IPL_DEPTH_32F, 3);
		allocateOnDemand(&temp_image, frame_size, IPL_DEPTH_32F, 3);

		/* Preparation: This array will contain the features found in frame 1. */
		CvPoint2D32f frame1_features[NO_FEATURES];

		/* Preparation: BEFORE the function call this variable is the array size
		* (or the maximum number of features to find). AFTER the function call
		* this variable is the number of features actually found.
		*/
		int number_of_features;

		/* I'm hardcoding this at NO_FEATURES. But you should make this a #define so that you can
		* change the number of features you use for an accuracy/speed tradeoff analysis.
		*/
		number_of_features = NO_FEATURES;

		/* Actually run the Shi and Tomasi algorithm!!
		* "frame1_1C" is the input image.
		* "eig_image" and "temp_image" are just workspace for the algorithm.
		* The first ".01" specifies the minimum quality of the features (based on the eigenvalues).
		* The second ".01" specifies the minimum Euclidean distance between features.
		* "NULL" means use the entire input image. You could point to a part of the	image.
		* WHEN THE ALGORITHM RETURNS:
		* "frame1_features" will contain the feature points.
		* "number_of_features" will be set to a value <= NO_FEATURES indicating the number of feature points found.
		*/
		cvGoodFeaturesToTrack(frame1_1C, eig_image, temp_image, frame1_features, &number_of_features, .05, .001, NULL);

		/* Pyramidal Lucas Kanade Optical Flow! */
		/* This array will contain the locations of the points from frame 1 in frame 2. */
		CvPoint2D32f frame2_features[NO_FEATURES];

		/* The i-th element of this array will be non-zero if and only if the i-th feature of
		* frame 1 was found in frame 2.
		*/
		char optical_flow_found_feature[NO_FEATURES];

		/* The i-th element of this array is the error in the optical flow for the i-th	feature
		* of frame1 as found in frame 2. If the i-th feature was not found (see the	array above)
		* I think the i-th entry in this array is undefined.
		*/
		float optical_flow_feature_error[NO_FEATURES];

		/* This is the window size to use to avoid the aperture problem (see slide
		"Optical Flow: Overview"). */
		CvSize optical_flow_window = cvSize(3, 3);

		/* This termination criteria tells the algorithm to stop when it has either done 20 iterations or when
		* epsilon is better than .3. You can play with these parameters for speed vs. accuracy but these values
		* work pretty well in many situations.
		*/
		CvTermCriteria optical_flow_termination_criteria = cvTermCriteria(CV_TERMCRIT_ITER | CV_TERMCRIT_EPS, 20, .1);

		/* This is some workspace for the algorithm.
		* (The algorithm actually carves the image into pyramids of different resolutions.)
		*/
		allocateOnDemand(&pyramid1, frame_size, IPL_DEPTH_8U, 1);
		allocateOnDemand(&pyramid2, frame_size, IPL_DEPTH_8U, 1);

		/* Actually run Pyramidal Lucas Kanade Optical Flow!!
		* "frame1_1C" is the first frame with the known features.
		* "frame2_1C" is the second frame where we want to find the first frame's features.
		* "pyramid1" and "pyramid2" are workspace for the algorithm.
		* "frame1_features" are the features from the first frame.
		* "frame2_features" is the (outputted) locations of those features in the second frame.
		* "number_of_features" is the number of features in the frame1_features array.
		* "optical_flow_window" is the size of the window to use to avoid the aperture problem.
		* "5" is the maximum number of pyramids to use. 0 would be just one level.
		* "optical_flow_found_feature" is as described above (non-zero iff feature found by the flow).
		* "optical_flow_feature_error" is as described above (error in the flow for this feature).
		* "optical_flow_termination_criteria" is as described above (how long the algorithm should look).
		* "0" means disable enhancements. (For example, the second aray isn't pre-initialized with guesses.)
		*/
		cvCalcOpticalFlowPyrLK(frame1_1C, frame2_1C, pyramid1, pyramid2, frame1_features, frame2_features,
							   number_of_features, optical_flow_window, 5, optical_flow_found_feature,
							   optical_flow_feature_error, optical_flow_termination_criteria, 0);


		double xt = 0, yt = 0;

		/* For fun (and debugging :)), let's draw the flow field. */
		for(int i = 0; i < number_of_features; i++)
		{
			/* If Pyramidal Lucas Kanade didn't really find the feature, skip it. */
			if (optical_flow_found_feature[i] == 0)
				continue;

			CvPoint p,q;
			p.x = (int) frame1_features[i].x;
			p.y = (int) frame1_features[i].y;
			q.x = (int) frame2_features[i].x;
			q.y = (int) frame2_features[i].y;

			double hypotenuse;
			hypotenuse = sqrt(square(p.y - q.y) + square(p.x - q.x));

			if (hypotenuse < 10 || hypotenuse > 100)
				continue;

			int line_thickness;
			line_thickness = 1;

			cvCircle(frame1, p, 2, CV_RGB(0, 0, 255), line_thickness, CV_AA);
			cvCircle(frame1, q, 2, CV_RGB(0, 255, 0), line_thickness, CV_AA);

			cvLine(frame1, p, q, CV_RGB(255, 0, 255), line_thickness, CV_AA);

			xt += (p.x - q.x);
			yt += (p.y - q.y);
		}

		CvPoint o, p;
		o.x = frame_size.width / 2;
		o.y = frame_size.height / 2;
		p.x = o.x + xt;
		p.y = o.y + yt;

		double angle;
		double hypotenuse;
		angle = atan2(yt, xt);
		hypotenuse = sqrt(square(yt) + square(xt));

		int line_thickness;
		line_thickness = 1;

		/* Here we lengthen the arrow by a factor of three. */
		p.x = (int) (o.x - hypotenuse * cos(angle));
		p.y = (int) (o.y - hypotenuse * sin(angle));

		/* Now we draw the main line of the arrow. */
		/* "frame1" is the frame to draw on.
		* "p" is the point where the line begins.
		* "q" is the point where the line stops.
		* "CV_AA" means antialiased drawing.
		* "0" means no fractional bits in the center cooridinate or radius.
		*/
		cvLine(frame1, o, p, CV_RGB(255, 0, 0), line_thickness, CV_AA, 0);

		/* Compute relevant direction. */
		xt = (abs(xt) > MIN_VECT_COMP) ? (50 * (xt > 0 ? -1 : 1)) : 0;
		yt = (abs(yt) > MIN_VECT_COMP) ? (50 * (yt > 0 ? -1 : 1)) : 0;

		p.x = o.x + xt;
		p.y = o.y + yt;

		line_thickness = 5;

		cvLine(frame1, o, p, CV_RGB(255, 255, 0), line_thickness, CV_AA, 0);

		/* Now draw the tips of the arrow. I do some scaling so that the
		* tips look proportional to the main line of the arrow.
		*/
		o.x = (int) (p.x + 9 * cos(angle + pi / 4));
		o.y = (int) (p.y + 9 * sin(angle + pi / 4));
		cvLine(frame1, o, p, CV_RGB(255, 255, 0), line_thickness, CV_AA, 0);

		o.x = (int) (p.x + 9 * cos(angle - pi / 4));
		o.y = (int) (p.y + 9 * sin(angle - pi / 4));
		cvLine(frame1, o, p, CV_RGB(255, 255, 0), line_thickness, CV_AA, 0);

		printf("%f %f %d\n", xt, yt, number_of_features);

		/* Now display the image we drew on. Recall that "Optical Flow" is the name of
		* the window we created above.
		*/
		cvShowImage("Optical Flow", frame1);
		cvShowImage("N", frame1_1C);
		cvShowImage("N+1", frame2_1C);

		/* And wait for the user to press a key (so the user has time to look at the image).
		* If the argument is 0 then it waits forever otherwise it waits that number of milliseconds.
		* The return value is the key the user pressed.
		*/
		key_pressed = cvWaitKey(10);
	}
}