View difference between Paste ID: <a href="/zYTiuRyA">zYTiuRyA</a> and <a href="/F9R5qpPz">F9R5qpPz</a>

/*
1		/*
2
3		Compile with:
4		g++ -Wall -O2 -g -D__STDC_CONSTANT_MACROS -o video-sound-transcoding-test video-sound-transcoding-test.cpp -lm -lavdevice -lavformat -lavfilter -lavcodec -lswresample -lswscale -lavutil
5
6		*/
7
8		extern "C"
9		{
10		#include <libavutil/avutil.h>
11		#include <libavutil/parseutils.h>
12		#include <libavutil/mathematics.h>
13		#include <libavutil/opt.h>
14		#include <libavcodec/avcodec.h>
15		#include <libavformat/avformat.h>
16		#include <libswscale/swscale.h>
17		#include <libswresample/swresample.h>
18		#include <libavfilter/avfilter.h>
19		#include <libavfilter/avfiltergraph.h>
20		#include <libavfilter/avcodec.h>
21		#include <libavfilter/buffersink.h>
22		#include <libavfilter/buffersrc.h>
23		#include <libavutil/imgutils.h>
24		}
25
26		#include <iostream>
27
28		using namespace std;
29
30		//
31		// Compilation error work around
32		//
33		#ifndef av_ts2timestr
34		#include <vector>
35		#define AV_TS_MAX_STRING_SIZE 32
36		static inline string av_ts_make_time_string(int64_t ts, AVRational *tb)
37		{
38		vector<char> buf(AV_TS_MAX_STRING_SIZE);
39		if (ts == AV_NOPTS_VALUE) snprintf(buf.data(), AV_TS_MAX_STRING_SIZE, "NOPTS");
40		else snprintf(buf.data(), AV_TS_MAX_STRING_SIZE, "%.6g", av_q2d(tb) ts);
41		return string(buf.data());
42		}
43		#define av_ts2timestr(ts, tb) av_ts_make_time_string(ts, tb)
44		#endif
45
46		struct MediaContext
47		{
48		MediaContext()
49		: formatCtx(0),
50		videoCodec(0),
51		audioCodec(0),
52		videoCodecCtx(0),
53		audioCodecCtx(0),
54		videoStream(0),
55		audioStream(0),
56		swsCtx(0),
57		videoStreamIndex(-1),
58		audioStreamIndex(-1)
59		{}
60
61		// input or output file name or URI
62		string resource;
63
64		AVFormatContext *formatCtx;
65		AVCodec *videoCodec;
66		AVCodec *audioCodec;
67		AVCodecContext *videoCodecCtx;
68		AVCodecContext *audioCodecCtx;
69		AVStream *videoStream;
70		AVStream *audioStream;
71		SwsContext *swsCtx;
72
73		int videoStreamIndex;
74		int audioStreamIndex;
75		};
76
77
78		MediaContext inCtx; // Media source
79		MediaContext outCtx; // Media destination
80
81
82		static
83		void usage(const string& progName)
84		{
85		cout << "Use: " << progName << " <input media file> <output media file>" << endl;
86		}
87
88
89
90		static
91		int openCodecContex(MediaContext &ctx, AVMediaType type)
92		{
93		int ret = -1;
94
95		AVStream **stream;
96		AVCodecContext **codecCtx;
97		AVCodec **codec;
98		AVFormatContext *formatCtx;
99		int *streamIndex;
100
101		formatCtx = ctx.formatCtx;
102
103		if (type == AVMEDIA_TYPE_VIDEO)
104		{
105		stream = &ctx.videoStream;
106		codecCtx = &ctx.videoCodecCtx;
107		streamIndex = &ctx.videoStreamIndex;
108		codec = &ctx.videoCodec;
109		}
110		else if (type == AVMEDIA_TYPE_AUDIO)
111		{
112		stream = &ctx.audioStream;
113		codecCtx = &ctx.audioCodecCtx;
114		streamIndex = &ctx.audioStreamIndex;
115		codec = &ctx.audioCodec;
116		}
117		else
118		{
119		cerr << "Unknown media type: " << av_get_media_type_string(type) << endl;
120		return -1;
121		}
122
123
124		ret = av_find_best_stream(formatCtx, type, -1, -1, 0, 0);
125		if (ret >= 0)
126		{
127		*stream = formatCtx->streams[ret];
128		codecCtx = (stream)->codec;
129
130		codec = avcodec_find_decoder((codecCtx)->codec_id);
131
132		if (!(*codec))
133		{
134		cerr << "Failed to find " << (*codecCtx)->codec_name << " codec" << endl;
135		return -1;
136		}
137
138		int result = avcodec_open2(codecCtx, codec, 0);
139		if (result < 0)
140		{
141		cerr << "Failed to open " << (*codec)->name << " codec" << endl;
142		return -1;
143		}
144		}
145
146		*streamIndex = ret;
147
148		return ret;
149		}
150
151
152
153		static
154		int addStream(MediaContext &ctx, AVMediaType type)
155		{
156		AVCodecID codecId;
157		AVStream **stream;
158		AVCodecContext **codecCtx;
159		AVCodec **codec;
160		AVFormatContext *formatCtx;
161		int *streamIndex;
162
163		formatCtx = ctx.formatCtx;
164
165		if (type == AVMEDIA_TYPE_VIDEO)
166		{
167		stream = &ctx.videoStream;
168		codecCtx = &ctx.videoCodecCtx;
169		streamIndex = &ctx.videoStreamIndex;
170		codec = &ctx.videoCodec;
171		codecId = formatCtx->oformat->video_codec;
172		}
173		else if (type == AVMEDIA_TYPE_AUDIO)
174		{
175		stream = &ctx.audioStream;
176		codecCtx = &ctx.audioCodecCtx;
177		streamIndex = &ctx.audioStreamIndex;
178		codec = &ctx.audioCodec;
179		codecId = formatCtx->oformat->audio_codec;
180		}
181		else
182		{
183		cerr << "Unknown media type: " << av_get_media_type_string(type) << endl;
184		return -1;
185		}
186
187
188		*codec = avcodec_find_encoder(codecId);
189		if (!(*codec))
190		{
191		cerr << "Can't found codec" << endl;
192		return -1;
193		}
194
195		stream = avformat_new_stream(ctx.formatCtx, codec);
196		if (!*stream)
197		{
198		cerr << "Can't alloc stream" << endl;
199		return -1;
200		}
201
202		codecCtx = (stream)->codec;
203
204		avcodec_get_context_defaults3(codecCtx, codec);
205
206		(*codecCtx)->codec_id = codecId;
207
208		if (formatCtx->oformat->flags & AVFMT_GLOBALHEADER)
209		(*codecCtx)->flags \|= CODEC_FLAG_GLOBAL_HEADER;
210
211		*streamIndex = formatCtx->nb_streams - 1;
212		return *streamIndex;
213		}
214
215
216		static
217		AVFrame *allocVideoFrame(PixelFormat pixFmt, int width, int height)
218		{
219		if (pixFmt == PIX_FMT_NONE \|\| width <= 0 \|\| height <= 0)
220		{
221		cerr << "Invalid picture params: format:" << pixFmt << ", w:" << width << ", h:" << height << endl;
222		return 0;
223		}
224
225		AVFrame *outFrame = avcodec_alloc_frame();
226		if (!outFrame)
227		return outFrame;
228
229		outFrame->width = width;
230		outFrame->height = height;
231		outFrame->format = pixFmt;
232
233		int size = avpicture_get_size(pixFmt, width, height);
234		uint8_t *pictBuf = new uint8_t[size];
235
236		avpicture_fill((AVPicture*)outFrame,
237		pictBuf,
238		pixFmt, width, height);
239
240		cout << "Framebuffer: " << (void)pictBuf << ", " << (void)outFrame->data[0] << endl;
241
242		return outFrame;
243		}
244
245
246		static
247		void freeVideoFrame(AVFrame **frame)
248		{
249		if (frame == 0 \|\| *frame == 0)
250		{
251		return;
252		}
253
254		AVFrame ptr = frame;
255
256		delete [] ptr->data[0];
257
258		avcodec_free_frame(frame);
259		*frame = 0;
260		}
261
262
263		/* Helpers to output timebases */
264		static
265		inline ostream& operator<< (ostream& ost, const AVRational &value)
266		{
267		ost << value.num << "/" << value.den;
268		if (value.num != 0)
269		{
270		ost << " (1/" << (double)value.den/(double)value.num << ")";
271		}
272		return ost;
273		}
274
275
276		int main(int argc, char **argv)
277		{
278		// register all formats, protocols and codecs
279		av_register_all();
280		avformat_network_init();
281
282		int stat;
283
284		if (argc < 3)
285		{
286		usage(argv[0]);
287		return 1;
288		}
289
290		inCtx.resource = argv[1];
291		outCtx.resource = argv[2];
292
293		////
294		//// Prepare input file
295		////
296
297		// Open input file
298	-	// TODO
298	+
299		{
300		cerr << "Can't open source file: " << inCtx.resource << endl;
301		return 1;
302		}
303	-	// TODO
303	+
304		// Retrieve stream information
305		if (avformat_find_stream_info(inCtx.formatCtx, 0) < 0)
306		{
307		cerr << "Can't find stream information" << endl;
308		return 1;
309		}
310
311		// Walk via streams
312		if (openCodecContex(inCtx, AVMEDIA_TYPE_VIDEO) >= 0)
313		{
314		cout << "in: VideoStream timebase: " << inCtx.videoStream->time_base << endl;
315		cout << "in: VideoCoder timebase: " << inCtx.videoCodecCtx->time_base << endl;
316		}
317
318		if (openCodecContex(inCtx, AVMEDIA_TYPE_AUDIO) >= 0)
319		{
320		cout << "in: AudioStream timebase: " << inCtx.audioStream->time_base << endl;
321		cout << "in: AudioCoder timebase: " << inCtx.audioCodecCtx->time_base << endl;
322		}
323
324		av_dump_format(inCtx.formatCtx, 0, inCtx.resource.c_str(), 0);
325
326
327
328
329		////
330		//// Prepare output file
331		////
332
333		// alloc context
334		outCtx.formatCtx = avformat_alloc_context();
335
336		outCtx.formatCtx->iformat = 0;
337		outCtx.formatCtx->oformat = av_guess_format(0, outCtx.resource.c_str(), 0);
338
339		if (!outCtx.formatCtx->oformat)
340		{
341		cout << "Fall back to MPEGTS format" << endl;
342		outCtx.formatCtx->oformat = av_guess_format("mpegts", 0, 0);
343
344		if (!outCtx.formatCtx->oformat)
345		{
346		cerr << "Can't found output format for file: " << outCtx.resource << endl;
347		return 1;
348		}
349		}
350
351
352		// Setup video stream
353		if (inCtx.videoStreamIndex >= 0 && addStream(outCtx, AVMEDIA_TYPE_VIDEO) >= 0)
354		{
355		outCtx.videoCodecCtx->bit_rate = 400000;
356
357		outCtx.videoCodecCtx->width = inCtx.videoCodecCtx->width;
358		outCtx.videoCodecCtx->height = inCtx.videoCodecCtx->height;
359		//outCtx.videoCodecCtx->width = 640;
360		//outCtx.videoCodecCtx->height = 480;
361
362
363		outCtx.videoCodecCtx->time_base.den = 25;
364		outCtx.videoCodecCtx->time_base.num = 1;
365
366		outCtx.videoCodecCtx->gop_size = 12;
367		outCtx.videoCodecCtx->pix_fmt = PIX_FMT_YUV420P;
368
369		if (outCtx.videoCodec->pix_fmts)
370	-	if (outCtx.videoCodecCtx->pix_fmt != inCtx.videoCodecCtx->pix_fmt)
370	+
371		outCtx.videoCodecCtx->pix_fmt = *outCtx.videoCodec->pix_fmts;
372		}
373
374		if (outCtx.videoCodecCtx->codec_id == AV_CODEC_ID_MPEG2VIDEO)
375		{
376		outCtx.videoCodecCtx->max_b_frames = 2;
377		}
378
379		if (outCtx.videoCodecCtx->codec_id == AV_CODEC_ID_MPEG1VIDEO)
380		{
381		outCtx.videoCodecCtx->mb_decision = 2;
382		}
383
384		// Open codec
385		if (avcodec_open2(outCtx.videoCodecCtx, outCtx.videoCodec, 0) < 0)
386		{
387		cerr << "Can't open output video codec" << endl;
388		return 1;
389		}
390
391		if (outCtx.videoCodecCtx->pix_fmt != inCtx.videoCodecCtx->pix_fmt \|\|
392		outCtx.videoCodecCtx->width != inCtx.videoCodecCtx->width \|\|
393		outCtx.videoCodecCtx->height != inCtx.videoCodecCtx->height)
394		{
395		outCtx.swsCtx = sws_getContext(inCtx.videoCodecCtx->width,
396		inCtx.videoCodecCtx->height,
397		inCtx.videoCodecCtx->pix_fmt,
398		outCtx.videoCodecCtx->width,
399		outCtx.videoCodecCtx->height,
400		outCtx.videoCodecCtx->pix_fmt,
401		SWS_BICUBIC, 0, 0, 0);
402		if (!outCtx.swsCtx)
403		{
404		cerr << "Can't initialize video conversion context" << endl;
405		return 1;
406		}
407		}
408
409		outCtx.videoStream->time_base = outCtx.videoCodecCtx->time_base;
410
411		cout << "ou: VideoStream timebase: " << outCtx.videoStream->time_base << endl;
412		cout << "ou: VideoCoder timebase: " << outCtx.videoCodecCtx->time_base << endl;
413		}
414
415
416		// Setup audio stream
417		if (inCtx.audioStreamIndex >= 0 && addStream(outCtx, AVMEDIA_TYPE_AUDIO) >= 0)
418		{
419		outCtx.audioStream->id = 1; // WHAT IS IT???
420
421		outCtx.audioCodecCtx->sample_fmt = inCtx.audioCodecCtx->sample_fmt;
422		outCtx.audioCodecCtx->bit_rate = inCtx.audioCodecCtx->bit_rate;
423		outCtx.audioCodecCtx->sample_rate = inCtx.audioCodecCtx->sample_rate;
424		outCtx.audioCodecCtx->channels = inCtx.audioCodecCtx->channels;
425		outCtx.audioCodecCtx->channel_layout = inCtx.audioCodecCtx->channel_layout;
426
427		/*if (outCtx.audioCodec->supported_samplerates)
428		{
429		outCtx.audioCodecCtx->sample_rate = *outCtx.audioCodec->supported_samplerates;
430		}*/
431
432		// Open codec
433		if (avcodec_open2(outCtx.audioCodecCtx, outCtx.audioCodec, 0) < 0)
434		{
435		cerr << "Can't open output audio codec" << endl;
436		return 1;
437		}
438
439		outCtx.audioStream->time_base = outCtx.audioCodecCtx->time_base;
440
441		cout << "ou: AudioStream timebase: " << outCtx.audioStream->time_base << endl;
442		cout << "ou: AudioCoder timebase: " << outCtx.audioCodecCtx->time_base << endl;
443		}
444
445		av_dump_format(outCtx.formatCtx, 0, outCtx.resource.c_str(), 1);
446
447		stat = avio_open2(&outCtx.formatCtx->pb, outCtx.resource.c_str(), AVIO_FLAG_WRITE, 0, 0);
448		if (stat < 0)
449		{
450		cerr << "Can't open output file: " << outCtx.resource << endl;
451		return 1;
452		}
453
454		// write header
455		if (avformat_write_header(outCtx.formatCtx, 0) < 0)
456		{
457		cerr << "Can't write header to output file" << endl;
458		return 1;
459		}
460
461
462		////
463		//// Transcode
464		////
465
466		cout << "streams: " << inCtx.videoStreamIndex << "/" << inCtx.audioStreamIndex << endl;
467		cout << "streams: " << outCtx.videoStreamIndex << "/" << outCtx.audioStreamIndex << endl;
468
469		AVFrame *frame = avcodec_alloc_frame();
470		AVFrame *videoFrame = 0;
471
472		if (outCtx.swsCtx)
473		{
474		videoFrame = allocVideoFrame(outCtx.videoCodecCtx->pix_fmt,
475		outCtx.videoCodecCtx->width,
476		outCtx.videoCodecCtx->height);
477		}
478
479
480		AVPacket pkt;
481		av_init_packet(&pkt);
482		pkt.data = 0;
483		pkt.size = 0;
484
485		int gotFrame;
486		uint64_t inputAudioSamples = 0;
487
488		while (av_read_frame(inCtx.formatCtx, &pkt) >= 0)
489		{
490		stat = -1;
491		AVRational outPktTimeBase;
492		AVPacket outPkt;
493		int gotPacket = 0;
494		av_init_packet(&outPkt);
495		outPkt.data = 0;
496		outPkt.size = 0;
497
498		if (pkt.stream_index == inCtx.videoStreamIndex)
499		{
500		stat = avcodec_decode_video2(inCtx.videoCodecCtx, frame, &gotFrame, &pkt);
501		if (stat < 0)
502		{
503		cerr << "Can't decode video frame" << endl;
504		return 1;
505		}
506
507		if (gotFrame)
508		{
509		cout << "video_frame coded_n:" << frame->coded_picture_number
510		<< " pts:" << av_ts2timestr(frame->pts, &inCtx.videoCodecCtx->time_base)
511		<< endl;
512
513
514		if (outCtx.swsCtx)
515		{
516		sws_scale(outCtx.swsCtx,
517		(const uint8_t * const *)frame->data, frame->linesize, 0, inCtx.videoCodecCtx->height,
518		videoFrame->data, videoFrame->linesize);
519		}
520	-	if (frame->pts == AV_NOPTS_VALUE)
520	+
521		{
522	-	AVRational samplesRateInv = {1, outCtx.audioCodecCtx->sample_rate};
522	+
523	-	int64_t pts = inputAudioSamples;
523	+
524	-	frame->pts = av_rescale_q(pts, samplesRateInv, inCtx.audioCodecCtx->time_base);
524	+
525		// Encode
526		stat = avcodec_encode_video2(outCtx.videoCodecCtx, &outPkt, videoFrame, &gotPacket);
527
528		if (stat >= 0 && gotPacket)
529		{
530		if (outCtx.videoCodecCtx->coded_frame->pts != AV_NOPTS_VALUE)
531	-	<< " timebase: " << inCtx.audioCodecCtx->time_base.num << "/" << inCtx.audioCodecCtx->time_base.den
531	+
532		outPkt.pts = av_rescale_q(outCtx.videoCodecCtx->coded_frame->pts,
533		outCtx.videoCodecCtx->time_base,
534		outCtx.videoStream->time_base);
535		}
536
537		if (outPkt.dts == AV_NOPTS_VALUE)
538		{
539		outPkt.dts = outPkt.pts;
540		}
541		else
542		{
543		outPkt.dts = av_rescale_q(outPkt.dts,
544		outCtx.videoCodecCtx->time_base,
545		outCtx.videoStream->time_base);
546		}
547
548		cout << "Coded PTS: "
549		<< av_ts2timestr(outPkt.pts,
550		&outCtx.videoStream->time_base)
551		<< ", DTS: "
552		<< av_ts2timestr(outPkt.dts,
553		&outCtx.videoStream->time_base)
554		<< endl;
555
556
557		if (outCtx.videoCodecCtx->coded_frame->key_frame)
558		{
559		outPkt.flags \|= AV_PKT_FLAG_KEY;
560		}
561
562		outPkt.stream_index = outCtx.videoStreamIndex;
563		outPktTimeBase = outCtx.videoStream->time_base;
564		}
565		}
566		}
567		else if (pkt.stream_index == inCtx.audioStreamIndex)
568		{
569		stat = avcodec_decode_audio4(inCtx.audioCodecCtx, frame, &gotFrame, &pkt);
570		if (stat < 0)
571		{
572		cerr << "Can't decode audio frame" << endl;
573		return 1;
574		}
575
576		if (gotFrame)
577		{
578		// If this block present, audio muxed incorrectly
579		// if (frame->pts == AV_NOPTS_VALUE)
580		// {
581		// AVRational samplesRateInv = {1, inCtx.audioCodecCtx->sample_rate};
582		// int64_t pts = inputAudioSamples;
583		// frame->pts = av_rescale_q(pts, samplesRateInv, inCtx.audioCodecCtx->time_base);
584		// }
585
586		inputAudioSamples += frame->nb_samples;
587
588		cout << "audio_frame nb_samples:" << frame->nb_samples
589		<< " pts:" << av_ts2timestr(frame->pts, &inCtx.audioCodecCtx->time_base)
590		<< " timebase: " << inCtx.audioCodecCtx->time_base
591		<< ", pkt pts: " << pkt.pts << "/" << av_ts2timestr(pkt.pts, &inCtx.audioCodecCtx->time_base)
592		<< ", pkt dts: " << av_ts2timestr(pkt.dts, &inCtx.audioCodecCtx->time_base)
593		<< endl;
594
595		// Encode
596		stat = avcodec_encode_audio2(outCtx.audioCodecCtx, &outPkt, frame, &gotPacket);
597		if (stat >= 0 && gotPacket)
598		{
599		cout << "Coded PTS: "
600		<< av_ts2timestr(outPkt.pts,
601		&outCtx.audioStream->time_base)
602		<< endl;
603
604		outPkt.stream_index = outCtx.audioStreamIndex;
605		outPktTimeBase = outCtx.audioStream->time_base;
606		}
607		}
608
609		}
610
611		// Muxing
612		if (stat >= 0 && gotPacket && outPkt.data && outPkt.size > 0)
613		{
614		cout << "write frame st:" << outPkt.stream_index
615		<< " pts:" << av_ts2timestr(outPkt.pts, &outPktTimeBase)
616		<< endl;
617		stat = av_interleaved_write_frame(outCtx.formatCtx, &outPkt);
618		if (stat < 0)
619		{
620		cerr << "Error while writing frame for stream: " << outPkt.stream_index << endl;
621		return 1;
622		}
623		}
624		}
625
626		// write trailer
627		av_write_trailer(outCtx.formatCtx);
628
629		//
630		// Free resources
631		//
632
633		// input
634		if (inCtx.videoStream)
635		avcodec_close(inCtx.videoStream->codec);
636		if (inCtx.audioStream)
637		avcodec_close(inCtx.audioStream->codec);
638
639		avformat_close_input(&inCtx.formatCtx);
640
641		// output
642		if (outCtx.videoStream)
643		avcodec_close(outCtx.videoStream->codec);
644		if (outCtx.audioStream)
645		avcodec_close(outCtx.audioStream->codec);
646
647		for (uint i = 0; i < outCtx.formatCtx->nb_streams; ++i)
648		{
649		av_freep(&outCtx.formatCtx->streams[i]->codec);
650		av_freep(&outCtx.formatCtx->streams[i]);
651		}
652
653		if (outCtx.swsCtx)
654		{
655		sws_freeContext(outCtx.swsCtx);
656		freeVideoFrame(&videoFrame);
657		}
658
659		avio_close(outCtx.formatCtx->pb);
660
661		av_free(outCtx.formatCtx);
662
663
664		return 0;
665		}