View difference between Paste ID: zYTiuRyA and F9R5qpPz
SHOW: | | - or go back to the newest paste.
1
/*
2
3
  Compile with:
4
  g++ -Wall -O2 -g -D__STDC_CONSTANT_MACROS  -o video-sound-transcoding-test video-sound-transcoding-test.cpp -lm -lavdevice -lavformat -lavfilter -lavcodec -lswresample -lswscale -lavutil
5
6
*/
7
8
extern "C"
9
{
10
#include <libavutil/avutil.h>
11
#include <libavutil/parseutils.h>
12
#include <libavutil/mathematics.h>
13
#include <libavutil/opt.h>
14
#include <libavcodec/avcodec.h>
15
#include <libavformat/avformat.h>
16
#include <libswscale/swscale.h>
17
#include <libswresample/swresample.h>
18
#include <libavfilter/avfilter.h>
19
#include <libavfilter/avfiltergraph.h>
20
#include <libavfilter/avcodec.h>
21
#include <libavfilter/buffersink.h>
22
#include <libavfilter/buffersrc.h>
23
#include <libavutil/imgutils.h>
24
}
25
26
#include <iostream>
27
28
using namespace std;
29
30
//
31
// Compilation error work around
32
//
33
#ifndef av_ts2timestr
34
#include <vector>
35
#define AV_TS_MAX_STRING_SIZE 32
36
static inline string av_ts_make_time_string(int64_t ts, AVRational *tb)
37
{
38
    vector<char> buf(AV_TS_MAX_STRING_SIZE);
39
    if (ts == AV_NOPTS_VALUE) snprintf(buf.data(), AV_TS_MAX_STRING_SIZE, "NOPTS");
40
    else                      snprintf(buf.data(), AV_TS_MAX_STRING_SIZE, "%.6g", av_q2d(*tb) * ts);
41
    return string(buf.data());
42
}
43
#define av_ts2timestr(ts, tb) av_ts_make_time_string(ts, tb)
44
#endif
45
46
struct MediaContext
47
{
48
    MediaContext()
49
        : formatCtx(0),
50
          videoCodec(0),
51
          audioCodec(0),
52
          videoCodecCtx(0),
53
          audioCodecCtx(0),
54
          videoStream(0),
55
          audioStream(0),
56
          swsCtx(0),
57
          videoStreamIndex(-1),
58
          audioStreamIndex(-1)
59
    {}
60
61
    // input or output file name or URI
62
    string resource;
63
64
    AVFormatContext *formatCtx;
65
    AVCodec         *videoCodec;
66
    AVCodec         *audioCodec;
67
    AVCodecContext  *videoCodecCtx;
68
    AVCodecContext  *audioCodecCtx;
69
    AVStream        *videoStream;
70
    AVStream        *audioStream;
71
    SwsContext      *swsCtx;
72
73
    int              videoStreamIndex;
74
    int              audioStreamIndex;
75
};
76
77
78
MediaContext inCtx;   // Media source
79
MediaContext outCtx;  // Media destination
80
81
82
static
83
void usage(const string& progName)
84
{
85
    cout << "Use: " << progName << " <input media file> <output media file>" << endl;
86
}
87
88
89
90
static
91
int openCodecContex(MediaContext &ctx, AVMediaType type)
92
{
93
    int ret = -1;
94
95
    AVStream        **stream;
96
    AVCodecContext  **codecCtx;
97
    AVCodec         **codec;
98
    AVFormatContext *formatCtx;
99
    int             *streamIndex;
100
101
    formatCtx = ctx.formatCtx;
102
103
    if (type == AVMEDIA_TYPE_VIDEO)
104
    {
105
        stream      = &ctx.videoStream;
106
        codecCtx    = &ctx.videoCodecCtx;
107
        streamIndex = &ctx.videoStreamIndex;
108
        codec       = &ctx.videoCodec;
109
    }
110
    else if (type == AVMEDIA_TYPE_AUDIO)
111
    {
112
        stream      = &ctx.audioStream;
113
        codecCtx    = &ctx.audioCodecCtx;
114
        streamIndex = &ctx.audioStreamIndex;
115
        codec       = &ctx.audioCodec;
116
    }
117
    else
118
    {
119
        cerr << "Unknown media type: " << av_get_media_type_string(type) << endl;
120
        return -1;
121
    }
122
123
124
    ret = av_find_best_stream(formatCtx, type, -1, -1, 0, 0);
125
    if (ret >= 0)
126
    {
127
        *stream   = formatCtx->streams[ret];
128
        *codecCtx    = (*stream)->codec;
129
130
        *codec = avcodec_find_decoder((*codecCtx)->codec_id);
131
132
        if (!(*codec))
133
        {
134
            cerr << "Failed to find " << (*codecCtx)->codec_name << " codec" << endl;
135
            return -1;
136
        }
137
138
        int result = avcodec_open2(*codecCtx, *codec, 0);
139
        if (result < 0)
140
        {
141
            cerr << "Failed to open " << (*codec)->name << " codec" << endl;
142
            return -1;
143
        }
144
    }
145
146
    *streamIndex = ret;
147
148
    return ret;
149
}
150
151
152
153
static
154
int addStream(MediaContext &ctx, AVMediaType type)
155
{
156
    AVCodecID       codecId;
157
    AVStream        **stream;
158
    AVCodecContext  **codecCtx;
159
    AVCodec         **codec;
160
    AVFormatContext *formatCtx;
161
    int             *streamIndex;
162
163
    formatCtx = ctx.formatCtx;
164
165
    if (type == AVMEDIA_TYPE_VIDEO)
166
    {
167
        stream      = &ctx.videoStream;
168
        codecCtx    = &ctx.videoCodecCtx;
169
        streamIndex = &ctx.videoStreamIndex;
170
        codec       = &ctx.videoCodec;
171
        codecId     = formatCtx->oformat->video_codec;
172
    }
173
    else if (type == AVMEDIA_TYPE_AUDIO)
174
    {
175
        stream      = &ctx.audioStream;
176
        codecCtx    = &ctx.audioCodecCtx;
177
        streamIndex = &ctx.audioStreamIndex;
178
        codec       = &ctx.audioCodec;
179
        codecId     = formatCtx->oformat->audio_codec;
180
    }
181
    else
182
    {
183
        cerr << "Unknown media type: " << av_get_media_type_string(type) << endl;
184
        return -1;
185
    }
186
187
188
    *codec = avcodec_find_encoder(codecId);
189
    if (!(*codec))
190
    {
191
        cerr << "Can't found codec" << endl;
192
        return -1;
193
    }
194
195
    *stream = avformat_new_stream(ctx.formatCtx, *codec);
196
    if (!*stream)
197
    {
198
        cerr << "Can't alloc stream" << endl;
199
        return -1;
200
    }
201
202
    *codecCtx = (*stream)->codec;
203
204
    avcodec_get_context_defaults3(*codecCtx, *codec);
205
206
    (*codecCtx)->codec_id = codecId;
207
208
    if (formatCtx->oformat->flags & AVFMT_GLOBALHEADER)
209
        (*codecCtx)->flags |= CODEC_FLAG_GLOBAL_HEADER;
210
211
    *streamIndex = formatCtx->nb_streams - 1;
212
    return *streamIndex;
213
}
214
215
216
static
217
AVFrame *allocVideoFrame(PixelFormat pixFmt, int width, int height)
218
{
219
    if (pixFmt == PIX_FMT_NONE || width <= 0 || height <= 0)
220
    {
221
        cerr << "Invalid picture params: format:" << pixFmt << ", w:" << width << ", h:" << height << endl;
222
        return 0;
223
    }
224
225
    AVFrame *outFrame = avcodec_alloc_frame();
226
    if (!outFrame)
227
        return outFrame;
228
229
    outFrame->width  = width;
230
    outFrame->height = height;
231
    outFrame->format = pixFmt;
232
233
    int size = avpicture_get_size(pixFmt, width, height);
234
    uint8_t *pictBuf = new uint8_t[size];
235
236
    avpicture_fill((AVPicture*)outFrame,
237
                   pictBuf,
238
                   pixFmt, width, height);
239
240
    cout << "Framebuffer: " << (void*)pictBuf << ", " << (void*)outFrame->data[0] << endl;
241
242
    return outFrame;
243
}
244
245
246
static
247
void freeVideoFrame(AVFrame **frame)
248
{
249
    if (frame == 0 || *frame == 0)
250
    {
251
        return;
252
    }
253
254
    AVFrame *ptr = *frame;
255
256
    delete [] ptr->data[0];
257
258
    avcodec_free_frame(frame);
259
    *frame = 0;
260
}
261
262
263
/* Helpers to output timebases */
264
static
265
inline ostream& operator<< (ostream& ost, const AVRational &value)
266
{
267
    ost << value.num << "/" << value.den;
268
    if (value.num != 0)
269
    {
270
        ost << " (1/" << (double)value.den/(double)value.num << ")";
271
    }
272
    return ost;
273
}
274
275
276
int main(int argc, char **argv)
277
{
278
    // register all formats, protocols and codecs
279
    av_register_all();
280
    avformat_network_init();
281
282
    int stat;
283
284
    if (argc < 3)
285
    {
286
        usage(argv[0]);
287
        return 1;
288
    }
289
290
    inCtx.resource  = argv[1];
291
    outCtx.resource = argv[2];
292
293
    ////
294
    //// Prepare input file
295
    ////
296
297
    // Open input file
298-
        // TODO
298+
299
    {
300
        cerr << "Can't open source file: " << inCtx.resource << endl;
301
        return 1;
302
    }
303-
        // TODO
303+
304
    // Retrieve stream information
305
    if (avformat_find_stream_info(inCtx.formatCtx, 0) < 0)
306
    {
307
        cerr << "Can't find stream information" << endl;
308
        return 1;
309
    }
310
311
    // Walk via streams
312
    if (openCodecContex(inCtx, AVMEDIA_TYPE_VIDEO) >= 0)
313
    {
314
        cout << "in: VideoStream timebase: " << inCtx.videoStream->time_base   << endl;
315
        cout << "in: VideoCoder  timebase: " << inCtx.videoCodecCtx->time_base << endl;
316
    }
317
318
    if (openCodecContex(inCtx, AVMEDIA_TYPE_AUDIO) >= 0)
319
    {
320
        cout << "in: AudioStream timebase: " << inCtx.audioStream->time_base   << endl;
321
        cout << "in: AudioCoder  timebase: " << inCtx.audioCodecCtx->time_base << endl;
322
    }
323
324
    av_dump_format(inCtx.formatCtx, 0, inCtx.resource.c_str(), 0);
325
326
327
328
329
    ////
330
    //// Prepare output file
331
    ////
332
333
    // alloc context
334
    outCtx.formatCtx = avformat_alloc_context();
335
336
    outCtx.formatCtx->iformat = 0;
337
    outCtx.formatCtx->oformat = av_guess_format(0, outCtx.resource.c_str(), 0);
338
339
    if (!outCtx.formatCtx->oformat)
340
    {
341
        cout << "Fall back to MPEGTS format" << endl;
342
        outCtx.formatCtx->oformat = av_guess_format("mpegts", 0, 0);
343
344
        if (!outCtx.formatCtx->oformat)
345
        {
346
            cerr << "Can't found output format for file: " << outCtx.resource << endl;
347
            return 1;
348
        }
349
    }
350
351
352
    // Setup video stream
353
    if (inCtx.videoStreamIndex >= 0 && addStream(outCtx, AVMEDIA_TYPE_VIDEO) >= 0)
354
    {
355
        outCtx.videoCodecCtx->bit_rate = 400000;
356
357
        outCtx.videoCodecCtx->width    = inCtx.videoCodecCtx->width;
358
        outCtx.videoCodecCtx->height   = inCtx.videoCodecCtx->height;
359
        //outCtx.videoCodecCtx->width    = 640;
360
        //outCtx.videoCodecCtx->height   = 480;
361
362
363
        outCtx.videoCodecCtx->time_base.den = 25;
364
        outCtx.videoCodecCtx->time_base.num = 1;
365
366
        outCtx.videoCodecCtx->gop_size      = 12;
367
        outCtx.videoCodecCtx->pix_fmt       = PIX_FMT_YUV420P;
368
369
        if (outCtx.videoCodec->pix_fmts)
370-
        if (outCtx.videoCodecCtx->pix_fmt != inCtx.videoCodecCtx->pix_fmt)
370+
371
            outCtx.videoCodecCtx->pix_fmt = *outCtx.videoCodec->pix_fmts;
372
        }
373
374
        if (outCtx.videoCodecCtx->codec_id == AV_CODEC_ID_MPEG2VIDEO)
375
        {
376
            outCtx.videoCodecCtx->max_b_frames = 2;
377
        }
378
379
        if (outCtx.videoCodecCtx->codec_id == AV_CODEC_ID_MPEG1VIDEO)
380
        {
381
            outCtx.videoCodecCtx->mb_decision = 2;
382
        }
383
384
        // Open codec
385
        if (avcodec_open2(outCtx.videoCodecCtx, outCtx.videoCodec, 0) < 0)
386
        {
387
            cerr << "Can't open output video codec" << endl;
388
            return 1;
389
        }
390
391
        if (outCtx.videoCodecCtx->pix_fmt != inCtx.videoCodecCtx->pix_fmt ||
392
            outCtx.videoCodecCtx->width   != inCtx.videoCodecCtx->width   ||
393
            outCtx.videoCodecCtx->height  != inCtx.videoCodecCtx->height)
394
        {
395
            outCtx.swsCtx = sws_getContext(inCtx.videoCodecCtx->width,
396
                                           inCtx.videoCodecCtx->height,
397
                                           inCtx.videoCodecCtx->pix_fmt,
398
                                           outCtx.videoCodecCtx->width,
399
                                           outCtx.videoCodecCtx->height,
400
                                           outCtx.videoCodecCtx->pix_fmt,
401
                                           SWS_BICUBIC, 0, 0, 0);
402
            if (!outCtx.swsCtx)
403
            {
404
                cerr << "Can't initialize video conversion context" << endl;
405
                return 1;
406
            }
407
        }
408
409
        outCtx.videoStream->time_base = outCtx.videoCodecCtx->time_base;
410
411
        cout << "ou: VideoStream timebase: " << outCtx.videoStream->time_base   << endl;
412
        cout << "ou: VideoCoder  timebase: " << outCtx.videoCodecCtx->time_base << endl;
413
    }
414
415
416
    // Setup audio stream
417
    if (inCtx.audioStreamIndex >= 0 && addStream(outCtx, AVMEDIA_TYPE_AUDIO) >= 0)
418
    {
419
        outCtx.audioStream->id = 1; // WHAT IS IT???
420
421
        outCtx.audioCodecCtx->sample_fmt     = inCtx.audioCodecCtx->sample_fmt;
422
        outCtx.audioCodecCtx->bit_rate       = inCtx.audioCodecCtx->bit_rate;
423
        outCtx.audioCodecCtx->sample_rate    = inCtx.audioCodecCtx->sample_rate;
424
        outCtx.audioCodecCtx->channels       = inCtx.audioCodecCtx->channels;
425
        outCtx.audioCodecCtx->channel_layout = inCtx.audioCodecCtx->channel_layout;
426
427
        /*if (outCtx.audioCodec->supported_samplerates)
428
        {
429
            outCtx.audioCodecCtx->sample_rate = *outCtx.audioCodec->supported_samplerates;
430
        }*/
431
432
        // Open codec
433
        if (avcodec_open2(outCtx.audioCodecCtx, outCtx.audioCodec, 0) < 0)
434
        {
435
            cerr << "Can't open output audio codec" << endl;
436
            return 1;
437
        }
438
439
        outCtx.audioStream->time_base = outCtx.audioCodecCtx->time_base;
440
441
        cout << "ou: AudioStream timebase: " << outCtx.audioStream->time_base   << endl;
442
        cout << "ou: AudioCoder  timebase: " << outCtx.audioCodecCtx->time_base << endl;
443
    }
444
445
    av_dump_format(outCtx.formatCtx, 0, outCtx.resource.c_str(), 1);
446
447
    stat = avio_open2(&outCtx.formatCtx->pb, outCtx.resource.c_str(), AVIO_FLAG_WRITE, 0, 0);
448
    if (stat < 0)
449
    {
450
        cerr << "Can't open output file: " << outCtx.resource << endl;
451
        return 1;
452
    }
453
454
    // write header
455
    if (avformat_write_header(outCtx.formatCtx, 0) < 0)
456
    {
457
        cerr << "Can't write header to output file" << endl;
458
        return 1;
459
    }
460
461
462
    ////
463
    //// Transcode
464
    ////
465
466
    cout << "streams: " << inCtx.videoStreamIndex << "/" << inCtx.audioStreamIndex << endl;
467
    cout << "streams: " << outCtx.videoStreamIndex << "/" << outCtx.audioStreamIndex << endl;
468
469
    AVFrame *frame      = avcodec_alloc_frame();
470
    AVFrame *videoFrame = 0;
471
472
    if (outCtx.swsCtx)
473
    {
474
        videoFrame = allocVideoFrame(outCtx.videoCodecCtx->pix_fmt,
475
                                     outCtx.videoCodecCtx->width,
476
                                     outCtx.videoCodecCtx->height);
477
    }
478
479
480
    AVPacket pkt;
481
    av_init_packet(&pkt);
482
    pkt.data = 0;
483
    pkt.size = 0;
484
485
    int gotFrame;
486
    uint64_t inputAudioSamples = 0;
487
488
    while (av_read_frame(inCtx.formatCtx, &pkt) >= 0)
489
    {
490
        stat = -1;
491
        AVRational outPktTimeBase;
492
        AVPacket outPkt;
493
        int      gotPacket = 0;
494
        av_init_packet(&outPkt);
495
        outPkt.data = 0;
496
        outPkt.size = 0;
497
498
        if (pkt.stream_index == inCtx.videoStreamIndex)
499
        {
500
            stat = avcodec_decode_video2(inCtx.videoCodecCtx, frame, &gotFrame, &pkt);
501
            if (stat < 0)
502
            {
503
                cerr << "Can't decode video frame" << endl;
504
                return 1;
505
            }
506
507
            if (gotFrame)
508
            {
509
                cout << "video_frame coded_n:" << frame->coded_picture_number
510
                     << " pts:" << av_ts2timestr(frame->pts, &inCtx.videoCodecCtx->time_base)
511
                     << endl;
512
513
514
                if (outCtx.swsCtx)
515
                {
516
                    sws_scale(outCtx.swsCtx,
517
                              (const uint8_t * const *)frame->data, frame->linesize, 0, inCtx.videoCodecCtx->height,
518
                              videoFrame->data, videoFrame->linesize);
519
                }
520-
                if (frame->pts == AV_NOPTS_VALUE)
520+
521
                {
522-
                    AVRational samplesRateInv = {1, outCtx.audioCodecCtx->sample_rate};
522+
523-
                    int64_t pts = inputAudioSamples;
523+
524-
                    frame->pts = av_rescale_q(pts, samplesRateInv, inCtx.audioCodecCtx->time_base);
524+
525
                // Encode
526
                stat = avcodec_encode_video2(outCtx.videoCodecCtx, &outPkt, videoFrame, &gotPacket);
527
528
                if (stat >= 0 && gotPacket)
529
                {
530
                    if (outCtx.videoCodecCtx->coded_frame->pts != AV_NOPTS_VALUE)
531-
                     << " timebase: " << inCtx.audioCodecCtx->time_base.num << "/" << inCtx.audioCodecCtx->time_base.den
531+
532
                        outPkt.pts = av_rescale_q(outCtx.videoCodecCtx->coded_frame->pts,
533
                                                  outCtx.videoCodecCtx->time_base,
534
                                                  outCtx.videoStream->time_base);
535
                    }
536
537
                    if (outPkt.dts == AV_NOPTS_VALUE)
538
                    {
539
                        outPkt.dts = outPkt.pts;
540
                    }
541
                    else
542
                    {
543
                        outPkt.dts = av_rescale_q(outPkt.dts,
544
                                                  outCtx.videoCodecCtx->time_base,
545
                                                  outCtx.videoStream->time_base);
546
                    }
547
548
                    cout << "Coded PTS: "
549
                         << av_ts2timestr(outPkt.pts,
550
                                          &outCtx.videoStream->time_base)
551
                         << ", DTS: "
552
                         << av_ts2timestr(outPkt.dts,
553
                                          &outCtx.videoStream->time_base)
554
                         << endl;
555
556
557
                    if (outCtx.videoCodecCtx->coded_frame->key_frame)
558
                    {
559
                        outPkt.flags |= AV_PKT_FLAG_KEY;
560
                    }
561
562
                    outPkt.stream_index = outCtx.videoStreamIndex;
563
                    outPktTimeBase = outCtx.videoStream->time_base;
564
                }
565
            }
566
        }
567
        else if (pkt.stream_index == inCtx.audioStreamIndex)
568
        {
569
            stat = avcodec_decode_audio4(inCtx.audioCodecCtx, frame, &gotFrame, &pkt);
570
            if (stat < 0)
571
            {
572
                cerr << "Can't decode audio frame" << endl;
573
                return 1;
574
            }
575
576
            if (gotFrame)
577
            {
578
                // If this block present, audio muxed incorrectly
579
//                if (frame->pts == AV_NOPTS_VALUE)
580
//                {
581
//                    AVRational samplesRateInv = {1, inCtx.audioCodecCtx->sample_rate};
582
//                    int64_t pts = inputAudioSamples;
583
//                    frame->pts = av_rescale_q(pts, samplesRateInv, inCtx.audioCodecCtx->time_base);
584
//                }
585
586
                inputAudioSamples += frame->nb_samples;
587
588
                cout << "audio_frame nb_samples:" << frame->nb_samples
589
                     << " pts:" << av_ts2timestr(frame->pts, &inCtx.audioCodecCtx->time_base)
590
                     << " timebase: " << inCtx.audioCodecCtx->time_base
591
                     << ", pkt pts: " << pkt.pts << "/" <<  av_ts2timestr(pkt.pts, &inCtx.audioCodecCtx->time_base)
592
                     << ", pkt dts: " << av_ts2timestr(pkt.dts, &inCtx.audioCodecCtx->time_base)
593
                     << endl;
594
595
                // Encode
596
                stat = avcodec_encode_audio2(outCtx.audioCodecCtx, &outPkt, frame, &gotPacket);
597
                if (stat >= 0 && gotPacket)
598
                {
599
                    cout << "Coded PTS: "
600
                         << av_ts2timestr(outPkt.pts,
601
                                          &outCtx.audioStream->time_base)
602
                         << endl;
603
604
                    outPkt.stream_index = outCtx.audioStreamIndex;
605
                    outPktTimeBase = outCtx.audioStream->time_base;
606
                }
607
            }
608
609
        }
610
611
        // Muxing
612
        if (stat >= 0 && gotPacket && outPkt.data && outPkt.size > 0)
613
        {
614
            cout << "write frame st:" << outPkt.stream_index
615
                 << " pts:" << av_ts2timestr(outPkt.pts, &outPktTimeBase)
616
                 << endl;
617
            stat = av_interleaved_write_frame(outCtx.formatCtx, &outPkt);
618
            if (stat < 0)
619
            {
620
                cerr << "Error while writing frame for stream: " << outPkt.stream_index << endl;
621
                return 1;
622
            }
623
        }
624
    }
625
626
    // write trailer
627
    av_write_trailer(outCtx.formatCtx);
628
629
    //
630
    // Free resources
631
    //
632
633
    // input
634
    if (inCtx.videoStream)
635
        avcodec_close(inCtx.videoStream->codec);
636
    if (inCtx.audioStream)
637
        avcodec_close(inCtx.audioStream->codec);
638
639
    avformat_close_input(&inCtx.formatCtx);
640
641
    // output
642
    if (outCtx.videoStream)
643
        avcodec_close(outCtx.videoStream->codec);
644
    if (outCtx.audioStream)
645
        avcodec_close(outCtx.audioStream->codec);
646
647
    for (uint i = 0; i < outCtx.formatCtx->nb_streams; ++i)
648
    {
649
        av_freep(&outCtx.formatCtx->streams[i]->codec);
650
        av_freep(&outCtx.formatCtx->streams[i]);
651
    }
652
653
    if (outCtx.swsCtx)
654
    {
655
        sws_freeContext(outCtx.swsCtx);
656
        freeVideoFrame(&videoFrame);
657
    }
658
659
    avio_close(outCtx.formatCtx->pb);
660
661
    av_free(outCtx.formatCtx);
662
663
664
    return 0;
665
}