Untitled


/*
Makefile:

---------------------------------------------------
CC=g++

FFMPEG_LIBS=    libavdevice                        \
                libavformat                        \
                libavfilter                        \
                libavcodec                         \
                libswresample                      \
                libswscale                         \
                libavutil                          \

CPPFLAGS := -Wall -g -std=c++14 $(shell pkg-config --cflags $(FFMPEG_LIBS)) $(CPPFLAGS)

LDFLAGS=-g
LDLIBS := $(shell pkg-config --libs $(FFMPEG_LIBS)) -lm $(LDLIBS)

SOURCES=        MakeVideo                           \

OBJS=$(addsuffix .o,$(SOURCES))

all: $(OBJS) $(SOURCES)
---------------------------------------------------


starts with: ./MakeVideo 1.mp4


*/


#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>

extern "C" {
    #include <libavutil/avassert.h>
    #include <libavutil/channel_layout.h>
    #include <libavutil/opt.h>
    #include <libavutil/mathematics.h>
    #include <libavutil/timestamp.h>
    #include <libavformat/avformat.h>
    #include <libswscale/swscale.h>
    #include <libswresample/swresample.h>

};

#define     VIDEO_WIDTH         1280
#define     VIDEO_HEIGHT        720

//#define       VIDEO_WIDTH         320
//#define       VIDEO_HEIGHT        240

//#define       VIDEO_WIDTH         1920
//#define       VIDEO_HEIGHT        1080


#define     STREAM_DURATION     3.0
#define     STREAM_FRAME_RATE   20
#define     STREAM_PIX_FMT      AV_PIX_FMT_YUV420P /* default pix_fmt */
//#define STREAM_PIX_FMT        AV_PIX_FMT_YUV422P

#define SCALE_FLAGS         SWS_BICUBIC


#define CLIP(X) ( (X) > 255 ? 255 : (X) < 0 ? 0 : X)

// RGB -> YCbCr
#define CRGB2Y(R, G, B)     CLIP((19595 * R + 38470 * G + 7471 * B ) >> 16)
#define CRGB2Cb(R, G, B)    CLIP((36962 * (B - CLIP((19595 * R + 38470 * G + 7471 * B ) >> 16) ) >> 16) + 128)
#define CRGB2Cr(R, G, B)    CLIP((46727 * (R - CLIP((19595 * R + 38470 * G + 7471 * B ) >> 16) ) >> 16) + 128)

// RGB -> YUV
#define RGB2Y(R, G, B) CLIP(( (  66 * (R) + 129 * (G) +  25 * (B) + 128) >> 8) +  16)
#define RGB2U(R, G, B) CLIP(( ( -38 * (R) -  74 * (G) + 112 * (B) + 128) >> 8) + 128)
#define RGB2V(R, G, B) CLIP(( ( 112 * (R) -  94 * (G) -  18 * (B) + 128) >> 8) + 128)


class VideoRenderer {

    private:

        typedef struct OutputStream { // a wrapper around a single output AVStream

            AVStream *st;
            AVCodecContext *enc;

            int64_t next_pts; // pts of the next frame that will be generated
            int samples_count;

            AVFrame *frame;
            AVFrame *tmp_frame;

            float t, tincr, tincr2;

            struct SwsContext *sws_ctx;
            struct SwrContext *swr_ctx;

        } OutputStream;

        struct SwsContext                           *sws_context = NULL;
        //std::vector < std::vector < RGBColor > >  imageArr;

    public:

        void log_packet( const AVFormatContext *fmt_ctx, const AVPacket *pkt ) {

            AVRational *time_base = &fmt_ctx->streams[pkt->stream_index]->time_base;
            printf( "pts:%s pts_time:%s dts:%s dts_time:%s duration:%s duration_time:%s stream_index:%d\n", av_ts2str( pkt->pts ), av_ts2timestr( pkt->pts, time_base ), av_ts2str( pkt->dts ), av_ts2timestr( pkt->dts, time_base ), av_ts2str( pkt->duration ), av_ts2timestr( pkt->duration, time_base ), pkt->stream_index );

        }

        int write_frame( AVFormatContext *fmt_ctx, const AVRational *time_base, AVStream *st, AVPacket *pkt ) {

            av_packet_rescale_ts( pkt, *time_base, st->time_base ); // rescale output packet timestamp values from codec to stream timebase
            pkt->stream_index = st->index;

            log_packet( fmt_ctx, pkt ); // Write the compressed frame to the media file.
            return av_interleaved_write_frame( fmt_ctx, pkt );

        }

        void add_stream( OutputStream *ost, AVFormatContext *oc, AVCodec **codec, enum AVCodecID codec_id ) { // Add an output stream.

            AVCodecContext *c;
            int i;

            *codec = avcodec_find_encoder( codec_id ); // find the encoder
            if ( !( *codec ) ) {
                fprintf( stderr, "Could not find encoder for '%s'\n", avcodec_get_name( codec_id ) );
                exit( 1 );
            }

            ost->st = avformat_new_stream( oc, NULL );
            if ( !ost->st ) {
                fprintf( stderr, "Could not allocate stream\n" );
                exit( 1 );
            }
            ost->st->id = oc->nb_streams-1;
            c = avcodec_alloc_context3( *codec );
            if ( !c ) {
                fprintf( stderr, "Could not alloc an encoding context\n" );
                exit( 1 );
            }
            ost->enc = c;

            switch ( ( *codec )->type ) {
            case AVMEDIA_TYPE_AUDIO:
                c->sample_fmt  = ( *codec )->sample_fmts ? ( *codec )->sample_fmts[0] : AV_SAMPLE_FMT_FLTP;
                c->bit_rate    = 64000;
                c->sample_rate = 44100;
                if ( ( *codec )->supported_samplerates ) {
                    c->sample_rate = ( *codec )->supported_samplerates[0];
                    for ( i = 0; ( *codec )->supported_samplerates[i]; i++ ) {
                        if ( ( *codec )->supported_samplerates[i] == 44100 )
                            c->sample_rate = 44100;
                    }
                }
                c->channels         = av_get_channel_layout_nb_channels( c->channel_layout );
                c->channel_layout   = AV_CH_LAYOUT_STEREO;
                if ( ( *codec )->channel_layouts ) {
                    c->channel_layout = ( *codec )->channel_layouts[0];
                    for ( i = 0; ( *codec )->channel_layouts[i]; i++ ) {
                        if ( ( *codec )->channel_layouts[i] == AV_CH_LAYOUT_STEREO )
                            c->channel_layout = AV_CH_LAYOUT_STEREO;
                    }
                }
                c->channels        = av_get_channel_layout_nb_channels( c->channel_layout );
                ost->st->time_base = ( AVRational ){ 1, c->sample_rate };
                break;

            case AVMEDIA_TYPE_VIDEO:
                c->codec_id = codec_id;

                c->bit_rate         = 3400000;
                c->width            = VIDEO_WIDTH; //320; //1280;
                c->height           = VIDEO_HEIGHT; //240; //720;
                ost->st->time_base  = ( AVRational ){ 1, STREAM_FRAME_RATE }; // timebase: This is the fundamental unit of time ( in seconds ) in term of which frame timestamps are represented. For fixed-fps content, timebase should be 1/framerate and timestamp increments should be identical to 1.
                c->time_base        = ost->st->time_base;

                c->gop_size         = 12; // emit one intra frame every twelve frames at most
                c->pix_fmt          = STREAM_PIX_FMT;

                if ( c->codec_id == AV_CODEC_ID_MPEG2VIDEO ) {
                    c->max_b_frames = 2; // just for testing, we also add B-frames
                }

                if ( c->codec_id == AV_CODEC_ID_MPEG1VIDEO ) {
                    c->mb_decision = 2; // Needed to avoid using macroblocks in which some coeffs overflow. This does not happen with normal video, it just happens here as the motion of the chroma plane does not match the luma plane.
                }

            break;

            default:
                break;

            }

            if ( oc->oformat->flags & AVFMT_GLOBALHEADER ) c->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; //Some formats want stream headers to be separate.

        }

        AVFrame *alloc_audio_frame( enum AVSampleFormat sample_fmt, uint64_t channel_layout, int sample_rate, int nb_samples ) { // audio output

            AVFrame *frame = av_frame_alloc(  );
            int ret;

            if ( !frame ) {
                fprintf( stderr, "Error allocating an audio frame\n" );
                exit( 1 );
            }

            frame->format           = sample_fmt;
            frame->channel_layout   = channel_layout;
            frame->sample_rate      = sample_rate;
            frame->nb_samples       = nb_samples;

            if ( nb_samples ) {
                ret = av_frame_get_buffer( frame, 0 );
                if ( ret < 0 ) {
                    fprintf( stderr, "Error allocating an audio buffer\n" );
                    exit( 1 );
                }
            }

            return frame;

        }

        void open_audio( AVFormatContext *oc, AVCodec *codec, OutputStream *ost, AVDictionary *opt_arg ) {

            AVCodecContext *c;
            int nb_samples;
            int ret;
            AVDictionary *opt = NULL;

            c = ost->enc;

            /* open it */
            av_dict_copy( &opt, opt_arg, 0 );
            ret = avcodec_open2( c, codec, &opt );
            av_dict_free( &opt );
            if ( ret < 0 ) {
                fprintf( stderr, "Could not open audio codec: %s\n", av_err2str( ret ) );
                exit( 1 );
            }

            // init signal generator
            ost->t          = 0;
            ost->tincr      = 2 * M_PI * 110.0 / c->sample_rate;
            ost->tincr2     = 2 * M_PI * 110.0 / c->sample_rate / c->sample_rate; // increment frequency by 110 Hz per second

            if ( c->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE ) nb_samples = 10000;
            else nb_samples = c->frame_size;

            ost->frame     = alloc_audio_frame( c->sample_fmt, c->channel_layout, c->sample_rate, nb_samples );
            ost->tmp_frame = alloc_audio_frame( AV_SAMPLE_FMT_S16, c->channel_layout, c->sample_rate, nb_samples );

            ret = avcodec_parameters_from_context( ost->st->codecpar, c ); // copy the stream parameters to the muxer
            if ( ret < 0 ) {
                fprintf( stderr, "Could not copy the stream parameters\n" );
                exit( 1 );
            }

                ost->swr_ctx = swr_alloc(  ); //create resampler context
                if ( !ost->swr_ctx ) {
                    fprintf( stderr, "Could not allocate resampler context\n" );
                    exit( 1 );
                }

                /* set options */
                av_opt_set_int       ( ost->swr_ctx, "in_channel_count",   c->channels,       0 );
                av_opt_set_int       ( ost->swr_ctx, "in_sample_rate",     c->sample_rate,    0 );
                av_opt_set_sample_fmt( ost->swr_ctx, "in_sample_fmt",      AV_SAMPLE_FMT_S16, 0 );
                av_opt_set_int       ( ost->swr_ctx, "out_channel_count",  c->channels,       0 );
                av_opt_set_int       ( ost->swr_ctx, "out_sample_rate",    c->sample_rate,    0 );
                av_opt_set_sample_fmt( ost->swr_ctx, "out_sample_fmt",     c->sample_fmt,     0 );

                /* initialize the resampling context */
                if ( ( ret = swr_init( ost->swr_ctx ) ) < 0 ) {
                    fprintf( stderr, "Failed to initialize the resampling context\n" );
                    exit( 1 );
                }

        }

        AVFrame *get_audio_frame( OutputStream *ost ) { // Prepare a 16 bit dummy audio frame of 'frame_size' samples and 'nb_channels' channels.

            AVFrame *frame = ost->tmp_frame;
            int j, i, v;
            int16_t *q = ( int16_t* )frame->data[0];

            if ( av_compare_ts( ost->next_pts, ost->enc->time_base, STREAM_DURATION, ( AVRational ){ 1, 1 } ) >= 0 ) return NULL; // check if we want to generate more frames

            for ( j = 0; j < frame->nb_samples; j++ ) {

                v = ( int )( sin( ost->t ) * 10000 );

                for ( i = 0; i < ost->enc->channels; i++ ) *q++ = v;

                ost->t     += ost->tincr;
                ost->tincr += ost->tincr2;

            }

            frame->pts      = ost->next_pts;
            ost->next_pts   += frame->nb_samples;

            return frame;

        }

        int write_audio_frame( AVFormatContext *oc, OutputStream *ost ) { // encode one audio frame and send it to the muxer return 1 when encoding is finished, 0 otherwise

            AVCodecContext *c;
            AVPacket pkt = { 0 }; // data and size must be 0;
            AVFrame *frame;
            int ret;
            int got_packet;
            int dst_nb_samples;

            av_init_packet( &pkt );
            c = ost->enc;

            frame = get_audio_frame( ost );

            if ( frame ) {
                    dst_nb_samples = av_rescale_rnd( swr_get_delay( ost->swr_ctx, c->sample_rate ) + frame->nb_samples, c->sample_rate, c->sample_rate, AV_ROUND_UP ); // convert samples from native format to destination codec format, using the resampler compute destination number of samples
                    av_assert0( dst_nb_samples == frame->nb_samples );

                ret = av_frame_make_writable( ost->frame ); // when we pass a frame to the encoder, it may keep a reference to it internally; make sure we do not overwrite it here
                if ( ret < 0 ) exit( 1 );

                    ret = swr_convert( ost->swr_ctx, ost->frame->data, dst_nb_samples, ( const uint8_t ** )frame->data, frame->nb_samples ); // convert to destination format
                    if ( ret < 0 ) {
                        fprintf( stderr, "Error while converting\n" );
                        exit( 1 );
                    }
                    frame = ost->frame;

                frame->pts = av_rescale_q( ost->samples_count, ( AVRational ){1, c->sample_rate}, c->time_base );
                ost->samples_count += dst_nb_samples;
            }

            ret = avcodec_encode_audio2( c, &pkt, frame, &got_packet );
            if ( ret < 0 ) {
                fprintf( stderr, "Error encoding audio frame: %s\n", av_err2str( ret ) );
                exit( 1 );
            }

            if ( got_packet ) {
                ret = write_frame( oc, &c->time_base, ost->st, &pkt );
                if ( ret < 0 ) {
                    fprintf( stderr, "Error while writing audio frame: %s\n",
                            av_err2str( ret ) );
                    exit( 1 );
                }
            }

            return ( frame || got_packet ) ? 0 : 1;
        }

        /**************************************************************/
        /* video output */

        AVFrame *alloc_picture( enum AVPixelFormat pix_fmt, int width, int height ) {

            AVFrame *picture;
            int ret;

            picture = av_frame_alloc(  );
            if ( !picture )
                return NULL;

            picture->format = pix_fmt;
            picture->width  = width;
            picture->height = height;

            ret = av_frame_get_buffer( picture, 32 ); // allocate the buffers for the frame data
            if ( ret < 0 ) {
                fprintf( stderr, "Could not allocate frame data.\n" );
                exit( 1 );
            }

            return picture;

        }

        void open_video( AVFormatContext *oc, AVCodec *codec, OutputStream *ost, AVDictionary *opt_arg ) {

            int ret;
            AVCodecContext *c = ost->enc;
            AVDictionary *opt = NULL;

            av_dict_copy( &opt, opt_arg, 0 );

            ret = avcodec_open2( c, codec, &opt ); // open the codec
            av_dict_free( &opt );
            if ( ret < 0 ) {
                fprintf( stderr, "Could not open video codec: %s\n", av_err2str( ret ) );
                exit( 1 );
            }

            ost->frame = alloc_picture( c->pix_fmt, c->width, c->height ); // allocate and init a re-usable frame
            if ( !ost->frame ) {
                fprintf( stderr, "Could not allocate video frame\n" );
                exit( 1 );
            }

            ost->tmp_frame = NULL;
            if ( c->pix_fmt != AV_PIX_FMT_YUV420P ) { // If the output format is not YUV420P, then a temporary YUV420P picture is needed too. It is then converted to the required output format.
                ost->tmp_frame = alloc_picture( AV_PIX_FMT_YUV420P, c->width, c->height );
                if ( !ost->tmp_frame ) {
                    fprintf( stderr, "Could not allocate temporary picture\n" );
                    exit( 1 );
                }
            }

            ret = avcodec_parameters_from_context( ost->st->codecpar, c ); // copy the stream parameters to the muxer
            if ( ret < 0 ) {
                fprintf( stderr, "Could not copy the stream parameters\n" );
                exit( 1 );
            }

        }

        void ffmpeg_encoder_set_frame_yuv_from_rgb( AVFrame *frame ) {

            uint8_t *rgb = (uint8_t *) malloc( 3 * sizeof( uint8_t ) * frame->width * frame->height );

            int width = frame->width, height = frame->height;
            int x, y, cur;
            for (y = 0; y < height; y++) {
                for (x = 0; x < width; x++) {
                    cur = 3 * (y * width + x);
                    rgb[cur + 0] = 0;
                    rgb[cur + 1] = 0;
                    rgb[cur + 2] = 0;
                    if ((frame->pts / 25) % 2 == 0) {
                        if (y < height / 2) {
                            if (x < width / 2) {
                                /* Black. */
                            } else {
                                rgb[cur + 0] = 255;
                            }
                        } else {
                            if (x < width / 2) {
                                rgb[cur + 1] = 255;
                            } else {
                                rgb[cur + 2] = 255;
                            }
                        }
                    } else {
                        if (y < height / 2) {
                            rgb[cur + 0] = 255;
                            if (x < width / 2) {
                                rgb[cur + 1] = 255;
                            } else {
                                rgb[cur + 2] = 255;
                            }
                        } else {
                            if (x < width / 2) {
                                rgb[cur + 1] = 255;
                                rgb[cur + 2] = 255;
                            } else {
                                rgb[cur + 0] = 255;
                                rgb[cur + 1] = 255;
                                rgb[cur + 2] = 255;
                            }
                        }
                    }
                }
            }


            sws_context = sws_getCachedContext( sws_context, frame->width, frame->height, AV_PIX_FMT_RGB24, frame->width, frame->height, AV_PIX_FMT_YUV420P, 0, 0, 0, 0 );

            const int inLinesize[ 1 ] = { 3 * frame->width }; // RGB stride
            std::cout << "sws_scale BEGIN\n";
            sws_scale( sws_context, ( const uint8_t * const * ) &rgb, inLinesize, 0, frame->height, frame->data, frame->linesize );
            std::cout << "sws_scale END\n";

        }

        void fill_yuv_image( AVFrame *pict, int frame_index, int width, int height ) { // Prepare a dummy image.

            int ret = av_frame_make_writable( pict );     // when we pass a frame to the encoder, it may keep a reference to it internally; make sure we do not overwrite it here
            if ( ret < 0 ) exit( 1 );

            int i = frame_index;

            ffmpeg_encoder_set_frame_yuv_from_rgb( pict );

            //!!! free( rgb );

        }

        AVFrame *get_video_frame( OutputStream *ost ) {

            AVCodecContext *c = ost->enc;

            if ( av_compare_ts( ost->next_pts, c->time_base, STREAM_DURATION, ( AVRational ){ 1, 1 } ) >= 0 ) return NULL; // check if we want to generate more frames

            if ( c->pix_fmt != AV_PIX_FMT_YUV420P ) {

                if ( !ost->sws_ctx ) { // as we only generate a YUV420P picture, we must convert it to the codec pixel format if needed
                    ost->sws_ctx = sws_getContext( c->width, c->height, AV_PIX_FMT_YUV420P, c->width, c->height, c->pix_fmt, SCALE_FLAGS, NULL, NULL, NULL );
                    if ( !ost->sws_ctx ) {
                        fprintf( stderr, "Could not initialize the conversion context\n" );
                        exit( 1 );
                    }
                }
                fill_yuv_image( ost->tmp_frame, ost->next_pts, c->width, c->height );
                sws_scale( ost->sws_ctx, ( const uint8_t * const * )ost->tmp_frame->data, ost->tmp_frame->linesize, 0, c->height, ost->frame->data, ost->frame->linesize );

            } else {
                fill_yuv_image( ost->frame, ost->next_pts, c->width, c->height );
            }

            ost->frame->pts = ost->next_pts++;

            return ost->frame;

        }

        /*
         * encode one video frame and send it to the muxer
         * return 1 when encoding is finished, 0 otherwise
         */
        int write_video_frame( AVFormatContext *oc, OutputStream *ost ) {

            int ret;
            AVCodecContext *c;
            AVFrame *frame;
            int got_packet = 0;
            AVPacket pkt = { 0 };

            c = ost->enc;

            frame = get_video_frame( ost );

            av_init_packet( &pkt );

            /* encode the image */
            ret = avcodec_encode_video2( c, &pkt, frame, &got_packet );
            if ( ret < 0 ) {
                fprintf( stderr, "Error encoding video frame: %s\n", av_err2str( ret ) );
                exit( 1 );
            }

            if ( got_packet ) {
                ret = write_frame( oc, &c->time_base, ost->st, &pkt );
            } else {
                ret = 0;
            }

            if ( ret < 0 ) {
                fprintf( stderr, "Error while writing video frame: %s\n", av_err2str( ret ) );
                exit( 1 );
            }

            return ( frame || got_packet ) ? 0 : 1;

        }

        void close_stream( AVFormatContext *oc, OutputStream *ost ) {

            avcodec_free_context( &ost->enc );
            av_frame_free( &ost->frame );
            av_frame_free( &ost->tmp_frame );
            sws_freeContext( ost->sws_ctx );
            swr_free( &ost->swr_ctx );

        }

        void makeVideo( const char *filename ) {

            OutputStream video_st = { 0 }, audio_st = { 0 };
            AVOutputFormat *fmt;
            AVFormatContext *oc;
            AVCodec *audio_codec, *video_codec;
            int ret;
            int have_video = 0, have_audio = 0;
            int encode_video = 0, encode_audio = 0;
            AVDictionary *opt = NULL;

            av_register_all(); // Initialize libavcodec, and register all codecs and formats.

            avformat_alloc_output_context2( &oc, NULL, NULL, filename ); // allocate the output media context
            if ( !oc ) {
                printf( "Could not deduce output format from file extension: using MPEG.\n" );
                avformat_alloc_output_context2( &oc, NULL, "mpeg", filename );
            }
            if ( !oc ) exit( 0 );

            fmt = oc->oformat;

            if ( fmt->video_codec != AV_CODEC_ID_NONE ) { // Add the audio and video streams using the default format codecs and initialize the codecs.
                add_stream( &video_st, oc, &video_codec, fmt->video_codec );
                have_video = 1;
                encode_video = 1;
            }
            if ( fmt->audio_codec != AV_CODEC_ID_NONE ) {
                add_stream( &audio_st, oc, &audio_codec, fmt->audio_codec );
                have_audio = 1;
                encode_audio = 1;
            }

            if ( have_video ) open_video( oc, video_codec, &video_st, opt ); // Now that all the parameters are set, we can open the audio and video codecs and allocate the necessary encode buffers.
            if ( have_audio ) open_audio( oc, audio_codec, &audio_st, opt );

            av_dump_format( oc, 0, filename, 1 );

            if ( !( fmt->flags & AVFMT_NOFILE ) ) { // open the output file, if needed
                ret = avio_open( &oc->pb, filename, AVIO_FLAG_WRITE );
                if ( ret < 0 ) {
                    fprintf( stderr, "Could not open '%s': %s\n", filename, av_err2str( ret ) );
                    exit( 0 );
                }
            }

            ret = avformat_write_header( oc, &opt ); // Write the stream header, if any.
            if ( ret < 0 ) {
                fprintf( stderr, "Error occurred when opening output file: %s\n", av_err2str( ret ) );
                exit( 0 );
            }

            while ( encode_video || encode_audio ) {
                // select the stream to encode
                if ( encode_video && ( !encode_audio || av_compare_ts( video_st.next_pts, video_st.enc->time_base, audio_st.next_pts, audio_st.enc->time_base ) <= 0 ) ) {
                    encode_video = !write_video_frame( oc, &video_st );
                } else {
                    encode_audio = !write_audio_frame( oc, &audio_st );
                }
            }

            av_write_trailer( oc ); // Write the trailer, if any. The trailer must be written before you close the CodecContexts open when you wrote the header; otherwise av_write_trailer(  ) may try to use memory that was freed on av_codec_close(  ).

            if ( have_video ) close_stream( oc, &video_st );
            if ( have_audio ) close_stream( oc, &audio_st );

            if ( !( fmt->flags & AVFMT_NOFILE ) ) avio_closep( &oc->pb ); // Close the output file.

            avformat_free_context( oc ); // free the stream

        }

};

int main( int argc, char **argv ) {

    VideoRenderer vr;
    vr.makeVideo( argv[ 1 ] );

    return 0;

}