transcoder

```c
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/opt.h>
#include <libavutil/pixdesc.h>
#include <libavutil/avutil.h>

typedef struct file
{
    AVFormatContext *container;
    AVCodecContext **codec;
    int *frames;
    int *last_pts;
    int *pre_pts;

    //Counting packets (same principle as counting frames).
    ////////////////////////////////////////////////////////////////////
    int *packets;
    ////////////////////////////////////////////////////////////////////
} file;

typedef struct EncoderContext
{
    file *encoder;

} EncoderContext;

file *create_output(int streams, const char *filename);
file *start_output_from_file(const char *path, file *input, const char *video_encoder, const char *audio_encoder);
int create_video_encoder(AVCodecContext **cod_ctx, AVFormatContext *container, const char *encoder, int width, int height,
                         int pix_fmt, AVRational sample_aspect_ratio, AVRational frame_rate, int bit_rate, int buffer_size);
int create_audio_encoder(AVCodecContext **cod_ctx, AVFormatContext *container, const char *encoder,
                         int channels, int sample_rate, int bit_rate);
int decode_frame(file *decoder, AVFrame *frame, AVPacket *packet);
int open_media(file *video, const char input_path[], const char *video_codec, const char *audio_codec);
void save_gray_frame(unsigned char *buf, int width, int height);
int free_file(file *f);
int encode_frame(file *encoder, AVFrame *input_frame, int index);
int stream_clip(file *input, file *output);
int fill_with_empty_frames_until_all_streams_match_pts(file *input, file *output);

int main()
{
    int res;
    int inputs_len = 1;

    //file* input1 = malloc(sizeof(file));
    file *input1 = calloc(sizeof(file), 1);

    res = open_media(input1, "in_short.mp4", "h264_cuvid", NULL);
    if (res == 1)
    {
        printf("Failed opening input 1");
        return 1;
    }
    file *output = start_output_from_file("output2.mp4", input1, "h264_nvenc", NULL);

    if (!input1)
    {
        printf("Failed opening output");
        return 1;
    }

    stream_clip(input1, output);
    free_file(input1);

    for (int i = 0; i < inputs_len; i++)
    {
        //file* input = malloc(sizeof(file));
        file *input = calloc(sizeof(file), 1);
        res = open_media(input, "in_short.mp4", "h264_cuvid", NULL);
        stream_clip(input, output);
        free_file(input);
    }

    encode_frame(output, NULL, 0);
    encode_frame(output, NULL, 1);

    av_write_trailer(output->container);

    free_file(output);
}

int stream_clip(file *input, file *output)
{
    AVPacket *packet = av_packet_alloc();
    AVFrame *frame = av_frame_alloc();
    int res;

    while (1)
    {
        res = decode_frame(input, frame, packet);

        if (res == 1)
        {
            printf("Error decoding a frame\n");
            av_frame_free(&frame);
            av_packet_free(&packet);

            return 1;
        }
        else if (res == 0)
        {

            AVCodecContext *codec = output->codec[packet->stream_index];
            AVRational fps = output->codec[packet->stream_index]->framerate;
            int frames_per_packet = 1;
            AVRational time_base = output->container->streams[packet->stream_index]->time_base;

            if (input->container->streams[packet->stream_index]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)
            {
                fps.den = 1;
                fps.num = input->container->streams[packet->stream_index]->codecpar->sample_rate;

                ////////////////////////////////////////////////////////////////////
                frames_per_packet = input->container->streams[packet->stream_index]->codecpar->frame_size; //For the audio there are 1024 (or 960) frames per packet https://stackoverflow.com/questions/23216103/about-definition-for-terms-of-audio-codec
                ////////////////////////////////////////////////////////////////////
            }

            //Why pkt_duration = pts???
            ////////////////////////////////////////////////////////////////////
            //frame->pkt_duration = frame->pts;
            frame->pkt_duration = (int64_t)(av_q2d(av_div_q((AVRational){time_base.den, 1}, fps))) * (int64_t)frames_per_packet;
            ////////////////////////////////////////////////////////////////////

            frame->pts = (int64_t)(output->frames[packet->stream_index] * frame->pkt_duration);

            output->last_pts[packet->stream_index] = frame->pts;

            frame->pkt_dts = frame->pts;

            printf("%i FRAME %i PTS %i\n", (int)packet->stream_index, (int)output->frames[packet->stream_index], (int)frame->pts);

            output->frames[packet->stream_index]++;

            res = encode_frame(output, frame, packet->stream_index);
            if (res == 1)
            {
                av_frame_free(&frame);
                printf("Failde encoding frame\n");
                return 1;
            }
            av_frame_unref(frame);
        }

        else if (res == -1)
        {
            printf("\nfile \"%s\" ended\n", input->container->url);
            break;
        }
    }
    for (int i = 0; i < input->container->nb_streams; i++)
    {
        output->pre_pts[i] = output->last_pts[i];
        output->last_pts[i] = 0;
    }

    fill_with_empty_frames_until_all_streams_match_pts(input, output);

    av_frame_free(&frame);

    decode_frame(input, NULL, packet);

    av_packet_free(&packet);

    return 0;
}

int encode_frame(file *encoder, AVFrame *input_frame, int index)
{

    AVPacket *output_packet = av_packet_alloc();
    if (!output_packet)
    {
        printf("ENCODER: Failed mallocing output_package");
        return 1;
    }

    AVCodecContext *codec = encoder->codec[index];

    if (!codec)
        return 0;

    int response = avcodec_send_frame(codec, input_frame);

    while (response >= 0)
    {
        //The packet unref is supposed to be here
        ////////////////////////////////////////////////////////////////////////
        av_packet_unref(output_packet);
        ////////////////////////////////////////////////////////////////////////

        response = avcodec_receive_packet(codec, output_packet);

        if (response == AVERROR(EAGAIN) || response == AVERROR_EOF)
        {
            break;
        }
        else if (response < 0)
        {
            printf("ENCODER: Error receiving packet");

            return 1;
        }

        output_packet->stream_index = index;

        //I think we have to set PTS, DTS and duration for each packet.
        ////////////////////////////////////////////////////////////////////////
        //output_packet->pts = input_frame->pts;
        //output_packet->dts = input_frame->pkt_dts;
        //output_packet->duration = input_frame->pkt_duration;
        AVRational fps = codec->framerate;
        int frames_per_packet = 1;

        if (encoder->container->streams[index]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)
        {
            fps.den = 1;
            fps.num = encoder->container->streams[index]->codecpar->sample_rate;

            ////////////////////////////////////////////////////////////////////
            frames_per_packet = encoder->container->streams[index]->codecpar->frame_size; //For the audio there are 1024 (or 960) frames per packet https://stackoverflow.com/questions/23216103/about-definition-for-terms-of-audio-codec
            ////////////////////////////////////////////////////////////////////
        }

        AVRational time_base = encoder->container->streams[index]->time_base;

        output_packet->duration = (int64_t)(av_q2d(av_div_q((AVRational){time_base.den, 1}, fps))) * (int64_t)frames_per_packet;
        output_packet->pts = (int64_t)(output_packet->duration * encoder->packets[index]);
        output_packet->dts = output_packet->pts;

        encoder->packets[index]++; //Count packets
        ////////////////////////////////////////////////////////////////////////

        response = av_interleaved_write_frame(encoder->container, output_packet);

        if (response != 0)
        {
            printf("ENCODER:failed writing frame");

            return 1;
        }
    }

    av_packet_free(&output_packet);

    return 0;
}

int free_file(file *f)
{
    int i;
    for (i = 0; i < (int)f->container->nb_streams; i++)
    {
        if (f->codec[i] == NULL)
        {
            continue;
        }
        avcodec_free_context(&f->codec[i]);
    }

    //av_free - Free a memory block which has been allocated with a function of av_malloc(), but f->codec is not allocated with av_malloc()???
    //av_free(f->codec);

    avformat_close_input(&f->container);

    ////////////////////////////////////////////////////////////////////////
    if (f->frames != NULL)
    {
        free(f->frames);
    }

    if (f->packets != NULL)
    {
        free(f->packets);
    }
    ////////////////////////////////////////////////////////////////////////

    free(f);

    return 0;
}

int fill_with_empty_frames_until_all_streams_match_pts(file *input, file *output)
{
    int res;

    int biggest_pts = 0;
    int biggest_pts_index = -1;

    for (int i = 0; i < output->container->nb_streams; i++)
    {
        int curr_pts_in_new_time_base = av_rescale_q(output->pre_pts[i], output->container->streams[i]->time_base, (AVRational){.den = 60000, .num = 1});

        if (curr_pts_in_new_time_base > biggest_pts)
        {
            biggest_pts = curr_pts_in_new_time_base;
            biggest_pts_index = i;
        }
    }

    for (int i = 0; i < output->container->nb_streams; i++)
    {

        AVCodecContext *codec = output->codec[i];
        AVRational fps = output->codec[i]->framerate;
        AVRational time_base = output->container->streams[i]->time_base;

        int frames_per_packet = 1;

        if (output->container->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)
        {
            fps.den = 1;
            fps.num = input->container->streams[i]->codecpar->sample_rate;

            ////////////////////////////////////////////////////////////////////
            frames_per_packet = input->container->streams[i]->codecpar->frame_size; //For the audio there are 1024 (or 960) frames per packet https://stackoverflow.com/questions/23216103/about-definition-for-terms-of-audio-codec
            ////////////////////////////////////////////////////////////////////
        }

        AVFrame *dummy_frame = av_frame_alloc();

        switch (output->container->streams[i]->codecpar->codec_type)
        {
        case AVMEDIA_TYPE_AUDIO:
            dummy_frame->nb_samples = frames_per_packet;
            dummy_frame->format = output->container->streams[i]->codecpar->format;
            dummy_frame->channel_layout = output->container->streams[i]->codecpar->channel_layout;
            break;
        case AVMEDIA_TYPE_VIDEO:
            dummy_frame->width = output->container->streams[i]->codecpar->width;
            dummy_frame->height = output->container->streams[i]->codecpar->height;
            dummy_frame->format = output->container->streams[i]->codecpar->format;
            break;
        default:
            continue;
        }

        av_frame_get_buffer(dummy_frame, 0);

        while (1)
        {
            int64_t pkt_duration = (int64_t)(av_q2d(av_div_q((AVRational){time_base.den, 1}, fps))) * (int64_t)frames_per_packet;
            int curr_pts_in_new_time_base = av_rescale_q(pkt_duration * output->frames[i], output->container->streams[i]->time_base, (AVRational){.den = 60000, .num = 1});
            printf("biggest_pts %i\n", biggest_pts);
            printf("curr_pts_in_new_time_base %i\n", curr_pts_in_new_time_base);

            printf("Adding frame\n");
            if (biggest_pts <= curr_pts_in_new_time_base)
                break;
            dummy_frame->pkt_duration = pkt_duration;
            dummy_frame->pts = (int64_t)(dummy_frame->pkt_duration * output->frames[i]);

            dummy_frame->pkt_dts = dummy_frame->pts;

            printf("%i FRAME %i PTS %i\n", (int)i, (int)output->frames[i], (int)dummy_frame->pts);

            output->frames[i]++;

            res = encode_frame(output, dummy_frame, i);
        }
        av_frame_free(&dummy_frame);
    }
}

int open_media(file *video, const char input_path[], const char *video_codec, const char *audio_codec)
{
    video->container = avformat_alloc_context();

    if (!video->container)
    {
        printf("Failed to alloc memory to the container of the input file");
        return 1;
    }
    if (avformat_open_input(&video->container, input_path, NULL, NULL) != 0)
    {
        printf("Failed to open input file");
        return 1;
    }
    if (avformat_find_stream_info(video->container, NULL) < 0)
    {
        printf("Failed to open read stream info");
        return 1;
    }

    video->codec = calloc(video->container->nb_streams, sizeof(AVCodecContext *));

    for (unsigned int i = 0; i < video->container->nb_streams; i++)
    {
        const char *curr_codec = NULL;

        AVStream *stream = video->container->streams[i];
        const AVCodec *dec;
        AVCodecContext *codec_ctx;

        if (AVMEDIA_TYPE_VIDEO == stream->codecpar->codec_type)
        {
            curr_codec = video_codec;
        }
        else if (AVMEDIA_TYPE_AUDIO == stream->codecpar->codec_type)
        {
            curr_codec = audio_codec;
        }

        if (curr_codec == NULL)
            dec = avcodec_find_decoder(stream->codecpar->codec_id);
        else
            dec = avcodec_find_decoder_by_name(video_codec);

        if (!dec)
        {
            printf("failed to find the codec");
            return 1;
        }

        codec_ctx = avcodec_alloc_context3(dec);
        if (!codec_ctx)
        {
            printf("failed to alloc memory for codec context");
            return 1;
        }

        if (avcodec_parameters_to_context(codec_ctx, stream->codecpar) < 0)
        {
            printf("failed to fill codec context");
            return 1;
        }

        if (avcodec_open2(codec_ctx, dec, NULL) < 0)
        {
            printf("failed to open codec");
            return 1;
        }

        video->codec[i] = codec_ctx;
    }
    return 0;
}

/*
    returns:
    1 if error
    0 if success
    -1 if file ended
*/
int decode_frame(file *decoder, AVFrame *frame, AVPacket *packet)
{
    AVCodecContext *dec;

    while (1)
    {
        av_packet_unref(packet);
        if (av_read_frame(decoder->container, packet) < 0)
            break;

        int index = packet->stream_index;

        dec = decoder->codec[index];

        int response = avcodec_send_packet(dec, packet);

        if (response < 0)
        {
            printf("Error while sending packet to decoder");
            return 1;
        }

        while (response >= 0)
        {
            response = avcodec_receive_frame(dec, frame);
            if (response == AVERROR(EAGAIN) || response == AVERROR_EOF)
            {
                break;
            }
            else if (response < 0)
            {
                printf("Error while receiving frame from decoder");
                return 1;
            }
            if (response >= 0)
            {
                return 0;
            }
            av_frame_unref(frame);
        }
    }
    return -1;
}
int create_audio_encoder(AVCodecContext **cod_ctx, AVFormatContext *container, const char *encoder,
                         int channels, int sample_rate, int bit_rate)
{
    AVStream *stream = avformat_new_stream(container, NULL);
    if (!stream)
    {
        printf("CREATE AUDIO ENCODER: Failed allocating memory for stream");
        return 1;
    }
    const AVCodec *enc = avcodec_find_encoder_by_name(encoder);
    if (!enc)
    {
        printf("CREATE AUDIO ENCODER: Failed searching encoder");

        return 1;
    }

    cod_ctx[0] = avcodec_alloc_context3(enc);

    if (!cod_ctx[0])
    {
        printf("CREATE AUDIO ENCODER: Failed allocation codec context");
        return 1;
    }

    cod_ctx[0]->channels = channels;
    cod_ctx[0]->channel_layout = av_get_default_channel_layout(channels);
    cod_ctx[0]->sample_rate = sample_rate;
    cod_ctx[0]->sample_fmt = *enc->sample_fmts;
    cod_ctx[0]->bit_rate = bit_rate;
    cod_ctx[0]->time_base = (AVRational){1, sample_rate}; // 1/48000

    int res = 0;

    res = avcodec_open2(cod_ctx[0], enc, NULL);
    if (res < 0)
    {
        printf("CREATE AUDIO ENCODER: couldn't open codec");
        return 1;
    }

    res = avcodec_parameters_from_context(stream->codecpar, cod_ctx[0]);

    if (res < 0)
    {
        printf("CREATE AUDIO ENCODER: failed setting codec parameters from context");
        return 1;
    }

    return 0;
}

int create_video_encoder(AVCodecContext **cod_ctx, AVFormatContext *container, const char *encoder, int width, int height,
                         int pix_fmt, AVRational sample_aspect_ratio, AVRational frame_rate, int bit_rate, int buffer_size)
{
    AVStream *stream = avformat_new_stream(container, NULL);
    if (!stream)
    {
        printf("CREATE VIDEO ENCODER: Failed allocating memory for stream");
        return 1;
    }
    const AVCodec *enc = avcodec_find_encoder_by_name(encoder);
    if (!enc)
    {
        printf("CREATE VIDEO ENCODER: Failed searching encoder");

        return 1;
    }

    cod_ctx[0] = avcodec_alloc_context3(enc);

    if (!cod_ctx[0])
    {
        printf("CREATE VIDEO ENCODER: Failed allocation codec context");
        return 1;
    }

    cod_ctx[0]->height = height;
    cod_ctx[0]->width = width;
    cod_ctx[0]->pix_fmt = pix_fmt;

    cod_ctx[0]->sample_aspect_ratio = sample_aspect_ratio;

    //It's not a good idea to set the video time base to 1/60 - we need higher resolution for allowing audio synchronization
    ////////////////////////////////////////////////////////////////////////////
    cod_ctx[0]->time_base = av_make_q(1, 60000); //av_inv_q(frame_rate);
    ////////////////////////////////////////////////////////////////////////////

    cod_ctx[0]->framerate = frame_rate;
    cod_ctx[0]->bit_rate = bit_rate;
    cod_ctx[0]->rc_buffer_size = buffer_size;
    cod_ctx[0]->rc_max_rate = buffer_size;
    cod_ctx[0]->rc_min_rate = buffer_size;

    stream->time_base = cod_ctx[0]->time_base; //cod_ctx->time_base;

    int res = 0;

    res = av_opt_set(cod_ctx[0]->priv_data, "preset", "fast", 0);

    if (res != 0)
    {
        printf("CREATE VIDEO ENCODER: Failed opt set");
        return 1;
    }

    res = avcodec_open2(cod_ctx[0], enc, NULL);
    if (res < 0)
    {
        printf("CREATE VIDEO ENCODER: couldn't open codec");
        return 1;
    }

    res = avcodec_parameters_from_context(stream->codecpar, cod_ctx[0]);

    if (res < 0)
    {
        printf("CREATE VIDEO ENCODER: failed setting codec parameters from context");
        return 1;
    }

    return 0;
}

file *start_output_from_file(const char *path, file *input, const char *video_encoder, const char *audio_encoder)
{
    int res;

    file *output = create_output(input->container->nb_streams, path);
    if (!output)
    {
        return NULL;
    }
    AVCodecContext *codec_ctx;
    output->frames = calloc(input->container->nb_streams, sizeof(int));
    output->packets = calloc(input->container->nb_streams, sizeof(int));
    output->last_pts = calloc(input->container->nb_streams, sizeof(int));
    output->pre_pts = calloc(input->container->nb_streams, sizeof(int));

    for (int stream = 0; stream < (int)input->container->nb_streams; stream++)
    {
        codec_ctx = input->codec[stream];

        switch (codec_ctx->codec_type)
        {
        case AVMEDIA_TYPE_AUDIO:
            if (audio_encoder == NULL)
            {
                audio_encoder = codec_ctx->codec_descriptor->name;
            }
            res = create_audio_encoder(&output->codec[stream], output->container, audio_encoder, codec_ctx->channels, codec_ctx->sample_rate, (int)codec_ctx->bit_rate);

            break;

        case AVMEDIA_TYPE_VIDEO:

            if (video_encoder == NULL)
            {
                video_encoder = codec_ctx->codec_descriptor->name;
            }
            AVRational framerate = av_guess_frame_rate(input->container, input->container->streams[stream], NULL);
            res = create_video_encoder(&output->codec[stream], output->container, video_encoder, codec_ctx->width, codec_ctx->height,
                                       23, (AVRational){1, 1}, framerate, (int)codec_ctx->bit_rate, codec_ctx->rc_buffer_size);
            break;
        }
        if (res != 0)
        {
            printf("Failed opening encoder stream number %i \n", stream);
            return NULL;
        }
    }

    if (output->container->oformat->flags & AVFMT_GLOBALHEADER)
        output->container->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;

    if (!(output->container->oformat->flags & AVFMT_NOFILE))
    {
        if (avio_open(&output->container->pb, path, AVIO_FLAG_WRITE) < 0)
        {
            printf("could not open the output file");
            return NULL;
        }
    }

    AVDictionary *muxer_opts = NULL;

    if (avformat_write_header(output->container, &muxer_opts) < 0)
    {
        printf("an error occurred when opening output file");
        return NULL;
    }

    return output;
}

file *create_output(int streams, const char *filename)
{
    int res;

    //file* output = malloc(sizeof(file));
    file *output = calloc(sizeof(file), 1);
    if (!output)
    {
        return NULL;
    }
    res = avformat_alloc_output_context2(&output->container, NULL, NULL, filename);
    if (res < 0)
    {
        printf("Failed opening output\n");
        return NULL;
    }

    output->codec = av_calloc(streams, sizeof(AVCodecContext *));

    if (!output->codec)
    {
        printf("Failed allocating ram for codec\n");
        return NULL;
    }

    for (int stream = 0; stream < streams; stream++)
    {
        output->codec[stream] = NULL;
    }

    return output;
}