Untitled

/*
    http://cristobaldobranco.github.io/blog/2015/01/20/compiling-ffmpeg-with-windows-tools/
    Build statically in x86
    ./configure --toolchain=msvc --arch=x86 --prefix=build/ --disable-network
    make
    make install
*/

#pragma comment (lib, "libavutil.a")
#pragma comment (lib, "libavformat.a")
#pragma comment (lib, "libavcodec.a")
#pragma comment (lib, "libswresample.a")
#pragma comment (lib, "libswscale.a")

#define __STDC_CONSTANT_MACROS

#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <vector>
#include <queue>

extern "C" {
    #include <libavutil/opt.h>
    #include <libavutil/mathematics.h>
    #include <libavformat/avformat.h>
    #include <libswscale/swscale.h>
    #include <libswresample/swresample.h>
}

#define OUT_FILE    "out.mpg" // "out.avi" for avi

#define STREAM_FORMAT       "mpeg"  // NULL for avi
#define STREAM_DURATION     15

#define STREAM_VIDEO_FRAME_RATE     30
#define STREAM_VIDEO_WIDTH          1280
#define STREAM_VIDEO_HEIGHT         720
#define STREAM_VIDEO_BIT_RATE       400000
#define STREAM_VIDEO_PIXEL_FORMAT   AV_PIX_FMT_YUV420P

#define STREAM_AUDIO_BIT_RATE           320000
#define STREAM_AUDIO_SAMPLE_RATE        44100
#define STREAM_AUDIO_SAMPLE_FORMAT      AV_SAMPLE_FMT_S16 // AV_SAMPLE_FMT_FLTP  for avi
#define STREAM_AUDIO_SAMPLE_TYPE        int16_t // float for avi
#define STREAM_AUDIO_SAMPLE_MAX         SHRT_MAX // 1 for avi
#define STREAM_AUDIO_SAMPLE_MIN         SHRT_MIN // -1 for avi
#define STREAM_AUDIO_CHANNEL_LAYOUT     AV_CH_LAYOUT_STEREO
#define STREAM_AUDIO_CHANNELS           2

using namespace std;

void die(const char *msg) {
    fprintf(stderr, "%s\n", msg);
    exit(1);
}


// A file (tested with .mp3, .wav, .ogg, .flac, .wma, .m4a)

typedef struct File {

    vector<AVFrame*> frames;

    File(char* filename);
    ~File();

} File;


// A sound

typedef struct Sound {

    File* file;
    uint64_t play, start, end;
    double volume;

    Sound(File* file, double play, double volume);

} Sound;


// Media file output
AVFormatContext *outContext;

// Video
AVStream *videoStream;
AVCodec *videoCodec;
AVCodecContext *videoCodecContext;
AVFrame *videoFrame;
AVRational videoTimeBase;
uint64_t videoFrameNum;
SwsContext *videoSwsContext;

// Audio
AVStream *audioStream;
AVCodec *audioCodec;
AVCodecContext *audioCodecContext;
AVRational audioTimeBase;
uint64_t audioFrameNum;
vector<Sound*> sounds;


// Video

void videoInit() {

    // Find encoder
    videoCodec = avcodec_find_encoder(outContext->oformat->video_codec);
    if (!videoCodec)
        die("Could not find video encoder!");


    // Start stream
    videoStream = avformat_new_stream(outContext, videoCodec);
    if (!videoStream)
        die("Could not allocate video stream!");
    videoCodecContext = videoStream->codec;
    videoStream->id = 0;


    // Setup
    videoCodecContext->codec_id = outContext->oformat->video_codec;
    videoCodecContext->bit_rate = STREAM_VIDEO_BIT_RATE;
    videoCodecContext->width = STREAM_VIDEO_WIDTH;
    videoCodecContext->height = STREAM_VIDEO_HEIGHT;
    videoCodecContext->time_base = { 1, STREAM_VIDEO_FRAME_RATE };
    videoCodecContext->pix_fmt = STREAM_VIDEO_PIXEL_FORMAT;
    videoCodecContext->gop_size = 12; // Emit one intra frame every twelve frames at most
    videoCodecContext->mb_decision = 2;

    if (outContext->oformat->flags & AVFMT_GLOBALHEADER)
        videoCodecContext->flags |= CODEC_FLAG_GLOBAL_HEADER;


    // Open the codec
    if (avcodec_open2(videoCodecContext, videoCodec, NULL) < 0)
        die("Could not open video codec");


    // Allocate and init a re-usable frame
    videoFrame = av_frame_alloc();
    if (!videoFrame)
        die("Could not allocate video frame");

    videoFrame->pts = 0;
    videoFrame->format = STREAM_VIDEO_PIXEL_FORMAT;
    videoFrame->width = STREAM_VIDEO_WIDTH;
    videoFrame->height = STREAM_VIDEO_HEIGHT;

    if (av_frame_get_buffer(videoFrame, 24) < 0)
        die("Could not allocate memory for video frame");


    // Scaling context
    videoSwsContext = sws_getContext(STREAM_VIDEO_WIDTH, STREAM_VIDEO_HEIGHT, AV_PIX_FMT_RGB24,
                                     STREAM_VIDEO_WIDTH, STREAM_VIDEO_HEIGHT, STREAM_VIDEO_PIXEL_FORMAT,
                                     SWS_BILINEAR, NULL, NULL, NULL);
    if (!videoSwsContext)
        die("Could not initialize the conversion context");

}

// Frame

void getImage(uint8_t *data) {
    for (int y = 0; y < STREAM_VIDEO_HEIGHT; y++) {
        for (int x = 0; x < STREAM_VIDEO_WIDTH; x++) {
            int pos = (y * STREAM_VIDEO_WIDTH + x);
            data[pos * 3] = x + y + videoFrameNum * 3;          // R
            data[pos * 3 + 1] = 128 + y + videoFrameNum * 2;    // G
            data[pos * 3 + 2] = 64 + x + videoFrameNum * 5;     // B
        }
    }
}

bool videoWrite() {

    // Check if done
    if (av_compare_ts(videoFrameNum, videoCodecContext->time_base, STREAM_DURATION, { 1, 1 }) >= 0)
        return true;


    // Convert
    uint8_t* data = new uint8_t[STREAM_VIDEO_WIDTH * STREAM_VIDEO_HEIGHT * 3];
    uint8_t* inData[1] = { data }; // RGB24 have one plane
    int inLinesize[1] = { 3 * STREAM_VIDEO_WIDTH }; // RGB stride

    getImage(data);
    sws_scale(videoSwsContext, inData, inLinesize, 0, STREAM_VIDEO_HEIGHT, videoFrame->data, videoFrame->linesize);
    delete data;


    // Init packet
    int gotPacket;
    AVPacket packet;
    av_init_packet(&packet);
    packet.size = 0;
    packet.data = NULL;


    // Encode the image
    videoFrame->pts = av_rescale_q(videoFrameNum, videoCodecContext->time_base, videoStream->time_base);
    if (avcodec_encode_video2(videoCodecContext, &packet, videoFrame, &gotPacket) < 0)
        die("Error encoding video frame");


    // Write the compressed frame to the media file.
    if (gotPacket) {
        packet.stream_index = videoStream->index;

        if (av_interleaved_write_frame(outContext, &packet) != 0)
            die("Error while writing video frame");
    }


    // Advance
    videoFrameNum++;


    // Free
    av_free_packet(&packet);

    return false;
}

void videoFlush() {
    while (1) {
        int gotPacket;
        AVPacket flushPacket;
        av_init_packet(&flushPacket);
        flushPacket.data = NULL;
        flushPacket.size = 0;

        if (avcodec_encode_video2(videoCodecContext, &flushPacket, NULL, &gotPacket) < 0)
            die("Error encoding audio frame");

        if (gotPacket) {
            flushPacket.stream_index = videoStream->index;
            printf("Video packet of size %d flushed!\n ", flushPacket.size);

            if (av_interleaved_write_frame(outContext, &flushPacket) != 0)
                die("Error while writing video frame");
        } else
            break;
    }
}

// Close

void videoClose() {
    avcodec_close(videoCodecContext);
    av_frame_free(&videoFrame);
    sws_freeContext(videoSwsContext);
}


// Audio

void audioInit() {

    // Find encoder
    audioCodec = avcodec_find_encoder(outContext->oformat->audio_codec);
    if (!audioCodec)
        die("Could not find audio encoder!");


    // Start stream
    audioStream = avformat_new_stream(outContext, audioCodec);
    if (!audioStream)
        die("Could not allocate audio stream!");

    audioCodecContext = audioStream->codec;
    audioStream->id = 1;

    // Setup
    audioCodecContext->sample_fmt = STREAM_AUDIO_SAMPLE_FORMAT;
    audioCodecContext->sample_rate = STREAM_AUDIO_SAMPLE_RATE;
    audioCodecContext->bit_rate = STREAM_AUDIO_BIT_RATE;
    audioCodecContext->channels = STREAM_AUDIO_CHANNELS;
    audioCodecContext->channel_layout = STREAM_AUDIO_CHANNEL_LAYOUT;

    if (outContext->oformat->flags & AVFMT_GLOBALHEADER)
        audioCodecContext->flags |= CODEC_FLAG_GLOBAL_HEADER;


    // Open the codec
    if (avcodec_open2(audioCodecContext, audioCodec, NULL) < 0)
        die("Could not open audio codec");

}

// Frame

bool audioWrite() {

    if (av_compare_ts(audioFrameNum, audioTimeBase, STREAM_DURATION, { 1, 1 }) >= 0)
        return true;


    // Allocate frame
    AVFrame *frame = av_frame_alloc();
    if (!frame)
        die("Could not allocate audio frame");

    frame->nb_samples = audioCodecContext->frame_size;
    frame->format = STREAM_AUDIO_SAMPLE_FORMAT;
    frame->channel_layout = STREAM_AUDIO_CHANNEL_LAYOUT;
    frame->channels = STREAM_AUDIO_CHANNELS;
    frame->sample_rate = STREAM_AUDIO_SAMPLE_RATE;

    if (av_frame_get_buffer(frame, 0) < 0)
        die("Could not get buffer for frame");

    if (av_frame_make_writable(frame) < 0)
        die("Could not make frame writable");


    // Find sounds
    vector<Sound*> frameSounds;
    for (unsigned int i = 0; i < sounds.size(); i++)
        if (audioFrameNum >= sounds[i]->play &&  audioFrameNum < sounds[i]->play + sounds[i]->file->frames.size())
            frameSounds.push_back(sounds[i]);


    // Write to frame (mix sounds)
    int dataSize = sizeof(STREAM_AUDIO_SAMPLE_TYPE);
    int isPlanar = av_sample_fmt_is_planar(STREAM_AUDIO_SAMPLE_FORMAT);

    for (int c = 0; c < 1 + isPlanar; c++) {
        for (int i = 0; i < frame->linesize[0]; i += dataSize) {
            STREAM_AUDIO_SAMPLE_TYPE dstVal = 0; // 0=silence

            for (unsigned int j = 0; j < frameSounds.size(); j++) {
                STREAM_AUDIO_SAMPLE_TYPE srcVal;
                memcpy(&srcVal, &frameSounds[j]->file->frames[audioFrameNum - frameSounds[j]->play]->data[c][i], dataSize);

                // Clamp audio
                double tmp = (double)dstVal + (double)(srcVal * frameSounds[j]->volume);
                if (tmp > STREAM_AUDIO_SAMPLE_MAX)
                    tmp = STREAM_AUDIO_SAMPLE_MAX;
                if (tmp < STREAM_AUDIO_SAMPLE_MIN)
                    tmp = STREAM_AUDIO_SAMPLE_MIN;
                dstVal = tmp;
            }

            memcpy(&frame->data[c][i], &dstVal, dataSize);
        }
    }

    frame->pts = av_rescale_q(audioFrameNum, audioTimeBase, audioCodecContext->time_base);


    // Allocate packet
    int gotPacket;
    AVPacket outPacket;
    av_init_packet(&outPacket);
    outPacket.data = NULL;
    outPacket.size = 0;


    // Encode
    if (avcodec_encode_audio2(audioCodecContext, &outPacket, frame, &gotPacket) < 0)
        die("Error encoding audio frame");


    // Write to file
    if (gotPacket) {
        av_packet_rescale_ts(&outPacket, audioCodecContext->time_base, audioStream->time_base);
        outPacket.stream_index = audioStream->index;

        if (av_interleaved_write_frame(outContext, &outPacket) != 0)
            die("Error while writing audio frame");
    }


    // Free
    av_frame_free(&frame);
    av_free_packet(&outPacket);


    // Advance
    audioFrameNum++;

    return false;

}


// Flush

void audioFlush() {
    while (1) {
        int gotPacket;
        AVPacket flushPacket;
        av_init_packet(&flushPacket);
        flushPacket.data = NULL;
        flushPacket.size = 0;

        if (avcodec_encode_audio2(audioCodecContext, &flushPacket, NULL, &gotPacket) < 0)
            die("Error encoding audio frame");

        if (gotPacket) {
            flushPacket.stream_index = audioStream->index;
            printf("Audio packet of size %d flushed!\n ", flushPacket.size);

            if (av_interleaved_write_frame(outContext, &flushPacket) != 0)
                die("Error while writing audio frame");
        } else
            break;
    }
}

// Close

void audioClose() {
    avcodec_close(audioCodecContext);
}


// File functions

File::File(char* filename) {

    AVFormatContext *formatContext;
    AVCodecContext *codecContext;

    // Create contex
    formatContext = avformat_alloc_context();
    if (avformat_open_input(&formatContext, filename, NULL, NULL)<0)
        die("Could not open file");


    // Find info
    if (avformat_find_stream_info(formatContext, 0)<0)
        die("Could not find file info");

    av_dump_format(formatContext, 0, filename, false);


    // Find audio stream
    AVCodec *codec;
    int streamId = av_find_best_stream(formatContext, AVMEDIA_TYPE_AUDIO, -1, -1, &codec, 0);
    if (streamId < 0)
        die("Could not find Audio Stream");

    codecContext = avcodec_alloc_context3(codec);
    avcodec_copy_context(codecContext, formatContext->streams[streamId]->codec);


    // Open codec
    if (avcodec_open2(codecContext, codec, 0)<0)
        die("Codec cannot be found");


    // Set up resample context
    SwrContext *swrContext = swr_alloc();
    if (!swrContext)
        die("Failed to alloc swr context");

    av_opt_set_int(swrContext, "in_channel_count", codecContext->channels, 0);
    av_opt_set_int(swrContext, "in_channel_layout", codecContext->channel_layout, 0);
    av_opt_set_int(swrContext, "in_sample_rate", codecContext->sample_rate, 0);
    av_opt_set_sample_fmt(swrContext, "in_sample_fmt", codecContext->sample_fmt, 0);

    av_opt_set_int(swrContext, "out_channel_count", STREAM_AUDIO_CHANNELS, 0);
    av_opt_set_int(swrContext, "out_channel_layout", STREAM_AUDIO_CHANNEL_LAYOUT, 0);
    av_opt_set_int(swrContext, "out_sample_rate", STREAM_AUDIO_SAMPLE_RATE, 0);
    av_opt_set_sample_fmt(swrContext, "out_sample_fmt", STREAM_AUDIO_SAMPLE_FORMAT, 0);

    if (swr_init(swrContext))
        die("Failed to init swr context");


    // Allocate re-usable frame
    AVFrame *frameDecoded = av_frame_alloc();
    if (!frameDecoded)
        die("Could not allocate audio frame");

    frameDecoded->format = codecContext->sample_fmt;
    frameDecoded->channel_layout = codecContext->channel_layout;
    frameDecoded->channels = codecContext->channels;
    frameDecoded->sample_rate = codecContext->sample_rate;


    // Load frames
    AVPacket inPacket;
    av_init_packet(&inPacket);
    inPacket.data = NULL;
    inPacket.size = 0;

    int gotFrame, samples = 0;

    while (av_read_frame(formatContext, &inPacket) >= 0) {

        if (inPacket.stream_index == streamId) {

            if (avcodec_decode_audio4(codecContext, frameDecoded, &gotFrame, &inPacket) < 0)
                die("Could not decode packet");

            if (gotFrame) {

                // Begin conversion
                if (swr_convert(swrContext, NULL, 0, (const uint8_t **)frameDecoded->data, frameDecoded->nb_samples) < 0)
                    die("Could not convert");

                while (swr_get_out_samples(swrContext, 0) >= audioCodecContext->frame_size) {

                    // Allocate data
                    uint8_t **convertedData = NULL;

                    if (av_samples_alloc_array_and_samples(&convertedData, NULL, STREAM_AUDIO_CHANNELS, audioCodecContext->frame_size, STREAM_AUDIO_SAMPLE_FORMAT, 0) < 0)
                        die("Could not allocate samples");


                    // Allocate frame
                    AVFrame *frameConverted = av_frame_alloc();
                    if (!frameConverted)
                        die("Could not allocate audio frame");

                    frameConverted->nb_samples = audioCodecContext->frame_size;
                    frameConverted->format = STREAM_AUDIO_SAMPLE_FORMAT;
                    frameConverted->channel_layout = STREAM_AUDIO_CHANNEL_LAYOUT;
                    frameConverted->channels = STREAM_AUDIO_CHANNELS;
                    frameConverted->sample_rate = STREAM_AUDIO_SAMPLE_RATE;


                    // Convert
                    int out = swr_convert(swrContext, convertedData, frameConverted->nb_samples, NULL, 0);
                    if (out < 0)
                        die("Could not convert");


                    // Calculate buffer size
                    size_t bufferSize = av_samples_get_buffer_size(NULL, STREAM_AUDIO_CHANNELS, frameConverted->nb_samples, STREAM_AUDIO_SAMPLE_FORMAT, 0);
                    if (bufferSize < 0)
                        die("Invalid buffer size");


                    // Fill frame
                    if (avcodec_fill_audio_frame(frameConverted, STREAM_AUDIO_CHANNELS, STREAM_AUDIO_SAMPLE_FORMAT, convertedData[0], bufferSize, 0) < 0)
                        die("Could not fill frame");


                    // Store away
                    frames.push_back(frameConverted);

                }
            }
        }
    }

    printf("This file generated %d packets\n\n", frames.size());

    // Clean up
    av_frame_free(&frameDecoded);
    av_free_packet(&inPacket);
    swr_free(&swrContext);
    avcodec_close(codecContext);
}

File::~File() {
    for (unsigned int i = 0; i < frames.size(); i++)
        av_frame_free(&frames[i]);
}


// Sound functions

Sound::Sound(File* file, double play, double volume) {
    this->file = file;
    this->play = av_rescale_q(play * 1000, { 1, 1000 }, audioTimeBase); // Seconds to frame number
    this->volume = volume;

    printf("Sound created starting at %gs (frame %d)...\n", play, this->play);
}


// Main program

int main() {

    // Initialize libavcodec, and register all codecs and formats.
    av_register_all();


    // Allocate the output media context
    avformat_alloc_output_context2(&outContext, NULL, STREAM_FORMAT, &OUT_FILE[0]);
    if (!outContext)
        die("Could not allocate output  context");


    // Add the audio and video streams using the default format codecs
    // and initialize the codecs.
    videoInit();
    audioInit();

    av_dump_format(outContext, 0, &OUT_FILE[0], 1);


    // Open the output file
    if (avio_open(&outContext->pb, &OUT_FILE[0], AVIO_FLAG_WRITE) < 0)
        die("Could not open");

    videoFrameNum = 0;
    videoTimeBase = videoCodecContext->time_base;
    audioFrameNum = 0;
    audioTimeBase = { audioCodecContext->frame_size, STREAM_AUDIO_SAMPLE_RATE };


    // Write the stream header, if any.
    if (avformat_write_header(outContext, NULL) < 0)
        die("Error occurred when opening output file");


    // Load files
    File* song = new File("sounds/frozenparadise.flac");
    File* chicken = new File("sounds/chicken.ogg");
    File* fart = new File("sounds/fart.mp3");


    // Add sounds
    sounds.push_back(new Sound(song, 0, 0.25));
    sounds.push_back(new Sound(fart, 0.9, 1));
    sounds.push_back(new Sound(fart, 1.85, 1));
    sounds.push_back(new Sound(fart, 2.35, 1));
    sounds.push_back(new Sound(fart, 2.95, 1));
    sounds.push_back(new Sound(chicken, 4, 2));
    sounds.push_back(new Sound(chicken, 8, 0.5));
    sounds.push_back(new Sound(fart, 10, 10));


    // Write interleaved frames
    printf("Writing frames\n");

    bool videoDone, audioDone;
    videoDone = audioDone = false;

    while (!videoDone || !audioDone) {
        if (!videoDone && (audioDone || av_compare_ts(videoFrameNum, videoTimeBase, audioFrameNum, audioTimeBase) <= 0))
            videoDone = videoWrite();
        else
            audioDone = audioWrite();
        printf(".");
    }

    videoFlush();
    audioFlush();


    // Write the trailer
    av_write_trailer(outContext);


    // Close each codec.
    videoClose();
    audioClose();


    // Close the output file.
    avio_close(outContext->pb);


    // Free the stream
    avformat_free_context(outContext);


    printf("All done!\n");
    system("pause");

    return 0;
}