Untitled

#pragma comment (lib, "libavutil.a")
#pragma comment (lib, "libavformat.a")
#pragma comment (lib, "libavcodec.a")
#pragma comment (lib, "libswresample.a")
#pragma comment (lib, "libswscale.a")

#include <Windows.h>
#include <string.h>
#include <math.h>
#include <vector>
#include <iostream>

extern "C" {
    #include <libavutil/opt.h>
    #include <libavutil/mathematics.h>
    #include <libavformat/avformat.h>
    #include <libswscale/swscale.h>
    #include <libswresample/swresample.h>
}

#define RAW_AUDIO_FRAME_SIZE            1152
#define STREAM_AUDIO_BIT_RATE           320000
#define STREAM_AUDIO_SAMPLE_RATE        44100
#define STREAM_AUDIO_SAMPLE_FORMAT_GM   AV_SAMPLE_FMT_S16P
#define STREAM_AUDIO_SAMPLE_FORMAT_MP3  AV_SAMPLE_FMT_S16P
#define STREAM_AUDIO_SAMPLE_TYPE        int16_t
#define STREAM_AUDIO_SAMPLE_MAX         SHRT_MAX
#define STREAM_AUDIO_SAMPLE_MIN         SHRT_MIN
#define STREAM_AUDIO_CHANNEL_LAYOUT     AV_CH_LAYOUT_STEREO
#define STREAM_AUDIO_CHANNELS           2

using namespace std;

// A file of raw data
typedef struct File {
    vector<AVFrame*> frames;
} File;

// A sound
typedef struct Sound {
    //vector<AVFrame*> frames;
    uint64_t play;
    double volume;
    File* file;
} Sound;

// Media file output
AVFormatContext *outContext;
uint64_t totalFrames;

// Audio
AVStream *audioStream;
AVCodec *audioCodec;
AVCodecContext *audioCodecContext;
AVRational audioTimeBase;
uint64_t audioFrameNum;

// Files
vector<File*> files;
vector<Sound*> sounds;


// Converts to a wide string
wstring towstr(const string str) {
    wstring buffer;
    buffer.resize(MultiByteToWideChar(CP_UTF8, 0, &str[0], -1, 0, 0));
    MultiByteToWideChar(CP_UTF8, 0, &str[0], -1, &buffer[0], buffer.size());
    return &buffer[0];
}


// Create an AVRational
AVRational rat(int num, int den) {
    AVRational r;
    r.num = num;
    r.den = den;
    return r;
}


// Initialize codecs
double audio_init() {

    // Initialize libavcodec, and register all codecs and formats.
    av_register_all();
    return 0;

}


// Decode a file into raw audio
double audio_file_decode(const char *source, const char *dest, float pitch) {

    FILE* outStream = NULL;
    AVFormatContext *formatContext = NULL;
    AVCodec *codec = NULL;
    AVCodecContext *codecContext = NULL;
    SwrContext *swrContext = NULL;
    AVFrame *frameDecoded = NULL;
    AVPacket inPacket;
    int ret = 0;
    av_init_packet(&inPacket);

    try {

        // Create writer
        _wfopen_s(&outStream, &towstr(dest)[0], L"wb");

        // Create contex
        formatContext = avformat_alloc_context();
        if (avformat_open_input(&formatContext, source, NULL, NULL) < 0)
            throw -11;

        // Find info
        if (avformat_find_stream_info(formatContext, 0) < 0)
            throw -12;

        // Find audio stream
        int streamId = av_find_best_stream(formatContext, AVMEDIA_TYPE_AUDIO, -1, -1, &codec, 0);
        if (streamId < 0)
            throw -13;

        codecContext = avcodec_alloc_context3(codec);
        avcodec_copy_context(codecContext, formatContext->streams[streamId]->codec);

        // Open codec
        if (avcodec_open2(codecContext, codec, 0) < 0)
            throw -14;

        // Set up resample context
        swrContext = swr_alloc();
        if (!swrContext)
            throw -15;

        av_opt_set_int(swrContext, "in_channel_count", codecContext->channels, 0);
        av_opt_set_int(swrContext, "in_channel_layout", codecContext->channel_layout, 0);
        av_opt_set_int(swrContext, "in_sample_rate", codecContext->sample_rate, 0);
        av_opt_set_sample_fmt(swrContext, "in_sample_fmt", codecContext->sample_fmt, 0);

        av_opt_set_int(swrContext, "out_channel_count", STREAM_AUDIO_CHANNELS, 0);
        av_opt_set_int(swrContext, "out_channel_layout", STREAM_AUDIO_CHANNEL_LAYOUT, 0);
        av_opt_set_int(swrContext, "out_sample_rate", STREAM_AUDIO_SAMPLE_RATE / pitch, 0);
        av_opt_set_sample_fmt(swrContext, "out_sample_fmt", STREAM_AUDIO_SAMPLE_FORMAT_GM, 0);

        if (swr_init(swrContext))
            throw -16;

        // Allocate re-usable frame
        frameDecoded = av_frame_alloc();
        if (!frameDecoded)
            throw -17;

        frameDecoded->format = codecContext->sample_fmt;
        frameDecoded->channel_layout = codecContext->channel_layout;
        frameDecoded->channels = codecContext->channels;
        frameDecoded->sample_rate = codecContext->sample_rate;

        // Load frames
        inPacket.data = NULL;
        inPacket.size = 0;

        int gotFrame, samples = 0;

        while (av_read_frame(formatContext, &inPacket) >= 0) {

            if (inPacket.stream_index != streamId)
                continue;

            if (avcodec_decode_audio4(codecContext, frameDecoded, &gotFrame, &inPacket) < 0)
                throw -18;

            if (!gotFrame)
                continue;

            // Begin conversion
            //if (swr_convert(swrContext, NULL, 0, (const uint8_t **)frameDecoded->data, frameDecoded->nb_samples) < 0)
            //  throw -19;

            //while (swr_get_out_samples(swrContext, 0) >= RAW_AUDIO_FRAME_SIZE) {

                // Allocate data
                uint8_t **convertedData = NULL;
                if (av_samples_alloc_array_and_samples(&convertedData, NULL, STREAM_AUDIO_CHANNELS, RAW_AUDIO_FRAME_SIZE, STREAM_AUDIO_SAMPLE_FORMAT_GM, 0) < 0)
                    throw -20;

                // Convert
                int outSamples = swr_convert(swrContext, convertedData, RAW_AUDIO_FRAME_SIZE, (const uint8_t **)frameDecoded->data, frameDecoded->nb_samples);
                if (outSamples < 0)
                    throw -21;

                // Calculate buffer size
                //size_t bufferSize = av_samples_get_buffer_size(NULL, STREAM_AUDIO_CHANNELS, RAW_AUDIO_FRAME_SIZE, STREAM_AUDIO_SAMPLE_FORMAT_GM, 0);
                //if (bufferSize < 0)
                //  throw -22;

                fwrite(convertedData[0], 1, outSamples * sizeof(STREAM_AUDIO_SAMPLE_TYPE) * STREAM_AUDIO_CHANNELS, outStream);
                av_freep(convertedData);
            //}
        }

        // Flush
        cout << "flushing.." << endl;
        while (1) {

            // Allocate data
            uint8_t **convertedData = NULL;
            if (av_samples_alloc_array_and_samples(&convertedData, NULL, STREAM_AUDIO_CHANNELS, RAW_AUDIO_FRAME_SIZE, STREAM_AUDIO_SAMPLE_FORMAT_GM, 0) < 0)
                throw -20;

            // Convert
            int outSamples = swr_convert(swrContext, convertedData, RAW_AUDIO_FRAME_SIZE, NULL, 0);
            if (outSamples < 0)
                throw -21;

            cout << "Flushed " << outSamples << endl;
            //cout << ret << endl;
            if (outSamples==0)
                break;

            // Calculate buffer size
            //size_t bufferSize = av_samples_get_buffer_size(NULL, STREAM_AUDIO_CHANNELS, RAW_AUDIO_FRAME_SIZE, STREAM_AUDIO_SAMPLE_FORMAT_GM, 0);
            //if (bufferSize < 0)
            //  throw -22;

            fwrite(convertedData[0], 1, outSamples * sizeof(STREAM_AUDIO_SAMPLE_TYPE) * STREAM_AUDIO_CHANNELS, outStream);
            av_freep(convertedData);
        }
    }
    catch (int e) {
        ret = e;
    }

    // Clean up
    if (frameDecoded)
        av_frame_free(&frameDecoded);
    if (swrContext)
        swr_free(&swrContext);
    if (codecContext)
        avcodec_close(codecContext);
    if (formatContext) {
        avformat_close_input(&formatContext);
        avformat_free_context(formatContext);
    }
    av_free_packet(&inPacket);


    // Close
    if (outStream)
        fclose(outStream);

    return ret;

}


// Save audio file
double audio_start(const char* outFile) {

    // Allocate the output media context
    avformat_alloc_output_context2(&outContext, NULL, NULL, outFile);
    if (!outContext)
        return -1;

    // Find audio encoder
    audioCodec = avcodec_find_encoder(outContext->oformat->audio_codec); // CODEC_ID_MP3
    if (!audioCodec)
        return -2;

    // Start audio stream
    audioStream = avformat_new_stream(outContext, audioCodec);
    if (!audioStream)
        return -3;

    audioCodecContext = audioStream->codec;
    audioStream->id = 0;

    // Setup
    audioCodecContext->sample_fmt = STREAM_AUDIO_SAMPLE_FORMAT_MP3;
    audioCodecContext->sample_rate = STREAM_AUDIO_SAMPLE_RATE;
    audioCodecContext->bit_rate = STREAM_AUDIO_BIT_RATE;
    audioCodecContext->channels = STREAM_AUDIO_CHANNELS;
    audioCodecContext->channel_layout = STREAM_AUDIO_CHANNEL_LAYOUT;

    if (outContext->oformat->flags & AVFMT_GLOBALHEADER)
        audioCodecContext->flags |= CODEC_FLAG_GLOBAL_HEADER;

    // Open the codec
    if (avcodec_open2(audioCodecContext, audioCodec, NULL) < 0)
        return -4;

    // Open the output file
    if (avio_open(&outContext->pb, outFile, AVIO_FLAG_WRITE) < 0)
        return -5;

    audioFrameNum = 0;
    audioTimeBase = rat(audioCodecContext->frame_size, STREAM_AUDIO_SAMPLE_RATE);
    cout << "frame_size = " << audioCodecContext->frame_size << endl;

    // Write the stream header, if any.
    if (avformat_write_header(outContext, NULL) < 0)
        return -10;

    totalFrames = 0;

    return 0;

}


// Adds a file with raw audio, returns the id
double audio_file_add(const char* source) {

    // Create file
    File* file = new File();
    files.push_back(file);

    // Create reader
    FILE* inStream;
    _wfopen_s(&inStream, &towstr(source)[0], L"rb");

    // Read to EOF, store data in frames
    size_t bufferSize = av_samples_get_buffer_size(NULL, STREAM_AUDIO_CHANNELS, audioCodecContext->frame_size, STREAM_AUDIO_SAMPLE_FORMAT_GM, 0);

    while (1) {
        uint8_t* rawData = new uint8_t[bufferSize];
        int len = fread(rawData, 1, bufferSize, inStream);

        if (!len)
            break;

        // Allocate frame
        AVFrame *frame = av_frame_alloc();
        if (!frame)
            return -23;

        frame->nb_samples = audioCodecContext->frame_size;
        frame->format = STREAM_AUDIO_SAMPLE_FORMAT_GM;
        frame->channel_layout = STREAM_AUDIO_CHANNEL_LAYOUT;
        frame->channels = STREAM_AUDIO_CHANNELS;
        frame->sample_rate = STREAM_AUDIO_SAMPLE_RATE;

        // Fill frame
        if (avcodec_fill_audio_frame(frame, STREAM_AUDIO_CHANNELS, STREAM_AUDIO_SAMPLE_FORMAT_GM, rawData, len, 0) < 0)
            return -24;

        file->frames.push_back(frame);
    }

    // Close
    fclose(inStream);

    // Return ID
    return files.size() - 1;

}


// Adds a sound to the export
double audio_sound_add(double fileId, double play, double pitch, double volume) {

    Sound* sound = new Sound();
    sound->file = files[(int)fileId];
    sound->play = av_rescale_q(play * 1000, rat(1, 1000), audioTimeBase);
    sound->volume = volume;
    sounds.push_back(sound);

    totalFrames = max(totalFrames, sound->play + sound->file->frames.size());
    cout << "Added sound at time " << play << ", frame " << sound->play << ", volume " << volume << ", pitch " << pitch << endl;

    return 0;

}


// Combine files together
double audio_combine() {

    uint64_t audioFrameNum = 0;
    int dataSize = sizeof(STREAM_AUDIO_SAMPLE_TYPE);
    int isPlanar = av_sample_fmt_is_planar(STREAM_AUDIO_SAMPLE_FORMAT_MP3);

    while (audioFrameNum < totalFrames) {

        // Allocate frame
        AVFrame *frame = av_frame_alloc();
        if (!frame)
            return -1;

        frame->nb_samples = audioCodecContext->frame_size;
        frame->format = STREAM_AUDIO_SAMPLE_FORMAT_MP3;
        frame->channel_layout = STREAM_AUDIO_CHANNEL_LAYOUT;
        frame->channels = STREAM_AUDIO_CHANNELS;
        frame->sample_rate = STREAM_AUDIO_SAMPLE_RATE;

        if (av_frame_get_buffer(frame, 0) < 0)
            return -2;

        if (av_frame_make_writable(frame) < 0)
            return -3;

        // Find sounds
        vector<Sound*> frameSounds;
        for (unsigned int i = 0; i < sounds.size(); i++)
            if (audioFrameNum >= sounds[i]->play &&  audioFrameNum < sounds[i]->play + sounds[i]->file->frames.size())
                frameSounds.push_back(sounds[i]);

        // Write to frame (mix sounds)

        for (int c = 0; c < 1 + isPlanar; c++) {
            for (int i = 0; i < frame->linesize[0]; i += dataSize) {
                STREAM_AUDIO_SAMPLE_TYPE dstVal = 0; // 0=silence

                for (unsigned int j = 0; j < frameSounds.size(); j++) {
                    STREAM_AUDIO_SAMPLE_TYPE srcVal;
                    memcpy(&srcVal, &frameSounds[j]->file->frames[audioFrameNum - frameSounds[j]->play]->data[c][i], dataSize);

                    // Clamp audio
                    double tmp = (double)dstVal + (double)(srcVal * frameSounds[j]->volume);
                    if (tmp > STREAM_AUDIO_SAMPLE_MAX)
                        tmp = STREAM_AUDIO_SAMPLE_MAX;
                    if (tmp < STREAM_AUDIO_SAMPLE_MIN)
                        tmp = STREAM_AUDIO_SAMPLE_MIN;
                    dstVal = tmp;
                }

                memcpy(&frame->data[c][i], &dstVal, dataSize);
            }
        }

        frame->pts = av_rescale_q(audioFrameNum, audioTimeBase, audioCodecContext->time_base);

        // Allocate packet
        int gotPacket;
        AVPacket outPacket;
        av_init_packet(&outPacket);
        outPacket.data = NULL;
        outPacket.size = 0;

        // Encode
        if (avcodec_encode_audio2(audioCodecContext, &outPacket, frame, &gotPacket) < 0)
            return -4;

        // Write to file
        if (gotPacket) {
            av_packet_rescale_ts(&outPacket, audioCodecContext->time_base, audioStream->time_base);
            outPacket.stream_index = audioStream->index;

            if (av_interleaved_write_frame(outContext, &outPacket) != 0)
                return -5;
        }

        // Free
        av_frame_free(&frame);
        av_free_packet(&outPacket);

        // Advance
        audioFrameNum++;

    }

    // Flush audio
    while (1) {
        int gotPacket;
        AVPacket flushPacket;
        av_init_packet(&flushPacket);
        flushPacket.data = NULL;
        flushPacket.size = 0;

        if (avcodec_encode_audio2(audioCodecContext, &flushPacket, NULL, &gotPacket) < 0)
            return -36;

        if (gotPacket) {
            flushPacket.stream_index = audioStream->index;

            if (av_interleaved_write_frame(outContext, &flushPacket) != 0)
                return -37;
        }

        av_free_packet(&flushPacket);

        if (!gotPacket)
            break;
    }

    // Clear files
    for (size_t i = 0; i < files.size(); i++) {
        for (size_t j = 0; j < files[i]->frames.size(); j++)
            avcodec_free_frame(&files[i]->frames[j]);
        delete files[i];
    }
    files.clear();

    // Clear sounds
    for (size_t i = 0; i < sounds.size(); i++) {
        delete sounds[i];
    }
    sounds.clear();

    // Write the trailer
    av_write_trailer(outContext);

    // Close audio
    avcodec_close(audioCodecContext);

    // Close the output file.
    avio_close(outContext->pb);

    // Free the stream
    avformat_free_context(outContext);

    return 0;
}

int main() {
    audio_init();
    audio_start("out.mp3");
    for (int i=0; i<25; i++) {
        audio_file_decode("note.ogg", "snd.au", 1-i*0.025);
        double note = audio_file_add("snd.au");
        audio_sound_add(note, i*0.5, 1-i*0.025, 1);
    }
    cout << "Writing to file..." << endl;
    audio_combine();
    cout << "done!" << endl;
    int x;
    cin >> x;
}