Untitled

// ConsoleApplication1.cpp : main project file.

#include "stdafx.h"

using namespace System;
using namespace System::Runtime::InteropServices;

#define OUTPUT_BIT_RATE 16000
/** The number of output channels */
#define OUTPUT_CHANNELS 1
/** The audio sample output format */
#define OUTPUT_SAMPLE_FORMAT AV_SAMPLE_FMT_S16P

const char *get_error_text(const int error)
{
    static char error_buffer[255];
    av_strerror(error, error_buffer, sizeof(error_buffer));
    return error_buffer;
}

// open an input file and load the required decoder
int open_input_file(const char *filename, AVFormatContext **input_format_context, AVCodecContext **input_codec_context)
{
    AVCodec *input_codec;
    int error;

    // open the input file and read to buffer
    if ((error = avformat_open_input(input_format_context, filename, NULL, NULL)) < 0)
    {
        fprintf(stderr, "Could not open input file '%s' (error '$s')\n",
            filename, get_error_text(error));
        *input_format_context = NULL;
        return error;
    }

    // get the input audio info
    if ((error = avformat_find_stream_info(*input_format_context, NULL)) < 0)
    {
        fprintf(stderr, "Could not determine audio info (error '$s')\n",
            get_error_text(error));
        avformat_close_input(input_format_context);
        return error;
    }

    // make sure there's only one audio stream
    if ((*input_format_context)->nb_streams != 1)
    {
        fprintf(stderr, "Expected a single audio input stream, but found %d\n",
            (*input_format_context)->nb_streams);

        avformat_close_input(input_format_context);
        return AVERROR_EXIT;
    }

    // find the audio decoder
    if (!(input_codec = avcodec_find_decoder((*input_format_context)->streams[0]->codec->codec_id)))
    {
        fprintf(stderr, "Could not find input codec\n");
        avformat_close_input(input_format_context);
        return AVERROR_EXIT;
    }

    // open the decoder for the audio stream
    if ((error = avcodec_open2((*input_format_context)->streams[0]->codec, input_codec, NULL)) < 0)
    {
        fprintf(stderr, "Could not open input codec (error '%s')\n",
            get_error_text(error));
        avformat_close_input(input_format_context);
        return error;
    }

    // save the decoder context for use later
    *input_codec_context = (*input_format_context)->streams[0]->codec;

    return 0;
}

// open the output file and encoder
int open_output_file(const char *filename,
    AVCodecContext *input_codec_context,
    AVFormatContext **output_format_context,
    AVCodecContext **output_codec_context)
{
    AVIOContext *output_io_context = NULL;
    AVStream *stream = NULL;
    AVCodec *output_codec = NULL;
    int error;

    // open output file
    if ((error = avio_open(&output_io_context, filename, AVIO_FLAG_WRITE)) < 0)
    {
        fprintf(stderr, "Could not open output file '%s' (error '%s')\n",
            filename, get_error_text(error));
        return error;
    }

    // create new output format context
    if (!(*output_format_context = avformat_alloc_context()))
    {
        fprintf(stderr, "Could not allocate output format context\n");
        return AVERROR(ENOMEM);
    }

    // Associate the output file with the container format context
    (*output_format_context)->pb = output_io_context;

    // guess the format based on file extension
    if (!((*output_format_context)->oformat = av_guess_format(NULL, filename, NULL)))
    {
        fprintf(stderr, "Could not find output file format\n");
        goto cleanup;
    }

    av_strlcpy((*output_format_context)->filename, filename,
        sizeof((*output_format_context)->filename));

    // find the required encoder
    if (!(output_codec = avcodec_find_encoder(AV_CODEC_ID_AAC)))
    {
        fprintf(stderr, "Could not find an AAC encoder.\n");
        goto cleanup;
    }

    if (!(stream = avformat_new_stream(*output_format_context, output_codec)))
    {
        fprintf(stderr, "Could not create new stream\n");
        error = AVERROR(ENOMEM);
        goto cleanup;
    }

    // save encoder for easier access later.
    *output_codec_context = stream->codec;

    // set encoder parameters
    (*output_codec_context)->channels = OUTPUT_CHANNELS;
    (*output_codec_context)->channel_layout = av_get_default_channel_layout(OUTPUT_CHANNELS);
    (*output_codec_context)->sample_rate = 16000;
    (*output_codec_context)->sample_fmt = AV_SAMPLE_FMT_S16P;
    (*output_codec_context)->bit_rate = OUTPUT_BIT_RATE;

    /**
    * Some container formats (like MP4) require global headers to be present
    * Mark the encoder so that it behaves accordingly.
    */
    if ((*output_format_context)->oformat->flags & AVFMT_GLOBALHEADER)
        (*output_codec_context)->flags |= CODEC_FLAG_GLOBAL_HEADER;

    // open the encoder for the audio stream
    if ((error = avcodec_open2(*output_codec_context, output_codec, NULL)) < 0)
    {
        fprintf(stderr, "Could not open output codec (error '%s')\n",
            get_error_text(error));
        goto cleanup;
    }

    return 0;

cleanup:
    avio_close((*output_format_context)->pb);
    avformat_free_context(*output_format_context);
    *output_format_context = NULL;
    return error < 0 ? error : AVERROR_EXIT;
}

// init data packet for reading or writing
void init_packet(AVPacket *packet)
{
    av_init_packet(packet);
    // set the packet data and size so that it's seen as empty
    packet->data = NULL;
    packet->size = 0;
}

int init_input_frame(AVFrame **frame)
{
    if (!(*frame = av_frame_alloc()))
    {
        fprintf(stderr, "Could not allocate input frame\n");
        return AVERROR(ENOMEM);
    }
    return 0;
}

// init audio resampler based on input and output codecs
int init_resampler(AVCodecContext *input_codec_context,
    AVCodecContext *output_codec_context,
    SwrContext **resample_context)
{
    int error;

    /**
    * Create a resampler context for the conversion.
    * Set the conversion parameters.
    * Default channel layouts based on the number of channels
    * are assumed for simplicity (they are sometimes not detected
    * properly by the demuxer and/or decoder).
    */
    *resample_context = swr_alloc_set_opts(NULL,
        av_get_default_channel_layout(output_codec_context->channels),
        output_codec_context->sample_fmt,
        output_codec_context->sample_rate,
        av_get_default_channel_layout(input_codec_context->channels),
        input_codec_context->sample_fmt,
        input_codec_context->sample_rate,
        0, NULL);

    if (!*resample_context)
    {
        fprintf(stderr, "Could not allocate resample context\n");
        return AVERROR(ENOMEM);
    }

    // sanity check
    //av_assert0(output_codec_context->sample_rate == input_codec_context->sample_rate);

    // open resampler with parms
    if ((error = swr_init(*resample_context)) < 0)
    {
        fprintf(stderr, "Could not open resample context\n");
        swr_free(resample_context);
        return error;
    }
    return 0;
}

// init FIFO buffer for encoding samples
int init_fifo(AVAudioFifo **fifo)
{
    if (!(*fifo = av_audio_fifo_alloc(OUTPUT_SAMPLE_FORMAT, OUTPUT_CHANNELS, 1)))
    {
        fprintf(stderr, "Could not allocate FIFO\n");
        return AVERROR(ENOMEM);
    }
    return 0;
}

// write output file container header
int write_output_file_header(AVFormatContext *output_format_context)
{
    int error;
    if ((error = avformat_write_header(output_format_context, NULL)) < 0)
    {
        fprintf(stderr, "Could not write output file header (error '%s')\n",
            get_error_text(error));
        return error;
    }
    return 0;
}

// decode one audio frame
int decode_audio_frame(AVFrame *frame,
    AVFormatContext *input_format_context,
    AVCodecContext *input_codec_context,
    int *data_present, int *finished)
{
    // temp storage
    AVPacket input_packet;
    int error;
    init_packet(&input_packet);

    // read one frame from the input to the temp packet
    if ((error = av_read_frame(input_format_context, &input_packet)) < 0)
    {
        // if we hit the EOF, flush the decoder
        if (error == AVERROR_EOF)
            *finished = 1;
        else
        {
            fprintf(stderr, "Could not read frame (error '%s')\n",
                get_error_text(error));
            return error;
        }
    }

    // decode the audio frame
    if ((error = avcodec_decode_audio4(input_codec_context, frame,
        data_present, &input_packet)) < 0)
    {
        fprintf(stderr, "Could not decode frame (error '%s')\n",
            get_error_text(error));
        av_free_packet(&input_packet);
        return error;
    }

    // if the decoder isn't completely flushed, we have to flush it again
    if (*finished && *data_present)
        *finished = 0;
    av_free_packet(&input_packet);
    return 0;
}

// init temp storage for the audio samples
int init_converted_samples(uint8_t ***converted_input_samples,
    AVCodecContext *output_codec_context,
    int frame_size)
{
    int error;

    // allocate as many pointers as there are audio channels
    if (!(*converted_input_samples = (uint8_t**)calloc(output_codec_context->channels,
        sizeof(**converted_input_samples))))
    {
        fprintf(stderr, "could not allocate converted input sample pointers\n");
        return AVERROR(ENOMEM);
    }

    // allocate memory for the samples
    if ((error = av_samples_alloc(*converted_input_samples, NULL,
        output_codec_context->channels,
        frame_size,
        output_codec_context->sample_fmt, 0)) < 0)
    {
        fprintf(stderr, "Could not allocate converted input samples (error '%s')\n",
            get_error_text(error));
        av_freep(&(*converted_input_samples)[0]);
        free(*converted_input_samples);
        return error;
    }
    return 0;
}

// convert the input audio samples into the output format
int convert_samples(const uint8_t **input_data,
    uint8_t **converted_data, const int output_size, const int frame_size,
    SwrContext *resample_context)
{
    int error;

    // convert samples
    if ((error = swr_convert(resample_context,
        converted_data, output_size,
        input_data, frame_size)) < 0)
    {
        fprintf(stderr, "Could not convert input samples (error '%s')\n",
            get_error_text(error));
        return error;
    }

    return 0;
}

// Add converted input samples to the FIFO buffer
int add_samples_to_fifo(AVAudioFifo *fifo,
    uint8_t **converted_input_samples,
    const int frame_size)
{
    int error;

    // make FIFO the size it needs to hold both old and new samples
    if ((error = av_audio_fifo_realloc(fifo, av_audio_fifo_size(fifo) + frame_size)) < 0)
    {
        fprintf(stderr, "Could not reallocate FIFO\n");
        return error;
    }

    // store the new samples in the FIFO buffer
    if (av_audio_fifo_write(fifo, (void **)converted_input_samples,
        frame_size) < frame_size)
    {
        fprintf(stderr, "Could not write data to FIFO\n");
        return AVERROR_EXIT;
    }
    return 0;
}

// read one audio frame from the input file, decode, convert and store it in the FIFO buffer
int read_decode_convert_and_store(AVAudioFifo *fifo,
    AVFormatContext *input_format_context,
    AVCodecContext *input_codec_context,
    AVCodecContext *output_codec_context,
    SwrContext *resampler_context,
    int *finished)
{
    // temp storage for input frames
    AVFrame *input_frame = NULL;
    // temp storage for input samples
    uint8_t **converted_input_samples = NULL;
    int data_present;
    int ret = AVERROR_EXIT;

    // init temp storage for one frame
    if (init_input_frame(&input_frame))
        goto cleanup;

    // decode one frame
    if (decode_audio_frame(input_frame, input_format_context,
        input_codec_context, &data_present, finished))
        goto cleanup;

    /**
    * If we are at the end of the file and there are no more samples
    * in the decoder which are delayed, we are actually finished.
    * This must not be treated as an error.
    */
    if (*finished && !data_present)
    {
        ret = 0;
        goto cleanup;
    }
    int max_samples = av_rescale_rnd(input_frame->nb_samples, output_codec_context->sample_rate, input_codec_context->sample_rate, AV_ROUND_UP);
    // if there's decoded data, convert and store
    if (data_present)
    {
        int out_samples = av_rescale_rnd(swr_get_delay(resampler_context, input_codec_context->sample_rate) + input_frame->nb_samples,
            output_codec_context->sample_rate, input_codec_context->sample_rate, AV_ROUND_UP);

        // init the temp storage for the samples
        if (init_converted_samples(&converted_input_samples, output_codec_context,
            out_samples))
            goto cleanup;

        // convert samples
        if (convert_samples((const uint8_t**)input_frame->extended_data, converted_input_samples, out_samples,
            input_frame->nb_samples, resampler_context))
            goto cleanup;

        // add the samples to the FIFO buffer
        if (add_samples_to_fifo(fifo, converted_input_samples,
            out_samples))
            goto cleanup;
        ret = 0;
    }
    ret = 0;

cleanup:
    if (converted_input_samples)
    {
        av_freep(&converted_input_samples[0]);
        free(converted_input_samples);
    }
    av_frame_free(&input_frame);
    return ret;
}

// init frame for writing
int init_output_frame(AVFrame **frame,
    AVCodecContext *output_codec_context,
    int frame_size)
{
    int error;

    // create a new frame to store samples
    if (!(*frame = av_frame_alloc()))
    {
        fprintf(stderr, "Could not allocate output frame\n");
        return AVERROR_EXIT;
    }

    /**
    * Set the frame's parameters, especially its size and format.
    * av_frame_get_buffer needs this to allocate memory for the
    * audio samples of the frame.
    * Default channel layouts based on the number of channels
    * are assumed for simplicity.
    */
    (*frame)->nb_samples = frame_size;
    (*frame)->channel_layout = output_codec_context->channel_layout;
    (*frame)->format = output_codec_context->sample_fmt;
    (*frame)->sample_rate = output_codec_context->sample_rate;

    // allocate the samples of the frame
    if ((error = av_frame_get_buffer(*frame, 0)) < 0)
    {
        fprintf(stderr, "Couldn't allocate output frame samples (error '%s')\n",
            get_error_text(error));
        av_frame_free(frame);
        return error;
    }

    return 0;
}

// encode one frame of audio to the output file
int encode_audio_frame(AVFrame *frame,
    AVFormatContext *output_format_context,
    AVCodecContext *output_codec_context,
    int *data_present)
{
    // packet for temp storage
    AVPacket output_packet;
    int error;
    init_packet(&output_packet);

    // encode the audio frame and store it in a packet
    if ((error = avcodec_encode_audio2(output_codec_context, &output_packet,
        frame, data_present)) < 0)
    {
        fprintf(stderr, "Could not encode frame (error '%s')\n",
            get_error_text(error));
        av_free_packet(&output_packet);
        return error;
    }

    // write one audio frame to the output file
    if (*data_present)
    {
        if ((error = av_write_frame(output_format_context, &output_packet)) < 0)
        {
            fprintf(stderr, "Could not write frame (error '%s'\n",
                get_error_text(error));
            av_free_packet(&output_packet);
            return error;
        }

        av_free_packet(&output_packet);
    }
    return 0;
}

// load a frame from the FIFO buffer, encode and then write it to the output
int load_encode_and_write(AVAudioFifo *fifo,
    AVFormatContext *output_format_context,
    AVCodecContext *output_codec_context)
{
    AVFrame *output_frame;
    const int frame_size = FFMIN(av_audio_fifo_size(fifo), output_codec_context->frame_size);

    int data_written;

    // init temp storage for the frame
    if (init_output_frame(&output_frame, output_codec_context, frame_size))
        return AVERROR_EXIT;

    // read the samples required to fill the frame
    if (av_audio_fifo_read(fifo, (void **)output_frame->data, frame_size) < frame_size)
    {
        fprintf(stderr, "Could not read data from FIFO\n");
        av_frame_free(&output_frame);
        return AVERROR_EXIT;
    }

    // Encode one frame worth of audio samples
    if (encode_audio_frame(output_frame, output_format_context,
        output_codec_context, &data_written))
    {
        av_frame_free(&output_frame);
        return AVERROR_EXIT;
    }

    av_frame_free(&output_frame);
    return 0;
}

// write the trailer of the output file container
int write_output_file_trailer(AVFormatContext *output_format_context)
{
    int error;
    if ((error = av_write_trailer(output_format_context)) < 0)
    {
        fprintf(stderr, "Could not write output file trailer (error '%s')\n",
            get_error_text(error));
        return error;
    }
    return 0;
}

// convert an audio file to AAC in a MP4 container
int ConvertFile(String^ inputFile, String^ outputFile)
{
    AVFormatContext *input_format_context = NULL, *output_format_context = NULL;
    AVCodecContext *input_codec_context = NULL, *output_codec_context = NULL;
    SwrContext *resample_context = NULL;
    AVAudioFifo *fifo = NULL;
    int ret = AVERROR_EXIT;

    av_register_all();
    IntPtr ptrInput = Marshal::StringToHGlobalAnsi(inputFile);
    char* nativeInput = static_cast<char*>(ptrInput.ToPointer());

    IntPtr ptrOutput = Marshal::StringToHGlobalAnsi(outputFile);
    char* nativeOutput = static_cast<char*>(ptrOutput.ToPointer());

    if (open_input_file(nativeInput, &input_format_context, &input_codec_context))
        goto cleanup;
    if (open_output_file(nativeOutput, input_codec_context, &output_format_context, &output_codec_context))
        goto cleanup;

    if (init_resampler(input_codec_context, output_codec_context, &resample_context))
        goto cleanup;

    if (init_fifo(&fifo))
        goto cleanup;

    if (write_output_file_header(output_format_context))
        goto cleanup;

    // loop until we run out of samples
    while (1)
    {
        const int output_frame_size = output_codec_context->frame_size;
        int finished = 0;

        while (av_audio_fifo_size(fifo) < output_frame_size)
        {
            if (read_decode_convert_and_store(fifo, input_format_context,
                input_codec_context, output_codec_context, resample_context, &finished))
                goto cleanup;

            if (finished)
                break;
        }

        while (av_audio_fifo_size(fifo) >= output_frame_size || (finished && av_audio_fifo_size(fifo) > 0))
            if (load_encode_and_write(fifo, output_format_context, output_codec_context))
                goto cleanup;

        if (finished)
        {
            int data_written;
            do
            {
                if (encode_audio_frame(NULL, output_format_context, output_codec_context, &data_written))
                    goto cleanup;
            } while (data_written);
            break;
        }
    }

    if (write_output_file_trailer(output_format_context))
        goto cleanup;
    ret = 0;

cleanup:
    if (fifo)
        av_audio_fifo_free(fifo);
    swr_free(&resample_context);

    if (output_codec_context)
        avcodec_close(output_codec_context);
    if (output_format_context)
    {
        avio_close(output_format_context->pb);
        avformat_free_context(output_format_context);
    }
    if (input_codec_context)
        avcodec_close(input_codec_context);
    if (input_format_context)
        avformat_close_input(&input_format_context);

    return ret;
}

int main(array<System::String ^> ^args)
{
    ConvertFile(args[0], args[1]);
}