trouble with merging audio

#include "com_hlcam_video_services_RenderService.h"

#include <fstream>
#include <vector>
#include <string>
#include <cstdlib>
#include <iostream>

#include <hlcam/pixels/clip.h>
#include <hlcam/pixels/vulcanRender.h>

#include <hlcam-common.h>
#include <hlcam/communication/movie/movie.pb.h>
using std::string;

#ifdef __cplusplus
extern "C" {
#include <libavutil/avassert.h>
#include <libavutil/channel_layout.h>
#include <libavutil/opt.h>
#include <libavutil/mathematics.h>
#include <libavformat/avformat.h>
#include <libswscale/swscale.h>
#include <libswresample/swresample.h>


#define STREAM_DURATION   10.0
#define STREAM_FRAME_RATE 25 /* 25 images/s */
#define STREAM_PIX_FMT    AV_PIX_FMT_YUV420P /* default pix_fmt */
#define SCALE_FLAGS SWS_BICUBIC

#endif

static bool marshal_resource_array(JNIEnv *env, movieRequest &request,
                                             jobjectArray j_resources);

static clip * marshal_resource_to_clip(JNIEnv *env, const jobject &j_resource);

static clip * marshal_video(JNIEnv *env, const jobject &j_resource);

static clip * marshal_image(JNIEnv *env, const jobject &j_resource);

static clip * marshal_audio(JNIEnv *env, const jobject &j_resource);

static clip * marshal_text(JNIEnv *env, const jobject &j_resource);

static void marshal_soundtrack(JNIEnv *env, const jobject &j_soundtrack,
                               string &soundtrack_path, float &ratio);

static string marshal_jstring_to_string(JNIEnv *env, const jstring &j_str);

static int marshal_jstring_to_int(JNIEnv *env, const jstring &j_str);

//static void marshal_color(JNIEnv *env, const jobject &j_color, float &r, float &g, float &b);

int com_hlcam_renderer_server_internal_RenderJob_RESULT_ERROR = -1;
int com_hlcam_renderer_server_internal_RenderJob_RESULT_SUCCESS = 0;

int renderMovieRequest(movieRequest *movieRequestObj, string outputPath);

JNIEXPORT jint JNICALL Java_com_hlcam_video_services_RenderService_renderProject(
    JNIEnv *env, jclass jclazz, jobjectArray j_resources, jobject j_soundtrack,
    jstring j_destination, jstring j_edit_points, jstring j_theme,
    jlong j_target_length, jint j_width, jint j_height, jint j_endroll) {

    string soundtrack_path = "";
    float soundtrack_ratio = 0;
    if (j_soundtrack != NULL) {
        marshal_soundtrack(env, j_soundtrack, soundtrack_path, soundtrack_ratio);
    }

    string dest = marshal_jstring_to_string(env, j_destination);

    string edit_points;
    if (j_edit_points) {
        edit_points = marshal_jstring_to_string(env, j_edit_points);
    } else {
        edit_points = "";
    }

    int theme = marshal_jstring_to_int(env, j_theme);
    int target_length = j_target_length;
    int width = j_width;
    int height = j_height;
    int endroll = j_endroll;

    movieRequest request (height, width, endroll, theme, soundtrack_path, soundtrack_ratio, edit_points);
    request.duration = target_length;

    if (!marshal_resource_array(env, request, j_resources)) {
        ELOG("Failed to marshal resources for some reason.");
        return com_hlcam_renderer_server_internal_RenderJob_RESULT_ERROR;
    }

    int renderResult = 0;
    renderResult = renderMovieRequest(&request, dest);
    DLOG("Render status : %i ", renderResult);

    return renderResult;
}

int64_t timeBase;

void SaveFrameLocal(AVFrame *pFrame, int width, int height, int iFrame, const char *sourceFilepath) {
  FILE *pFile;
  char szFilename[100];
  int  y;

  // Open file
  sprintf(szFilename, "%s-frame%d.ppm", sourceFilepath, iFrame);
  pFile=fopen(szFilename, "wb");
  if(pFile==NULL) {
    ELOG("Unable to write snapshot to file");
  }

  // Write header
  fprintf(pFile, "P6\n%d %d\n255\n", width, height);

  // Write pixel data
  for(y=0; y<height; y++)
    fwrite(pFrame->data[0]+y*pFrame->linesize[0], 1, width*3, pFile);

  // Close file
  fclose(pFile);
}

bool seek(AVFormatContext *pFormatCtx, int frameIndex){

    if(!pFormatCtx)
        return false;

    int64_t seekTarget = int64_t(frameIndex) * timeBase;

    if(av_seek_frame(pFormatCtx, -1, seekTarget, AVSEEK_FLAG_ANY) < 0) {
        ELOG("av_seek_frame failed.");
        return false;
    }

    return true;

}

void processProtobuf(movieRequest *movieRequestObj) {
 GOOGLE_PROTOBUF_VERIFY_VERSION;
    hlcam::communication::movie::MovieBook movie_book;

    fstream input(movieRequestObj->protoBufferName.c_str(), ios::in | ios::binary);
    if(!input)
    {
        printf("protocol buffer file not found\n");
    }
    else
    {
        if(movie_book.ParseFromIstream(&input))
        {
            //get the struct out of the movie book
            //const hlcam::communication::movie::HighLevelScores& high_level_scores = movie_book.high_level_scores();

            size_t correspondingIdx=0;

                DLOG("protobuf has entries for %d videos\n", movie_book.clips_size() );

            int totalOutputMovieDuration = 0;

            //go through all clips in the movie book (all the video clips)
            for(int clipIter=0; clipIter<movie_book.clips_size(); clipIter++)
            {
                //advance the corresponding index to the next video clip in movieRequestObj, skipping over titles and images
                while(correspondingIdx < movieRequestObj->clips.size() && movieRequestObj->clips.at(correspondingIdx)->getClipType()!=VIDEO_CLIP)
                {
                    totalOutputMovieDuration += movieRequestObj->clips.at(correspondingIdx)->duration;
                    correspondingIdx++;
                }

                //printf("video # %d was at position %d among all clips\n",clipIter,correspondingIdx);
                if (correspondingIdx < movieRequestObj->clips.size() && movieRequestObj->clips.at(correspondingIdx)->getClipType()==VIDEO_CLIP) //if there was another video in the movie and we found it
                {
                    //get segments for this clip
                    const hlcam::communication::movie::Clip& clipInfo = movie_book.clips(clipIter);

                    #ifdef DEBUG_PRINTING
                        DLOG("there are %d segments for %s\n", clipInfo.start_size(), (dynamic_pointer_cast<videoClip>(movieRequestObj->clips.at(correspondingIdx)))->vidFileName.c_str());
                    #endif

                    (dynamic_pointer_cast<videoClip>(movieRequestObj->clips.at(correspondingIdx)))->fps = clipInfo.input_video_fps();

                    //we set this to true so we know that we ONLY show this clip if it actually had segments
                    (dynamic_pointer_cast<videoClip>(movieRequestObj->clips.at(correspondingIdx)))->shouldHaveSegments = true;

                    for(int segIter=0; segIter<clipInfo.start_size(); segIter++)
                    {
                        //set the duration/inframes accordingly
                        pair<int,int> thisSeg;
                        thisSeg.first = clipInfo.start(segIter);
                        thisSeg.second = clipInfo.end(segIter) - clipInfo.start(segIter);

//                        if (beatTime!=-1) //if the beat has been found make sure that this clip ends at the closest beat time (4 beats per measure)
//                        {
//                            int tempDuration = totalOutputMovieDuration + thisSeg.second;
//                            tempDuration = (tempDuration - offset)/beatTime;
//                            thisSeg.second = beatTime*(tempDuration) - totalOutputMovieDuration;
//                        }

                        totalOutputMovieDuration += thisSeg.second;

                        int beatTime = -1;
                        #ifdef DEBUG_PRINTING
                            DLOG("segment [%d,%d]\n", thisSeg.first, thisSeg.second);
                        #endif
                        if ( (beatTime==-1 && thisSeg.second < 72) || (beatTime!=-1 && thisSeg.second < beatTime) ) // must be 3 seconds if beat unknown, or 4 beats
                        {
                            #ifdef DEBUG_PRINTING
                                DLOG("a segment was only %d frames long and is being discarded\n", thisSeg.second);
                            #endif
                        }
                        else
                        {
                            (dynamic_pointer_cast<videoClip>(movieRequestObj->clips.at(correspondingIdx)))->segments.push_back(thisSeg);
                        }
                    }
                }
                correspondingIdx++;
            }
        }
        else
        {
            printf("unable to read protocol buffer file\n");
        }
    }

    DLOG("Finished parsing protocol buffer");
 }

typedef struct OutputStream {
    AVStream *st;
    /* pts of the next frame that will be generated */
    int64_t next_pts;
    int samples_count;
    AVFrame *frame;
    AVFrame *tmp_frame;
    float t, tincr, tincr2;
    struct SwsContext *sws_ctx;
    struct SwrContext *swr_ctx;
} OutputStream;

static void log_packet(const AVFormatContext *fmt_ctx, const AVPacket *pkt)
{
//    AVRational *time_base = &fmt_ctx->streams[pkt->stream_index]->time_base;
//    printf("pts:%s pts_time:%s dts:%s dts_time:%s duration:%s duration_time:%s stream_index:%d\n",
//           av_ts2str(pkt->pts), av_ts2timestr(pkt->pts, time_base),
//           av_ts2str(pkt->dts), av_ts2timestr(pkt->dts, time_base),
//           av_ts2str(pkt->duration), av_ts2timestr(pkt->duration, time_base),
//           pkt->stream_index);
}
static int write_frame(AVFormatContext *fmt_ctx, const AVRational *time_base, AVStream *st, AVPacket *pkt)
{
    /* rescale output packet timestamp values from codec to stream timebase */
    av_packet_rescale_ts(pkt, *time_base, st->time_base);
    pkt->stream_index = st->index;
    /* Write the compressed frame to the media file. */
    log_packet(fmt_ctx, pkt);
    return av_interleaved_write_frame(fmt_ctx, pkt);
}
/* Add an output stream. */
static void add_stream(OutputStream *ost, AVFormatContext *oc,
                       AVCodec **codec,
                       enum AVCodecID codec_id) {
    AVCodecContext *c;
    int i;
    /* find the encoder */
    *codec = avcodec_find_encoder(codec_id);
    if (!(*codec)) {
        ELOG("Could not find encoder for '%s'\n", avcodec_get_name(codec_id));
        return;
    }
    ost->st = avformat_new_stream(oc, *codec);
    if (!ost->st) {
        ELOG("Could not allocate stream\n");
        return;
    }
    ost->st->id = oc->nb_streams-1;
    c = ost->st->codec;
    switch ((*codec)->type) {
    case AVMEDIA_TYPE_AUDIO:
        c->sample_fmt  = (*codec)->sample_fmts ?
            (*codec)->sample_fmts[0] : AV_SAMPLE_FMT_FLTP;
        c->bit_rate    = 64000;
        c->sample_rate = 44100;
        if ((*codec)->supported_samplerates) {
            c->sample_rate = (*codec)->supported_samplerates[0];
            for (i = 0; (*codec)->supported_samplerates[i]; i++) {
                if ((*codec)->supported_samplerates[i] == 44100)
                    c->sample_rate = 44100;
            }
        }
        c->channels        = av_get_channel_layout_nb_channels(c->channel_layout);
        c->channel_layout = AV_CH_LAYOUT_STEREO;
        if ((*codec)->channel_layouts) {
            c->channel_layout = (*codec)->channel_layouts[0];
            for (i = 0; (*codec)->channel_layouts[i]; i++) {
                if ((*codec)->channel_layouts[i] == AV_CH_LAYOUT_STEREO)
                    c->channel_layout = AV_CH_LAYOUT_STEREO;
            }
        }
        c->channels        = av_get_channel_layout_nb_channels(c->channel_layout);
        ost->st->time_base = (AVRational){ 1, c->sample_rate };
        break;
    case AVMEDIA_TYPE_VIDEO:
        c->codec_id = codec_id;
        c->bit_rate = 400000;
        /* Resolution must be a multiple of two. */
//        c->width    = 352;
//        c->height   = 288;
        c->width    = 1280;
        c->height   = 720;

        /* timebase: This is the fundamental unit of time (in seconds) in terms
         * of which frame timestamps are represented. For fixed-fps content,
         * timebase should be 1/framerate and timestamp increments should be
         * identical to 1. */
        ost->st->time_base = (AVRational){ 1, STREAM_FRAME_RATE };
        c->time_base       = ost->st->time_base;
        c->gop_size      = 12; /* emit one intra frame every twelve frames at most */
        c->pix_fmt       = STREAM_PIX_FMT;
        if (c->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
            /* just for testing, we also add B frames */
            c->max_b_frames = 2;
        }
        if (c->codec_id == AV_CODEC_ID_MPEG1VIDEO) {
            /* Needed to avoid using macroblocks in which some coeffs overflow.
             * This does not happen with normal video, it just happens here as
             * the motion of the chroma plane does not match the luma plane. */
            c->mb_decision = 2;
        }
    break;
    default:
        break;
    }
    /* Some formats want stream headers to be separate. */
    if (oc->oformat->flags & AVFMT_GLOBALHEADER)
        c->flags |= CODEC_FLAG_GLOBAL_HEADER;
}

/**************************************************************/
/* audio output */

void printAudioFrameInfo(const AVCodecContext* codecContext, const AVFrame* frame)
{
    // See the following to know what data type (unsigned char, short, float, etc) to use to access the audio data:
    // http://ffmpeg.org/doxygen/trunk/samplefmt_8h.html#af9a51ca15301871723577c730b5865c5
    std::cout << "Audio frame info:\n"
              << "  Sample count: " << frame->nb_samples << '\n'
              << "  Channel count: " << codecContext->channels << '\n'
              << "  Format: " << av_get_sample_fmt_name(codecContext->sample_fmt) << '\n'
              << "  Bytes per sample: " << av_get_bytes_per_sample(codecContext->sample_fmt) << '\n'
              << "  Is planar? " << av_sample_fmt_is_planar(codecContext->sample_fmt) << '\n';

    DLOG( "Audio frame info: \n\tSample count: %i\n\tChannel count: %i\n\tFormat: %s\n\tBytes per sample: %i\n\tIs Planar? %i\n",
                        frame->nb_samples,
                        codecContext->channels,
                        av_get_sample_fmt_name(codecContext->sample_fmt),
                        av_get_bytes_per_sample(codecContext->sample_fmt),
                        av_sample_fmt_is_planar(codecContext->sample_fmt));


    std::cout << "frame->linesize[0] tells you the size (in bytes) of each plane\n";

    if (codecContext->channels > AV_NUM_DATA_POINTERS && av_sample_fmt_is_planar(codecContext->sample_fmt))
    {
        std::cout << "The audio stream (and its frames) have too many channels to fit in\n"
                  << "frame->data. Therefore, to access the audio data, you need to use\n"
                  << "frame->extended_data to access the audio data. It's planar, so\n"
                  << "each channel is in a different element. That is:\n"
                  << "  frame->extended_data[0] has the data for channel 1\n"
                  << "  frame->extended_data[1] has the data for channel 2\n"
                  << "  etc.\n";
    }
    else
    {
        std::cout << "Either the audio data is not planar, or there is enough room in\n"
                  << "frame->data to store all the channels, so you can either use\n"
                  << "frame->data or frame->extended_data to access the audio data (they\n"
                  << "should just point to the same data).\n";
    }

    std::cout << "If the frame is planar, each channel is in a different element.\n"
              << "That is:\n"
              << "  frame->data[0]/frame->extended_data[0] has the data for channel 1\n"
              << "  frame->data[1]/frame->extended_data[1] has the data for channel 2\n"
              << "  etc.\n";

    std::cout << "If the frame is packed (not planar), then all the data is in\n"
              << "frame->data[0]/frame->extended_data[0] (kind of like how some\n"
              << "image formats have RGB pixels packed together, rather than storing\n"
              << " the red, green, and blue channels separately in different arrays.\n";
}


static AVFrame *alloc_audio_frame(enum AVSampleFormat sample_fmt,
                                  uint64_t channel_layout,
                                  int sample_rate, int nb_samples)
{
    AVFrame *frame = av_frame_alloc();
    int ret;
    if (!frame) {
        fprintf(stderr, "Error allocating an audio frame\n");
        exit(1);
    }
    frame->format = sample_fmt;
    frame->channel_layout = channel_layout;
    frame->sample_rate = sample_rate;
    frame->nb_samples = nb_samples;
    if (nb_samples) {
        ret = av_frame_get_buffer(frame, 0);
        if (ret < 0) {
            fprintf(stderr, "Error allocating an audio buffer\n");
            exit(1);
        }
    }
    return frame;
}
static int open_audio(AVFormatContext *oc, AVCodec *codec, OutputStream *ost, AVDictionary *opt_arg)
{
    AVCodecContext *c;
    int nb_samples;
    int ret;
    AVDictionary *opt = NULL;
    c = ost->st->codec;
    /* open it */
    av_dict_copy(&opt, opt_arg, 0);
    av_dict_set(&opt, "strict", "experimental", 0);

    ret = avcodec_open2(c, codec, &opt);
    av_dict_free(&opt);
    if (ret < 0) {
        ELOG("Could not open audio codec: [%i] %s\n", ret, av_err2str(ret));
        return ret;
    }
    /* init signal generator */
    ost->t     = 0;
    ost->tincr = 2 * M_PI * 110.0 / c->sample_rate;
    /* increment frequency by 110 Hz per second */
    ost->tincr2 = 2 * M_PI * 110.0 / c->sample_rate / c->sample_rate;
    if (c->codec->capabilities & CODEC_CAP_VARIABLE_FRAME_SIZE)
        nb_samples = 10000;
    else
        nb_samples = c->frame_size;
    ost->frame     = alloc_audio_frame(c->sample_fmt, c->channel_layout,
                                       c->sample_rate, nb_samples);
    ost->tmp_frame = alloc_audio_frame(AV_SAMPLE_FMT_S16, c->channel_layout,
                                       c->sample_rate, nb_samples);
    /* create resampler context */
        ost->swr_ctx = swr_alloc();
        if (!ost->swr_ctx) {
            ELOG("Could not allocate resampler context\n");
            return -300;
        }
        /* set options */
        av_opt_set_int       (ost->swr_ctx, "in_channel_count",   c->channels,       0);
        av_opt_set_int       (ost->swr_ctx, "in_sample_rate",     c->sample_rate,    0);
        av_opt_set_sample_fmt(ost->swr_ctx, "in_sample_fmt",      AV_SAMPLE_FMT_S16, 0);
        av_opt_set_int       (ost->swr_ctx, "out_channel_count",  c->channels,       0);
        av_opt_set_int       (ost->swr_ctx, "out_sample_rate",    c->sample_rate,    0);
        av_opt_set_sample_fmt(ost->swr_ctx, "out_sample_fmt",     c->sample_fmt,     0);
        /* initialize the resampling context */
        if ((ret = swr_init(ost->swr_ctx)) < 0) {
            ELOG("Failed to initialize the resampling context: %i\n", ret);
            return ret;
        }

        return 0;
}
/* Prepare a 16 bit dummy audio frame of 'frame_size' samples and
 * 'nb_channels' channels. */
static AVFrame *get_audio_frame(OutputStream *ost)
{
    AVFrame *frame = ost->tmp_frame;
    int j, i, v;
    int16_t *q = (int16_t*)frame->data[0];
    /* check if we want to generate more frames */
    if (av_compare_ts(ost->next_pts, ost->st->codec->time_base,
                      STREAM_DURATION, (AVRational){ 1, 1 }) >= 0)
        return NULL;
    for (j = 0; j <frame->nb_samples; j++) {
        v = (int)(sin(ost->t) * 10000);
        for (i = 0; i < ost->st->codec->channels; i++)
            *q++ = v;
        ost->t     += ost->tincr;
        ost->tincr += ost->tincr2;
    }
    frame->pts = ost->next_pts;
    ost->next_pts  += frame->nb_samples;
    return frame;
}
/*
 * encode one audio frame and send it to the muxer
 * return 1 when encoding is finished, 0 otherwise
 */
static int write_audio_frame(AVFormatContext *oc, OutputStream *ost, AVFrame *frame)
{
    AVCodecContext *c;
    AVPacket pkt = { 0 }; // data and size must be 0;
//    AVFrame *frame;
    int ret;
    int got_packet;
    int dst_nb_samples;
    av_init_packet(&pkt);
    c = ost->st->codec;
//    frame = get_audio_frame(ost);
    if (frame) {
        /* convert samples from native format to destination codec format, using the resampler */
            /* compute destination number of samples */
            dst_nb_samples = av_rescale_rnd(swr_get_delay(ost->swr_ctx, c->sample_rate) + frame->nb_samples,
                                            c->sample_rate, c->sample_rate, AV_ROUND_UP);
            av_assert0(dst_nb_samples == frame->nb_samples);
        /* when we pass a frame to the encoder, it may keep a reference to it
         * internally;
         * make sure we do not overwrite it here
         */
        ret = av_frame_make_writable(ost->frame);
        if (ret < 0) {
            ELOG("Unable to prepare frame for writing: Error code: %s", av_err2str(ret));
            return ret;
        }
            /* convert to destination format */
            ret = swr_convert(ost->swr_ctx,
                              ost->frame->data, dst_nb_samples,
                              (const uint8_t **)frame->data, frame->nb_samples);
            if (ret < 0) {
                ELOG("Error while converting: %s\n", av_err2str(ret));
                return -1;
            }
            frame = ost->frame;
        frame->pts = av_rescale_q(ost->samples_count, (AVRational){1, c->sample_rate}, c->time_base);
        ost->samples_count += dst_nb_samples;
    }
    ret = avcodec_encode_audio2(c, &pkt, frame, &got_packet);
    if (ret < 0) {
        ELOG("Error encoding audio frame: %s\n", av_err2str(ret));
        return -1;
    }
    if (got_packet) {
        ret = write_frame(oc, &c->time_base, ost->st, &pkt);
        if (ret < 0) {
            ELOG( "Error while writing audio frame: %s\n", av_err2str(ret));
            return -1;
        }
    }
    return (frame || got_packet) ? 0 : 1;
}


/**************************************************************/
/* video output */
static AVFrame *alloc_picture(enum AVPixelFormat pix_fmt, int width, int height)
{
    AVFrame *picture;
    int ret;
    picture = av_frame_alloc();
    if (!picture)
        return NULL;
    picture->format = pix_fmt;
    picture->width  = width;
    picture->height = height;
    /* allocate the buffers for the frame data */
    ret = av_frame_get_buffer(picture, 32);
    if (ret < 0) {
        fprintf(stderr, "Could not allocate frame data.\n");
        exit(1);
    }
    return picture;
}


static int open_video(AVFormatContext *oc, AVCodec *codec, OutputStream *ost, AVDictionary *opt_arg)
{
    int ret;
    AVCodecContext *c = ost->st->codec;
    AVDictionary *opt = NULL;
    av_dict_copy(&opt, opt_arg, 0);
    /* open the codec */
    ret = avcodec_open2(c, codec, &opt);
    av_dict_free(&opt);

    if (ret < 0) {
        ELOG("Could not open video codec: %s\n", av_err2str(ret));
        return ret;
    }
    /* allocate and init a re-usable frame */
    DLOG("Allocate and init a are-usable frame: %i x %i Format: %i", c->width, c->height, c->pix_fmt);
    ost->frame = alloc_picture(c->pix_fmt, c->width, c->height);
    if (!ost->frame) {
        ELOG("Could not allocate video frame\n");
        return -100;
    }

    /* If the output format is not YUV420P, then a temporary YUV420P
     * picture is needed too. It is then converted to the required
     * output format. */
    ost->tmp_frame = NULL;
    if (c->pix_fmt != AV_PIX_FMT_YUV420P) {
        DLOG("input format is not YUV420P converting to size %i x %i", c->width, c->height);
        ost->tmp_frame = alloc_picture(AV_PIX_FMT_YUV420P, c->width, c->height);
        if (!ost->tmp_frame) {
            ELOG("Could not allocate temporary picture\n");
            return -200;
        }
    }

    return 0;
}

/* Prepare a dummy image. */
static void fill_yuv_image(AVFrame *pict, int frame_index,

                           int width, int height)
{
    int x, y, i, ret;
    /* when we pass a frame to the encoder, it may keep a reference to it
     * internally;
     * make sure we do not overwrite it here
     */
    ret = av_frame_make_writable(pict);
    if (ret < 0)
        exit(1);
    i = frame_index;
    /* Y */
    for (y = 0; y < height; y++)
        for (x = 0; x < width; x++)
            pict->data[0][y * pict->linesize[0] + x] = x + y + i * 3;
    /* Cb and Cr */
    for (y = 0; y < height / 2; y++) {
        for (x = 0; x < width / 2; x++) {
            pict->data[1][y * pict->linesize[1] + x] = 128 + y + i * 2;
            pict->data[2][y * pict->linesize[2] + x] = 64 + x + i * 5;
        }
    }
}
static AVFrame *get_video_frame(OutputStream *ost)
{
    AVCodecContext *c = ost->st->codec;
    /* check if we want to generate more frames */
    if (av_compare_ts(ost->next_pts, ost->st->codec->time_base,
                      STREAM_DURATION, (AVRational){ 1, 1 }) >= 0)
        return NULL;
    if (c->pix_fmt != AV_PIX_FMT_YUV420P) {
        /* as we only generate a YUV420P picture, we must convert it
         * to the codec pixel format if needed */
        if (!ost->sws_ctx) {
            ost->sws_ctx = sws_getContext(c->width, c->height,
                                          AV_PIX_FMT_YUV420P,
                                          c->width, c->height,
                                          c->pix_fmt,
                                          SCALE_FLAGS, NULL, NULL, NULL);
            if (!ost->sws_ctx) {
                fprintf(stderr,
                        "Could not initialize the conversion context\n");
                exit(1);
            }
        }
        fill_yuv_image(ost->tmp_frame, ost->next_pts, c->width, c->height);
        sws_scale(ost->sws_ctx,
                  (const uint8_t * const *)ost->tmp_frame->data, ost->tmp_frame->linesize,
                  0, c->height, ost->frame->data, ost->frame->linesize);
    } else {
        fill_yuv_image(ost->frame, ost->next_pts, c->width, c->height);
    }
    ost->frame->pts = ost->next_pts++;
    return ost->frame;
}
/*
 * encode one video frame and send it to the muxer
 * return 1 when encoding is finished, 0 otherwise
 */
static int write_video_frame(AVFormatContext *oc, OutputStream *ost, AVFrame *frame)
{
    int ret;
    AVCodecContext *c;
//    AVFrame *frame;
    int got_packet = 0;
    c = ost->st->codec;
//    frame = get_video_frame(ost);
    if (oc->oformat->flags & AVFMT_RAWPICTURE) {
        /* a hack to avoid data copy with some raw video muxers */
        AVPacket pkt;
        av_init_packet(&pkt);
        if (!frame)
            return 1;
        pkt.flags        |= AV_PKT_FLAG_KEY;
        pkt.stream_index  = ost->st->index;
        pkt.data          = (uint8_t *)frame;
        pkt.size          = sizeof(AVPicture);
        pkt.pts = pkt.dts = frame->pts;
        av_packet_rescale_ts(&pkt, c->time_base, ost->st->time_base);
        ret = av_interleaved_write_frame(oc, &pkt);
    } else {
        AVPacket pkt = { 0 };
        av_init_packet(&pkt);
        /* encode the image */
        ret = avcodec_encode_video2(c, &pkt, frame, &got_packet);
        if (ret < 0) {
            ELOG("Error encoding video frame: %s\n", av_err2str(ret));
            exit(1);
        }
        if (got_packet) {
            ret = write_frame(oc, &c->time_base, ost->st, &pkt);
        } else {
            ret = 0;
        }
    }
    if (ret < 0) {
        ELOG("Error while writing video frame: %s\n", av_err2str(ret));
        exit(1);
    }
    return (frame || got_packet) ? 0 : 1;
}
static void close_stream(AVFormatContext *oc, OutputStream *ost)
{
    avcodec_close(ost->st->codec);
    av_frame_free(&ost->frame);
    av_frame_free(&ost->tmp_frame);
    sws_freeContext(ost->sws_ctx);
    swr_free(&ost->swr_ctx);
}


int renderMovieRequest(movieRequest *movieRequestObj, string outputPath) {
    AVOutputFormat *ofmt = NULL;
    AVFormatContext *ifmt_ctx = NULL, *ofmt_ctx = NULL;
    AVFormatContext *pFormatCtx = NULL;
    AVCodec *audio_codec, *video_codec;

    OutputStream video_st = { 0 }, audio_st = { 0 };
    size_t            i;
    int             videoStream, audioStream;
    AVCodecContext  *pCodecCtx = NULL;
    AVCodecContext *audioCodecCtx = NULL;
    AVCodec         *pCodec = NULL;
    AVFrame         *aFrame = NULL;
    AVFrame         *pFrame = NULL;
    AVFrame         *pFrameRGB = NULL;
    AVPacket        packet = { 0 };
    int             frameFinished;
    int             audioFrameFinished;
    int             numBytes;
    uint8_t         *buffer = NULL;
    AVDictionary    *optionsDict = NULL;
    AVDictionary *opt = NULL;
    struct SwsContext      *sws_ctx = NULL;

    const char *in_filename, *out_filename;
    int ret;

    int have_audio = 0, have_video = 0;
    int encode_audio = 0, encode_video = 0;

    processProtobuf(movieRequestObj);

    out_filename = outputPath.c_str();

    av_register_all();

    DLOG("attempting to create context for output file %s", out_filename);

    avformat_alloc_output_context2(&ofmt_ctx, NULL, NULL, out_filename);
    if (!ofmt_ctx) {
        ELOG("Could not create output context\n");
        ret = AVERROR_UNKNOWN;
        return ret; //goto end;
    }
    ofmt = ofmt_ctx->oformat;

   /* Add the audio and video streams using the default format codecs
       * and initialize the codecs. */
      if (ofmt->video_codec != AV_CODEC_ID_NONE) {
          add_stream(&video_st, ofmt_ctx, &video_codec, ofmt->video_codec);
          have_video = 1;
          encode_video = 1;
      }
      if (ofmt->audio_codec != AV_CODEC_ID_NONE) {
          add_stream(&audio_st, ofmt_ctx, &audio_codec, ofmt->audio_codec);
          have_audio = 1;
          encode_audio = 1;
      }

    DLOG("allocate encode buffers");
 /* Now that all the parameters are set, we can open the audio and
     * video codecs and allocate the necessary encode buffers. */
    if (have_video)
        open_video(ofmt_ctx, video_codec, &video_st, opt);
    if (have_audio) {
        DLOG("Opening audio codec");
        open_audio(ofmt_ctx, audio_codec, &audio_st, opt);
    }

    DLOG("open output file for writing");
   /* open the output file, if needed */
    if (!(ofmt->flags & AVFMT_NOFILE)) {
        ret = avio_open(&ofmt_ctx->pb, out_filename, AVIO_FLAG_WRITE);
        if (ret < 0) {
            ELOG( "Could not open '%s': %s\n", out_filename, av_err2str(ret));
            return 1;
        }
    }

    /* Write the stream header, if any. */
    ret = avformat_write_header(ofmt_ctx, &opt);
    if (ret < 0) {
        ELOG("Error occurred when opening output file: %s\n", av_err2str(ret));
        return 1;
    }

    vector<clipShPtr> * clips = &(movieRequestObj->clips);

    DLOG("ready to process clips: %i", clips->size());
    for (size_t clipIdx = 0; clipIdx < clips->size(); ++clipIdx) {

        shared_ptr<clip> currentClip = clips->at(clipIdx);

        switch (currentClip->getClipType()) {
            case VIDEO_CLIP: {
                shared_ptr<videoClip> vidClip = dynamic_pointer_cast<videoClip>(clips->at(clipIdx));

                if (vidClip->shouldHaveSegments) {
                    // open the file for reading and create a temporary file for output
                    in_filename = vidClip->vidFileName.c_str();
                    DLOG("Opening %s for reading", in_filename);

                    if ((ret = avformat_open_input(&ifmt_ctx, in_filename, 0, 0)) < 0) {
                        ELOG("Could not open input file '%s'", in_filename);
                        return ret; //goto end;
                    }

                    if ((ret = avformat_find_stream_info(ifmt_ctx, 0)) < 0) {
                        ELOG("Failed to retrieve input stream information");
                        return ret; //goto end;
                    }

                    av_dump_format(ifmt_ctx, 0, in_filename, 0);

                    videoStream = -1;
                    audioStream = -1;
                    // setup input format context and output format context;
//                    AVStream *video_in_stream = NULL;
                    for (i = 0; i < ifmt_ctx->nb_streams; i++) {
                        if(ifmt_ctx->streams[i]->codec->codec_type==AVMEDIA_TYPE_VIDEO) {
                            videoStream=i;
//                            video_in_stream = ifmt_ctx->streams[i];
                        }
                        else if(ifmt_ctx->streams[i]->codec->codec_type==AVMEDIA_TYPE_AUDIO) {
                            audioStream=i;
                            DLOG("Found audio stream at index %i", i);
//                            video_in_stream = ifmt_ctx->streams[i];
                        }
                    }

                    audioStream = av_find_best_stream(ifmt_ctx, AVMEDIA_TYPE_AUDIO, -1, -1, &pCodec, 0);
                    if (audioStream < 0) {
                        DLOG("Could not find audio stream in file");
                        avformat_close_input(&ifmt_ctx);
                        continue;
                    }
                    if (videoStream == -1) {
                        DLOG("not a video stream.");
                        avformat_close_input(&ifmt_ctx);
                        continue;
                    }
                    // Get a pointer to the codec context for the audio stream
                    audioCodecCtx = ifmt_ctx->streams[audioStream]->codec;

                    // Get a pointer to the codec context for the video stream
                    pCodecCtx = ifmt_ctx->streams[videoStream]->codec;

                    if (pCodecCtx == NULL) {
                        ELOG("Error in getting pointer to codec for vidstream");
                    }

                    if (audioCodecCtx == NULL) {
                        ELOG("Error in getting pointer to codec for audio stream");
                    }

                    audioCodecCtx-> codec = pCodec;

                    // Find the decoder for the video stream
                    pCodec=avcodec_find_decoder(pCodecCtx->codec_id);

                    if(pCodec==NULL) {
                        ELOG("Unsupported codec!\n");
                        return -1; // Codec not found
                    }
                    // Open codec
                    if(avcodec_open2(pCodecCtx, pCodec, &optionsDict)<0) {
                        ELOG("Unable to open codec");
                        return -1; // Could not open codec
                    }

                    if (avcodec_open2(audioCodecCtx, audioCodecCtx->codec, NULL) != 0)
                    {
                        av_free(pFrame);
                        avformat_close_input(&ifmt_ctx);
                        ELOG("Couldn't open the context with the decoder" );
                        return 1;

                    }

                    DLOG("This stream(%i) has %i channels and a sample rate of %iHz ", audioStream, audioCodecCtx->channels, audioCodecCtx->sample_rate);
                    DLOG("The data is in the format %s", av_get_sample_fmt_name(audioCodecCtx->sample_fmt));


                    // get the timebase
                    timeBase = (int64_t(pCodecCtx->time_base.num) * AV_TIME_BASE) / int64_t(pCodecCtx->time_base.den);

                    // Allocate video frame
                    pFrame=av_frame_alloc();


                    // Allocate an AVFrame structure
                    pFrameRGB=av_frame_alloc();
                    if(pFrameRGB==NULL)
                        return -1;

                    // Determine required buffer size and allocate buffer
//                    numBytes=avpicture_get_size(PIX_FMT_RGB24, pCodecCtx->width, pCodecCtx->height);
                    numBytes = avpicture_get_size(PIX_FMT_RGB24, movieRequestObj->width, movieRequestObj->height);
                    DLOG("Buffer size allocated: %i x %i: %i ", movieRequestObj->width, movieRequestObj->height, numBytes);
                    buffer=(uint8_t *)av_malloc(numBytes*sizeof(uint8_t));

                    sws_ctx = sws_getContext
                    (
                        pCodecCtx->width,
                        pCodecCtx->height,
                        pCodecCtx->pix_fmt,
                        movieRequestObj->width,
                        movieRequestObj->height,
                        PIX_FMT_RGB24,
                        SWS_BILINEAR,
                        NULL,
                        NULL,
                        NULL
                    );

                    // Assign appropriate parts of buffer to image planes in pFrameRGB
                    // Note that pFrameRGB is an AVFrame, but AVFrame is a superset
                    // of AVPicture
                    avpicture_fill((AVPicture *)pFrameRGB, buffer, PIX_FMT_RGB24, movieRequestObj->width, movieRequestObj->height);
                    size_t numSegments = vidClip->segments.size();

                    DLOG("Found %i segments to process", numSegments);
                    for (size_t segmentIdx = 0; segmentIdx < numSegments; ++segmentIdx) {
                        // seek to the right position
                        int frameOffset = vidClip->segments.at(segmentIdx).first;
                        int clipDuration = vidClip->segments.at(segmentIdx).second;
                        DLOG("Starting Frame Number: %i Duration: %i", frameOffset, clipDuration);

                        seek(ifmt_ctx, frameOffset);
                        // loop for X frames where X is < frameOffset + clipDuration; clipDuration is the length of the clip in terms of frames
                        for (int frameIdx = frameOffset; frameIdx < (frameOffset + clipDuration); ++frameIdx) {
                            av_init_packet(&packet);
                            int avReadResult = 0;
                            int continueRecording = 1;
                            while ((continueRecording == 1) && (frameIdx < (frameOffset + clipDuration) )) {
                                avReadResult = av_read_frame(ifmt_ctx, &packet);
                                if(avReadResult != 0){
                                    if (avReadResult != AVERROR_EOF) {
                                        ELOG("av_read_frame error: %i", avReadResult );
                                    } else {
                                        ILOG("End of input file");
                                    }
                                    continueRecording = 0;
                                }
                                // Is this a packet from the video stream?
                                if(packet.stream_index==videoStream) {
                                    // Decode video frame
                                    avcodec_decode_video2(pCodecCtx, pFrameRGB, &frameFinished, &packet);

                                    // Did we get a video frame?
                                    if(frameFinished) {
                                        // Convert the image from its native format to RGB
                                        sws_scale
                                        (
                                           sws_ctx,
                                           (uint8_t const * const *)pFrame->data,
                                           pFrame->linesize,
                                           0,
                                           pCodecCtx->height,
                                           pFrameRGB->data,
                                           pFrameRGB->linesize
                                        );
                                        write_video_frame(ofmt_ctx, &video_st, pFrameRGB);
                                        frameIdx++;
                                    }

                                }
                                else if (packet.stream_index == audioStream) {
//                                    // Decode audio frame
//                                    int audioErr = avcodec_decode_audio4(pCodecCtx, pFrameRGB, &audioFrameFinished, &packet);
//
//                                    DLOG("Audio frame decoded.  decode status: [%i] %s", audioErr, av_err2str(audioErr));
//                                    if (audioFrameFinished) {
////                                        write the audio frame to file
//                                        write_audio_frame(ofmt_ctx, &audio_st, pFrameRGB);
//
//                                    }

                                    AVPacket decodingPacket = packet;

                                    // Audio packets can have multiple audio frames in a single packet
                                    while (decodingPacket.size > 0)
                                    {
                                        // Try to decode the packet into a frame
                                        // Some frames rely on multiple packets, so we have to make sure the frame is finished before
                                        // we can use it
                                        int gotFrame = 0;
                                        if (!aFrame) {
                                            if (!(aFrame = av_frame_alloc())) {
                                                fprintf(stderr, "Could not allocate audio frame\n");
                                                exit(1);
                                            }
                                        }
                                        int result = avcodec_decode_audio4(audioCodecCtx, aFrame, &gotFrame, &decodingPacket);
                                        DLOG("Audio frame decoded.  decode status: [%i] %s", result, av_err2str(result));

                                        if (result >= 0 && gotFrame)
                                        {
                                            decodingPacket.size -= result;
                                            decodingPacket.data += result;

                                            // We now have a fully decoded audio frame
                                            printAudioFrameInfo(audioCodecCtx, pFrameRGB);
                                            write_audio_frame(ofmt_ctx, &audio_st, pFrameRGB);
                                        }
                                        else
                                        {
                                            decodingPacket.size = 0;
                                            decodingPacket.data = nullptr;
                                        }
                                    }

                                }
                                // Free the packet that was allocated by av_read_frame
                                av_free_packet(&packet);
                            }
                                // Free the RGB image

                        }
                    }

                    DLOG("Cleaning up frame allocations");
                    av_free(buffer);
                    av_free(pFrameRGB);
                    av_free(aFrame);
                    // Free the YUV frame
                    av_free(pFrame);

                } // end video clip processing
            }
            break;

            case TITLE_CLIP: {
              }
            break;

            default:
                ELOG("Failed to identify clip");
                break;
        } // end switch statement

        DLOG("Finished processing clip #%i", clipIdx);
        avformat_close_input(&ifmt_ctx);
    } // end main for loop -> clip iteration


 /* Write the trailer, if any. The trailer must be written before you
     * close the CodecContexts open when you wrote the header; otherwise
     * av_write_trailer() may try to use memory that was freed on
     * av_codec_close(). */
    av_write_trailer(ofmt_ctx);

    /* Close each codec. */
    if (have_video)
        close_stream(ofmt_ctx, &video_st);
    if (have_audio)
        close_stream(ofmt_ctx, &audio_st);

    if (ofmt_ctx && !(ofmt->flags & AVFMT_NOFILE)) {
        /* Close the output file. */
        avio_close(ofmt_ctx->pb);
    }

    DLOG("Closing input format context");
    avformat_close_input(&ifmt_ctx);

    DLOG("Free ouptut format context");
    avformat_free_context(ofmt_ctx);

    if (ret < 0 && ret != AVERROR_EOF) {
        ELOG( "Error occurred: %s\n", av_err2str(ret));
        return 1;
    }

    return 0;
}


// marshal routines transform jobj to c/c++ type
static bool marshal_resource_array(JNIEnv *env, movieRequest &request, jobjectArray j_resources) {
  jsize j_length = env->GetArrayLength(j_resources);

  vector<clip *> v;

  for (jsize i = 0; i < j_length; i++) {
    clip * c = marshal_resource_to_clip(env, env->GetObjectArrayElement(j_resources, i));
    if (c == NULL) {
      return false;
    }
    request.push_back(*c);
  }

  return true;
}

static clip * marshal_resource_to_clip(JNIEnv * env, const jobject &j_resource) {
  // Get Resource type
  // This resource is either a title, photo, or video
  jclass j_cls = env->GetObjectClass(j_resource);
  jmethodID j_mid = env->GetMethodID(j_cls, "getMimeType", "()Ljava/lang/String;");
  jstring j_mime_type = (jstring) env->CallObjectMethod(j_resource, j_mid);

  string mimeType = marshal_jstring_to_string(env, j_mime_type);

  if (mimeType.find("video/") == 0) {
    return marshal_video(env, j_resource);
  } else if (mimeType.find("image/") == 0) {
    return marshal_image(env, j_resource);
  } else if (mimeType.find("audio/") == 0) {
    return marshal_audio(env, j_resource);
  } else if (mimeType.find("text/") == 0) {
    return marshal_text(env, j_resource);
  } else if (mimeType.find("application/slide") == 0) {
    return marshal_text(env, j_resource);
  } else {
    std::cout << "Invalid mimetype '" << mimeType << "'" << std::endl;
    return NULL;
  }
}

static clip * marshal_video(JNIEnv *env, const jobject &j_resource) {
  videoClip * c = new videoClip();

  jclass j_cls = env->GetObjectClass(j_resource);
  jmethodID j_mid = env->GetMethodID(j_cls, "getAssetUri", "()Ljava/lang/String;");
  jstring j_uri = (jstring) env->CallObjectMethod(j_resource, j_mid);
  c->vidFileName = marshal_jstring_to_string(env, j_uri);

//  j_mid = env->GetMethodID(j_cls, "getRotationAngle", "()I");
  c->rotationAngle = 0; //env->CallIntMethod(j_resource, j_mid);

  j_mid = env->GetMethodID(j_cls, "getCaptionText", "()Ljava/lang/String;");
  jstring j_caption = (jstring) env->CallObjectMethod(j_resource, j_mid);

  c->captionText = marshal_jstring_to_string(env, j_caption);

//  j_mid = env->GetMethodID(j_cls, "getCaptionColor", "()Ljava/awt/Color;");
//  jobject j_caption_color = env->CallObjectMethod(j_resource, j_mid);
  float captionR = 255.0, captionG = 255.0, captionB = 255.0;
//  marshal_color(env, j_caption_color, captionR, captionG, captionB);
  if ( captionR >= 0)
    c->captionR = captionR;
  if (captionG >= 0)
    c->captionG = captionG;
  if (captionB >= 0)
    c->captionB = captionB;

    DLOG("Add Video: [ vidFilename: %s\ncaptionText: %s", c->vidFileName.c_str(), c->captionText.c_str());
  std::cout << "Add Video[";
  std::cout << " vidFileName: " << c->vidFileName;
  std::cout << " rotationAngle: " << c->rotationAngle;
  std::cout << " captionText: " << c->captionText;
  std::cout << " captionR: " << c->captionR;
  std::cout << " captionG: " << c->captionG;
  std::cout << " captionB: " << c->captionB;
  std::cout << "]" << std::endl;
  return c;
}

static clip * marshal_image(JNIEnv *env, const jobject &j_resource) {
  imageClip * c = new imageClip();

  jclass j_cls = env->GetObjectClass(j_resource);

  jmethodID j_mid = env->GetMethodID(j_cls, "getAssetUri",
                                     "()Ljava/lang/String;");
  jstring j_uri = (jstring) env->CallObjectMethod(j_resource, j_mid);
  c->imgFileName = marshal_jstring_to_string(env, j_uri);

//  j_mid = env->GetMethodID(j_cls, "getRotationAngle", "()I");
  c->rotationAngle = 0; //env->CallIntMethod(j_resource, j_mid);

  j_mid = env->GetMethodID(j_cls, "getCaption", "()Ljava/lang/String;");
  jstring j_caption = (jstring) env->CallObjectMethod(j_resource, j_mid);
  c->captionText = marshal_jstring_to_string(env, j_caption);

//  j_mid = env->GetMethodID(j_cls, "getCaptionColor", "()Ljava/awt/Color;");
//  jobject j_caption_color = env->CallObjectMethod(j_resource, j_mid);
  float captionR = 255, captionG = 255, captionB = 255;
//  marshal_color(env, j_caption_color, captionR,captionG, captionB);
  if ( captionR >= 0)
    c->captionR = captionR;
  if (captionG >= 0)
    c->captionG = captionG;
  if (captionB >= 0)
    c->captionB = captionB;

  std::cout << "Add Image[";
  std::cout << " imgFileName: " << c->imgFileName;
  std::cout << " rotationAngle: " << c->rotationAngle;
  std::cout << " captionText: " << c->captionText;
  std::cout << " captionR: " << c->captionR;
  std::cout << " captionG: " << c->captionG;
  std::cout << " captionB: " << c->captionB;
  std::cout << "]" << std::endl;

  return c;
}

static clip * marshal_text(JNIEnv *env, const jobject &j_resource) {
  titleClip * c = new titleClip();

  jclass j_cls = env->GetObjectClass(j_resource);

  jmethodID j_mid = env->GetMethodID(j_cls, "getCaption", "()Ljava/lang/String;");
  jstring j_caption = (jstring) env->CallObjectMethod(j_resource, j_mid);
  c->captionText = marshal_jstring_to_string(env, j_caption);

//  j_mid = env->GetMethodID(j_cls, "getCaptionColor", "()Ljava/awt/Color;");
//  jobject j_caption_color = env->CallObjectMethod(j_resource, j_mid);
  float captionR = 255.0, captionG = 255.0, captionB = 255.0;
//  marshal_color(env, j_caption_color, captionR, captionG, captionB);
  if (captionR >= 0 )
    c->captionR = captionR;
  if (captionG >= 0)
    c->captionG = captionG;
  if (captionB >= 0)
    c->captionB = captionB;

  j_mid = env->GetMethodID(j_cls, "getText", "()Ljava/lang/String;");
  jstring jtext = (jstring) env->CallObjectMethod(j_resource, j_mid);
  c->titleText = marshal_jstring_to_string(env, jtext);

//  j_mid = env->GetMethodID(j_cls, "getForeground", "()Ljava/awt/Color;");
//  jobject j_foreground = env->CallObjectMethod(j_resource, j_mid);
  float textR = 255.0, textG = 255.0, textB = 255.0;
//  marshal_color(env, j_foreground, textR, textG, textB);
  if (textR >= 0)
      c->textR = textR;
  if (textG >= 0)
      c->textG = textG;
  if (textB >= 0)
      c->textB = textB;

//  j_mid = env->GetMethodID(j_cls, "getBackground", "()Ljava/awt/Color;");
//  jobject j_background = env->CallObjectMethod(j_resource, j_mid);
  float backgroundR = 125.0, backgroundG = 125.0, backgroundB = 125.0;
//  marshal_color(env, j_background, backgroundR, backgroundG, backgroundB);
  if (backgroundR >= 0)
      c->backgroundR = backgroundR;
  if (backgroundG >= 0)
      c->backgroundG = backgroundG;
  if (backgroundB >= 0)
      c->backgroundB = backgroundB;

  std::cout << "Add Text[";
  std::cout << " captionText: " << c->captionText;
  std::cout << " captionR: " << c->captionR;
  std::cout << " captionG: " << c->captionG;
  std::cout << " captionB: " << c->captionB;
  std::cout << " titleText: " << c->titleText;
  std::cout << " textR: " << c->textR;
  std::cout << " textG: " << c->textG;
  std::cout << " textB: " << c->textB;
  std::cout << " backgroundR: " << c->backgroundR;
  std::cout << " backgroundG: " << c->backgroundG;
  std::cout << " backgroundB: " << c->backgroundB;
  std::cout << "]" << std::endl;
  return c;
}

static clip * marshal_audio(JNIEnv *env, const jobject &j_resource) {
  // Not currently supported
  std::cout << "Audio not supported mimetype for resource" << std::endl;
  return NULL;
}

static void marshal_soundtrack(JNIEnv *env, const jobject &j_soundtrack,
                               string &soundtrack_path, float &ratio) {
  jclass j_cls = env->GetObjectClass(j_soundtrack);

  jmethodID j_mid = env->GetMethodID(j_cls, "getURI",
                                     "()Ljava/lang/String;");
  jstring j_uri = (jstring) env->CallObjectMethod(j_soundtrack, j_mid);
  soundtrack_path = marshal_jstring_to_string(env, j_uri);

  j_mid = env->GetMethodID(j_cls, "getRatio", "()F");
  ratio = env->CallFloatMethod(j_soundtrack, j_mid);
}

static string  marshal_jstring_to_string(JNIEnv *env, const jstring &j_str) {
  if (!j_str) {
    return "";
  }

  const char * chars = env->GetStringUTFChars(j_str, NULL);

  string result(chars);

  env->ReleaseStringUTFChars(j_str, chars);

  return result;
}

static int  marshal_jstring_to_int(JNIEnv *env, const jstring &j_str) {
  if (!j_str) {
    return 0; // we want to make sure invalid inputs default to 0
  }

  const char * chars = env->GetStringUTFChars(j_str, NULL);

  int result = atoi(chars);

  env->ReleaseStringUTFChars(j_str, chars);

  return result;
}
//
//static void marshal_color(JNIEnv *env, const jobject &j_color, float &r, float &g, float &b) {
//  if (j_color == NULL) {
//    r = g = b = -1;
//    return;
//  }
//
//  jclass j_cls = env->GetObjectClass(j_color);
//  jmethodID j_mid = env->GetMethodID(j_cls, "getRed", "()I");
//  r = ((float) env->CallIntMethod(j_color, j_mid)) / 255.0;
//  j_mid = env->GetMethodID(j_cls, "getGreen", "()I");
//  g = ((float) env->CallIntMethod(j_color, j_mid)) / 255.0;
//  j_mid = env->GetMethodID(j_cls, "getBlue", "()I");
//  b = ((float) env->CallIntMethod(j_color, j_mid)) / 255.0;
//}


#ifdef __cplusplus
}

#endif