Untitled

// TODO:
// - Make sure video frames don't get overwritten (avoid re-allocating, because
// that will be hard to manage, instead pop old frames if they are behind the audio)
// - Change returns to exits in some places (change the assert macro)
// - Add stop audio/video
// - Add volume control
// - Add video seeking
// - Check if video/audio is opened before every function related to video/audio
// - Create debug print information functions for video/audio

#ifndef OLCPGEX_MEDIA_H
#define OLCPGEX_MEDIA_H

extern "C" {
// Video and audio dependencies
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>

// Video dependencies
#include <libswscale/swscale.h>
#include <inttypes.h>
#include <libavutil/pixdesc.h>

// Audio dependencies
#include <libavutil/avutil.h>
#include <libswresample/swresample.h>
#include <libavutil/audio_fifo.h>
}

#define MINIAUDIO_IMPLEMENTATION
#include "miniaudio.h"


#include <iostream>
#include <string>
#include <cstdint>
#include <stdexcept>
#include <thread>
#include <mutex>
#include <condition_variable>
#include <queue>


// This definition usually gets set automatically by the IDEs
#ifdef NDEBUG
// Release macro
#define OLC_MEDIA_ASSERT(condition, return_value, message) if(!(condition)){return return_value;}
#else
// Debug macro
#define OLC_MEDIA_ASSERT(condition, return_value, message) if(!(condition)){std::cerr << "Assert failed: \"" << __FILE__ << "\" (" << __LINE__ << ")\n" << message << '\n'; return return_value;}
#endif

// Declarations
namespace olc {

    enum class MediaResult {
        Success = 0,
        Error = 1,
    };

    // Thread safe "queue" that uses circular buffer
    class VideoQueue {
    private:
        uint16_t _size = 0;
        uint16_t _capacity = 0;

        uint16_t _insert_idx = 0; // idx that points to location where new element will be inserted
        uint16_t _delete_idx = 0; // idx that points to location where oldest element will be deleted from

        AVFrame** _data = nullptr;

        mutable std::mutex _mut;

    public:
        VideoQueue() {
        }

        ~VideoQueue() {
            clear();
            free();
        }

        // Suggested to set capacity to fps (but min capacity must be 2)
        MediaResult init(uint16_t capacity) {
            // Always reset the values
            _size = 0;
            _insert_idx = 0;
            _delete_idx = 0;

            // If video fifo was already in use, reset it first
            if (_data != nullptr) {
                clear();
                free();
            }

            if(capacity <= 1)
                return MediaResult::Error;

            _data = new AVFrame * [capacity];
            if (_data == nullptr)
                return MediaResult::Error;

            _capacity = capacity;
            for (uint16_t i = 0; i < _capacity; ++i) {
                _data[i] = av_frame_alloc();

                // Memory might run out if capacity is too big (which might happen if video fps is insanely large)
                if (_data[i] == nullptr)
                    return MediaResult::Error;
            }

            return MediaResult::Success;
        }

        AVFrame* back() const {
            std::unique_lock<std::mutex> lock(_mut);
            return _data[_insert_idx];
        }

        AVFrame* front() const {
            std::unique_lock<std::mutex> lock(_mut);
            return _data[_delete_idx];
        }

        // Push updated AVFrame from "back()"
        void push() {
            std::unique_lock<std::mutex> lock(_mut);

            _insert_idx = (_insert_idx + 1) % _capacity;
            ++_size;
        }

        // Pop AVFrame from the front() unreferencing it
        // If size is 0, does nothing
        void pop() {
            std::unique_lock<std::mutex> lock(_mut);

            if (_size > 0) {
                av_frame_unref(_data[_delete_idx]);

                _delete_idx = (_delete_idx + 1) % _capacity;
                --_size;
            }
        }

        size_t size() const {
            std::unique_lock<std::mutex> lock(_mut);
            return _size;
        }

        size_t capacity() const {
            std::unique_lock<std::mutex> lock(_mut);
            return _capacity;
        }

        void clear() {
            if (_data != nullptr) {
                for (uint16_t i = 0; i < _capacity; ++i) {
                    av_frame_unref(_data[i]);
                }
            }
        }

        // De-allocates fifo structure
        void free() {
            if (_data != nullptr) {
                for (uint16_t i = 0; i < _capacity; ++i) {
                    av_frame_free(&_data[i]);
                }

                delete[] _data;
                _data = nullptr;
            }
        }
    };

    // Thread safe wrapper for audio fifo read/write
    class AudioQueue {
    private:
        AVAudioFifo* _fifo = nullptr;
        mutable std::mutex _mut;

    public:
        AudioQueue() {
        }

        ~AudioQueue() {
            clear();
            free();
        }

        // Suggested to set capacity to sample rate
        MediaResult init(int channels, int capacity) {
            // If these parameters are 0, there is something wrong
            if (capacity == 0 || channels == 0)
                return MediaResult::Error;

            if (_fifo != nullptr) {
                clear();
                free();
            }

            _fifo = av_audio_fifo_alloc(AV_SAMPLE_FMT_FLT, channels, capacity);

            if (_fifo == nullptr)
                return MediaResult::Error;

            return MediaResult::Success;
        }

        int push(void** data, int samples) {
            std::lock_guard<std::mutex> lock(_mut);

            int space = av_audio_fifo_space(_fifo);

            // If capacity is reached, drain some audio samples
            if (samples > space) {
                av_audio_fifo_drain(_fifo, std::min(av_audio_fifo_size(_fifo), samples - space));
            }


            return av_audio_fifo_write(_fifo, data, samples);
        }

        int pop(void** data, int samples) {
            std::unique_lock<std::mutex> lock(_mut);
            return av_audio_fifo_read(_fifo, data, samples);
        }

        void drain(int samples) {
            std::unique_lock<std::mutex> lock(_mut);
            av_audio_fifo_drain(_fifo, samples);
        }

        int size() {
            std::unique_lock<std::mutex> lock(_mut);
            return av_audio_fifo_size(_fifo);
        }

        int capacity() {
            std::unique_lock<std::mutex> lock(_mut);
            return av_audio_fifo_space(_fifo) + av_audio_fifo_size(_fifo);
        }

        // Empties out all the frames
        void clear() {
            if(_fifo != nullptr)
                av_audio_fifo_drain(_fifo, av_audio_fifo_size(_fifo));
        }

        // De-allocates fifo structure
        void free() {
            av_audio_fifo_free(_fifo);
            _fifo = nullptr;
        }
    };

    class Media {
    private:
        // -- Private internal state --
        AVFormatContext* av_format_ctx = nullptr;

        uint16_t preloaded_frames_scale;

        // If false, video capture wasn't opened, or last frame was put into queue
        std::atomic<bool> finished_reading = false;

        // When true, the frame loading thread keeps on working
        // When set to false, and conditional is called, frame loading thread is halted
        std::atomic<bool> keep_loading = true;

        std::thread frame_loader;
        std::mutex mutex;
        std::condition_variable conditional;


        // -- Video stuff --
        int video_stream_index = -1;
        AVRational video_time_base;
        VideoQueue video_fifo;
        const AVCodec* av_video_codec = nullptr;
        AVCodecContext* av_video_codec_ctx = nullptr;
        SwsContext* sws_video_scaler_ctx = nullptr;
        olc::Renderable video_frame;
        int video_width = 0;
        int video_height = 0;
        int video_delay = 0;
        float delta_time_accumulator = 0.0f;
        double last_video_pts = 0.0;
        bool video_opened = false;


        // -- Audio stuff --
        int audio_stream_index = -1;
        AVRational audio_time_base;
        AudioQueue audio_fifo;
        const AVCodec* av_audio_codec = nullptr;
        AVCodecContext* av_audio_codec_ctx = nullptr;
        SwrContext* swr_audio_resampler = nullptr;
        ma_device audio_device;
        size_t audio_frames_consumed = 0;
        std::atomic<double> audio_time = 0.0;
        int audio_sample_size = 0;
        int audio_channel_count = 0;
        bool audio_opened = false;

    public:
        Media();
        ~Media();

        // -- Video and audio functions --

        // If media is already open, closes it first.
        // preloaded_frames_scale: Affects how many seconds of pre-decoded video/audio frames should be stored. (only suggested to
        // increase it, when you notice that some of your video/audio frames are skipped)
        MediaResult Open(const char* filename, bool open_video, bool open_audio, uint16_t preloaded_frames_scale = 1);
        void Close();
        // Returns true if any of the following is true:
        // - Media file wasn't opened;
        // - No more frames are available in the media file;
        // - Error occured when trying to receive a packet.
        bool FinishedReading();


        // -- Video functions --

        // When only video is playing, delta_time is used to synchronise the video.
        // If video is played together with audio, delta_time is ignored, and video
        // is synchronised based on how many audio frames were consumed.
        // NOTE: the decal's pixel data will change when you call one of "GetVideoFrame" functions again
        olc::Decal* GetVideoFrame(float delta_time);
        // NOTE: the decal's pixel data will change when you call one of "GetVideoFrame" functions again
        olc::Decal* GetVideoFrame();
        MediaResult SkipVideoFrame();
        bool IsVideoOpened();
        // Not all videos have frames of equal length, so FPS can only be average
        double GetAverageVideoFPS();
        void PrintVideoInfo();

        //bool seek_frame(int64_t ts);


        // -- Audio functions --
        // output: pointer to byte array pointed by "void*", where the byte array size must be "channel_count * sample_count * sample_size"
        // Returns amount of samples that were read (if not all samples were written, the rest will be filled with 0s (silence))
        // WARNING: Only use this function if you intend to play the audio yourself. If you do decide to handle audio yourself,
        // note, that when video and audio is played together, video is synchronised according to how many audio samples have been read.
        int GetAudioFrame(void** output, int sample_count);
        bool IsAudioOpened();
        void PrintAudioInfo();

    private:
        // -- Video and audio functions --
        MediaResult OpenFile(const char* filename);
        void CloseFile();
        void StartDecodingThread();
        void StopDecodingThread();
        MediaResult DecodingThread();
        static const char* GetError(int errnum);


        // -- Video functions --
        MediaResult InitVideo();
        void CloseVideo();
        void ConvertFrameToRGBASprite(AVFrame* frame, olc::Sprite* target);
        // Send updated pixel data in olc::Sprite to GPU
        void UpdateResultSprite();
        MediaResult HandleVideoDelay();
        const AVFrame* PeekFrame();
        static AVPixelFormat CorrectDeprecatedPixelFormat(AVPixelFormat pix_fmt);

        // -- Audio functions --
        MediaResult InitAudio();
        void CloseAudio();
    };
}

// Definitions
namespace olc {
    Media::Media() {
    }

    Media::~Media() {
        Close();
    }

    MediaResult Media::Open(const char* filename, bool open_video, bool open_audio, uint16_t preloaded_frames_scale) {
        MediaResult result;

        OLC_MEDIA_ASSERT(preloaded_frames_scale > 0, MediaResult::Error, "\"preloaded_frames_scale\" can't be 0");

        this->preloaded_frames_scale = preloaded_frames_scale;

        // If media is already open, close it first
        if (IsVideoOpened() || IsAudioOpened()) {
            Close();
        }

        printf("opening file\n");

        result = OpenFile(filename);
        if (result != MediaResult::Success) {
            return result;
        }

        printf("initing video\n");

        if (open_video) {
            result = InitVideo();
            if (result != MediaResult::Success) {
                return result;
            }
        }

        printf("initing audio\n");

        if (open_audio) {
            result = InitAudio();
            if (result != MediaResult::Success) {
                return result;
            }
        }

        StartDecodingThread();

        result = HandleVideoDelay();
        OLC_MEDIA_ASSERT(result == MediaResult::Success, result, "Couldn't handle video delay");


        return MediaResult::Success;
    }

    void Media::Close() {
        StopDecodingThread();
        printf("Closing file\n");
        CloseFile();
        printf("Closing video\n");
        CloseVideo();
        printf("Closing audio\n");
        CloseAudio();
    }

    bool Media::FinishedReading() {
        // If neither of the streams were open, return false
        if (IsVideoOpened() == false && IsAudioOpened() == false)
            return false;

        // Check if decoding thread has finished and all the frames were read from the streams that were open
        if (finished_reading) {
            bool video_finished = true;
            bool audio_finished = true;

            if (IsVideoOpened() && video_fifo.size() > 0) {
                video_finished = false;
            }

            if (IsAudioOpened() && audio_fifo.size() > 0) {
                audio_finished = false;
            }

            return video_finished && audio_finished;
        }

        return false;
    }

    olc::Decal* Media::GetVideoFrame(float delta_time) {
        if (IsVideoOpened() == false) {
            printf("Video isn't open\n");
            return nullptr;
        }

        if (FinishedReading()) {
            printf("Finished reading video\n");
            return nullptr;
        }

        double time_reference;

        // If audio is opened, synchronise video with audio
        if (IsAudioOpened()) {
            time_reference = audio_time;
        }
        // Otherwise synchronise it based on how much time has passed between function calls (or allow user to mess with delta time if he wants)
        else {
            delta_time_accumulator += delta_time;
            time_reference = delta_time_accumulator;
        }

        // If enough time hasn't passed yet, return the same frame
        if (time_reference < last_video_pts)
            return video_frame.Decal();

        while (true) {
            const AVFrame* next_frame = PeekFrame();

            // Check if Decoding thread has a next video frame at all
            if (next_frame == nullptr)
                return video_frame.Decal();

            last_video_pts = double(next_frame->best_effort_timestamp * video_time_base.num) / double(video_time_base.den);

            // Test Decoding thread later by changing "<=" to ">="
            if (time_reference <= last_video_pts)
                break;

            SkipVideoFrame();
        }

        return GetVideoFrame();
    }

    olc::Decal* Media::GetVideoFrame() {
        if (IsVideoOpened() == false) {
            printf("Video isn't open\n");
            return nullptr;
        }

        if (FinishedReading()) {
            printf("Finished reading video\n");
            return nullptr;
        }

        // If decoding thread wasn't quick enough to decode frames return same image.
        // (We don't know if decoding thread isn't quick enough, or if last video frame
        // was decoded, and there are other frames left over, like audio frames)
        if (video_fifo.size() > 0) {
            //printf("v-\n");

            AVFrame* frame_ref = video_fifo.front();

            ConvertFrameToRGBASprite(frame_ref, video_frame.Sprite());
            UpdateResultSprite();

            //printf("vt: %lf\n", double(frame_ref->best_effort_timestamp * video_time_base.num) / double(video_time_base.den));

            video_fifo.pop();

            conditional.notify_one();
        }

        return video_frame.Decal();
    }

    // TODO: return error when no more frames are available and there is nothing to skip
    MediaResult Media::SkipVideoFrame() {
        if (IsVideoOpened() == false) {
            printf("Video isn't open\n");
            return MediaResult::Error;
        }

        if (FinishedReading()) {
            printf("Finished reading video\n");
            return MediaResult::Error;
        }

        // If decoding thread wasn't quick enough to decode frames don't do anything.
        // (We don't know if decoding thread isn't quick enough, or if last video frame
        // was decoded, and there are other frames left over, like audio frames)
        if (video_fifo.size() > 0) {
            video_fifo.pop();

            conditional.notify_one();

            return MediaResult::Success;
        }

        return MediaResult::Error;
    }

    bool Media::IsVideoOpened() {
        return video_opened;
    }

    double Media::GetAverageVideoFPS() {
        return av_q2d(av_format_ctx->streams[video_stream_index]->avg_frame_rate);
    }

    void Media::PrintVideoInfo() {
        if (IsVideoOpened() == false) {
            printf("Video isn't open\n");
            return;
        }

        AVStream* video_stream = av_format_ctx->streams[video_stream_index];
        double frame_rate = av_q2d(video_stream->avg_frame_rate);
        int time_base_num = video_stream->time_base.num;
        int time_base_den = video_stream->time_base.den;
        int frame_rate_num = video_stream->avg_frame_rate.num;
        int frame_rate_den = video_stream->avg_frame_rate.den;

        // Print time base and fps
        printf("----------------------\n");
        printf("Video info\n");
        printf("Codec: %s\n", av_video_codec->long_name);
        printf("Pixel fmt: %s\n", av_get_pix_fmt_name(av_video_codec_ctx->pix_fmt));
        printf("w: %i   h: %i\n", video_width, video_height);
        printf("Frame rate: %lf\n", frame_rate);
        printf("Time base num: %i\n", time_base_num);
        printf("Time base den: %i\n", time_base_den);
        printf("Frame rate num: %i\n", frame_rate_num);
        printf("Frame rate den: %i\n", frame_rate_den);
        printf("----------------------\n");
    }

    int Media::GetAudioFrame(void** output, int sample_count) {
        // Fill buffer with silence, in case not all samples will be read
        memset(*output, 0, audio_channel_count * audio_sample_size * sample_count);
        int samples_read = audio_fifo.pop(output, sample_count);
        conditional.notify_one();

        audio_frames_consumed += samples_read;
        audio_time = double(audio_frames_consumed * audio_time_base.num) / double(audio_time_base.den);
        double a_time = audio_time;
        //printf("at: %lf\n", a_time);

        //printf("sr: %i\n", samples_read);

        //printf("tot: %llu\n", audio_frames_consumed);
        //printf("-----------\n");

        // TEMP
        //memset(*output, 0, audio_channel_count * audio_sample_size * sample_count);

        return samples_read;
    }

    bool Media::IsAudioOpened() {
        return audio_opened;
    }

    void Media::PrintAudioInfo() {
        if (IsAudioOpened() == false) {
            printf("Audio isn't open\n");
            return;
        }

        AVStream* audio_stream = av_format_ctx->streams[audio_stream_index];
        int frame_size = av_format_ctx->streams[audio_stream_index]->codecpar->frame_size;
        int sample_rate = av_format_ctx->streams[audio_stream_index]->codecpar->sample_rate;
        int channels = av_format_ctx->streams[audio_stream_index]->codecpar->channels;
        int time_base_num = audio_stream->time_base.num;
        int time_base_den = audio_stream->time_base.den;

        // Print time base and fps
        printf("----------------------\n");
        printf("Audio info\n");

        printf("Codec: %s\n", av_audio_codec->long_name);
        printf("Frame size: %i\n", frame_size);
        printf("Sample rate: %i\n", sample_rate);
        printf("Channels: %i\n", channels);
        printf("Time base num: %i\n", time_base_num);
        printf("Time base den: %i\n", time_base_den);
        printf("block_align: %i\n", av_format_ctx->streams[audio_stream_index]->codecpar->block_align);
        printf("initial_padding: %i\n", av_format_ctx->streams[audio_stream_index]->codecpar->initial_padding);
        printf("trailing_padding: %i\n", av_format_ctx->streams[audio_stream_index]->codecpar->trailing_padding);
        printf("seek_preroll: %i\n", av_format_ctx->streams[audio_stream_index]->codecpar->seek_preroll);
        printf("----------------------\n");
    }

    MediaResult Media::OpenFile(const char* filename) {
        int response;

        av_format_ctx = avformat_alloc_context();
        OLC_MEDIA_ASSERT(av_format_ctx != nullptr, MediaResult::Error, "Couldn't allocate AVFormatContext");

        response = avformat_open_input(&av_format_ctx, filename, NULL, NULL);
        if (response < 0) {
            printf("avformat_open_input response: %s\n", GetError(response));
        }
        OLC_MEDIA_ASSERT(response == 0, MediaResult::Error, "Couldn't find/open file or file format isn't supported");

        response = avformat_find_stream_info(av_format_ctx, nullptr);
        OLC_MEDIA_ASSERT(response >= 0, MediaResult::Error, "Couldn't find stream info");

        return MediaResult::Success;
    }

    void Media::CloseFile() {
        avformat_close_input(&av_format_ctx);

        // Don't think this function is really needed, but I put it here for sanity reasons
        avformat_free_context(av_format_ctx);
    }

    void Media::StartDecodingThread() {
        printf("Starting thread\n");
        keep_loading = true;
        finished_reading = false;
        this->frame_loader = std::thread(&Media::DecodingThread, this);
    }

    void Media::StopDecodingThread() {
        printf("Stopping thread\n");
        keep_loading = false;
        finished_reading = true;
        conditional.notify_one();
        if (frame_loader.joinable()) {
            printf("Thread is joinable\n");
            frame_loader.join();
        }
    }

    MediaResult Media::DecodingThread() {
        finished_reading = false;

        std::unique_lock<std::mutex> lock(mutex);

        // Maximum amount of frames that can be pre-decoded
        size_t max_video_queue_size;

        // Minimum amount of frames that should be pre-decoded
        size_t min_video_queue_size;
        size_t min_audio_queue_size;

        if (IsVideoOpened()) {
            // "-1" because resizing is disabled, and it allows to avoid overwriting a frame recevied from "GetVideoFrame()"
            max_video_queue_size = video_fifo.capacity() - 1;
            min_video_queue_size = std::max(max_video_queue_size / 2, size_t(1));

            printf("max_v: %llu\n", max_video_queue_size);
            printf("min_v: %llu\n", min_video_queue_size);
        }

        if (IsAudioOpened()) {
            min_audio_queue_size = std::max(size_t(audio_fifo.capacity() / 2), size_t(1));

            printf("min_a: %llu\n", min_audio_queue_size);
        }

        int response;

        AVFrame* av_audio_frame = av_frame_alloc();
        OLC_MEDIA_ASSERT(av_audio_frame != nullptr, MediaResult::Error, "Couldn't allocate resampled AVFrame");

        // Used to store converted "av_audio_frame"
        AVFrame* resampled_audio_frame = av_frame_alloc();
        OLC_MEDIA_ASSERT(resampled_audio_frame != nullptr, MediaResult::Error, "Couldn't allocate resampled AVFrame");

        AVPacket* av_packet = av_packet_alloc();
        OLC_MEDIA_ASSERT(av_packet != nullptr, MediaResult::Error, "Couldn't allocate resampled AVFrame");

        while (true) {
            while (true) {
                if (IsVideoOpened() && video_fifo.size() <= min_video_queue_size)
                    break;

                if (IsAudioOpened() && audio_fifo.size() <= min_audio_queue_size)
                    break;

                if (keep_loading == false)
                    break;

                conditional.wait(lock);
            }

            if (keep_loading == false)
                break;


            // Get remaining audio from previous conversion
            if (IsAudioOpened()) {
                if (swr_get_delay(swr_audio_resampler, std::max(resampled_audio_frame->sample_rate, av_audio_frame->sample_rate)) > 0) {
                    response = swr_convert_frame(swr_audio_resampler, resampled_audio_frame, nullptr);
                    OLC_MEDIA_ASSERT(response == 0, MediaResult::Error, "Couldn't resample the frame");

                    int samples_written = audio_fifo.push((void**)resampled_audio_frame->data, resampled_audio_frame->nb_samples);

                    continue;
                }
            }

            // Try reading next packet
            response = av_read_frame(av_format_ctx, av_packet);

            // Return if error or end of file was encountered
            if (response < 0) {
                printf("Error or end of file happened\n");
                printf("Exit info: %s\n", GetError(response));

                // TODO: check if response is error or end of file
                break;
            }

            if (IsVideoOpened() && av_packet->stream_index == video_stream_index) {
                PiraTimer::start("DecodeVideoFrame");

                // Drain a frame when max size is reached
                if (max_video_queue_size == video_fifo.size()) {
                    video_fifo.pop();
                }


                AVFrame* av_video_frame = video_fifo.back();

                // Send packet to decode
                response = avcodec_send_packet(av_video_codec_ctx, av_packet);
                OLC_MEDIA_ASSERT(response == 0, MediaResult::Error, "Couldn't decode packet");

                // Receive decoded frame
                response = avcodec_receive_frame(av_video_codec_ctx, av_video_frame);
                if (response < 0) {
                    OLC_MEDIA_ASSERT(response == AVERROR_EOF || response == AVERROR(EAGAIN), MediaResult::Error, "Couldn't receive decoded frame");
                }

                video_fifo.push();

                PiraTimer::end("DecodeVideoFrame");
            }
            else if (IsAudioOpened() && av_packet->stream_index == audio_stream_index) {
                PiraTimer::start("DecodeAudioFrame");

                // Send packet to decode
                response = avcodec_send_packet(av_audio_codec_ctx, av_packet);
                if (response < 0) {
                    OLC_MEDIA_ASSERT(response == AVERROR(EAGAIN), MediaResult::Error, "Failed to decode packet");
                }

                // Single packet can contain multiple frames, so receive them in a loop
                while (true) {
                    response = avcodec_receive_frame(av_audio_codec_ctx, av_audio_frame);
                    if (response < 0) {
                        OLC_MEDIA_ASSERT(response == AVERROR_EOF || response == AVERROR(EAGAIN), MediaResult::Error, "Something went wrong when trying to receive decoded frame");
                        break;
                    }

                    // Convert whatever format audio frame is encoded in, to "AV_SAMPLE_FMT_FLT", for simplicity.
                    // We have to manually copy some frame data
                    resampled_audio_frame->sample_rate = av_audio_frame->sample_rate;
                    resampled_audio_frame->channel_layout = av_audio_frame->channel_layout;
                    resampled_audio_frame->channels = av_audio_frame->channels;
                    resampled_audio_frame->format = AV_SAMPLE_FMT_FLT;

                    response = swr_convert_frame(swr_audio_resampler, resampled_audio_frame, av_audio_frame);
                    OLC_MEDIA_ASSERT(response == 0, MediaResult::Error, "Couldn't resample the frame");

                    av_frame_unref(av_audio_frame);

                    // Insert decoded audio samples
                    /*int samples_written = */audio_fifo.push((void**)resampled_audio_frame->data, resampled_audio_frame->nb_samples);
                    //static size_t total_written = 0;
                    //total_written += samples_written;

                    //printf("total_written: %llu\n", total_written);
                    //printf("sw: %i\n", samples_written);
                }

                PiraTimer::end("DecodeAudioFrame");
            }
            //std::this_thread::sleep_for(std::chrono::milliseconds(10));

            av_packet_unref(av_packet);
        }

        finished_reading = true;

        printf("Freeing resources in thread\n");

        // Free the resources
        av_frame_free(&av_audio_frame);
        av_frame_free(&resampled_audio_frame);
        av_packet_free(&av_packet);

        printf("Exiting thread\n");

        return MediaResult::Success;
    }

    // av_err2str returns a temporary array. This doesn't work in gcc.
    // This function can be used as a replacement for av_err2str.
    const char* Media::GetError(int errnum) {
        static char str[AV_ERROR_MAX_STRING_SIZE];
        memset(str, 0, sizeof(str));
        return av_make_error_string(str, AV_ERROR_MAX_STRING_SIZE, errnum);
    }

    MediaResult Media::InitVideo() {
        int response;

        AVCodecParameters* av_video_codec_params = nullptr;

        printf("1\n");

        video_stream_index = av_find_best_stream(av_format_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, &av_video_codec, 0);
        if (video_stream_index < 0) {
            if (video_stream_index == AVERROR_STREAM_NOT_FOUND) {
                // TODO: might change it later
                return MediaResult::Success;
            }
            else if (video_stream_index == AVERROR_DECODER_NOT_FOUND) {
                OLC_MEDIA_ASSERT(false, MediaResult::Error, "Couldn't find decoder for any of the video streams");
            }
            else {
                OLC_MEDIA_ASSERT(false, MediaResult::Error, "Unknown error occured when trying to find video stream");
            }
        }

        av_video_codec_params = av_format_ctx->streams[video_stream_index]->codecpar;

        //av_dump_format(av_video_format_ctx, video_stream_index, filename, 0);
        //printf("fps: %f\n", av_q2d(av_video_format_ctx->streams[video_stream_index]->avg_frame_rate));
        //av_video_format_ctx->streams[video_stream_index]->avg_frame_rate;

        printf("2\n");

        // Set up a codec context for the decoder
        av_video_codec_ctx = avcodec_alloc_context3(av_video_codec);
        OLC_MEDIA_ASSERT(av_video_codec_ctx != nullptr, MediaResult::Error, "Couldn't create AVCodecContext");

        printf("3\n");

        response = avcodec_parameters_to_context(av_video_codec_ctx, av_video_codec_params);
        OLC_MEDIA_ASSERT(response >= 0, MediaResult::Error, "Couldn't send parameters to AVCodecContext");

        printf("4\n");

        response = avcodec_open2(av_video_codec_ctx, av_video_codec, NULL);
        OLC_MEDIA_ASSERT(response == 0, MediaResult::Error, "Couldn't initialise AVCodecContext");

        printf("5\n");

        AVPixelFormat source_pix_fmt = Media::CorrectDeprecatedPixelFormat(av_video_codec_ctx->pix_fmt);
        sws_video_scaler_ctx = sws_getContext(
            av_video_codec_params->width, av_video_codec_params->height, source_pix_fmt,
            av_video_codec_params->width, av_video_codec_params->height, AV_PIX_FMT_RGB0,
            SWS_BILINEAR, NULL, NULL, NULL
        );
        OLC_MEDIA_ASSERT(sws_video_scaler_ctx != nullptr, MediaResult::Error, "Couldn't initialise SwsContext");

        printf("6\n");

        // Minimum video fifo capacity must stay 2, regardless of video fps
        MediaResult result = video_fifo.init(std::max(uint16_t(preloaded_frames_scale * GetAverageVideoFPS()), uint16_t(2)));
        OLC_MEDIA_ASSERT(result == MediaResult::Success, result, "Couldn't allocate video fifo");

        printf("7\n");

        video_opened = true;
        printf("01\n");
        video_width = av_video_codec_params->width;
        printf("02\n");
        video_height = av_video_codec_params->height;
        printf("03\n");
        video_frame.Create(video_width, video_height);
        printf("04\n");
        video_time_base = av_format_ctx->streams[video_stream_index]->time_base;
        printf("05\n");
        video_delay = av_video_codec_params->video_delay;
        printf("Video delay: %i\n", video_delay);

        // Reset values if video was previously opened
        delta_time_accumulator = 0.0f;
        last_video_pts = 0.0;

        PrintVideoInfo();

        return MediaResult::Success;
    }

    void Media::CloseVideo() {
        avcodec_free_context(&av_video_codec_ctx);
        sws_freeContext(sws_video_scaler_ctx);
        sws_video_scaler_ctx = nullptr;

        video_opened = false;
        video_fifo.clear();
        video_fifo.free();
    }

    void Media::ConvertFrameToRGBASprite(AVFrame* frame, olc::Sprite* target) {
        // TODO: implement some error checking

        // Convert pixel format from (most likely) YUV representation to RGBA
        uint8_t* dest[4] = { (uint8_t*)(target->pColData), NULL, NULL, NULL };
        int dest_linesize[4] = { video_width * 4, 0, 0, 0 };
        sws_scale(sws_video_scaler_ctx, frame->data, frame->linesize, 0, frame->height, dest, dest_linesize);
    }

    void Media::UpdateResultSprite() {
        video_frame.Decal()->Update();
    }

    MediaResult Media::HandleVideoDelay() {
        if (IsVideoOpened()) {
            // Skip initial frames provided in "video_delay" after starting the decoding thread
            for (int frames_skipped = 0; frames_skipped < video_delay;) {
                if (SkipVideoFrame() == MediaResult::Success) {
                    ++frames_skipped;
                }
                else {
                    // If video delay is bigger than the video frame count, it probably means video is corrupted or something went wrong
                    if (FinishedReading()) {
                        return MediaResult::Error;
                    }

                    // Otherwise thread didn't decode the frames yet, so let's wait a bit
                    std::this_thread::sleep_for(std::chrono::milliseconds(5));
                }
            }
        }

        return MediaResult::Success;
    }

    const AVFrame* Media::PeekFrame() {
        if (video_fifo.size() > 0) {
            return video_fifo.front();
        }

        return nullptr;
    }

    AVPixelFormat Media::CorrectDeprecatedPixelFormat(AVPixelFormat pix_fmt) {
        // Fix swscaler deprecated pixel format warning
        // (YUVJ has been deprecated, change pixel format to regular YUV)
        switch (pix_fmt) {
        case AV_PIX_FMT_YUVJ420P: return AV_PIX_FMT_YUV420P;
        case AV_PIX_FMT_YUVJ422P: return AV_PIX_FMT_YUV422P;
        case AV_PIX_FMT_YUVJ444P: return AV_PIX_FMT_YUV444P;
        case AV_PIX_FMT_YUVJ440P: return AV_PIX_FMT_YUV440P;
        default:                  return pix_fmt;
        }
    }

    // TODO: figure out what to do with these variables in AVCodecParameters:
    // - block_align
    // - initial_padding
    // - trailing_padding
    // - seek_preroll
    MediaResult Media::InitAudio() {
        int response;

        AVCodecParameters* av_audio_codec_params = nullptr;

        audio_stream_index = av_find_best_stream(av_format_ctx, AVMEDIA_TYPE_AUDIO, -1, -1, &av_audio_codec, 0);
        if (audio_stream_index < 0) {
            if (audio_stream_index == AVERROR_STREAM_NOT_FOUND) {
                // TODO: might change it later
                return MediaResult::Success;
            }
            else if (audio_stream_index == AVERROR_DECODER_NOT_FOUND) {
                OLC_MEDIA_ASSERT(false, MediaResult::Error, "Couldn't find decoder for any of the audio streams");
            }
            else {
                OLC_MEDIA_ASSERT(false, MediaResult::Error, "Unknown error occured when trying to find audio stream");
            }
        }

        av_audio_codec_params = av_format_ctx->streams[audio_stream_index]->codecpar;

        // Set up a codec context for the decoder
        av_audio_codec_ctx = avcodec_alloc_context3(av_audio_codec);
        OLC_MEDIA_ASSERT(av_audio_codec_ctx != nullptr, MediaResult::Error, "Couldn't create AVCodecContext");

        response = avcodec_parameters_to_context(av_audio_codec_ctx, av_audio_codec_params);
        OLC_MEDIA_ASSERT(response >= 0, MediaResult::Error, "Couldn't send parameters to AVCodecContext");

        response = avcodec_open2(av_audio_codec_ctx, av_audio_codec, NULL);
        OLC_MEDIA_ASSERT(response == 0, MediaResult::Error, "Couldn't initialise AVCodecContext");

        swr_audio_resampler = swr_alloc_set_opts(
            nullptr,
            av_audio_codec_params->channel_layout, AV_SAMPLE_FMT_FLT, av_audio_codec_params->sample_rate,
            av_audio_codec_params->channel_layout, (AVSampleFormat)av_audio_codec_params->format, av_audio_codec_params->sample_rate,
            0, nullptr
        );
        OLC_MEDIA_ASSERT(swr_audio_resampler != nullptr, MediaResult::Error, "Couldn't allocate SwrContext");

        audio_opened = true;
        audio_sample_size = 4;
        audio_channel_count = av_audio_codec_params->channels;
        audio_time_base = av_format_ctx->streams[audio_stream_index]->time_base;

        // Reset values if audio was previously opened
        audio_frames_consumed = 0;
        audio_time = 0.0;

        MediaResult result = audio_fifo.init(av_audio_codec_params->channels, preloaded_frames_scale * av_audio_codec_params->sample_rate);
        OLC_MEDIA_ASSERT(result == MediaResult::Success, result, "Couldn't allocate audio fifo");


        // Initialise and immediately start audio device
        ma_device_config audio_device_config;

        audio_device_config = ma_device_config_init(ma_device_type_playback);
        audio_device_config.playback.format = ma_format_f32;
        audio_device_config.playback.channels = av_audio_codec_params->channels;
        audio_device_config.sampleRate = av_audio_codec_params->sample_rate;
        audio_device_config.pUserData = this;
        audio_device_config.dataCallback = [](ma_device* pDevice, void* pOutput, const void* pInput, ma_uint32 frameCount) {
            Media* media = reinterpret_cast<Media*>(pDevice->pUserData);
            int frames_read = media->GetAudioFrame(&pOutput, frameCount);

            //std::this_thread::sleep_for(std::chrono::milliseconds(200));
            (void)pInput;
        };

        OLC_MEDIA_ASSERT(ma_device_init(NULL, &audio_device_config, &audio_device) == MA_SUCCESS, MediaResult::Error, "Couldn't open playback device");

        OLC_MEDIA_ASSERT(ma_device_start(&audio_device) == MA_SUCCESS, MediaResult::Error, "Couldn't start playback device");

        // TODO: figure out if it's possible to neatly put this into assert macro
        /*if (ma_device_start(&audio_device) != MA_SUCCESS) {
            printf("Failed to start playback device.\n");
            ma_device_uninit(&audio_device);
            return MediaResult::Error;
        }*/

        PrintAudioInfo();

        return MediaResult::Success;
    }

    void Media::CloseAudio() {
        avcodec_free_context(&av_audio_codec_ctx);
        swr_free(&swr_audio_resampler);
        ma_device_uninit(&audio_device);

        audio_opened = false;
        video_fifo.clear();
        video_fifo.free();
    }

    /*bool Media2::seek_frame(int64_t ts) {
        av_seek_frame(av_video_format_ctx, video_stream_index, ts, AVSEEK_FLAG_BACKWARD);

        // av_seek_frame takes effect after one frame, so I'm decoding one here
        // so that the next call to video_reader_read_frame() will give the correct
        // frame
        int response;
        while (av_read_frame(av_video_format_ctx, av_video_packet) >= 0) {
            if (av_video_packet->stream_index != video_stream_index) {
                av_packet_unref(av_packet);
                continue;
            }

            response = avcodec_send_packet(av_codec_ctx, av_packet);
            if (response < 0) {
                printf("Failed to decode packet: %s\n", av_make_error(response));
                return false;
            }

            response = avcodec_receive_frame(av_codec_ctx, av_frame);
            if (response == AVERROR(EAGAIN) || response == AVERROR_EOF) {
                av_packet_unref(av_packet);
                continue;
            }
            else if (response < 0) {
                printf("Failed to decode packet: %s\n", av_make_error(response));
                return false;
            }

            av_packet_unref(av_packet);
            break;
        }

        return true;
    }*/
}

#endif // OLCPGEX_MEDIA_H