device.h

class GpuDevice : NonCopyable
{
public:
    //--------------------------------------------------------------------------
    //These functions should only be used by the thread that created the device
    //--------------------------------------------------------------------------

    //Call before starting each frame. Do not render if it returns false (lost device, etc, has occurred).
    bool ReadyToRender( void(*preReset)(void*), void(*postReset)(void*), void* arg=0 );

    //End a frame and queue it to be displayed to the window
    void Present( TextureId );

    //On single-threaded APIs, this is the only context that directly uses the native API.
    //Searching for code that calls this function is a good way to find rendering code that's explicitly single-threaded...
    GpuContext& GetImmediateContext();//the 'main' context. Can ONLY be used by the thread that created the device!

    //Submit a finished deferred/compute context to the immediate context. Do not pass the immediate context into these!
    void Submit( GpuContext& );
    void Submit( ComputeContext& );
    void Submit( DmaContext& );

    //Stall the CPU until an event has completed, retrieve it's value
    u64 CpuWait( GpuEventId );

    //--------------------------------------------------------------------------
    //Everything below here is thread-safe - in that multiple threads can use the device at once.
    // No guarantees are made here about using other objects in parallel, such as the loaders, scopes, or resources...
    //--------------------------------------------------------------------------

    GpuCapabilities GetCapabilities() const;
    SingleThread ResourceLoadingThread() const;

    //Signal an event from the CPU, allowing the GPU to proceed
    void CpuSignal( GpuEventId );

    // Multi-threaded command submission:
        GpuContext& AcquireDeferredContext();//must be submitted to the device, or aborted
               void   AbortDeferredContext( GpuContext& );//releases a deferred context without submitting

    ComputeContext& AcquireComputeContext();//must be submitted to the device, or aborted
               void   AbortComputeContext( ComputeContext& );//releases a compute context without submitting

        DmaContext& AcquireDmaContext();//must be submitted to the device, or aborted
               void   AbortDmaContext( DmaContext& );//releases a DMA context without submitting

    // Default data
                          void GetDefaultViewportSize( int& x, int& y );
    static TextureFormat::Type GetDefaultColorFormat();
    static TextureFormat::Type GetDefaultDepthFormat();
             const StateGroup* GetDefaultStates() const;
              const PassState* DefaultPassState() const;

    // Loads a shader pack with the same lifetime as the scope object.
    ShaderPackAsset* LoadShaderPack( AssetName, AssetScope&, BlobLoader& );

    ShaderTechniqueId GetTechnqiue( ShaderPackId, int techniqueIdx );
    ShaderProgramsId GetGraphicsProgram( ShaderTechniqueId, u32 pass, const ShaderOptions&, u32& validCBufferMask, u32& validResourceListMask, u32& validSamplerMask, VertexFormatId& vertexFormat );
    ShaderProgramsId GetComputeProgram( ShaderTechniqueId, u32 pass, const ShaderOptions&, u32& validCBufferMask, u32& validResourceListMask, u32& validSamplerMask, u32& validRwResourceListMask );

    // Loads a stream format pack with the same lifetime as the scope object.
              void LoadStreamFormatPack( AssetName, AssetScope&, BlobLoader& );
    StreamFormatId FindStreamFormat( u32 name );
    static     u32 StreamFormatHash( const char *name );//convert a string into the format used by FindStreamFormat

    InputAssemblerLayoutId FindInputAssemblerLayout( StreamFormatId, VertexFormatId, ShaderProgramsId );

    // Textures
    // If passing an ID as a recycle parameter, the internal resource allocations attached to it will be released, but the same ID will (likely) be returned with the newly requested resource allocations attached.
    // The Array and Volume flags cannot be used simultanously: d is volume depth or array size. If not passing the Volume or Array flags, set d to 1.
    // If the Array and Cube flags are set, the 6 2D-faces-per-cube factor is automatic -- i.e. the internal 2d-texture array size will be d*6, and the alias offset into the array will be startSlice*6.
      TextureId CreateNullTexture( TextureId recycle=TextureId(0) );//reserve a TextureId but make no resource allocations. Using the recycle parameter allows internal resource to be released while retaining the ID.
      TextureId CreateTexture( const char* name, TextureFlags::Type, float fullScreenRatio,         TextureFormat::Type,                     TextureId recycle=TextureId(0), CreateTextureCallback*cb=0 );
      TextureId CreateTexture( const char* name, TextureFlags::Type, u32 w, u32 h, u32 d, u32 mips, TextureFormat::Type, const u8* pixels=0, TextureId recycle=TextureId(0), CreateTextureCallback*cb=0, void* vram=0, u32 vramSize=0 );
      TextureId CreateTexture( const char* name, const u8* header, const u8* vram, u32 vramSize, bool ownsVram,                              TextureId recycle=TextureId(0), CreateTextureCallback*cb=0 );
      TextureId CreateTextureAlias( const char* name, TextureId source, uint startSlice, uint startMip, TextureFlags::Type, u32 w, u32 h, u32 d, u32 mips, TextureFormat::Type, TextureId recycle=TextureId(0) );
           void ReleaseTexture( TextureId ); // Release the ID and the internal allocations.
    TextureInfo GetTextureInfo( TextureId ) const;
           bool IsTextureNull( TextureId ) const; // true if there are no resource allocations attached to this ID
      TextureId GetErrorTexture() const;

    // Buffers hold Vertex, Index, and other data (bytes, structures)
    //      If useExistingAllocation is true, then the buffer object will use the vram pointer directly, and assume that this allocation has a longer lifetime than the buffer object itself (the buffer will not attempt to free the vram pointer).
    //      If useExistingAllocation is false, and vram is not-NULL, it will be memcpy'ed into the new buffer allocation.
    BufferId CreateBuffer( const char* name, const u8* vram, u32 size, BufferBindFlags::Type, UsageFlags::Type = UsageFlags::GpuRead, bool useExistingAllocation=false, BufferId recycle=BufferId(0) );
        void ReleaseBuffer( BufferId );

    // ConstantBuffers supply uniform values to shaders
    ConstantBufferId CreateConstantBuffer( const char* name, const void* initialData, u32 size, UsageFlags::Type = UsageFlags::CpuWrite|UsageFlags::GpuRead, ConstantBufferId recycle=ConstantBufferId(0) );
    ConstantBufferId CreateConstantBuffer( ConstantBufferNative& );
                void ReleaseConstantBuffer( ConstantBufferId );
    // This grants you a writable pointer into the cbuffer's memory allocation. However, you MUST call GpuContext::Synchronize after changing the data but before the data is used by a Submit call.
    void* MapPersistentUnsynchronized( ConstantBufferId );
    // This creates a copy of the supplied constant buffer. However, this relies on the source buffer being created with CpuRead & CpuWrite.
    inline ConstantBufferId CreateConstantBuffer( const char* name, ConstantBufferId source, UsageFlags::Type usageFlags = UsageFlags::CpuWrite|UsageFlags::GpuRead, ConstantBufferId recycle=ConstantBufferId(0) )
    {
        u32 GetConstantBufferSize( ConstantBufferId, GpuDevice& );
        return CreateConstantBuffer(name, MapPersistentUnsynchronized(source), GetConstantBufferSize(source, *this), usageFlags, recycle);
    }

    // ResourceLists supply TextureIds and BufferIds to shaders
    ResourceListId CreateResourceList( const char* name, u32 numItems, u32 initCount, TextureId* initialData );
    ResourceListId CreateResourceList( const char* name, u32 numItems, u32 initCount=0, ResourceId* initialData=0 );
             void ReleaseResourceList( ResourceListId );

protected:
    ~GpuDevice(){}
};
//==============================================================================

//==============================================================================
// Functionality common to Gpu/Compute/DMA context types.
class BaseGpuContext : NoCreate
{
public:
    GpuDevice& Device();//Get the device that owns this context.
};

//==============================================================================
// Allows one thread to send DMA commands to a GPU
class DmaContext : public BaseGpuContext
{
public:
    //Insert a GPU event
    GpuEventId Submit( GpuFenceFlags::Type, u64 arg=0 );

    //Have the GPU stall execution of this context until an event has completed
    void Wait( GpuEventId );

    //Have the GPU signal an event as having been completed
    void Signal( GpuEventId );

    //Have the GPU clone one resource into another with compatible type/format/dimensions
    void Copy( ResourceId dst, ResourceId src );//Copy all subresources
    void Copy( ResourceId dst, const ResourceOffset& dstOffset, ResourceId src, const ResourceArea* srcArea=0 );//Copy [part of] one subresource

    //Upload new data from the CPU to GPU-resources. Best for infrequent changes to static resources.
    void Update( TextureId, const void* data, u32 size, u32 mip=0, u32 arrayIdx=0 );
    void Update( BufferId, const void* data, u32 size );
    void Update( ConstantBufferId, const void* data, u32 size );
    void Update( ResourceListId, u32 idCount, const ResourceId* ids );

    //Map GPU-resources into CPU address space. For dynamic resources, ideally use WriteNoOverwrite for efficiency.
    void Map( ResourceLock&, MapMode::Type, TextureId, u8*& buffer, u32& width, u32& height, u32& pitchBytes, u32 mip=0, u32 arrayIdx=0 );//todo - move these out params into the ResourceLock
    void Map( ResourceLock&, MapMode::Type, BufferId, u32 offset, u32 size );
    void Map( ResourceLock&, MapMode::Type, ConstantBufferId, u32 offset=0, u32 size=0 );
    void Map( ResourceLock&, MapMode::Type, ResourceListId, u32 offset=0, u32 size=0 );
    void Unmap( ResourceLock&, TextureId );
    void Unmap( ResourceLock&, BufferId );
    void Unmap( ResourceLock&, ConstantBufferId );
    void Unmap( ResourceLock&, ResourceListId );

    //For use in conjunction with GpuDevice::MapPersistentUnsynchronized. Makes sure that any changes to the mapped buffer are GPU-visible.
    void Synchronize( ConstantBufferId );
};

//==============================================================================
// Allows one thread to send compute and DMA commands to a GPU
class ComputeContext : public DmaContext
{
public:
    //Submit a stateless compute job (dispatch call)
    using DmaContext::Submit;
    void Submit( const RenderPass&, const DispatchItem& );

    //Mark a texture as having undefined pixel contents.
    void Discard( TextureId );
};

//==============================================================================
// Allows one thread to send draw, compute and DMA commands to a GPU
class GpuContext : public ComputeContext
{
public:
    using DmaContext::Submit;
    using ComputeContext::Submit;

    void Finish();//Call when no more commands will be made via this context on this frame. Must be called on deferred contexts before submitting them!

    //Submit a clear command only
    void Submit( const RenderPass&, const ClearCommand& c );

    //Submit a list of draw-calls, optionally clearing before the first draw
    void Submit( const RenderPass&, const DrawList&, const ClearCommand* c=0 );
};
//==============================================================================

struct DrawList
{
    //Array of pointers
    DrawList( u32 count, const DrawItem*const* items     ) : count(count),    items(items),              stride(sizeof(DrawItem*)),   helper() {}
    DrawList( u32 count, const DrawItemKey*    items     ) : count(count),    items(&items[0].item),     stride(sizeof(DrawItemKey)),   helper() {}
    DrawList( const      rde::vector<      DrawItem*>& v ) : count(v.size()), items(count?&v[0]:0),      stride(sizeof(DrawItem*)),   helper() {}
    DrawList( const      rde::vector<    DrawItemKey>& v ) : count(v.size()), items(count?&v[0].item:0), stride(sizeof(DrawItemKey)), helper() {}
    DrawList( const      rde::vector<const DrawItem*>& v ) : count(v.size()), items(count?&v[0]:0),      stride(sizeof(DrawItem*)),   helper() {}
    template<class T> DrawList( const T& v )               : count(v.size()), items(count?&v[0]:0),      stride(sizeof(DrawItem*)),   helper() {}
    //Contiguous blob of DrawItem instances
    DrawList( u32 count, const DrawItem* items ) : count(count), stride(), items(), helper(items) {}
    //Single DrawItem
    DrawList( const DrawItem& item ) : count(1), stride(), items(&helper), helper(&item) {}

    u32 count;
    u32 stride;
    const void* items;
    const DrawItem* helper;
};