Optimized cmatqueuedrendercontext
This commit is contained in:
parent
651e3ca823
commit
494411928e
5 changed files with 86 additions and 24 deletions
|
@ -367,32 +367,61 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
Assert( pDest );
|
Assert( pDest );
|
||||||
memcpy( (byte *)pDest, pVertexData, nBytesVerts );
|
|
||||||
|
auto pAlignedDest = assume_aligned<16>((byte*)pDest);
|
||||||
|
auto pAlignedVertexData = assume_aligned<16>(pVertexData);
|
||||||
|
|
||||||
|
for (int i = 0; i < nBytesVerts; i++)
|
||||||
|
{
|
||||||
|
pAlignedDest[i] = pAlignedVertexData[i];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( pIndexData && pIndexData != &gm_ScratchIndexBuffer[0] && desc.m_nIndexSize )
|
if ( pIndexData && pIndexData != &gm_ScratchIndexBuffer[0] && desc.m_nIndexSize )
|
||||||
{
|
{
|
||||||
|
static constexpr auto INDICES_TO_AUTOVECTORIZE = 256;
|
||||||
|
int i = 0;
|
||||||
|
auto pAlignedIndexData = assume_aligned<16>(pIndexData);
|
||||||
|
auto pAlignedIndces = (((size_t)desc.m_pIndices) % 16) == 0 ? assume_aligned<16>(desc.m_pIndices) : desc.m_pIndices;
|
||||||
|
|
||||||
if ( !desc.m_nFirstVertex )
|
if ( !desc.m_nFirstVertex )
|
||||||
{
|
{
|
||||||
// AssertMsg(desc.m_pIndices & 0x03 == 0,"desc.m_pIndices is misaligned in CMatQueuedMesh::ExecuteDefferedBuild\n");
|
// AssertMsg(desc.m_pIndices & 0x03 == 0,"desc.m_pIndices is misaligned in CMatQueuedMesh::ExecuteDefferedBuild\n");
|
||||||
// memcpy( (byte *)desc.m_pIndices, (byte *)pIndexData, nIndices * sizeof(*pIndexData) );
|
// memcpy( (byte *)desc.m_pIndices, (byte *)pIndexData, nIndices * sizeof(*pIndexData) );
|
||||||
|
|
||||||
// Let it autovectorize.
|
// Let it autovectorize.
|
||||||
for (int i = 0; i < nIndices; i++)
|
while ( i < nIndices )
|
||||||
{
|
{
|
||||||
desc.m_pIndices[i] = pIndexData[i];
|
int nToCopy = nIndices - i;
|
||||||
|
|
||||||
|
auto pSrc = pAlignedIndexData + i;
|
||||||
|
auto pDst = pAlignedIndces + i;
|
||||||
|
|
||||||
|
if (nToCopy < INDICES_TO_AUTOVECTORIZE)
|
||||||
|
{
|
||||||
|
for (int j = 0; j < nToCopy; j++)
|
||||||
|
{
|
||||||
|
pDst[j] = pSrc[j];
|
||||||
|
}
|
||||||
|
|
||||||
|
i += nToCopy;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (int j = 0; j < INDICES_TO_AUTOVECTORIZE; j++)
|
||||||
|
{
|
||||||
|
pDst[j] = pSrc[j];
|
||||||
|
}
|
||||||
|
|
||||||
|
i += INDICES_TO_AUTOVECTORIZE;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
static constexpr auto INDICES_TO_AUTOVECTORIZE = 256;
|
|
||||||
|
|
||||||
// original method
|
|
||||||
int i = 0;
|
|
||||||
|
|
||||||
int firstVertex = desc.m_nFirstVertex;
|
int firstVertex = desc.m_nFirstVertex;
|
||||||
|
|
||||||
uint16 DECL_ALIGN(512) firstVertexMatrix[INDICES_TO_AUTOVECTORIZE];
|
uint16 firstVertexMatrix alignas(16) [INDICES_TO_AUTOVECTORIZE];
|
||||||
|
|
||||||
for (int i = 0; i < INDICES_TO_AUTOVECTORIZE; i++)
|
for (int i = 0; i < INDICES_TO_AUTOVECTORIZE; i++)
|
||||||
{
|
{
|
||||||
|
@ -402,9 +431,8 @@ public:
|
||||||
while ( i < nIndices )
|
while ( i < nIndices )
|
||||||
{
|
{
|
||||||
int nToCopy = nIndices - i;
|
int nToCopy = nIndices - i;
|
||||||
|
auto pSrc = pAlignedIndexData + i;
|
||||||
auto pSrc = pIndexData + i;
|
auto pDst = pAlignedIndces + i;
|
||||||
auto pDst = desc.m_pIndices + i;
|
|
||||||
|
|
||||||
if (nToCopy < INDICES_TO_AUTOVECTORIZE)
|
if (nToCopy < INDICES_TO_AUTOVECTORIZE)
|
||||||
{
|
{
|
||||||
|
@ -641,10 +669,10 @@ private:
|
||||||
IMesh *m_pVertexOverride;
|
IMesh *m_pVertexOverride;
|
||||||
IMesh *m_pIndexOverride;
|
IMesh *m_pIndexOverride;
|
||||||
|
|
||||||
static unsigned short gm_ScratchIndexBuffer[6];
|
alignas(16) static unsigned short gm_ScratchIndexBuffer [6];
|
||||||
};
|
};
|
||||||
|
|
||||||
unsigned short CMatQueuedMesh::gm_ScratchIndexBuffer[6];
|
alignas(16) unsigned short CMatQueuedMesh::gm_ScratchIndexBuffer[6];
|
||||||
|
|
||||||
//-----------------------------------------------------------------------------
|
//-----------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
|
|
|
@ -27,7 +27,7 @@ CDummyTexture g_DummyTexture;
|
||||||
// ---------------------------------------------------------------------------------------- //
|
// ---------------------------------------------------------------------------------------- //
|
||||||
// IMaterialSystem and IMesh stub classes.
|
// IMaterialSystem and IMesh stub classes.
|
||||||
// ---------------------------------------------------------------------------------------- //
|
// ---------------------------------------------------------------------------------------- //
|
||||||
static unsigned short g_DummyIndices[6];
|
alignas(16) static unsigned short g_DummyIndices[6];
|
||||||
|
|
||||||
class CDummyMesh : public IMesh
|
class CDummyMesh : public IMesh
|
||||||
{
|
{
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
#ifndef DYNAMICIB_H
|
#ifndef DYNAMICIB_H
|
||||||
#define DYNAMICIB_H
|
#define DYNAMICIB_H
|
||||||
|
|
||||||
|
#include "memalloc.h"
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
#pragma once
|
#pragma once
|
||||||
#endif
|
#endif
|
||||||
|
@ -310,7 +311,7 @@ inline CIndexBuffer::CIndexBuffer( IDirect3DDevice9 *pD3D, int count,
|
||||||
|
|
||||||
if ( g_pShaderUtil->GetThreadMode() != MATERIAL_SINGLE_THREADED || !ThreadInMainThread() )
|
if ( g_pShaderUtil->GetThreadMode() != MATERIAL_SINGLE_THREADED || !ThreadInMainThread() )
|
||||||
{
|
{
|
||||||
m_pSysmemBuffer = ( byte * )malloc( count * IndexSize() );
|
m_pSysmemBuffer = ( byte * )_aligned_malloc( count * IndexSize(), 16 );
|
||||||
m_nSysmemBufferStartBytes = 0;
|
m_nSysmemBufferStartBytes = 0;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -509,7 +510,7 @@ inline CIndexBuffer::~CIndexBuffer()
|
||||||
|
|
||||||
if ( m_pSysmemBuffer )
|
if ( m_pSysmemBuffer )
|
||||||
{
|
{
|
||||||
free( m_pSysmemBuffer );
|
_aligned_free( m_pSysmemBuffer );
|
||||||
m_pSysmemBuffer = NULL;
|
m_pSysmemBuffer = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -800,7 +801,7 @@ inline unsigned short* CIndexBuffer::Lock( bool bReadOnly, int numIndices, int&
|
||||||
# endif
|
# endif
|
||||||
m_bFlush = false;
|
m_bFlush = false;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -829,12 +830,13 @@ inline unsigned short* CIndexBuffer::Lock( bool bReadOnly, int numIndices, int&
|
||||||
|
|
||||||
HRESULT hr = D3D_OK;
|
HRESULT hr = D3D_OK;
|
||||||
|
|
||||||
|
static ConVar cl_materialsystem_ignore_thread_safe("cl_materialsystem_ignore_thread_safe", "0");
|
||||||
#if !defined( _X360 )
|
#if !defined( _X360 )
|
||||||
// If the caller isn't in the thread that owns the render lock, need to return a system memory pointer--cannot talk to GL from
|
// If the caller isn't in the thread that owns the render lock, need to return a system memory pointer--cannot talk to GL from
|
||||||
// the non-current thread.
|
// the non-current thread.
|
||||||
if ( !m_pSysmemBuffer && !g_pShaderUtil->IsRenderThreadSafe() )
|
if ( !m_pSysmemBuffer && (!g_pShaderUtil->IsRenderThreadSafe() || cl_materialsystem_ignore_thread_safe.GetBool()) )
|
||||||
{
|
{
|
||||||
m_pSysmemBuffer = ( byte * )malloc( m_IndexCount * IndexSize() );
|
m_pSysmemBuffer = ( byte * )_aligned_malloc( m_IndexCount * IndexSize(), 16 );
|
||||||
m_nSysmemBufferStartBytes = position * IndexSize();
|
m_nSysmemBufferStartBytes = position * IndexSize();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1022,7 +1024,7 @@ inline void CIndexBuffer::HandleLateCreation( )
|
||||||
memcpy( pWritePtr, m_pSysmemBuffer + m_nSysmemBufferStartBytes, dataToWriteBytes );
|
memcpy( pWritePtr, m_pSysmemBuffer + m_nSysmemBufferStartBytes, dataToWriteBytes );
|
||||||
ReallyUnlock( dataToWriteBytes );
|
ReallyUnlock( dataToWriteBytes );
|
||||||
|
|
||||||
free( m_pSysmemBuffer );
|
_aligned_free( m_pSysmemBuffer );
|
||||||
m_pSysmemBuffer = NULL;
|
m_pSysmemBuffer = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -57,7 +57,7 @@ extern ConVar mat_debugalttab;
|
||||||
|
|
||||||
//#define DRAW_SELECTION 1
|
//#define DRAW_SELECTION 1
|
||||||
static bool g_bDrawSelection = true; // only used in DRAW_SELECTION
|
static bool g_bDrawSelection = true; // only used in DRAW_SELECTION
|
||||||
static unsigned short g_nScratchIndexBuffer[6]; // large enough for a fast quad; used when device is not active
|
alignas(16) static unsigned short g_nScratchIndexBuffer[6]; // large enough for a fast quad; used when device is not active
|
||||||
#ifdef _DEBUG
|
#ifdef _DEBUG
|
||||||
int CVertexBuffer::s_BufferCount = 0;
|
int CVertexBuffer::s_BufferCount = 0;
|
||||||
int CIndexBuffer::s_BufferCount = 0;
|
int CIndexBuffer::s_BufferCount = 0;
|
||||||
|
@ -627,7 +627,7 @@ private:
|
||||||
CDynamicMeshDX8 *GetDynamicMesh();
|
CDynamicMeshDX8 *GetDynamicMesh();
|
||||||
|
|
||||||
CUtlVector< unsigned char, CUtlMemoryAligned< unsigned char, 32 > > m_VertexData;
|
CUtlVector< unsigned char, CUtlMemoryAligned< unsigned char, 32 > > m_VertexData;
|
||||||
CUtlVector< unsigned short > m_IndexData;
|
CUtlVector< unsigned short, CUtlMemoryAligned< unsigned short, 16 > > m_IndexData;
|
||||||
|
|
||||||
unsigned short m_VertexSize;
|
unsigned short m_VertexSize;
|
||||||
MaterialPrimitiveType_t m_Type;
|
MaterialPrimitiveType_t m_Type;
|
||||||
|
|
|
@ -540,6 +540,38 @@ typedef void * HINSTANCE;
|
||||||
#error
|
#error
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
template <std::size_t N, typename T>
|
||||||
|
#if defined(__clang__) || defined(__GNUC__)
|
||||||
|
__attribute__((always_inline))
|
||||||
|
#elif defined(_MSC_VER)
|
||||||
|
__forceinline
|
||||||
|
#endif
|
||||||
|
[[nodiscard]] constexpr T* assume_aligned(T* ptr) noexcept
|
||||||
|
{
|
||||||
|
#if defined(__clang__) || (defined(__GNUC__) && !defined(__ICC))
|
||||||
|
return reinterpret_cast<T*>(__builtin_assume_aligned(ptr, N));
|
||||||
|
#elif defined(_MSC_VER)
|
||||||
|
if ((reinterpret_cast<std::uintptr_t>(ptr) & ((1 << N) - 1)) == 0)
|
||||||
|
return ptr;
|
||||||
|
else
|
||||||
|
__assume(0);
|
||||||
|
#elif defined(__ICC)
|
||||||
|
switch (N) {
|
||||||
|
case 2: __assume_aligned(ptr, 2); break;
|
||||||
|
case 4: __assume_aligned(ptr, 4); break;
|
||||||
|
case 8: __assume_aligned(ptr, 8); break;
|
||||||
|
case 16: __assume_aligned(ptr, 16); break;
|
||||||
|
case 32: __assume_aligned(ptr, 32); break;
|
||||||
|
case 64: __assume_aligned(ptr, 64); break;
|
||||||
|
case 128: __assume_aligned(ptr, 128); break;
|
||||||
|
}
|
||||||
|
return ptr;
|
||||||
|
#else
|
||||||
|
// Unknown compiler — do nothing
|
||||||
|
return ptr;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
// !!! NOTE: if you get a compile error here, you are using VALIGNOF on an abstract type :NOTE !!!
|
// !!! NOTE: if you get a compile error here, you are using VALIGNOF on an abstract type :NOTE !!!
|
||||||
#define VALIGNOF_PORTABLE( type ) ( sizeof( AlignOf_t<type> ) - sizeof( type ) )
|
#define VALIGNOF_PORTABLE( type ) ( sizeof( AlignOf_t<type> ) - sizeof( type ) )
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue