materialsystem: threaded optimizations, fix mat_queue_mode on some android devices
This commit is contained in:
parent
3493fe9b0e
commit
8f1156442e
8 changed files with 66 additions and 140 deletions
|
@ -1029,7 +1029,7 @@ bool CMaterialSystem::AllowThreading( bool bAllow, int nServiceThread )
|
|||
|
||||
bool bOldAllow = m_bAllowQueuedRendering;
|
||||
|
||||
if ( GetCPUInformation()->m_nPhysicalProcessors >= 2 )
|
||||
if ( GetCPUInformation()->m_nLogicalProcessors >= 2 )
|
||||
{
|
||||
m_bAllowQueuedRendering = bAllow;
|
||||
bool bQueued = m_IdealThreadMode != MATERIAL_SINGLE_THREADED;
|
||||
|
@ -1806,11 +1806,7 @@ static ConVar mat_normalmaps( "mat_normalmaps", "0", FCVAR_CHEAT );
|
|||
static ConVar mat_measurefillrate( "mat_measurefillrate", "0", FCVAR_CHEAT );
|
||||
static ConVar mat_fillrate( "mat_fillrate", "0", FCVAR_CHEAT );
|
||||
static ConVar mat_reversedepth( "mat_reversedepth", "0", FCVAR_CHEAT );
|
||||
#ifdef DX_TO_GL_ABSTRACTION
|
||||
static ConVar mat_bufferprimitives( "mat_bufferprimitives", "0" ); // I'm not seeing any benefit speed wise for buffered primitives on GLM/POSIX (checked via TF2 timedemo) - default to zero
|
||||
#else
|
||||
static ConVar mat_bufferprimitives( "mat_bufferprimitives", "1" );
|
||||
#endif
|
||||
static ConVar mat_drawflat( "mat_drawflat","0", FCVAR_CHEAT );
|
||||
static ConVar mat_softwarelighting( "mat_softwarelighting", "0", FCVAR_ALLOWED_IN_COMPETITIVE );
|
||||
static ConVar mat_proxy( "mat_proxy", "0", FCVAR_CHEAT, "", MatProxyCallback );
|
||||
|
@ -2780,8 +2776,8 @@ IMaterial* CMaterialSystem::FindMaterialEx( char const* pMaterialName, const cha
|
|||
{
|
||||
// We need lower-case symbols for this to work
|
||||
int nLen = Q_strlen( pMaterialName ) + 1;
|
||||
char *pFixedNameTemp = (char*)malloc( nLen );
|
||||
char *pTemp = (char*)malloc( nLen );
|
||||
char *pFixedNameTemp = (char*)stackalloc( nLen );
|
||||
char *pTemp = (char*)stackalloc( nLen );
|
||||
Q_strncpy( pFixedNameTemp, pMaterialName, nLen );
|
||||
Q_strlower( pFixedNameTemp );
|
||||
#ifdef POSIX
|
||||
|
@ -2883,9 +2879,6 @@ IMaterial* CMaterialSystem::FindMaterialEx( char const* pMaterialName, const cha
|
|||
}
|
||||
}
|
||||
|
||||
free(pTemp);
|
||||
free(pFixedNameTemp);
|
||||
|
||||
return g_pErrorMaterial->GetRealTimeVersion();
|
||||
}
|
||||
|
||||
|
@ -3103,20 +3096,12 @@ void CMaterialSystem::ResetTempHWMemory( bool bExitingLevel )
|
|||
//-----------------------------------------------------------------------------
|
||||
void CMaterialSystem::CacheUsedMaterials( )
|
||||
{
|
||||
printf("Cache materials\n");
|
||||
|
||||
g_pShaderAPI->EvictManagedResources();
|
||||
size_t count = 0;
|
||||
|
||||
for (MaterialHandle_t i = FirstMaterial(); i != InvalidMaterial(); i = NextMaterial(i) )
|
||||
{
|
||||
// Some (mac) drivers (amd) seem to keep extra resources around on uploads until the next frame swap. This
|
||||
// injects pointless synthetic swaps (between already-static load frames)
|
||||
if ( mat_texture_reload_frame_swap_workaround.GetBool() )
|
||||
{
|
||||
if ( count++ % 20 == 0 )
|
||||
{
|
||||
Flush(true);
|
||||
SwapBuffers(); // Not the right thing to call
|
||||
}
|
||||
}
|
||||
IMaterialInternal* pMat = GetMaterialInternal(i);
|
||||
Assert( pMat->GetReferenceCount() >= 0 );
|
||||
if( pMat->GetReferenceCount() > 0 )
|
||||
|
@ -3703,9 +3688,13 @@ void CMaterialSystem::EndFrame( void )
|
|||
ThreadAcquire( true );
|
||||
}
|
||||
|
||||
IThreadPool* pThreadPool = CreateMatQueueThreadPool();
|
||||
|
||||
if ( m_pActiveAsyncJob && !m_pActiveAsyncJob->IsFinished() )
|
||||
{
|
||||
m_pActiveAsyncJob->WaitForFinish();
|
||||
m_pActiveAsyncJob->WaitForFinish(TT_INFINITE, pThreadPool);
|
||||
|
||||
// Sync with GPU if we had a job for it, even if it finished early on CPU!
|
||||
if ( !IsPC() && g_config.ForceHWSync() )
|
||||
{
|
||||
g_pShaderAPI->ForceHardwareSync();
|
||||
|
@ -3730,7 +3719,6 @@ void CMaterialSystem::EndFrame( void )
|
|||
}
|
||||
}
|
||||
|
||||
IThreadPool *pThreadPool = CreateMatQueueThreadPool();
|
||||
pThreadPool->AddJob( m_pActiveAsyncJob );
|
||||
break;
|
||||
}
|
||||
|
@ -4664,20 +4652,9 @@ void CMaterialSystem::BeginRenderTargetAllocation( void )
|
|||
|
||||
void CMaterialSystem::EndRenderTargetAllocation( void )
|
||||
{
|
||||
// Any GPU newer than 2005 doesn't need to do this, and it eats up ~40% of our level load time!
|
||||
const bool cbRequiresRenderTargetAllocationFirst = mat_requires_rt_alloc_first.GetBool();
|
||||
|
||||
g_pShaderAPI->FlushBufferedPrimitives();
|
||||
m_bAllocatingRenderTargets = false;
|
||||
|
||||
if ( IsPC() && cbRequiresRenderTargetAllocationFirst && g_pShaderAPI->CanDownloadTextures() )
|
||||
{
|
||||
// Simulate an Alt-Tab...will cause RTs to be allocated first
|
||||
|
||||
g_pShaderDevice->ReleaseResources();
|
||||
g_pShaderDevice->ReacquireResources();
|
||||
}
|
||||
|
||||
TextureManager()->CacheExternalStandardRenderTargets();
|
||||
}
|
||||
|
||||
|
|
|
@ -455,14 +455,11 @@ public:
|
|||
}
|
||||
else
|
||||
{
|
||||
ALIGN16 uint16 tempIndices[16];
|
||||
static ALIGN16 uint16 tempIndices[256];
|
||||
|
||||
// original method
|
||||
int i = 0;
|
||||
if ( (size_t)desc.m_pIndices % 4 == 2 )
|
||||
{
|
||||
desc.m_pIndices[i] = pIndexData[i] + desc.m_nFirstVertex;
|
||||
i++;
|
||||
}
|
||||
|
||||
while ( i < nIndices )
|
||||
{
|
||||
int nToCopy = min( (int)ARRAYSIZE(tempIndices), nIndices - i );
|
||||
|
|
|
@ -2458,13 +2458,6 @@ bool CTexture::AsyncReadTextureFromFile( IVTFTexture* pVTFTexture, unsigned int
|
|||
return false;
|
||||
}
|
||||
|
||||
if ( V_strstr( GetName(), "c_sniperrifle_scope" ) )
|
||||
{
|
||||
int i = 0;
|
||||
i = 3;
|
||||
}
|
||||
|
||||
|
||||
tmZone( TELEMETRY_LEVEL0, TMZF_NONE, "%s - %s", __FUNCTION__, tmDynamicString( TELEMETRY_LEVEL0, pCacheFileName ) );
|
||||
|
||||
// OSX hackery
|
||||
|
@ -4189,12 +4182,6 @@ bool SLoadTextureBitsFromFile( IVTFTexture **ppOutVtfTexture, FileHandle_t hFile
|
|||
// NOTE! NOTE! NOTE! or by the streaming texture code!
|
||||
Assert( ppOutVtfTexture != NULL && *ppOutVtfTexture != NULL );
|
||||
|
||||
if ( V_strstr( pName, "c_rocketlauncher/c_rocketlauncher" ) )
|
||||
{
|
||||
int i = 0;
|
||||
i = 3;
|
||||
}
|
||||
|
||||
CUtlBuffer buf;
|
||||
|
||||
{
|
||||
|
|
|
@ -52,6 +52,12 @@
|
|||
#pragma once
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable:4251)
|
||||
|
||||
extern "C"
|
||||
{
|
||||
void __declspec(dllimport) __stdcall Sleep( unsigned long );
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef COMPILER_MSVC64
|
||||
|
@ -194,8 +200,6 @@ PLATFORM_INTERFACE bool ReleaseThreadHandle( ThreadHandle_t );
|
|||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
PLATFORM_INTERFACE void ThreadSleep(unsigned duration = 0);
|
||||
PLATFORM_INTERFACE void ThreadNanoSleep(unsigned ns);
|
||||
PLATFORM_INTERFACE ThreadId_t ThreadGetCurrentId();
|
||||
PLATFORM_INTERFACE ThreadHandle_t ThreadGetCurrentHandle();
|
||||
PLATFORM_INTERFACE int ThreadGetPriority( ThreadHandle_t hThread = NULL );
|
||||
|
@ -229,10 +233,10 @@ inline void ThreadPause()
|
|||
{
|
||||
#if defined( COMPILER_PS3 )
|
||||
__db16cyc();
|
||||
#elif defined(__arm__) || defined(__aarch64__)
|
||||
sched_yield();
|
||||
#elif defined( COMPILER_GCC )
|
||||
#elif defined( COMPILER_GCC ) && (defined( __i386__ ) || defined( __x86_64__ ))
|
||||
__asm __volatile( "pause" );
|
||||
#elif defined( POSIX )
|
||||
sched_yield();
|
||||
#elif defined ( COMPILER_MSVC64 )
|
||||
_mm_pause();
|
||||
#elif defined( COMPILER_MSVC32 )
|
||||
|
@ -247,6 +251,36 @@ inline void ThreadPause()
|
|||
#endif
|
||||
}
|
||||
|
||||
inline void ThreadSleep(unsigned nMilliseconds = 0)
|
||||
{
|
||||
if( nMilliseconds == 0 )
|
||||
{
|
||||
ThreadPause();
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
#ifdef _WIN32_PC
|
||||
static bool bInitialized = false;
|
||||
if ( !bInitialized )
|
||||
{
|
||||
bInitialized = true;
|
||||
// Set the timer resolution to 1 ms (default is 10.0, 15.6, 2.5, 1.0 or
|
||||
// some other value depending on hardware and software) so that we can
|
||||
// use Sleep( 1 ) to avoid wasting CPU time without missing our frame
|
||||
// rate.
|
||||
timeBeginPeriod( 1 );
|
||||
}
|
||||
#endif
|
||||
Sleep( nMilliseconds );
|
||||
#elif PS3
|
||||
sys_timer_usleep( nMilliseconds * 1000 );
|
||||
#elif defined(POSIX)
|
||||
usleep( nMilliseconds * 1000 );
|
||||
#endif
|
||||
}
|
||||
|
||||
PLATFORM_INTERFACE bool ThreadJoin( ThreadHandle_t, unsigned timeout = TT_INFINITE );
|
||||
|
||||
PLATFORM_INTERFACE void ThreadSetDebugName( ThreadHandle_t hThread, const char *pszName );
|
||||
|
|
|
@ -11,21 +11,15 @@ namespace memutils
|
|||
template<typename T>
|
||||
inline void copy( T *dest, const T *src, size_t n )
|
||||
{
|
||||
do
|
||||
{
|
||||
--n;
|
||||
*(dest+n) = *(src+n);
|
||||
} while( n );
|
||||
for(; n; n--)
|
||||
*(dest++) = *(src++);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline void set( T *dest, T value, size_t n )
|
||||
inline void set( T *dest, const T& value, size_t n )
|
||||
{
|
||||
do
|
||||
{
|
||||
--n;
|
||||
*(dest+n) = value;
|
||||
} while( n );
|
||||
for(; n; n--)
|
||||
*(dest++) = value;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -492,8 +492,8 @@ public:
|
|||
//-----------------------------------------------------
|
||||
// Thread event support (safe for NULL this to simplify code )
|
||||
//-----------------------------------------------------
|
||||
bool WaitForFinish( uint32 dwTimeout = TT_INFINITE ) { if (!this) return true; return ( !IsFinished() ) ? g_pThreadPool->YieldWait( this, dwTimeout ) : true; }
|
||||
bool WaitForFinishAndRelease( uint32 dwTimeout = TT_INFINITE ) { if (!this) return true; bool bResult = WaitForFinish( dwTimeout); Release(); return bResult; }
|
||||
inline bool WaitForFinish( uint32 dwTimeout = TT_INFINITE, IThreadPool *pool = g_pThreadPool ) { if (!this) return true; return ( !IsFinished() ) ? pool->YieldWait( this, dwTimeout ) : true; }
|
||||
inline bool WaitForFinishAndRelease( uint32 dwTimeout = TT_INFINITE ) { if (!this) return true; bool bResult = WaitForFinish( dwTimeout); Release(); return bResult; }
|
||||
CThreadEvent *AccessEvent() { return &m_CompleteEvent; }
|
||||
|
||||
//-----------------------------------------------------
|
||||
|
|
|
@ -485,59 +485,6 @@ bool ReleaseThreadHandle( ThreadHandle_t hThread )
|
|||
//
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
void ThreadSleep(unsigned nMilliseconds)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
|
||||
#ifdef _WIN32_PC
|
||||
static bool bInitialized = false;
|
||||
if ( !bInitialized )
|
||||
{
|
||||
bInitialized = true;
|
||||
// Set the timer resolution to 1 ms (default is 10.0, 15.6, 2.5, 1.0 or
|
||||
// some other value depending on hardware and software) so that we can
|
||||
// use Sleep( 1 ) to avoid wasting CPU time without missing our frame
|
||||
// rate.
|
||||
timeBeginPeriod( 1 );
|
||||
}
|
||||
#endif
|
||||
|
||||
Sleep( nMilliseconds );
|
||||
#elif PS3
|
||||
if( nMilliseconds == 0 )
|
||||
{
|
||||
// sys_ppu_thread_yield doesn't seem to function properly, so sleep instead.
|
||||
// sys_timer_usleep( 60 );
|
||||
sys_ppu_thread_yield();
|
||||
}
|
||||
else
|
||||
{
|
||||
sys_timer_usleep( nMilliseconds * 1000 );
|
||||
}
|
||||
#elif defined(POSIX)
|
||||
usleep( nMilliseconds * 1000 );
|
||||
#endif
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
void ThreadNanoSleep(unsigned ns)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
// ceil
|
||||
Sleep( ( ns + 999 ) / 1000 );
|
||||
#elif PS3
|
||||
sys_timer_usleep( ns );
|
||||
#elif defined(POSIX)
|
||||
struct timespec tm;
|
||||
tm.tv_sec = 0;
|
||||
tm.tv_nsec = ns;
|
||||
nanosleep( &tm, NULL );
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
#ifndef ThreadGetCurrentId
|
||||
ThreadId_t ThreadGetCurrentId()
|
||||
{
|
||||
|
|
|
@ -214,7 +214,11 @@ public:
|
|||
//-----------------------------------------------------
|
||||
virtual int YieldWait( CThreadEvent **pEvents, int nEvents, bool bWaitAll = true, unsigned timeout = TT_INFINITE );
|
||||
virtual int YieldWait( CJob **, int nJobs, bool bWaitAll = true, unsigned timeout = TT_INFINITE );
|
||||
void Yield( unsigned timeout );
|
||||
inline void Yield( unsigned timeout )
|
||||
{
|
||||
Assert( ThreadInMainThread() );
|
||||
ThreadSleep( timeout );
|
||||
}
|
||||
|
||||
//-----------------------------------------------------
|
||||
// Add a native job to the queue (master thread)
|
||||
|
@ -656,20 +660,6 @@ int CThreadPool::YieldWait( CJob **ppJobs, int nJobs, bool bWaitAll, unsigned ti
|
|||
return YieldWait( handles.Base(), handles.Count(), bWaitAll, timeout);
|
||||
}
|
||||
|
||||
//---------------------------------------------------------
|
||||
|
||||
void CThreadPool::Yield( unsigned timeout )
|
||||
{
|
||||
// @MULTICORE (toml 10/24/2006): not implemented
|
||||
Assert( ThreadInMainThread() );
|
||||
if ( !ThreadInMainThread() )
|
||||
{
|
||||
ThreadSleep( timeout );
|
||||
return;
|
||||
}
|
||||
ThreadSleep( timeout );
|
||||
}
|
||||
|
||||
//---------------------------------------------------------
|
||||
// Add a job to the queue
|
||||
//---------------------------------------------------------
|
||||
|
|
Loading…
Reference in a new issue