css_enhanced_waf/external/vpc/public/tier0/tslist.h
FluorescentCIAAfricanAmerican 3bf9df6b27 1
2020-04-22 12:56:21 -04:00

858 lines
18 KiB
C++

//========== Copyright © 2005, Valve Corporation, All rights reserved. ========
//
// Purpose:
//
// LIFO from disassembly of Windows API and http://perso.wanadoo.fr/gmem/evenements/jim2002/articles/L17_Fober.pdf
// FIFO from http://perso.wanadoo.fr/gmem/evenements/jim2002/articles/L17_Fober.pdf
//
//=============================================================================
#ifndef TSLIST_H
#define TSLIST_H
#if defined( _WIN32 )
#pragma once
#endif
#if ( defined( PLATFORM_X360 ) || defined( PLATFORM_WINDOWS_PC64 ) )
#define USE_NATIVE_SLIST
#endif
#if defined( USE_NATIVE_SLIST ) && !defined( _X360 ) && !defined( _PS3 )
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#endif
#include "tier0/dbg.h"
#include "tier0/threadtools.h"
#include "tier0/memdbgon.h"
//-----------------------------------------------------------------------------
#if defined(PLATFORM_WINDOWS_PC64)
#define TSLIST_HEAD_ALIGNMENT 16 //MEMORY_ALLOCATION_ALIGNMENT
#define TSLIST_NODE_ALIGNMENT 16 //MEMORY_ALLOCATION_ALIGNMENT
#define TSLIST_HEAD_ALIGN ALIGN16
#define TSLIST_NODE_ALIGN ALIGN16
#define TSLIST_HEAD_ALIGN_POST ALIGN16_POST
#define TSLIST_NODE_ALIGN_POST ALIGN16_POST
#else
#define TSLIST_HEAD_ALIGNMENT 8
#define TSLIST_NODE_ALIGNMENT 8
#define TSLIST_HEAD_ALIGN ALIGN8
#define TSLIST_NODE_ALIGN ALIGN8
#define TSLIST_HEAD_ALIGN_POST ALIGN8_POST
#define TSLIST_NODE_ALIGN_POST ALIGN8_POST
#endif
//-----------------------------------------------------------------------------
PLATFORM_INTERFACE bool RunTSQueueTests( int nListSize = 10000, int nTests = 1 );
PLATFORM_INTERFACE bool RunTSListTests( int nListSize = 10000, int nTests = 1 );
//-----------------------------------------------------------------------------
// Lock free list.
//-----------------------------------------------------------------------------
//#define USE_NATIVE_SLIST
#ifdef USE_NATIVE_SLIST
typedef SLIST_ENTRY TSLNodeBase_t;
typedef SLIST_HEADER TSLHead_t;
#else
struct TSLIST_NODE_ALIGN TSLNodeBase_t
{
TSLNodeBase_t *Next; // name to match Windows
}
TSLIST_NODE_ALIGN_POST;
union TSLHead_t
{
struct Value_t
{
TSLNodeBase_t *Next;
// <sergiy> Depth must be in the least significant halfword when atomically loading into register,
// to avoid carrying digits from Sequence. Carrying digits from Depth to Sequence is ok,
// because Sequence can be pretty much random. We could operate on both of them separately,
// but it could perhaps (?) lead to problems with store forwarding. I don't know 'cause I didn't
// performance-test or design original code, I'm just making it work on PowerPC.
#ifdef PLAT_BIG_ENDIAN
int16 Sequence;
int16 Depth;
#else
int16 Depth;
int16 Sequence;
#endif
} value;
struct Value32_t
{
TSLNodeBase_t *Next_do_not_use_me;
int32 DepthAndSequence;
} value32;
int64 value64;
};
#endif
//-------------------------------------
class TSLIST_HEAD_ALIGN CTSListBase
{
public:
CTSListBase()
{
if ( ((size_t)&m_Head) % TSLIST_HEAD_ALIGNMENT != 0 )
{
Error( _T( "CTSListBase: Misaligned list\n" ) );
DebuggerBreak();
}
#ifdef USE_NATIVE_SLIST
InitializeSListHead( &m_Head );
#else
m_Head.value64 = (int64)0;
#endif
}
~CTSListBase()
{
Detach();
}
TSLNodeBase_t *Push( TSLNodeBase_t *pNode )
{
#ifdef _DEBUG
if ( (size_t)pNode % TSLIST_NODE_ALIGNMENT != 0 )
{
Error( _T( "CTSListBase: Misaligned node\n" ) );
DebuggerBreak();
}
#endif
#ifdef USE_NATIVE_SLIST
#ifdef _X360
// integrated write-release barrier
return (TSLNodeBase_t *)InterlockedPushEntrySListRelease( &m_Head, pNode );
#else
return (TSLNodeBase_t *)InterlockedPushEntrySList( &m_Head, pNode );
#endif
#else
TSLHead_t oldHead;
TSLHead_t newHead;
#if defined( PLATFORM_PS3 ) || defined( PLATFORM_X360 )
__lwsync(); // write-release barrier
#endif
for (;;)
{
oldHead.value64 = m_Head.value64;
pNode->Next = oldHead.value.Next;
newHead.value.Next = pNode;
newHead.value32.DepthAndSequence = oldHead.value32.DepthAndSequence + 0x10001;
if ( ThreadInterlockedAssignIf64( &m_Head.value64, newHead.value64, oldHead.value64 ) )
{
break;
}
ThreadPause();
};
return (TSLNodeBase_t *)oldHead.value.Next;
#endif
}
TSLNodeBase_t *Pop()
{
#ifdef USE_NATIVE_SLIST
#ifdef _X360
// integrated read-acquire barrier
TSLNodeBase_t *pNode = (TSLNodeBase_t *)InterlockedPopEntrySListAcquire( &m_Head );
#else
TSLNodeBase_t *pNode = (TSLNodeBase_t *)InterlockedPopEntrySList( &m_Head );
#endif
return pNode;
#else
TSLHead_t oldHead;
TSLHead_t newHead;
for (;;)
{
oldHead.value64 = m_Head.value64;
if ( !oldHead.value.Next )
return NULL;
newHead.value.Next = oldHead.value.Next->Next;
newHead.value32.DepthAndSequence = oldHead.value32.DepthAndSequence - 1;
if ( ThreadInterlockedAssignIf64( &m_Head.value64, newHead.value64, oldHead.value64 ) )
{
#if defined( PLATFORM_PS3 ) || defined( PLATFORM_X360 )
__lwsync(); // read-acquire barrier
#endif
break;
}
ThreadPause();
};
return (TSLNodeBase_t *)oldHead.value.Next;
#endif
}
TSLNodeBase_t *Detach()
{
#ifdef USE_NATIVE_SLIST
TSLNodeBase_t *pBase = (TSLNodeBase_t *)InterlockedFlushSList( &m_Head );
#if defined( _X360 ) || defined( _PS3 )
__lwsync(); // read-acquire barrier
#endif
return pBase;
#else
TSLHead_t oldHead;
TSLHead_t newHead;
do
{
ThreadPause();
oldHead.value64 = m_Head.value64;
if ( !oldHead.value.Next )
return NULL;
newHead.value.Next = NULL;
// <sergiy> the reason for AND'ing it instead of poking a short into memory
// is probably to avoid store forward issues, but I'm not sure because
// I didn't construct this code. In any case, leaving it as is on big-endian
newHead.value32.DepthAndSequence = oldHead.value32.DepthAndSequence & 0xffff0000;
} while( !ThreadInterlockedAssignIf64( &m_Head.value64, newHead.value64, oldHead.value64 ) );
return (TSLNodeBase_t *)oldHead.value.Next;
#endif
}
TSLHead_t *AccessUnprotected()
{
return &m_Head;
}
int Count() const
{
#ifdef USE_NATIVE_SLIST
return QueryDepthSList( const_cast<TSLHead_t*>( &m_Head ) );
#else
return m_Head.value.Depth;
#endif
}
private:
TSLHead_t m_Head;
}
TSLIST_HEAD_ALIGN_POST;
//-------------------------------------
template <typename T>
class TSLIST_HEAD_ALIGN CTSSimpleList : public CTSListBase
{
public:
void Push( T *pNode )
{
Assert( sizeof(T) >= sizeof(TSLNodeBase_t) );
CTSListBase::Push( (TSLNodeBase_t *)pNode );
}
T *Pop()
{
return (T *)CTSListBase::Pop();
}
}
TSLIST_HEAD_ALIGN_POST;
//-------------------------------------
// this is a replacement for CTSList<> and CObjectPool<> that does not
// have a per-item, per-alloc new/delete overhead
// similar to CTSSimpleList except that it allocates it's own pool objects
// and frees them on destruct. Also it does not overlay the TSNodeBase_t memory
// on T's memory
template< class T >
class TSLIST_HEAD_ALIGN CTSPool : public CTSListBase
{
// packs the node and the item (T) into a single struct and pools those
struct TSLIST_NODE_ALIGN simpleTSPoolStruct_t : public TSLNodeBase_t
{
T elem;
} TSLIST_NODE_ALIGN_POST;
public:
~CTSPool()
{
simpleTSPoolStruct_t *pNode = NULL;
while ( 1 )
{
pNode = (simpleTSPoolStruct_t *)CTSListBase::Pop();
if ( !pNode )
break;
delete pNode;
}
}
void PutObject( T *pInfo )
{
char *pElem = (char *)pInfo;
pElem -= offsetof(simpleTSPoolStruct_t,elem);
simpleTSPoolStruct_t *pNode = (simpleTSPoolStruct_t *)pElem;
CTSListBase::Push( pNode );
}
T *GetObject()
{
simpleTSPoolStruct_t *pNode = (simpleTSPoolStruct_t *)CTSListBase::Pop();
if ( !pNode )
{
pNode = new simpleTSPoolStruct_t;
}
return &pNode->elem;
}
// omg windows sdk - why do you #define GetObject()?
FORCEINLINE T *Get()
{
return GetObject();
}
} TSLIST_HEAD_ALIGN_POST;
//-------------------------------------
template <typename T>
class TSLIST_HEAD_ALIGN CTSList : public CTSListBase
{
public:
struct TSLIST_NODE_ALIGN Node_t : public TSLNodeBase_t
{
Node_t() {}
Node_t( const T &init ) : elem( init ) {}
T elem;
} TSLIST_NODE_ALIGN_POST;
~CTSList()
{
Purge();
}
void Purge()
{
Node_t *pCurrent = Detach();
Node_t *pNext;
while ( pCurrent )
{
pNext = (Node_t *)pCurrent->Next;
delete pCurrent;
pCurrent = pNext;
}
}
void RemoveAll()
{
Purge();
}
Node_t *Push( Node_t *pNode )
{
return (Node_t *)CTSListBase::Push( pNode );
}
Node_t *Pop()
{
return (Node_t *)CTSListBase::Pop();
}
void PushItem( const T &init )
{
Push( new Node_t( init ) );
}
bool PopItem( T *pResult)
{
Node_t *pNode = Pop();
if ( !pNode )
return false;
*pResult = pNode->elem;
delete pNode;
return true;
}
Node_t *Detach()
{
return (Node_t *)CTSListBase::Detach();
}
} TSLIST_HEAD_ALIGN_POST;
//-------------------------------------
template <typename T>
class TSLIST_HEAD_ALIGN CTSListWithFreeList : public CTSListBase
{
public:
struct TSLIST_NODE_ALIGN Node_t : public TSLNodeBase_t
{
Node_t() {}
Node_t( const T &init ) : elem( init ) {}
T elem;
} TSLIST_NODE_ALIGN_POST;
~CTSListWithFreeList()
{
Purge();
}
void Purge()
{
Node_t *pCurrent = Detach();
Node_t *pNext;
while ( pCurrent )
{
pNext = (Node_t *)pCurrent->Next;
delete pCurrent;
pCurrent = pNext;
}
pCurrent = (Node_t *)m_FreeList.Detach();
while ( pCurrent )
{
pNext = (Node_t *)pCurrent->Next;
delete pCurrent;
pCurrent = pNext;
}
}
void RemoveAll()
{
Node_t *pCurrent = Detach();
Node_t *pNext;
while ( pCurrent )
{
pNext = (Node_t *)pCurrent->Next;
m_FreeList.Push( pCurrent );
pCurrent = pNext;
}
}
Node_t *Push( Node_t *pNode )
{
return (Node_t *)CTSListBase::Push( pNode );
}
Node_t *Pop()
{
return (Node_t *)CTSListBase::Pop();
}
void PushItem( const T &init )
{
Node_t *pNode = (Node_t *)m_FreeList.Pop();
if ( !pNode )
{
pNode = new Node_t;
}
pNode->elem = init;
Push( pNode );
}
bool PopItem( T *pResult)
{
Node_t *pNode = Pop();
if ( !pNode )
return false;
*pResult = pNode->elem;
m_FreeList.Push( pNode );
return true;
}
Node_t *Detach()
{
return (Node_t *)CTSListBase::Detach();
}
void FreeNode( Node_t *pNode )
{
m_FreeList.Push( pNode );
}
private:
CTSListBase m_FreeList;
} TSLIST_HEAD_ALIGN_POST;
//-----------------------------------------------------------------------------
// Lock free queue
//
// A special consideration: the element type should be simple. This code
// actually dereferences freed nodes as part of pop, but later detects
// that. If the item in the queue is a complex type, only bad things can
// come of that. Also, therefore, if you're using Push/Pop instead of
// push item, be aware that the node memory cannot be freed until
// all threads that might have been popping have completed the pop.
// The PushItem()/PopItem() for handles this by keeping a persistent
// free list. Dont mix Push/PushItem. Note also nodes will be freed at the end,
// and are expected to have been allocated with operator new.
//-----------------------------------------------------------------------------
template <typename T, bool bTestOptimizer = false, bool bFreeList = true>
class TSLIST_HEAD_ALIGN CTSQueue
{
public:
struct TSLIST_NODE_ALIGN Node_t
{
Node_t() {}
Node_t( const T &init ) : elem( init ) {}
Node_t *pNext;
T elem;
} TSLIST_NODE_ALIGN_POST;
union TSLIST_HEAD_ALIGN NodeLink_t
{
struct Value_t
{
Node_t *pNode;
int32 sequence;
} value;
int64 value64;
} TSLIST_HEAD_ALIGN_POST;
CTSQueue()
{
COMPILE_TIME_ASSERT( sizeof(Node_t) >= sizeof(TSLNodeBase_t) );
if ( ((size_t)&m_Head) % TSLIST_HEAD_ALIGNMENT != 0 )
{
Error( "CTSQueue: Misaligned queue\n" );
DebuggerBreak();
}
if ( ((size_t)&m_Tail) % TSLIST_HEAD_ALIGNMENT != 0 )
{
Error( "CTSQueue: Misaligned queue\n" );
DebuggerBreak();
}
m_Count = 0;
m_Head.value.sequence = m_Tail.value.sequence = 0;
m_Head.value.pNode = m_Tail.value.pNode = new Node_t; // list always contains a dummy node
m_Head.value.pNode->pNext = End();
}
~CTSQueue()
{
Purge();
Assert( m_Count == 0 );
Assert( m_Head.value.pNode == m_Tail.value.pNode );
Assert( m_Head.value.pNode->pNext == End() );
delete m_Head.value.pNode;
}
// Note: Purge, RemoveAll, and Validate are *not* threadsafe
void Purge()
{
if ( IsDebug() )
{
Validate();
}
Node_t *pNode;
while ( ( pNode = Pop() ) != NULL )
{
delete pNode;
}
while ( bFreeList && ( pNode = (Node_t *)m_FreeNodes.Pop() ) != NULL )
{
delete pNode;
}
Assert( m_Count == 0 );
Assert( m_Head.value.pNode == m_Tail.value.pNode );
Assert( m_Head.value.pNode->pNext == End() );
m_Head.value.sequence = m_Tail.value.sequence = 0;
}
void RemoveAll()
{
if ( IsDebug() )
{
Validate();
}
Node_t *pNode;
while ( bFreeList && ( pNode = Pop() ) != NULL )
{
m_FreeNodes.Push( (TSLNodeBase_t *)pNode );
}
}
bool Validate()
{
bool bResult = true;
int nNodes = 0;
if ( m_Count == 0 )
{
if ( m_Head.value.pNode != m_Tail.value.pNode )
{
DebuggerBreakIfDebugging();
bResult = false;
}
}
Node_t *pNode = m_Head.value.pNode;
while ( pNode != End() )
{
nNodes++;
pNode = pNode->pNext;
}
nNodes--;// skip dummy node
if ( nNodes != m_Count )
{
DebuggerBreakIfDebugging();
bResult = false;
}
if ( !bResult )
{
Msg( "Corrupt CTSQueueDetected" );
}
return bResult;
}
void FinishPush( Node_t *pNode, const NodeLink_t &oldTail )
{
NodeLink_t newTail;
newTail.value.pNode = pNode;
newTail.value.sequence = oldTail.value.sequence + 1;
ThreadMemoryBarrier();
InterlockedCompareExchangeNodeLink( &m_Tail, newTail, oldTail );
}
Node_t *Push( Node_t *pNode )
{
#ifdef _DEBUG
if ( (size_t)pNode % TSLIST_NODE_ALIGNMENT != 0 )
{
Error( "CTSListBase: Misaligned node\n" );
DebuggerBreak();
}
#endif
NodeLink_t oldTail;
pNode->pNext = End();
for (;;)
{
oldTail.value.sequence = m_Tail.value.sequence;
oldTail.value.pNode = m_Tail.value.pNode;
if ( InterlockedCompareExchangeNode( &(oldTail.value.pNode->pNext), pNode, End() ) == End() )
{
break;
}
else
{
// Another thread is trying to push, help it along
FinishPush( oldTail.value.pNode->pNext, oldTail );
}
}
FinishPush( pNode, oldTail ); // This can fail if another thread pushed between the sequence and node grabs above. Later pushes or pops corrects
m_Count++;
return oldTail.value.pNode;
}
Node_t *Pop()
{
#define TSQUEUE_BAD_NODE_LINK ( (Node_t *)INT_TO_POINTER( 0xdeadbeef ) )
NodeLink_t * volatile pHead = &m_Head;
NodeLink_t * volatile pTail = &m_Tail;
Node_t * volatile * pHeadNode = &m_Head.value.pNode;
volatile int * volatile pHeadSequence = &m_Head.value.sequence;
Node_t * volatile * pTailNode = &pTail->value.pNode;
NodeLink_t head;
NodeLink_t newHead;
Node_t *pNext;
int tailSequence;
T elem;
for (;;)
{
head.value.sequence = *pHeadSequence; // must grab sequence first, which allows condition below to ensure pNext is valid
ThreadMemoryBarrier(); // need a barrier to prevent reordering of these assignments
head.value.pNode = *pHeadNode;
tailSequence = pTail->value.sequence;
pNext = head.value.pNode->pNext;
if ( pNext && head.value.sequence == *pHeadSequence ) // Checking pNext only to force optimizer to not reorder the assignment to pNext and the compare of the sequence
{
if ( bTestOptimizer )
{
if ( pNext == TSQUEUE_BAD_NODE_LINK )
{
Msg( "Bad node link detected\n" );
continue;
}
}
if ( head.value.pNode == *pTailNode )
{
if ( pNext == End() )
{
return NULL;
}
// Another thread is trying to push, help it along
NodeLink_t &oldTail = head; // just reuse local memory for head to build old tail
oldTail.value.sequence = tailSequence; // reuse head pNode
FinishPush( pNext, oldTail );
}
else if ( pNext != End() )
{
elem = pNext->elem; // NOTE: next could be a freed node here, by design
newHead.value.pNode = pNext;
newHead.value.sequence = head.value.sequence + 1;
if ( InterlockedCompareExchangeNodeLink( pHead, newHead, head ) )
{
ThreadMemoryBarrier();
if ( bTestOptimizer )
{
head.value.pNode->pNext = TSQUEUE_BAD_NODE_LINK;
}
break;
}
}
}
}
m_Count--;
head.value.pNode->elem = elem;
return head.value.pNode;
}
void FreeNode( Node_t *pNode )
{
if ( bFreeList )
{
m_FreeNodes.Push( (TSLNodeBase_t *)pNode );
}
else
{
delete pNode;
}
}
void PushItem( const T &init )
{
Node_t *pNode;
if ( bFreeList && ( pNode = (Node_t *)m_FreeNodes.Pop() ) != NULL )
{
pNode->elem = init;
}
else
{
pNode = new Node_t( init );
}
Push( pNode );
}
bool PopItem( T *pResult)
{
Node_t *pNode = Pop();
if ( !pNode )
return false;
*pResult = pNode->elem;
if ( bFreeList )
{
m_FreeNodes.Push( (TSLNodeBase_t *)pNode );
}
else
{
delete pNode;
}
return true;
}
int Count()
{
return m_Count;
}
private:
Node_t *End() { return (Node_t *)this; } // just need a unique signifier
#ifndef _WIN64
Node_t *InterlockedCompareExchangeNode( Node_t * volatile *ppNode, Node_t *value, Node_t *comperand )
{
return (Node_t *)::ThreadInterlockedCompareExchangePointer( (void **)ppNode, value, comperand );
}
bool InterlockedCompareExchangeNodeLink( NodeLink_t volatile *pLink, const NodeLink_t &value, const NodeLink_t &comperand )
{
return ThreadInterlockedAssignIf64( (int64 *)pLink, value.value64, comperand.value64 );
}
#else
Node_t *InterlockedCompareExchangeNode( Node_t * volatile *ppNode, Node_t *value, Node_t *comperand )
{
AUTO_LOCK( m_ExchangeMutex );
Node_t *retVal = *ppNode;
if ( *ppNode == comperand )
*ppNode = value;
return retVal;
}
bool InterlockedCompareExchangeNodeLink( NodeLink_t volatile *pLink, const NodeLink_t &value, const NodeLink_t &comperand )
{
AUTO_LOCK( m_ExchangeMutex );
if ( pLink->value64 == comperand.value64 )
{
pLink->value64 = value.value64;
return true;
}
return false;
}
CThreadFastMutex m_ExchangeMutex;
#endif
NodeLink_t m_Head;
NodeLink_t m_Tail;
CInterlockedInt m_Count;
CTSListBase m_FreeNodes;
} TSLIST_HEAD_ALIGN_POST;
#include "tier0/memdbgoff.h"
#endif // TSLIST_H