arm64 detect marcos
This commit is contained in:
parent
5a2e752799
commit
fafb4c71dc
11 changed files with 66 additions and 28 deletions
|
@ -26,7 +26,7 @@ class CMaterialDict;
|
||||||
class IMaterial;
|
class IMaterial;
|
||||||
class IMaterialInternal;
|
class IMaterialInternal;
|
||||||
class FloatBitMap_t;
|
class FloatBitMap_t;
|
||||||
typedef int ShaderAPITextureHandle_t;
|
typedef intp ShaderAPITextureHandle_t;
|
||||||
struct MaterialSystem_SortInfo_t;
|
struct MaterialSystem_SortInfo_t;
|
||||||
typedef unsigned short MaterialHandle_t;
|
typedef unsigned short MaterialHandle_t;
|
||||||
|
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
#include "tier0/dbg.h"
|
#include "tier0/dbg.h"
|
||||||
#include "mathlib/mathlib.h"
|
#include "mathlib/mathlib.h"
|
||||||
#include "mathlib/vector.h"
|
#include "mathlib/vector.h"
|
||||||
#ifdef __arm__
|
#if defined(__arm__) || defined(__arm64__)
|
||||||
#include "sse2neon.h"
|
#include "sse2neon.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -180,7 +180,7 @@ float _SSE_RSqrtFast(float x)
|
||||||
Assert( s_bMathlibInitialized );
|
Assert( s_bMathlibInitialized );
|
||||||
|
|
||||||
float rroot;
|
float rroot;
|
||||||
#ifdef __arm__
|
#if defined(__arm__) || defined(__arm64__)
|
||||||
rroot = _SSE_RSqrtAccurate(x);
|
rroot = _SSE_RSqrtAccurate(x);
|
||||||
#elif _WIN32
|
#elif _WIN32
|
||||||
_asm
|
_asm
|
||||||
|
@ -217,7 +217,7 @@ float FASTCALL _SSE_VectorNormalize (Vector& vec)
|
||||||
// be much of a performance win, considering you will very likely miss 3 branch predicts in a row.
|
// be much of a performance win, considering you will very likely miss 3 branch predicts in a row.
|
||||||
if ( v[0] || v[1] || v[2] )
|
if ( v[0] || v[1] || v[2] )
|
||||||
{
|
{
|
||||||
#ifdef __arm__
|
#if defined(__arm__) || defined(__arm64__)
|
||||||
float rsqrt = _SSE_RSqrtAccurate( v[0] * v[0] + v[1] * v[1] + v[2] * v[2] );
|
float rsqrt = _SSE_RSqrtAccurate( v[0] * v[0] + v[1] * v[1] + v[2] * v[2] );
|
||||||
r[0] = v[0] * rsqrt;
|
r[0] = v[0] * rsqrt;
|
||||||
r[1] = v[1] * rsqrt;
|
r[1] = v[1] * rsqrt;
|
||||||
|
@ -296,7 +296,7 @@ void FASTCALL _SSE_VectorNormalizeFast (Vector& vec)
|
||||||
float _SSE_InvRSquared(const float* v)
|
float _SSE_InvRSquared(const float* v)
|
||||||
{
|
{
|
||||||
float inv_r2 = 1.f;
|
float inv_r2 = 1.f;
|
||||||
#ifdef __arm__
|
#if defined(__arm__) || defined(__arm64__)
|
||||||
return _SSE_RSqrtAccurate( FLT_EPSILON + v[0] * v[0] + v[1] * v[1] + v[2] * v[2] );
|
return _SSE_RSqrtAccurate( FLT_EPSILON + v[0] * v[0] + v[1] * v[1] + v[2] * v[2] );
|
||||||
#elif _WIN32
|
#elif _WIN32
|
||||||
_asm { // Intel SSE only routine
|
_asm { // Intel SSE only routine
|
||||||
|
@ -391,8 +391,10 @@ typedef __m64 v2si; // vector of 2 int (mmx)
|
||||||
|
|
||||||
void _SSE_SinCos(float x, float* s, float* c)
|
void _SSE_SinCos(float x, float* s, float* c)
|
||||||
{
|
{
|
||||||
#ifdef __arm__
|
#if defined(__arm__) || defined(__arm64__)
|
||||||
#if defined( POSIX )
|
#if defined( OSX )
|
||||||
|
__sincosf(x, s, c);
|
||||||
|
#elif defined( POSIX )
|
||||||
sincosf(x, s, c);
|
sincosf(x, s, c);
|
||||||
#else
|
#else
|
||||||
*s = sin( x );
|
*s = sin( x );
|
||||||
|
@ -605,7 +607,7 @@ void _SSE_SinCos(float x, float* s, float* c)
|
||||||
|
|
||||||
float _SSE_cos( float x )
|
float _SSE_cos( float x )
|
||||||
{
|
{
|
||||||
#ifdef __arm__
|
#if defined(__arm__) || defined(__arm64__)
|
||||||
return cos(x);
|
return cos(x);
|
||||||
#elif _WIN32
|
#elif _WIN32
|
||||||
float temp;
|
float temp;
|
||||||
|
|
|
@ -457,6 +457,8 @@ void inline SinCos( float radians, float *sine, float *cosine )
|
||||||
#elif defined( PLATFORM_WINDOWS_PC64 )
|
#elif defined( PLATFORM_WINDOWS_PC64 )
|
||||||
*sine = sin( radians );
|
*sine = sin( radians );
|
||||||
*cosine = cos( radians );
|
*cosine = cos( radians );
|
||||||
|
#elif defined( OSX )
|
||||||
|
__sincosf(radians, sine, cosine);
|
||||||
#elif defined( POSIX )
|
#elif defined( POSIX )
|
||||||
sincosf(radians, sine, cosine);
|
sincosf(radians, sine, cosine);
|
||||||
#endif
|
#endif
|
||||||
|
@ -1213,7 +1215,7 @@ FORCEINLINE int RoundFloatToInt(float f)
|
||||||
};
|
};
|
||||||
flResult = __fctiw( f );
|
flResult = __fctiw( f );
|
||||||
return pResult[1];
|
return pResult[1];
|
||||||
#elif defined (__arm__)
|
#elif defined (__arm__) || defined (__arm64__)
|
||||||
return (int)(f + 0.5f);
|
return (int)(f + 0.5f);
|
||||||
#else
|
#else
|
||||||
#error Unknown architecture
|
#error Unknown architecture
|
||||||
|
@ -1245,7 +1247,7 @@ FORCEINLINE unsigned long RoundFloatToUnsignedLong(float f)
|
||||||
Assert( pIntResult[1] >= 0 );
|
Assert( pIntResult[1] >= 0 );
|
||||||
return pResult[1];
|
return pResult[1];
|
||||||
#else // !X360
|
#else // !X360
|
||||||
#ifdef __arm__
|
#if defined(__arm__) || defined(__arm64__)
|
||||||
return (unsigned long)(f + 0.5f);
|
return (unsigned long)(f + 0.5f);
|
||||||
#elif defined( PLATFORM_WINDOWS_PC64 )
|
#elif defined( PLATFORM_WINDOWS_PC64 )
|
||||||
uint nRet = ( uint ) f;
|
uint nRet = ( uint ) f;
|
||||||
|
@ -2168,7 +2170,7 @@ inline bool CloseEnough( const Vector &a, const Vector &b, float epsilon = EQUAL
|
||||||
// Fast compare
|
// Fast compare
|
||||||
// maxUlps is the maximum error in terms of Units in the Last Place. This
|
// maxUlps is the maximum error in terms of Units in the Last Place. This
|
||||||
// specifies how big an error we are willing to accept in terms of the value
|
// specifies how big an error we are willing to accept in terms of the value
|
||||||
// of the least significant digit of the floating point number’s
|
// of the least significant digit of the floating point number<EFBFBD>s
|
||||||
// representation. maxUlps can also be interpreted in terms of how many
|
// representation. maxUlps can also be interpreted in terms of how many
|
||||||
// representable floats we are willing to accept between A and B.
|
// representable floats we are willing to accept between A and B.
|
||||||
// This function will allow maxUlps-1 floats between A and B.
|
// This function will allow maxUlps-1 floats between A and B.
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
|
|
||||||
#if defined( _X360 )
|
#if defined( _X360 )
|
||||||
#include <xboxmath.h>
|
#include <xboxmath.h>
|
||||||
#elif defined(__arm__)
|
#elif defined(__arm__) || defined(__arm64__)
|
||||||
#include "sse2neon.h"
|
#include "sse2neon.h"
|
||||||
#else
|
#else
|
||||||
#include <xmmintrin.h>
|
#include <xmmintrin.h>
|
||||||
|
|
|
@ -654,10 +654,10 @@ inline void Vector4DWeightMAD( vec_t w, Vector4DAligned const& vInA, Vector4DAli
|
||||||
vOutB.z += vInB.z * w;
|
vOutB.z += vInB.z * w;
|
||||||
vOutB.w += vInB.w * w;
|
vOutB.w += vInB.w * w;
|
||||||
#else
|
#else
|
||||||
__vector4 temp;
|
__vector4 temp;
|
||||||
|
|
||||||
temp = __lvlx( &w, 0 );
|
temp = __lvlx( &w, 0 );
|
||||||
temp = __vspltw( temp, 0 );
|
temp = __vspltw( temp, 0 );
|
||||||
|
|
||||||
vOutA.AsM128() = __vmaddfp( vInA.AsM128(), temp, vOutA.AsM128() );
|
vOutA.AsM128() = __vmaddfp( vInA.AsM128(), temp, vOutA.AsM128() );
|
||||||
vOutB.AsM128() = __vmaddfp( vInB.AsM128(), temp, vOutB.AsM128() );
|
vOutB.AsM128() = __vmaddfp( vInB.AsM128(), temp, vOutB.AsM128() );
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
//========= Copyright © 1996-2008, Valve LLC, All rights reserved. ============
|
//========= Copyright <EFBFBD> 1996-2008, Valve LLC, All rights reserved. ============
|
||||||
//
|
//
|
||||||
// Purpose:
|
// Purpose:
|
||||||
//
|
//
|
||||||
|
@ -24,7 +24,7 @@ typedef unsigned char uint8;
|
||||||
#define POSIX 1
|
#define POSIX 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__x86_64__) || defined(_WIN64)
|
#if defined(__x86_64__) || defined(_WIN64) || defined(__arm64__)
|
||||||
#define X64BITS
|
#define X64BITS
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,7 @@
|
||||||
// Temporarily turn off Valve defines
|
// Temporarily turn off Valve defines
|
||||||
#include "tier0/valve_off.h"
|
#include "tier0/valve_off.h"
|
||||||
|
|
||||||
#if !defined(_WCHAR_T_DEFINED) && !defined(GNUC)
|
#if !defined(_WCHAR_T_DEFINED) && !defined( __WCHAR_TYPE__ ) && !defined(GNUC)
|
||||||
typedef unsigned short wchar_t;
|
typedef unsigned short wchar_t;
|
||||||
#define _WCHAR_T_DEFINED
|
#define _WCHAR_T_DEFINED
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -22,7 +22,7 @@ const tchar* GetProcessorVendorId();
|
||||||
|
|
||||||
static bool cpuid(unsigned long function, unsigned long& out_eax, unsigned long& out_ebx, unsigned long& out_ecx, unsigned long& out_edx)
|
static bool cpuid(unsigned long function, unsigned long& out_eax, unsigned long& out_ebx, unsigned long& out_ecx, unsigned long& out_edx)
|
||||||
{
|
{
|
||||||
#if defined (__arm__) || defined( _X360 )
|
#if defined (__arm__) || defined (__arm64__) || defined( _X360 )
|
||||||
return false;
|
return false;
|
||||||
#elif defined(GNUC)
|
#elif defined(GNUC)
|
||||||
asm("mov %%ebx, %%esi\n\t"
|
asm("mov %%ebx, %%esi\n\t"
|
||||||
|
|
|
@ -99,6 +99,15 @@ uint64 GetCPUFreqFromPROC()
|
||||||
|
|
||||||
uint64 CalculateCPUFreq()
|
uint64 CalculateCPUFreq()
|
||||||
{
|
{
|
||||||
|
#ifdef __APPLE__
|
||||||
|
uint64 freq_hz = 0;
|
||||||
|
size_t freq_size = sizeof(freq_hz);
|
||||||
|
int retval = sysctlbyname("hw.cpufrequency_max", &freq_hz, &freq_size, NULL, 0);
|
||||||
|
// MoeMod : TODO dont know how to get freq on Apple Silicon
|
||||||
|
if(!freq_hz)
|
||||||
|
freq_hz = 3200000;
|
||||||
|
return freq_hz;
|
||||||
|
#else
|
||||||
// Try to open cpuinfo_max_freq. If the kernel was built with cpu scaling support disabled, this will fail.
|
// Try to open cpuinfo_max_freq. If the kernel was built with cpu scaling support disabled, this will fail.
|
||||||
FILE *fp = fopen( "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", "r" );
|
FILE *fp = fopen( "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", "r" );
|
||||||
if ( fp )
|
if ( fp )
|
||||||
|
@ -118,8 +127,9 @@ uint64 CalculateCPUFreq()
|
||||||
return retVal * 1000;
|
return retVal * 1000;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef __arm__
|
#if !defined(__arm__) && !defined(__arm64__)
|
||||||
// Compute the period. Loop until we get 3 consecutive periods that
|
// Compute the period. Loop until we get 3 consecutive periods that
|
||||||
// are the same to within a small error. The error is chosen
|
// are the same to within a small error. The error is chosen
|
||||||
// to be +/- 0.02% on a P-200.
|
// to be +/- 0.02% on a P-200.
|
||||||
|
|
|
@ -6,24 +6,48 @@
|
||||||
// $NoKeywords: $
|
// $NoKeywords: $
|
||||||
//=============================================================================//
|
//=============================================================================//
|
||||||
|
|
||||||
|
#include "platform.h"
|
||||||
|
|
||||||
#if defined __SANITIZE_ADDRESS__
|
#if defined __SANITIZE_ADDRESS__
|
||||||
bool CheckMMXTechnology(void) { return false; }
|
bool CheckMMXTechnology(void) { return false; }
|
||||||
bool CheckSSETechnology(void) { return false; }
|
bool CheckSSETechnology(void) { return false; }
|
||||||
bool CheckSSE2Technology(void) { return false; }
|
bool CheckSSE2Technology(void) { return false; }
|
||||||
bool Check3DNowTechnology(void) { return false; }
|
bool Check3DNowTechnology(void) { return false; }
|
||||||
#elif defined (__arm__)
|
#elif defined (__arm__) || defined (__arm64__)
|
||||||
bool CheckMMXTechnology(void) { return false; }
|
bool CheckMMXTechnology(void) { return false; }
|
||||||
bool CheckSSETechnology(void) { return false; }
|
bool CheckSSETechnology(void) { return false; }
|
||||||
bool CheckSSE2Technology(void) { return false; }
|
bool CheckSSE2Technology(void) { return false; }
|
||||||
bool Check3DNowTechnology(void) { return false; }
|
bool Check3DNowTechnology(void) { return false; }
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#define cpuid(in,a,b,c,d) \
|
static void cpuid(uint32 function, uint32& out_eax, uint32& out_ebx, uint32& out_ecx, uint32& out_edx)
|
||||||
asm("pushl %%ebx\n\t" "cpuid\n\t" "movl %%ebx,%%esi\n\t" "pop %%ebx": "=a" (a), "=S" (b), "=c" (c), "=d" (d) : "a" (in));
|
{
|
||||||
|
#if defined(PLATFORM_64BITS)
|
||||||
|
asm("mov %%rbx, %%rsi\n\t"
|
||||||
|
"cpuid\n\t"
|
||||||
|
"xchg %%rsi, %%rbx"
|
||||||
|
: "=a" (out_eax),
|
||||||
|
"=S" (out_ebx),
|
||||||
|
"=c" (out_ecx),
|
||||||
|
"=d" (out_edx)
|
||||||
|
: "a" (function)
|
||||||
|
);
|
||||||
|
#else
|
||||||
|
asm("mov %%ebx, %%esi\n\t"
|
||||||
|
"cpuid\n\t"
|
||||||
|
"xchg %%esi, %%ebx"
|
||||||
|
: "=a" (out_eax),
|
||||||
|
"=S" (out_ebx),
|
||||||
|
"=c" (out_ecx),
|
||||||
|
"=d" (out_edx)
|
||||||
|
: "a" (function)
|
||||||
|
);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
bool CheckMMXTechnology(void)
|
bool CheckMMXTechnology(void)
|
||||||
{
|
{
|
||||||
unsigned long eax,ebx,edx,unused;
|
uint32 eax,ebx,edx,unused;
|
||||||
cpuid(1,eax,ebx,unused,edx);
|
cpuid(1,eax,ebx,unused,edx);
|
||||||
|
|
||||||
return edx & 0x800000;
|
return edx & 0x800000;
|
||||||
|
@ -31,7 +55,7 @@ bool CheckMMXTechnology(void)
|
||||||
|
|
||||||
bool CheckSSETechnology(void)
|
bool CheckSSETechnology(void)
|
||||||
{
|
{
|
||||||
unsigned long eax,ebx,edx,unused;
|
uint32 eax,ebx,edx,unused;
|
||||||
cpuid(1,eax,ebx,unused,edx);
|
cpuid(1,eax,ebx,unused,edx);
|
||||||
|
|
||||||
return edx & 0x2000000L;
|
return edx & 0x2000000L;
|
||||||
|
@ -39,7 +63,7 @@ bool CheckSSETechnology(void)
|
||||||
|
|
||||||
bool CheckSSE2Technology(void)
|
bool CheckSSE2Technology(void)
|
||||||
{
|
{
|
||||||
unsigned long eax,ebx,edx,unused;
|
uint32 eax,ebx,edx,unused;
|
||||||
cpuid(1,eax,ebx,unused,edx);
|
cpuid(1,eax,ebx,unused,edx);
|
||||||
|
|
||||||
return edx & 0x04000000;
|
return edx & 0x04000000;
|
||||||
|
@ -47,7 +71,7 @@ bool CheckSSE2Technology(void)
|
||||||
|
|
||||||
bool Check3DNowTechnology(void)
|
bool Check3DNowTechnology(void)
|
||||||
{
|
{
|
||||||
unsigned long eax, unused;
|
uint32 eax, unused;
|
||||||
cpuid(0x80000000,eax,unused,unused,unused);
|
cpuid(0x80000000,eax,unused,unused,unused);
|
||||||
|
|
||||||
if ( eax > 0x80000000L )
|
if ( eax > 0x80000000L )
|
||||||
|
|
|
@ -87,7 +87,7 @@ int64 CReliableTimer::GetPerformanceCountNow()
|
||||||
uint64 ulNow;
|
uint64 ulNow;
|
||||||
SYS_TIMEBASE_GET( ulNow );
|
SYS_TIMEBASE_GET( ulNow );
|
||||||
return ulNow;
|
return ulNow;
|
||||||
#elif defined( __arm__ ) && defined (POSIX)
|
#elif (defined( __arm__ ) || defined( __arm64__ )) && defined (POSIX)
|
||||||
struct timespec ts;
|
struct timespec ts;
|
||||||
clock_gettime(CLOCK_REALTIME, &ts);
|
clock_gettime(CLOCK_REALTIME, &ts);
|
||||||
return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
|
return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
|
||||||
|
|
Loading…
Reference in a new issue