665 lines
15 KiB
C++
665 lines
15 KiB
C++
//========= Copyright Valve Corporation, All rights reserved. ============//
|
|
//
|
|
// Purpose:
|
|
//
|
|
// $NoKeywords: $
|
|
//
|
|
//===========================================================================//
|
|
|
|
#ifdef _WIN32
|
|
#include <windows.h>
|
|
|
|
#pragma warning( disable : 4530 ) // warning: exception handler -GX option
|
|
|
|
#include "tier0/valve_off.h"
|
|
#include "tier0/pmelib.h"
|
|
#if _MSC_VER >=1300
|
|
#else
|
|
#include "winioctl.h"
|
|
#endif
|
|
#include "tier0/valve_on.h"
|
|
|
|
#include "tier0/ioctlcodes.h"
|
|
|
|
// NOTE: This has to be the last file included!
|
|
#include "tier0/memdbgon.h"
|
|
|
|
|
|
PME* PME::_singleton = 0;
|
|
|
|
// Single interface.
|
|
PME* PME::Instance()
|
|
{
|
|
if (_singleton == 0)
|
|
{
|
|
_singleton = new PME;
|
|
}
|
|
return _singleton;
|
|
}
|
|
|
|
//---------------------------------------------------------------------------
|
|
// Open the device driver and detect the processor
|
|
//---------------------------------------------------------------------------
|
|
HRESULT PME::Init( void )
|
|
{
|
|
OSVERSIONINFO OS;
|
|
|
|
if ( bDriverOpen )
|
|
return E_DRIVER_ALREADY_OPEN;
|
|
|
|
switch( vendor )
|
|
{
|
|
case INTEL:
|
|
case AMD:
|
|
break;
|
|
default:
|
|
bDriverOpen = FALSE; // not an Intel or Athlon processor so return false
|
|
return E_UNKNOWN_CPU_VENDOR;
|
|
}
|
|
|
|
//-----------------------------------------------------------------------
|
|
// Get the operating system version
|
|
//-----------------------------------------------------------------------
|
|
OS.dwOSVersionInfoSize = sizeof( OSVERSIONINFO );
|
|
GetVersionEx( &OS );
|
|
|
|
if ( OS.dwPlatformId == VER_PLATFORM_WIN32_NT )
|
|
{
|
|
hFile = CreateFile( // WINDOWS NT
|
|
"\\\\.\\GDPERF",
|
|
GENERIC_READ,
|
|
0,
|
|
NULL,
|
|
OPEN_EXISTING,
|
|
FILE_ATTRIBUTE_NORMAL,
|
|
NULL);
|
|
}
|
|
else
|
|
{
|
|
hFile = CreateFile( // WINDOWS 95
|
|
"\\\\.\\GDPERF.VXD",
|
|
GENERIC_READ,
|
|
0,
|
|
NULL,
|
|
OPEN_EXISTING,
|
|
FILE_ATTRIBUTE_NORMAL,
|
|
NULL);
|
|
}
|
|
|
|
if (hFile == INVALID_HANDLE_VALUE )
|
|
return E_CANT_OPEN_DRIVER;
|
|
|
|
|
|
bDriverOpen = TRUE;
|
|
|
|
|
|
//-------------------------------------------------------------------
|
|
// We have successfully opened the device driver, get the family
|
|
// of the processor.
|
|
//-------------------------------------------------------------------
|
|
|
|
|
|
|
|
//-------------------------------------------------------------------
|
|
// We need to write to counter 0 on the pro family to enable both
|
|
// of the performance counters. We write to both so they start in a
|
|
// known state. For the pentium this is not necessary.
|
|
//-------------------------------------------------------------------
|
|
if (vendor == INTEL && version.Family == PENTIUMPRO_FAMILY)
|
|
{
|
|
SelectP5P6PerformanceEvent(P6_CLOCK, 0, TRUE, TRUE);
|
|
SelectP5P6PerformanceEvent(P6_CLOCK, 1, TRUE, TRUE);
|
|
}
|
|
|
|
return S_OK;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
//---------------------------------------------------------------------------
|
|
// Close the device driver
|
|
//---------------------------------------------------------------------------
|
|
HRESULT PME::Close(void)
|
|
{
|
|
if (bDriverOpen == false) // driver is not going
|
|
return E_DRIVER_NOT_OPEN;
|
|
|
|
bDriverOpen = false;
|
|
|
|
if (hFile) // if we have no driver handle, return FALSE
|
|
{
|
|
BOOL result = CloseHandle(hFile);
|
|
|
|
hFile = NULL;
|
|
return result ? S_OK : HRESULT_FROM_WIN32( GetLastError() );
|
|
}
|
|
else
|
|
return E_DRIVER_NOT_OPEN;
|
|
|
|
|
|
}
|
|
|
|
//---------------------------------------------------------------------------
|
|
// Select the event to monitor with counter 0
|
|
//
|
|
HRESULT PME::SelectP5P6PerformanceEvent(uint32 dw_event, uint32 dw_counter,
|
|
bool b_user, bool b_kernel)
|
|
{
|
|
HRESULT hr = S_OK;
|
|
|
|
if (dw_counter>1) // is the counter valid
|
|
return E_BAD_COUNTER;
|
|
|
|
if (bDriverOpen == false) // driver is not going
|
|
return E_DRIVER_NOT_OPEN;
|
|
|
|
if ( ((dw_event>>28)&0xF) != (uint32)version.Family)
|
|
{
|
|
return E_ILLEGAL_OPERATION; // this operation is not for this processor
|
|
}
|
|
|
|
if ( (((dw_event & 0x300)>>8) & (dw_counter+1)) == 0 )
|
|
{
|
|
return E_ILLEGAL_OPERATION; // this operation is not for this counter
|
|
}
|
|
|
|
switch(version.Family)
|
|
{
|
|
case PENTIUM_FAMILY:
|
|
{
|
|
uint64 i64_cesr;
|
|
int i_kernel_bit,i_user_bit;
|
|
BYTE u1_event = (BYTE)((dw_event & (0x3F0000))>>16);
|
|
|
|
if (dw_counter==0) // the kernel and user mode bits depend on
|
|
{ // counter being used.
|
|
i_kernel_bit = 6;
|
|
i_user_bit = 7;
|
|
}
|
|
else
|
|
{
|
|
i_kernel_bit = 22;
|
|
i_user_bit = 23;
|
|
}
|
|
|
|
ReadMSR(0x11, &i64_cesr); // get current P5 event select (cesr)
|
|
|
|
// top 32bits of cesr are not valid so ignore them
|
|
i64_cesr &= ((dw_counter == 0)?0xffff0000:0x0000ffff);
|
|
WriteMSR(0x11,i64_cesr); // stop the counter
|
|
WriteMSR((dw_counter==0)?0x12:0x13,0ui64); // clear the p.counter
|
|
|
|
// set the user and kernel mode bits
|
|
i64_cesr |= ( b_user?(1<<7):0 ) | ( b_kernel?(1<<6):0 );
|
|
|
|
// is this the special P5 value that signals count clocks??
|
|
if (u1_event == 0x3f)
|
|
{
|
|
WriteMSR(0x11, i64_cesr|0x100); // Count clocks
|
|
}
|
|
else
|
|
{
|
|
WriteMSR(0x11, i64_cesr|u1_event); // Count events
|
|
}
|
|
|
|
}
|
|
break;
|
|
|
|
case PENTIUMPRO_FAMILY:
|
|
|
|
{
|
|
BYTE u1_event = (BYTE)((dw_event & (0xFF0000))>>16);
|
|
BYTE u1_mask = (BYTE)((dw_event & 0xFF));
|
|
|
|
// Event select 0 and 1 are identical.
|
|
hr = WriteMSR((dw_counter==0)?0x186:0x187,
|
|
|
|
|
|
uint64((u1_event | (b_user?(1<<16):0) | (b_kernel?(1<<17):0) | (1<<22) | (1<<18) | (u1_mask<<8)) )
|
|
);
|
|
}
|
|
break;
|
|
|
|
case PENTIUM4_FAMILY:
|
|
// use the p4 path
|
|
break;
|
|
|
|
default:
|
|
return E_UNKNOWN_CPU;
|
|
}
|
|
|
|
return hr;
|
|
}
|
|
|
|
//---------------------------------------------------------------------------
|
|
// Read model specific register
|
|
//---------------------------------------------------------------------------
|
|
HRESULT PME::ReadMSR(uint32 dw_reg, int64 * pi64_value)
|
|
{
|
|
DWORD dw_ret_len;
|
|
|
|
if (bDriverOpen == false) // driver is not going
|
|
return E_DRIVER_NOT_OPEN;
|
|
|
|
BOOL result = DeviceIoControl
|
|
(
|
|
hFile, // Handle to device
|
|
(DWORD) IOCTL_READ_MSR, // IO Control code for Read
|
|
&dw_reg, // Input Buffer to driver.
|
|
sizeof(uint32), // Length of input buffer.
|
|
pi64_value, // Output Buffer from driver.
|
|
sizeof(int64), // Length of output buffer in bytes.
|
|
&dw_ret_len, // Bytes placed in output buffer.
|
|
NULL // NULL means wait till op. completes
|
|
);
|
|
|
|
HRESULT hr = result ? S_OK : HRESULT_FROM_WIN32( GetLastError() );
|
|
if (hr == S_OK && dw_ret_len != sizeof(int64))
|
|
hr = E_BAD_DATA;
|
|
|
|
return hr;
|
|
}
|
|
|
|
HRESULT PME::ReadMSR(uint32 dw_reg, uint64 * pi64_value)
|
|
{
|
|
DWORD dw_ret_len;
|
|
|
|
if (bDriverOpen == false) // driver is not going
|
|
return E_DRIVER_NOT_OPEN;
|
|
|
|
BOOL result = DeviceIoControl
|
|
(
|
|
hFile, // Handle to device
|
|
(DWORD) IOCTL_READ_MSR, // IO Control code for Read
|
|
&dw_reg, // Input Buffer to driver.
|
|
sizeof(uint32), // Length of input buffer.
|
|
pi64_value, // Output Buffer from driver.
|
|
sizeof(uint64), // Length of output buffer in bytes.
|
|
&dw_ret_len, // Bytes placed in output buffer.
|
|
NULL // NULL means wait till op. completes
|
|
);
|
|
|
|
HRESULT hr = result ? S_OK : HRESULT_FROM_WIN32( GetLastError() );
|
|
if (hr == S_OK && dw_ret_len != sizeof(uint64))
|
|
hr = E_BAD_DATA;
|
|
|
|
return hr;
|
|
}
|
|
|
|
//---------------------------------------------------------------------------
|
|
// Write model specific register
|
|
//---------------------------------------------------------------------------
|
|
HRESULT PME::WriteMSR(uint32 dw_reg, const int64 & i64_value)
|
|
{
|
|
DWORD dw_buffer[3];
|
|
DWORD dw_ret_len;
|
|
|
|
if (bDriverOpen == false) // driver is not going
|
|
return E_DRIVER_NOT_OPEN;
|
|
|
|
dw_buffer[0] = dw_reg; // setup the 12 byte input
|
|
*((int64*)(&dw_buffer[1]))= i64_value;
|
|
|
|
BOOL result = DeviceIoControl
|
|
(
|
|
hFile, // Handle to device
|
|
(DWORD) IOCTL_WRITE_MSR, // IO Control code for Read
|
|
dw_buffer, // Input Buffer to driver.
|
|
12, // Length of Input buffer
|
|
NULL, // Buffer from driver, None for WRMSR
|
|
0, // Length of output buffer in bytes.
|
|
&dw_ret_len, // Bytes placed in DataBuffer.
|
|
NULL // NULL means wait till op. completes.
|
|
);
|
|
|
|
HRESULT hr = result ? S_OK : HRESULT_FROM_WIN32( GetLastError() );
|
|
if (hr == S_OK && dw_ret_len != 0)
|
|
hr = E_BAD_DATA;
|
|
|
|
return hr;
|
|
}
|
|
|
|
|
|
|
|
HRESULT PME::WriteMSR(uint32 dw_reg, const uint64 & i64_value)
|
|
{
|
|
DWORD dw_buffer[3];
|
|
DWORD dw_ret_len;
|
|
|
|
if (bDriverOpen == false) // driver is not going
|
|
return E_DRIVER_NOT_OPEN;
|
|
|
|
dw_buffer[0] = dw_reg; // setup the 12 byte input
|
|
*((uint64*)(&dw_buffer[1]))= i64_value;
|
|
|
|
BOOL result = DeviceIoControl
|
|
(
|
|
hFile, // Handle to device
|
|
(DWORD) IOCTL_WRITE_MSR, // IO Control code for Read
|
|
dw_buffer, // Input Buffer to driver.
|
|
12, // Length of Input buffer
|
|
NULL, // Buffer from driver, None for WRMSR
|
|
0, // Length of output buffer in bytes.
|
|
&dw_ret_len, // Bytes placed in DataBuffer.
|
|
NULL // NULL means wait till op. completes.
|
|
);
|
|
|
|
//E_POINTER
|
|
HRESULT hr = result ? S_OK : HRESULT_FROM_WIN32( GetLastError() );
|
|
if (hr == S_OK && dw_ret_len != 0)
|
|
hr = E_BAD_DATA;
|
|
|
|
return hr;
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#pragma hdrstop
|
|
|
|
|
|
|
|
|
|
//---------------------------------------------------------------------------
|
|
// Return the frequency of the processor in Hz.
|
|
//
|
|
|
|
double PME::GetCPUClockSpeedFast(void)
|
|
{
|
|
int64 i64_perf_start, i64_perf_freq, i64_perf_end;
|
|
int64 i64_clock_start,i64_clock_end;
|
|
double d_loop_period, d_clock_freq;
|
|
|
|
//-----------------------------------------------------------------------
|
|
// Query the performance of the Windows high resolution timer.
|
|
//-----------------------------------------------------------------------
|
|
QueryPerformanceFrequency((LARGE_INTEGER*)&i64_perf_freq);
|
|
|
|
//-----------------------------------------------------------------------
|
|
// Query the current value of the Windows high resolution timer.
|
|
//-----------------------------------------------------------------------
|
|
QueryPerformanceCounter((LARGE_INTEGER*)&i64_perf_start);
|
|
i64_perf_end = 0;
|
|
|
|
//-----------------------------------------------------------------------
|
|
// Time of loop of 250000 windows cycles with RDTSC
|
|
//-----------------------------------------------------------------------
|
|
RDTSC(i64_clock_start);
|
|
while(i64_perf_end<i64_perf_start+250000)
|
|
{
|
|
QueryPerformanceCounter((LARGE_INTEGER*)&i64_perf_end);
|
|
}
|
|
RDTSC(i64_clock_end);
|
|
|
|
//-----------------------------------------------------------------------
|
|
// Caclulate the frequency of the RDTSC timer and therefore calculate
|
|
// the frequency of the processor.
|
|
//-----------------------------------------------------------------------
|
|
i64_clock_end -= i64_clock_start;
|
|
|
|
d_loop_period = ((double)(i64_perf_freq)) / 250000.0;
|
|
d_clock_freq = ((double)(i64_clock_end & 0xffffffff))*d_loop_period;
|
|
|
|
return (float)d_clock_freq;
|
|
}
|
|
|
|
|
|
|
|
// takes 1 second
|
|
double PME::GetCPUClockSpeedSlow(void)
|
|
{
|
|
|
|
if (m_CPUClockSpeed != 0)
|
|
return m_CPUClockSpeed;
|
|
|
|
unsigned long start_ms, stop_ms;
|
|
unsigned long start_tsc,stop_tsc;
|
|
|
|
// boosting priority helps with noise. its optional and i dont think
|
|
// it helps all that much
|
|
|
|
PME * pme = PME::Instance();
|
|
|
|
pme->SetProcessPriority(ProcessPriorityHigh);
|
|
|
|
// wait for millisecond boundary
|
|
start_ms = GetTickCount() + 5;
|
|
while (start_ms <= GetTickCount());
|
|
|
|
// read timestamp (you could use QueryPerformanceCounter in hires mode if you want)
|
|
#ifdef COMPILER_MSVC64
|
|
RDTSC(start_tsc);
|
|
#else
|
|
__asm
|
|
{
|
|
rdtsc
|
|
mov dword ptr [start_tsc+0],eax
|
|
mov dword ptr [start_tsc+4],edx
|
|
}
|
|
#endif
|
|
|
|
// wait for end
|
|
stop_ms = start_ms + 1000; // longer wait gives better resolution
|
|
while (stop_ms > GetTickCount());
|
|
|
|
// read timestamp (you could use QueryPerformanceCounter in hires mode if you want)
|
|
#ifdef COMPILER_MSVC64
|
|
RDTSC(stop_tsc);
|
|
#else
|
|
__asm
|
|
{
|
|
rdtsc
|
|
mov dword ptr [stop_tsc+0],eax
|
|
mov dword ptr [stop_tsc+4],edx
|
|
}
|
|
#endif
|
|
|
|
|
|
// normalize priority
|
|
pme->SetProcessPriority(ProcessPriorityNormal);
|
|
|
|
// return clock speed
|
|
// optionally here you could round to known clocks, like speeds that are multimples
|
|
// of 100, 133, 166, etc.
|
|
m_CPUClockSpeed = ((stop_tsc - start_tsc) * 1000.0) / (double)(stop_ms - start_ms);
|
|
return m_CPUClockSpeed;
|
|
|
|
}
|
|
|
|
|
|
|
|
const unsigned short cccr_escr_map[NCOUNTERS][8] =
|
|
{
|
|
{
|
|
0x3B2,
|
|
0x3B4,
|
|
0x3AA,
|
|
0x3B6,
|
|
0x3AC,
|
|
0x3C8,
|
|
0x3A2,
|
|
0x3A0,
|
|
},
|
|
{
|
|
0x3B2,
|
|
0x3B4,
|
|
0x3AA,
|
|
0x3B6,
|
|
0x3AC,
|
|
0x3C8,
|
|
0x3A2,
|
|
0x3A0,
|
|
},
|
|
{
|
|
0x3B3,
|
|
0x3B5,
|
|
0x3AB,
|
|
0x3B7,
|
|
0x3AD,
|
|
0x3C9,
|
|
0x3A3,
|
|
0x3A1,
|
|
},
|
|
{
|
|
0x3B3,
|
|
0x3B5,
|
|
0x3AB,
|
|
0x3B7,
|
|
0x3AD,
|
|
0x3C9,
|
|
0x3A3,
|
|
0x3A1,
|
|
},
|
|
{
|
|
|
|
0x3C0,
|
|
0x3C4,
|
|
0x3C2,
|
|
},
|
|
{
|
|
0x3C0,
|
|
0x3C4,
|
|
0x3C2,
|
|
},
|
|
{
|
|
0x3C1,
|
|
0x3C5,
|
|
0x3C3,
|
|
},
|
|
{
|
|
0x3C1,
|
|
0x3C5,
|
|
0x3C3,
|
|
},
|
|
{
|
|
0x3A6,
|
|
0x3A4,
|
|
0x3AE,
|
|
0x3B0,
|
|
0,
|
|
0x3A8,
|
|
},
|
|
{
|
|
0x3A6,
|
|
0x3A4,
|
|
0x3AE,
|
|
0x3B0,
|
|
0,
|
|
0x3A8,
|
|
},
|
|
{
|
|
|
|
0x3A7,
|
|
0x3A5,
|
|
0x3AF,
|
|
0x3B1,
|
|
0,
|
|
0x3A9,
|
|
},
|
|
{
|
|
|
|
0x3A7,
|
|
0x3A5,
|
|
0x3AF,
|
|
0x3B1,
|
|
0,
|
|
0x3A9,
|
|
},
|
|
{
|
|
|
|
0x3BA,
|
|
0x3CA,
|
|
0x3BC,
|
|
0x3BE,
|
|
0x3B8,
|
|
0x3CC,
|
|
0x3E0,
|
|
},
|
|
{
|
|
|
|
0x3BA,
|
|
0x3CA,
|
|
0x3BC,
|
|
0x3BE,
|
|
0x3B8,
|
|
0x3CC,
|
|
0x3E0,
|
|
},
|
|
{
|
|
|
|
0x3BB,
|
|
0x3CB,
|
|
0x3BD,
|
|
0,
|
|
0x3B9,
|
|
0x3CD,
|
|
0x3E1,
|
|
},
|
|
{
|
|
|
|
|
|
0x3BB,
|
|
0x3CB,
|
|
0x3BD,
|
|
0,
|
|
0x3B9,
|
|
0x3CD,
|
|
0x3E1,
|
|
},
|
|
{
|
|
0x3BA,
|
|
0x3CA,
|
|
0x3BC,
|
|
0x3BE,
|
|
0x3B8,
|
|
0x3CC,
|
|
0x3E0,
|
|
},
|
|
{
|
|
|
|
0x3BB,
|
|
0x3CB,
|
|
0x3BD,
|
|
0,
|
|
0x3B9,
|
|
0x3CD,
|
|
0x3E1,
|
|
},
|
|
};
|
|
|
|
#ifdef DBGFLAG_VALIDATE
|
|
//-----------------------------------------------------------------------------
|
|
// Purpose: Ensure that all of our internal structures are consistent, and
|
|
// account for all memory that we've allocated.
|
|
// Input: validator - Our global validator object
|
|
// pchName - Our name (typically a member var in our container)
|
|
//-----------------------------------------------------------------------------
|
|
void PME::Validate( CValidator &validator, tchar *pchName )
|
|
{
|
|
validator.Push( _T("PME"), this, pchName );
|
|
|
|
validator.ClaimMemory( this );
|
|
|
|
validator.ClaimMemory( cache );
|
|
|
|
validator.ClaimMemory( ( void * ) vendor_name.c_str( ) );
|
|
validator.ClaimMemory( ( void * ) brand.c_str( ) );
|
|
|
|
validator.Pop( );
|
|
}
|
|
#endif // DBGFLAG_VALIDATE
|
|
|
|
#pragma warning( default : 4530 ) // warning: exception handler -GX option
|
|
#endif
|
|
|