Fix shaders compilation, replace some opengl functions with opengles analogs
This commit is contained in:
parent
245e7d1368
commit
3e287a6b70
16 changed files with 3462 additions and 859 deletions
|
@ -263,7 +263,8 @@ public:
|
|||
GLint m_locVertexParams; // "vc" per dx9asmtogl2 convention
|
||||
GLint m_locVertexBoneParams; // "vcbones"
|
||||
GLint m_locVertexInteger0; // "i0"
|
||||
|
||||
GLint m_locAlphaRef; // "alpha_ref"
|
||||
|
||||
enum { cMaxVertexShaderBoolUniforms = 4, cMaxFragmentShaderBoolUniforms = 1 };
|
||||
|
||||
GLint m_locVertexBool[cMaxVertexShaderBoolUniforms]; // "b0", etc.
|
||||
|
|
|
@ -30,18 +30,16 @@ GL_FUNC_VOID(OpenGL,true,glAlphaFunc,(GLenum a,GLclampf b),(a,b))
|
|||
GL_FUNC_VOID(OpenGL,true,glAttachShader,(GLuint a, GLuint b),(a,b))
|
||||
GL_FUNC_VOID(OpenGL,true,glBindAttribLocation,(GLuint a,GLuint b,const GLchar *c),(a,b,c))
|
||||
GL_FUNC_VOID(OpenGL,true,glBindBuffer,(GLenum a,GLuint b),(a,b))
|
||||
GL_FUNC_VOID(OpenGL,true,glBindProgram,(GLenum a,GLuint b),(a,b))
|
||||
GL_FUNC_VOID(OpenGL,true,glBindTexture,(GLenum a,GLuint b),(a,b))
|
||||
GL_FUNC_VOID(OpenGL,true,glBlendColor,(GLclampf a,GLclampf b,GLclampf c,GLclampf d),(a,b,c,d))
|
||||
GL_FUNC_VOID(OpenGL,true,glBlendEquation,(GLenum a),(a))
|
||||
GL_FUNC_VOID(OpenGL,true,glBlendFunc,(GLenum a,GLenum b),(a,b))
|
||||
GL_FUNC_VOID(OpenGL,true,glBufferData,(GLenum a, GLsizeiptr b, const GLvoid *c,GLenum d),(a,b,c,d))
|
||||
GL_FUNC_VOID(OpenGL,true,glClear,(GLbitfield a),(a))
|
||||
GL_FUNC_VOID(OpenGL,true,glClearDepthf,(GLfloat a),(a))
|
||||
GL_FUNC_VOID(OpenGL,true,glClearColor,(GLclampf a,GLclampf b,GLclampf c,GLclampf d),(a,b,c,d))
|
||||
GL_FUNC_VOID(OpenGL,true,glClearDepth,(GLclampd a),(a))
|
||||
GL_FUNC_VOID(OpenGL,true,glReadPixels, (GLint a, GLint b, GLsizei c, GLsizei d, GLenum e, GLenum f, void * g), (a,b,c,d,e,f,g))
|
||||
GL_FUNC_VOID(OpenGL,true,glClearStencil,(GLint a),(a))
|
||||
GL_FUNC_VOID(OpenGL,true,glClipPlane,(GLenum a,const GLdouble *b),(a,b))
|
||||
GL_FUNC_VOID(OpenGL,true,glColorMask,(GLboolean a,GLboolean b,GLboolean c,GLboolean d),(a,b,c,d))
|
||||
GL_FUNC_VOID(OpenGL,true,glCompileShader,(GLuint a),(a))
|
||||
GL_FUNC_VOID(OpenGL,true,glGetShaderiv,(GLuint a, GLenum b, GLint *c),(a,b,c))
|
||||
|
@ -52,20 +50,17 @@ GL_FUNC_VOID(OpenGL,true,glCompressedTexImage3D,(GLenum a,GLint b,GLenum c,GLsiz
|
|||
GL_FUNC(OpenGL,true,GLuint,glCreateProgram,(void),())
|
||||
GL_FUNC(OpenGL,true,GLuint,glCreateShader,(GLenum a),(a))
|
||||
GL_FUNC_VOID(OpenGL,true,glDeleteBuffers,(GLsizei a,const GLuint *b),(a,b))
|
||||
GL_FUNC_VOID(OpenGL,true,glDeleteObject,(GLuint a),(a))
|
||||
GL_FUNC_VOID(OpenGL,true,glDeletePrograms,(GLsizei a,const GLuint *b),(a,b))
|
||||
GL_FUNC_VOID(OpenGL,true,glDeleteProgram,(GLuint a),(a))
|
||||
GL_FUNC_VOID(OpenGL,true,glDeleteShader,(GLuint a),(a))
|
||||
GL_FUNC_VOID(OpenGL,true,glDeleteTextures,(GLsizei a,const GLuint *b),(a,b))
|
||||
GL_FUNC_VOID(OpenGL,true,glDepthFunc,(GLenum a),(a))
|
||||
GL_FUNC_VOID(OpenGL,true,glDepthMask,(GLboolean a),(a))
|
||||
GL_FUNC_VOID(OpenGL,true,glDepthRangef,(GLfloat a,GLfloat b),(a,b))
|
||||
GL_FUNC_VOID(OpenGL,true,glDepthRange,(GLclampd a,GLclampd b),(a,b))
|
||||
GL_FUNC_VOID(OpenGL,true,glDetachObject,(GLuint a,GLuint b),(a,b))
|
||||
GL_FUNC_VOID(OpenGL,true,glDisable,(GLenum a),(a))
|
||||
GL_FUNC_VOID(OpenGL,true,glDisableVertexAttribArray,(GLuint a),(a))
|
||||
GL_FUNC_VOID(OpenGL,true,glDrawArrays,(GLenum a,GLint b,GLsizei c),(a,b,c))
|
||||
GL_FUNC_VOID(OpenGL,true,glDrawBuffer,(GLenum a),(a))
|
||||
GL_FUNC_VOID(OpenGL,true,glDrawBuffers,(GLsizei a,const GLenum *b),(a,b))
|
||||
GL_FUNC_VOID(OpenGL,true,glDetachShader,(GLuint a,GLuint b),(a,b))
|
||||
GL_FUNC_VOID(OpenGL,true,glDrawRangeElements,(GLenum a,GLuint b,GLuint c,GLsizei d,GLenum e,const GLvoid *f),(a,b,c,d,e,f))
|
||||
#ifndef OSX // 10.6/GL 2.1 compatability
|
||||
GL_FUNC_VOID(OpenGL,true,glDrawRangeElementsBaseVertex,(GLenum a,GLuint b,GLuint c,GLsizei d,GLenum e,const GLvoid *f, GLenum g),(a,b,c,d,e,f,g))
|
||||
|
@ -76,35 +71,27 @@ GL_FUNC_VOID(OpenGL,true,glFinish,(void),())
|
|||
GL_FUNC_VOID(OpenGL,true,glFlush,(void),())
|
||||
GL_FUNC_VOID(OpenGL,true,glFrontFace,(GLenum a),(a))
|
||||
GL_FUNC_VOID(OpenGL,true,glGenBuffers,(GLsizei a,GLuint *b),(a,b))
|
||||
GL_FUNC_VOID(OpenGL,true,glGenPrograms,(GLsizei a,GLuint *b),(a,b))
|
||||
GL_FUNC_VOID(OpenGL,true,glGenTextures,(GLsizei a,GLuint *b),(a,b))
|
||||
GL_FUNC_VOID(OpenGL,true,glGetBooleanv,(GLenum a,GLboolean *b),(a,b))
|
||||
GL_FUNC_VOID(OpenGL,true,glGetCompressedTexImage,(GLenum a,GLint b,GLvoid *c),(a,b,c))
|
||||
GL_FUNC_VOID(OpenGL,true,glGetDoublev,(GLenum a,GLdouble *b),(a,b))
|
||||
GL_FUNC_VOID(OpenGL,true,glGetFloatv,(GLenum a,GLfloat *b),(a,b))
|
||||
GL_FUNC_VOID(OpenGL,true,glGetInfoLog,(GLuint a,GLsizei b,GLsizei *c,GLchar *d),(a,b,c,d))
|
||||
//GL_FUNC_VOID(OpenGL,true,glGetInfoLog,(GLuint a,GLsizei b,GLsizei *c,GLchar *d),(a,b,c,d))
|
||||
GL_FUNC_VOID(OpenGL,true,glGetIntegerv,(GLenum a,GLint *b),(a,b))
|
||||
GL_FUNC_VOID(OpenGL,true,glGetObjectParameteriv,(GLuint a,GLenum b,GLint *c),(a,b,c))
|
||||
GL_FUNC_VOID(OpenGL,true,glGetProgramiv,(GLenum a,GLenum b,GLint *c),(a,b,c))
|
||||
GL_FUNC(OpenGL,true,const GLubyte *,glGetString,(GLenum a),(a))
|
||||
GL_FUNC_VOID(OpenGL,true,glGetTexImage,(GLenum a,GLint b,GLenum c,GLenum d,GLvoid *e),(a,b,c,d,e))
|
||||
GL_FUNC(OpenGL,true,GLint,glGetUniformLocation,(GLuint a,const GLchar *b),(a,b))
|
||||
GL_FUNC(OpenGL,true,GLboolean,glIsEnabled,(GLenum a),(a))
|
||||
GL_FUNC(OpenGL,true,GLboolean,glIsTexture,(GLuint a),(a))
|
||||
GL_FUNC_VOID(OpenGL,true,glLinkProgram,(GLuint a),(a))
|
||||
GL_FUNC_VOID(OpenGL,true,glOrtho,(GLdouble a,GLdouble b,GLdouble c,GLdouble d,GLdouble e,GLdouble f),(a,b,c,d,e,f))
|
||||
//GL_FUNC_VOID(OpenGL,true,glOrtho,(GLdouble a,GLdouble b,GLdouble c,GLdouble d,GLdouble e,GLdouble f),(a,b,c,d,e,f))
|
||||
GL_FUNC_VOID(OpenGL,true,glPixelStorei,(GLenum a,GLint b),(a,b))
|
||||
GL_FUNC_VOID(OpenGL,true,glPolygonMode,(GLenum a,GLenum b),(a,b))
|
||||
//GL_FUNC_VOID(OpenGL,true,glPolygonMode,(GLenum a,GLenum b),(a,b))
|
||||
GL_FUNC_VOID(OpenGL,true,glPolygonOffset,(GLfloat a,GLfloat b),(a,b))
|
||||
GL_FUNC_VOID(OpenGL,true,glPopAttrib,(void),())
|
||||
GL_FUNC_VOID(OpenGL,true,glPushAttrib,(GLbitfield a),(a))
|
||||
GL_FUNC_VOID(OpenGL,true,glReadBuffer,(GLenum a),(a))
|
||||
GL_FUNC_VOID(OpenGL,true,glScissor,(GLint a,GLint b,GLsizei c,GLsizei d),(a,b,c,d))
|
||||
GL_FUNC_VOID(OpenGL,true,glShaderSource,(GLuint a,GLsizei b,const GLchar **c,const GLint *d),(a,b,c,d))
|
||||
GL_FUNC_VOID(OpenGL,true,glStencilFunc,(GLenum a,GLint b,GLuint c),(a,b,c))
|
||||
GL_FUNC_VOID(OpenGL,true,glStencilMask,(GLuint a),(a))
|
||||
GL_FUNC_VOID(OpenGL,true,glStencilOp,(GLenum a,GLenum b,GLenum c),(a,b,c))
|
||||
GL_FUNC_VOID(OpenGL,true,glTexCoord2f,(GLfloat a,GLfloat b),(a,b))
|
||||
GL_FUNC_VOID(OpenGL,true,glTexImage2D,(GLenum a,GLint b,GLint c,GLsizei d,GLsizei e,GLint f,GLenum g,GLenum h,const GLvoid *i),(a,b,c,d,e,f,g,h,i))
|
||||
GL_FUNC_VOID(OpenGL,true,glTexImage3D,(GLenum a,GLint b,GLint c,GLsizei d,GLsizei e,GLsizei f,GLint g,GLenum h,GLenum i,const GLvoid *j),(a,b,c,d,e,f,g,h,i,j))
|
||||
GL_FUNC_VOID(OpenGL,true,glTexParameterfv,(GLenum a,GLenum b,const GLfloat *c),(a,b,c))
|
||||
|
@ -115,16 +102,9 @@ GL_FUNC_VOID(OpenGL,true,glUniform1i,(GLint a,GLint b),(a,b))
|
|||
GL_FUNC_VOID(OpenGL,true,glUniform4fv,(GLint a,GLsizei b,const GLfloat *c),(a,b,c))
|
||||
GL_FUNC(OpenGL,true,GLboolean,glUnmapBuffer,(GLenum a),(a))
|
||||
GL_FUNC_VOID(OpenGL,true,glUseProgram,(GLuint a),(a))
|
||||
GL_FUNC_VOID(OpenGL,true,glVertex3f,(GLfloat a,GLfloat b,GLfloat c),(a,b,c))
|
||||
GL_FUNC_VOID(OpenGL,true,glVertexAttribPointer,(GLuint a,GLint b,GLenum c,GLboolean d,GLsizei e,const GLvoid *f),(a,b,c,d,e,f))
|
||||
GL_FUNC_VOID(OpenGL,true,glViewport,(GLint a,GLint b,GLsizei c,GLsizei d),(a,b,c,d))
|
||||
GL_FUNC_VOID(OpenGL,true,glEnableClientState,(GLenum a),(a))
|
||||
GL_FUNC_VOID(OpenGL,true,glDisableClientState,(GLenum a),(a))
|
||||
GL_FUNC_VOID(OpenGL,true,glClientActiveTexture,(GLenum a),(a))
|
||||
GL_FUNC_VOID(OpenGL,true,glVertexPointer,(GLint a,GLenum b,GLsizei c,const GLvoid *d),(a,b,c,d))
|
||||
GL_FUNC_VOID(OpenGL,true,glTexCoordPointer,(GLint a,GLenum b,GLsizei c,const GLvoid *d),(a,b,c,d))
|
||||
GL_FUNC_VOID(OpenGL,true,glProgramEnvParameters4fvEXT,(GLenum a,GLuint b,GLsizei c,const GLfloat *d),(a,b,c,d))
|
||||
GL_FUNC_VOID(OpenGL,true,glColor4sv,(const GLshort *a),(a))
|
||||
GL_FUNC_VOID(OpenGL,true,glStencilOpSeparate,(GLenum a,GLenum b,GLenum c,GLenum d),(a,b,c,d))
|
||||
GL_FUNC_VOID(OpenGL,true,glStencilFuncSeparate,(GLenum a,GLenum b,GLint c,GLuint d),(a,b,c,d))
|
||||
GL_FUNC_VOID(OpenGL,true,glGetTexLevelParameteriv,(GLenum a,GLint b,GLenum c,GLint *d),(a,b,c,d))
|
||||
|
@ -164,11 +144,6 @@ GL_FUNC_VOID(GL_ARB_sync,false,glWaitSync,(GLsync a, GLbitfield b, GLuint64 c),(
|
|||
GL_FUNC_VOID(GL_ARB_sync,false,glDeleteSync,(GLsync a),(a))
|
||||
GL_FUNC(GL_ARB_sync,false,GLsync,glFenceSync,(GLenum a, GLbitfield b),(a,b))
|
||||
#endif
|
||||
GL_EXT(GL_EXT_draw_buffers2,-1,-1)
|
||||
GL_FUNC_VOID(GL_EXT_draw_buffers2,true,glColorMaskIndexedEXT,(GLuint a,GLboolean b,GLboolean c,GLboolean d,GLboolean e),(a,b,c,d,e))
|
||||
GL_FUNC_VOID(GL_EXT_draw_buffers2,true,glEnableIndexedEXT,(GLenum a,GLuint b),(a,b))
|
||||
GL_FUNC_VOID(GL_EXT_draw_buffers2,true,glDisableIndexedEXT,(GLenum a,GLuint b),(a,b))
|
||||
GL_FUNC_VOID(GL_EXT_draw_buffers2,true,glGetBooleanIndexedvEXT,(GLenum a,GLuint b,GLboolean *c),(a,b,c))
|
||||
GL_EXT(GL_EXT_bindable_uniform,-1,-1)
|
||||
GL_FUNC_VOID(GL_EXT_bindable_uniform,false,glUniformBufferEXT,(GLuint a,GLint b,GLuint c),(a,b,c))
|
||||
GL_FUNC(GL_EXT_bindable_uniform,false,int,glGetUniformBufferSizeEXT,(GLenum a, GLenum b),(a,b))
|
||||
|
@ -212,7 +187,7 @@ GL_FUNC_VOID(OpenGL,false,glDeleteRenderbuffers,(GLsizei a,const GLuint *b),(a,b
|
|||
GL_FUNC_VOID(OpenGL,false,glFramebufferRenderbuffer,(GLenum a,GLenum b,GLenum c,GLuint d),(a,b,c,d))
|
||||
GL_FUNC_VOID(OpenGL,false,glFramebufferTexture2D,(GLenum a,GLenum b,GLenum c,GLuint d,GLint e),(a,b,c,d,e))
|
||||
GL_FUNC_VOID(OpenGL,false,glFramebufferTexture3D,(GLenum a,GLenum b,GLenum c,GLuint d,GLint e,GLint f),(a,b,c,d,e,f))
|
||||
GL_FUNC_VOID(OpenGL,false,glGenFramebuffers,(GLsizei a,GLuint *b),(a,b))
|
||||
GL_FUNC_VOID(OpenGL,true,glGenFramebuffers,(GLsizei a,GLuint *b),(a,b))
|
||||
GL_FUNC_VOID(OpenGL,false,glGenRenderbuffers,(GLsizei a,GLuint *b),(a,b))
|
||||
GL_FUNC_VOID(OpenGL,false,glDeleteFramebuffers,(GLsizei a,const GLuint *b),(a,b))
|
||||
GL_FUNC_VOID(OpenGL,false,glBlitFramebuffer,(GLint a,GLint b,GLint c,GLint d,GLint e,GLint f,GLint g,GLint h,GLbitfield i,GLenum j),(a,b,c,d,e,f,g,h,i,j))
|
||||
|
@ -248,9 +223,6 @@ GL_FUNC_VOID(OpenGL,true,glGenQueries,(GLsizei n, GLuint *ids), (n, ids))
|
|||
GL_FUNC_VOID(OpenGL,true,glDeleteQueries,(GLsizei n, const GLuint *ids),(n, ids))
|
||||
GL_FUNC_VOID(OpenGL,true,glBeginQuery,(GLenum target, GLuint id), (target, id))
|
||||
GL_FUNC_VOID(OpenGL,true,glEndQuery,(GLenum target), (target))
|
||||
GL_FUNC_VOID(OpenGL,true,glQueryCounter,(GLuint id, GLenum target), (id, target))
|
||||
GL_FUNC_VOID(OpenGL,true,glGetQueryObjectiv,(GLuint id, GLenum pname, GLint *params), (id, pname, params))
|
||||
GL_FUNC_VOID(OpenGL,true,glGetQueryObjectui64v,(GLuint id, GLenum pname, GLuint64 *params), (id, pname, params))
|
||||
GL_FUNC_VOID(OpenGL,true,glCopyBufferSubData,(GLenum readtarget, GLenum writetarget, GLintptr readoffset, GLintptr writeoffset, GLsizeiptr size),(readtarget, writetarget, readoffset, writeoffset, size))
|
||||
#endif // !OSX
|
||||
|
||||
|
@ -264,8 +236,6 @@ GL_FUNC_VOID(OpenGL,true,glBindVertexArray,(GLuint a),(a))
|
|||
#endif // !OSX
|
||||
|
||||
GL_EXT(GL_EXT_texture_sRGB_decode,-1,-1)
|
||||
GL_FUNC_VOID(OpenGL,true,glPushClientAttrib,(GLbitfield a),(a))
|
||||
GL_FUNC_VOID(OpenGL,true,glPopClientAttrib,(void),())
|
||||
GL_EXT(GL_NVX_gpu_memory_info,-1,-1)
|
||||
GL_EXT(GL_ATI_meminfo,-1,-1)
|
||||
GL_EXT(GL_EXT_texture_compression_s3tc,-1,-1)
|
||||
|
|
|
@ -198,9 +198,11 @@ FORCEINLINE void glGetEnumv( GLenum which, GLenum *dst )
|
|||
// shorthand macros
|
||||
#define EQ(fff) ( (src.fff) == (fff) )
|
||||
|
||||
|
||||
//rasterizer
|
||||
struct GLAlphaTestEnable_t { GLint enable; inline bool operator==(const GLAlphaTestEnable_t& src) const { return EQ(enable); } };
|
||||
struct GLAlphaTestFunc_t { GLenum func; GLclampf ref; inline bool operator==(const GLAlphaTestFunc_t& src) const { return EQ(func) && EQ(ref); } };
|
||||
struct GLAlphaTest_t { GLint enable; GLenum func; GLclampf ref; };
|
||||
struct GLCullFaceEnable_t { GLint enable; inline bool operator==(const GLCullFaceEnable_t& src) const { return EQ(enable); } };
|
||||
struct GLCullFrontFace_t { GLenum value; inline bool operator==(const GLCullFrontFace_t& src) const { return EQ(value); } };
|
||||
struct GLPolygonMode_t { GLenum values[2]; inline bool operator==(const GLPolygonMode_t& src) const { return EQ(values[0]) && EQ(values[1]); } };
|
||||
|
@ -209,7 +211,7 @@ struct GLScissorEnable_t { GLint enable; inline bool operator==(co
|
|||
struct GLScissorBox_t { GLint x,y; GLsizei width, height; inline bool operator==(const GLScissorBox_t& src) const { return EQ(x) && EQ(y) && EQ(width) && EQ(height); } };
|
||||
struct GLAlphaToCoverageEnable_t{ GLint enable; inline bool operator==(const GLAlphaToCoverageEnable_t& src) const { return EQ(enable); } };
|
||||
struct GLViewportBox_t { GLint x,y; GLsizei width, height; uint widthheight; inline bool operator==(const GLViewportBox_t& src) const { return EQ(x) && EQ(y) && EQ(width) && EQ(height); } };
|
||||
struct GLViewportDepthRange_t { GLdouble flNear,flFar; inline bool operator==(const GLViewportDepthRange_t& src) const { return EQ(flNear) && EQ(flFar); } };
|
||||
struct GLViewportDepthRange_t { GLfloat flNear,flFar; inline bool operator==(const GLViewportDepthRange_t& src) const { return EQ(flNear) && EQ(flFar); } };
|
||||
struct GLClipPlaneEnable_t { GLint enable; inline bool operator==(const GLClipPlaneEnable_t& src) const { return EQ(enable); } };
|
||||
struct GLClipPlaneEquation_t { GLfloat x,y,z,w; inline bool operator==(const GLClipPlaneEquation_t& src) const { return EQ(x) && EQ(y) && EQ(z) && EQ(w); } };
|
||||
|
||||
|
@ -235,7 +237,7 @@ struct GLStencilWriteMask_t { GLint mask; inline bool operator==(c
|
|||
|
||||
//clearing
|
||||
struct GLClearColor_t { GLfloat r,g,b,a; inline bool operator==(const GLClearColor_t& src) const { return EQ(r) && EQ(g) && EQ(b) && EQ(a); } };
|
||||
struct GLClearDepth_t { GLdouble d; inline bool operator==(const GLClearDepth_t& src) const { return EQ(d); } };
|
||||
struct GLClearDepth_t { GLfloat d; inline bool operator==(const GLClearDepth_t& src) const { return EQ(d); } };
|
||||
struct GLClearStencil_t { GLint s; inline bool operator==(const GLClearStencil_t& src) const { return EQ(s); } };
|
||||
|
||||
#undef EQ
|
||||
|
@ -306,15 +308,20 @@ template<typename T> void GLContextGetDefaultIndexed( T *dst, int index );
|
|||
//===============================================================================
|
||||
// template specializations for each type of state
|
||||
|
||||
|
||||
static GLAlphaTest_t g_alpha_test;
|
||||
|
||||
// --- GLAlphaTestEnable ---
|
||||
FORCEINLINE void GLContextSet( GLAlphaTestEnable_t *src )
|
||||
{
|
||||
glSetEnable( GL_ALPHA_TEST, src->enable != 0 );
|
||||
// glSetEnable( GL_ALPHA_TEST, src->enable != 0 );
|
||||
g_alpha_test.enable = src->enable;
|
||||
}
|
||||
|
||||
FORCEINLINE void GLContextGet( GLAlphaTestEnable_t *dst )
|
||||
{
|
||||
dst->enable = gGL->glIsEnabled( GL_ALPHA_TEST );
|
||||
// dst->enable = gGL->glIsEnabled( GL_ALPHA_TEST );
|
||||
dst->enable = g_alpha_test.enable;
|
||||
}
|
||||
|
||||
FORCEINLINE void GLContextGetDefault( GLAlphaTestEnable_t *dst )
|
||||
|
@ -326,12 +333,16 @@ FORCEINLINE void GLContextGetDefault( GLAlphaTestEnable_t *dst )
|
|||
FORCEINLINE void GLContextSet( GLAlphaTestFunc_t *src )
|
||||
{
|
||||
// gGL->glAlphaFunc( src->func, src->ref );
|
||||
g_alpha_test.func = src->func;
|
||||
g_alpha_test.ref = src->ref;
|
||||
}
|
||||
|
||||
FORCEINLINE void GLContextGet( GLAlphaTestFunc_t *dst )
|
||||
{
|
||||
glGetEnumv( GL_ALPHA_TEST_FUNC, &dst->func );
|
||||
gGL->glGetFloatv( GL_ALPHA_TEST_REF, &dst->ref );
|
||||
// glGetEnumv( GL_ALPHA_TEST_FUNC, &dst->func );
|
||||
// gGL->glGetFloatv( GL_ALPHA_TEST_REF, &dst->ref );
|
||||
dst->func = g_alpha_test.func;
|
||||
dst->ref = g_alpha_test.ref;
|
||||
}
|
||||
|
||||
FORCEINLINE void GLContextGetDefault( GLAlphaTestFunc_t *dst )
|
||||
|
@ -502,7 +513,7 @@ FORCEINLINE void GLContextSet( GLViewportDepthRange_t *src )
|
|||
|
||||
FORCEINLINE void GLContextGet( GLViewportDepthRange_t *dst )
|
||||
{
|
||||
gGL->glGetDoublev ( GL_DEPTH_RANGE, &dst->flNear );
|
||||
gGL->glGetFloatv( GL_DEPTH_RANGE, &dst->flNear );
|
||||
}
|
||||
|
||||
FORCEINLINE void GLContextGetDefault( GLViewportDepthRange_t *dst )
|
||||
|
@ -584,12 +595,26 @@ FORCEINLINE void GLContextGetDefault( GLColorMaskSingle_t *dst )
|
|||
// --- GLColorMaskMultiple ---
|
||||
FORCEINLINE void GLContextSetIndexed( GLColorMaskMultiple_t *src, int index )
|
||||
{
|
||||
gGL->glColorMaskIndexedEXT ( index, src->r, src->g, src->b, src->a );
|
||||
GLint Rfbo = 0, Dfbo = 0;
|
||||
|
||||
gGL->glGetIntegerv( GL_DRAW_FRAMEBUFFER_BINDING, &Dfbo );
|
||||
gGL->glGetIntegerv( GL_READ_FRAMEBUFFER_BINDING, &Rfbo );
|
||||
GLint target = Dfbo == Rfbo?GL_FRAMEBUFFER:GL_DRAW_FRAMEBUFFER;
|
||||
gGL->glBindFramebuffer( target, index );
|
||||
gGL->glColorMask ( src->r, src->g, src->b, src->a );
|
||||
gGL->glBindFramebuffer( target, Dfbo );
|
||||
}
|
||||
|
||||
FORCEINLINE void GLContextGetIndexed( GLColorMaskMultiple_t *dst, int index )
|
||||
{
|
||||
gGL->glGetBooleanIndexedvEXT ( GL_COLOR_WRITEMASK, index, (GLboolean*)&dst->r );
|
||||
GLint Rfbo = 0, Dfbo = 0;
|
||||
|
||||
gGL->glGetIntegerv( GL_DRAW_FRAMEBUFFER_BINDING, &Dfbo );
|
||||
gGL->glGetIntegerv( GL_READ_FRAMEBUFFER_BINDING, &Rfbo );
|
||||
GLint target = Dfbo == Rfbo?GL_FRAMEBUFFER:GL_DRAW_FRAMEBUFFER;
|
||||
gGL->glBindFramebuffer( target, index );
|
||||
gGL->glGetBooleanv( GL_COLOR_WRITEMASK, (GLboolean*)&dst->r );
|
||||
gGL->glBindFramebuffer( target, Dfbo );
|
||||
}
|
||||
|
||||
FORCEINLINE void GLContextGetDefaultIndexed( GLColorMaskMultiple_t *dst, int index )
|
||||
|
@ -698,7 +723,7 @@ FORCEINLINE void GLContextSet( GLBlendEnableSRGB_t *src )
|
|||
|
||||
FORCEINLINE void GLContextGet( GLBlendEnableSRGB_t *dst )
|
||||
{
|
||||
//dst->enable = glIsEnabled( GL_FRAMEBUFFER_SRGB_EXT );
|
||||
// dst->enable = gGL->glIsEnabled( GL_FRAMEBUFFER_SRGB_EXT );
|
||||
dst->enable = true; // wtf ?
|
||||
}
|
||||
|
||||
|
@ -864,13 +889,12 @@ FORCEINLINE void GLContextGetDefault( GLClearColor_t *dst )
|
|||
// --- GLClearDepth ---
|
||||
FORCEINLINE void GLContextSet( GLClearDepth_t *src )
|
||||
{
|
||||
// TOFUCK: wut
|
||||
// gGL->glClearDepth ( src->d );
|
||||
gGL->glClearDepthf( src->d );
|
||||
}
|
||||
|
||||
FORCEINLINE void GLContextGet( GLClearDepth_t *dst )
|
||||
{
|
||||
gGL->glGetDoublev ( GL_DEPTH_CLEAR_VALUE, &dst->d );
|
||||
gGL->glGetFloatv( GL_DEPTH_CLEAR_VALUE, &dst->d );
|
||||
}
|
||||
|
||||
FORCEINLINE void GLContextGetDefault( GLClearDepth_t *dst )
|
||||
|
@ -2285,7 +2309,7 @@ public:
|
|||
};
|
||||
|
||||
#define kMaxCrawlFrames 100
|
||||
#define kMaxCrawlText (kMaxCrawlFrames * 256)
|
||||
#define kMaxCrawlText (kMaxCrawlFrames * 256)
|
||||
class CStackCrawlParams
|
||||
{
|
||||
public:
|
||||
|
|
|
@ -718,7 +718,7 @@ CGLMShaderPair::~CGLMShaderPair( )
|
|||
{
|
||||
if (m_program)
|
||||
{
|
||||
gGL->glDeleteObject( m_program );
|
||||
gGL->glDeleteProgram( m_program );
|
||||
m_program = 0;
|
||||
}
|
||||
}
|
||||
|
@ -746,7 +746,7 @@ bool CGLMShaderPair::ValidateProgramPair()
|
|||
|
||||
// check for success
|
||||
GLint result = GL_TRUE;
|
||||
gGL->glGetObjectParameteriv( m_program, GL_OBJECT_LINK_STATUS_ARB, &result ); // want GL_TRUE
|
||||
gGL->glGetProgramiv(m_program, GL_LINK_STATUS, &result);
|
||||
m_bCheckLinkStatus = false;
|
||||
|
||||
if (result == GL_TRUE)
|
||||
|
@ -762,12 +762,6 @@ bool CGLMShaderPair::ValidateProgramPair()
|
|||
GLint laux = 0;
|
||||
|
||||
// do some digging
|
||||
gGL->glGetObjectParameteriv( m_program, GL_OBJECT_INFO_LOG_LENGTH_ARB, &length );
|
||||
|
||||
GLchar *logString = (GLchar *)malloc( length * sizeof(GLchar) );
|
||||
gGL->glGetInfoLog( m_program, length, &laux, logString );
|
||||
|
||||
GLMPRINTF( ("-D- ----- GLSL link failed: \n %s ", logString) );
|
||||
#if !GLM_FREE_SHADER_TEXT
|
||||
char *vtemp = strdup( m_vertexProg->m_text );
|
||||
vtemp[m_vertexProg->m_descs[kGLMGLSL].m_textOffset + m_vertexProg->m_descs[kGLMGLSL].m_textLength] = 0;
|
||||
|
@ -784,8 +778,6 @@ bool CGLMShaderPair::ValidateProgramPair()
|
|||
free( ftemp );
|
||||
free( vtemp );
|
||||
#endif
|
||||
free( logString );
|
||||
|
||||
GLMPRINTF( ("-D- -----end-----") );
|
||||
}
|
||||
|
||||
|
@ -793,11 +785,15 @@ bool CGLMShaderPair::ValidateProgramPair()
|
|||
{
|
||||
gGL->glUseProgram( m_program );
|
||||
|
||||
printf("Sample text\n");
|
||||
|
||||
m_ctx->NewLinkedProgram();
|
||||
|
||||
m_locVertexParams = gGL->glGetUniformLocation( m_program, "vc" );
|
||||
m_locVertexBoneParams = gGL->glGetUniformLocation( m_program, "vcbones" );
|
||||
m_locVertexScreenParams = gGL->glGetUniformLocation( m_program, "vcscreen" );
|
||||
m_locAlphaRef = gGL->glGetUniformLocation( m_program, "alpha_ref" );
|
||||
|
||||
m_nScreenWidthHeight = 0xFFFFFFFF;
|
||||
|
||||
m_locVertexInteger0 = gGL->glGetUniformLocation( m_program, "i0" );
|
||||
|
@ -940,13 +936,13 @@ bool CGLMShaderPair::SetProgramPair( CGLMProgram *vp, CGLMProgram *fp )
|
|||
// attempt link. but first, detach any previously attached programs
|
||||
if (m_vertexProg)
|
||||
{
|
||||
gGL->glDetachObject(m_program, m_vertexProg->m_descs[kGLMGLSL].m_object.glsl);
|
||||
gGL->glDetachShader(m_program, m_vertexProg->m_descs[kGLMGLSL].m_object.glsl);
|
||||
m_vertexProg = NULL;
|
||||
}
|
||||
|
||||
if (m_fragmentProg)
|
||||
{
|
||||
gGL->glDetachObject(m_program, m_fragmentProg->m_descs[kGLMGLSL].m_object.glsl);
|
||||
gGL->glDetachShader(m_program, m_fragmentProg->m_descs[kGLMGLSL].m_object.glsl);
|
||||
m_fragmentProg = NULL;
|
||||
}
|
||||
|
||||
|
@ -987,7 +983,7 @@ bool CGLMShaderPair::SetProgramPair( CGLMProgram *vp, CGLMProgram *fp )
|
|||
gGL->glLinkProgram( m_program );
|
||||
|
||||
GLint isLinked = 0;
|
||||
gGL->glGetShaderiv(m_program, GL_LINK_STATUS, &isLinked);
|
||||
gGL->glGetProgramiv(m_program, GL_LINK_STATUS, &isLinked);
|
||||
if(isLinked == GL_FALSE)
|
||||
{
|
||||
GLint maxLength = 0;
|
||||
|
@ -997,8 +993,8 @@ bool CGLMShaderPair::SetProgramPair( CGLMProgram *vp, CGLMProgram *fp )
|
|||
gGL->glGetProgramInfoLog( m_program, sizeof(log), &maxLength, log );
|
||||
if( maxLength )
|
||||
{
|
||||
printf("vp: \n%s\nfp: \n%s\n", vp->m_text, fp->m_text );
|
||||
printf("shader %d link log: %s\n", m_program, log);
|
||||
Msg("vp: \n%s\nfp: \n%s\n", vp->m_text, fp->m_text );
|
||||
Msg("shader %d link log: %s\n", m_program, log);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -239,8 +239,8 @@ bool CGLMQuery::IsDone( void )
|
|||
{
|
||||
// prepare to pay a big price on drivers prior to 10.6.4+SLGU
|
||||
|
||||
GLint available = 0;
|
||||
gGL->glGetQueryObjectiv(m_name, GL_QUERY_RESULT_AVAILABLE, &available );
|
||||
GLuint available = 0;
|
||||
gGL->glGetQueryObjectuiv(m_name, GL_QUERY_RESULT_AVAILABLE, &available );
|
||||
|
||||
m_done = (available != 0);
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load diff
341
togles/linuxwin/decompress.c
Normal file
341
togles/linuxwin/decompress.c
Normal file
|
@ -0,0 +1,341 @@
|
|||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
/*
|
||||
DXT1/DXT3/DXT5 texture decompression
|
||||
|
||||
The original code is from Benjamin Dobell, see below for details. Compared to
|
||||
the original this one adds DXT3 decompression, is valid C89, and is x64
|
||||
compatible as it uses fixed size integers everywhere. It also uses a different
|
||||
PackRGBA order.
|
||||
|
||||
---
|
||||
|
||||
Copyright (c) 2012, Matth<EFBFBD>us G. "Anteru" Chajdas (http://anteru.net)
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
of the Software, and to permit persons to whom the Software is furnished to do
|
||||
so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
||||
---
|
||||
|
||||
Copyright (C) 2009 Benjamin Dobell, Glass Echidna
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
of the Software, and to permit persons to whom the Software is furnished to do
|
||||
so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
||||
---
|
||||
*/
|
||||
static uint32_t PackRGBA (uint8_t r, uint8_t g, uint8_t b, uint8_t a)
|
||||
{
|
||||
return r | (g << 8) | (b << 16) | (a << 24);
|
||||
}
|
||||
|
||||
static void DecompressBlockDXT1Internal (const uint8_t* block,
|
||||
uint32_t* output,
|
||||
uint32_t outputStride,
|
||||
int transparent0, int* simpleAlpha, int *complexAlpha,
|
||||
const uint8_t* alphaValues)
|
||||
{
|
||||
uint32_t temp, code;
|
||||
|
||||
uint16_t color0, color1;
|
||||
uint8_t r0, g0, b0, r1, g1, b1;
|
||||
|
||||
int i, j;
|
||||
|
||||
color0 = *(const uint16_t*)(block);
|
||||
color1 = *(const uint16_t*)(block + 2);
|
||||
|
||||
temp = (color0 >> 11) * 255 + 16;
|
||||
r0 = (uint8_t)((temp/32 + temp)/32);
|
||||
temp = ((color0 & 0x07E0) >> 5) * 255 + 32;
|
||||
g0 = (uint8_t)((temp/64 + temp)/64);
|
||||
temp = (color0 & 0x001F) * 255 + 16;
|
||||
b0 = (uint8_t)((temp/32 + temp)/32);
|
||||
|
||||
temp = (color1 >> 11) * 255 + 16;
|
||||
r1 = (uint8_t)((temp/32 + temp)/32);
|
||||
temp = ((color1 & 0x07E0) >> 5) * 255 + 32;
|
||||
g1 = (uint8_t)((temp/64 + temp)/64);
|
||||
temp = (color1 & 0x001F) * 255 + 16;
|
||||
b1 = (uint8_t)((temp/32 + temp)/32);
|
||||
|
||||
code = *(const uint32_t*)(block + 4);
|
||||
|
||||
if (color0 > color1) {
|
||||
for (j = 0; j < 4; ++j) {
|
||||
for (i = 0; i < 4; ++i) {
|
||||
uint32_t finalColor, positionCode;
|
||||
uint8_t alpha;
|
||||
|
||||
alpha = alphaValues [j*4+i];
|
||||
|
||||
finalColor = 0;
|
||||
positionCode = (code >> 2*(4*j+i)) & 0x03;
|
||||
|
||||
switch (positionCode) {
|
||||
case 0:
|
||||
finalColor = PackRGBA(r0, g0, b0, alpha);
|
||||
break;
|
||||
case 1:
|
||||
finalColor = PackRGBA(r1, g1, b1, alpha);
|
||||
break;
|
||||
case 2:
|
||||
finalColor = PackRGBA((2*r0+r1)/3, (2*g0+g1)/3, (2*b0+b1)/3, alpha);
|
||||
break;
|
||||
case 3:
|
||||
finalColor = PackRGBA((r0+2*r1)/3, (g0+2*g1)/3, (b0+2*b1)/3, alpha);
|
||||
break;
|
||||
}
|
||||
if(!alpha)
|
||||
*simpleAlpha = 1;
|
||||
else if(alpha<0xff)
|
||||
*complexAlpha = 1;
|
||||
output [j*outputStride + i] = finalColor;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (j = 0; j < 4; ++j) {
|
||||
for (i = 0; i < 4; ++i) {
|
||||
uint32_t finalColor, positionCode;
|
||||
uint8_t alpha;
|
||||
|
||||
alpha = alphaValues [j*4+i];
|
||||
|
||||
finalColor = 0;
|
||||
positionCode = (code >> 2*(4*j+i)) & 0x03;
|
||||
|
||||
switch (positionCode) {
|
||||
case 0:
|
||||
finalColor = PackRGBA(r0, g0, b0, alpha);
|
||||
break;
|
||||
case 1:
|
||||
finalColor = PackRGBA(r1, g1, b1, alpha);
|
||||
break;
|
||||
case 2:
|
||||
finalColor = PackRGBA((r0+r1)/2, (g0+g1)/2, (b0+b1)/2, alpha);
|
||||
break;
|
||||
case 3:
|
||||
if(transparent0) alpha=0;
|
||||
finalColor = PackRGBA(0, 0, 0, alpha);
|
||||
break;
|
||||
}
|
||||
|
||||
if(!alpha)
|
||||
*simpleAlpha = 1;
|
||||
else if(alpha<0xff)
|
||||
*complexAlpha = 1;
|
||||
|
||||
output [j*outputStride + i] = finalColor;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
void DecompressBlockDXT1(): Decompresses one block of a DXT1 texture and stores the resulting pixels at the appropriate offset in 'image'.
|
||||
|
||||
uint32_t x: x-coordinate of the first pixel in the block.
|
||||
uint32_t y: y-coordinate of the first pixel in the block.
|
||||
uint32_t width: width of the texture being decompressed.
|
||||
const uint8_t *blockStorage: pointer to the block to decompress.
|
||||
uint32_t *image: pointer to image where the decompressed pixel data should be stored.
|
||||
*/
|
||||
void DecompressBlockDXT1(uint32_t x, uint32_t y, uint32_t width,
|
||||
const uint8_t* blockStorage,
|
||||
int transparent0, int* simpleAlpha, int *complexAlpha,
|
||||
uint32_t* image)
|
||||
{
|
||||
static const uint8_t const_alpha [] = {
|
||||
255, 255, 255, 255,
|
||||
255, 255, 255, 255,
|
||||
255, 255, 255, 255,
|
||||
255, 255, 255, 255
|
||||
};
|
||||
|
||||
DecompressBlockDXT1Internal (blockStorage,
|
||||
image + x + (y * width), width, transparent0, simpleAlpha, complexAlpha, const_alpha);
|
||||
}
|
||||
|
||||
/*
|
||||
void DecompressBlockDXT5(): Decompresses one block of a DXT5 texture and stores the resulting pixels at the appropriate offset in 'image'.
|
||||
|
||||
uint32_t x: x-coordinate of the first pixel in the block.
|
||||
uint32_t y: y-coordinate of the first pixel in the block.
|
||||
uint32_t width: width of the texture being decompressed.
|
||||
const uint8_t *blockStorage: pointer to the block to decompress.
|
||||
uint32_t *image: pointer to image where the decompressed pixel data should be stored.
|
||||
*/
|
||||
void DecompressBlockDXT5(uint32_t x, uint32_t y, uint32_t width,
|
||||
const uint8_t* blockStorage,
|
||||
int transparent0, int* simpleAlpha, int *complexAlpha,
|
||||
uint32_t* image)
|
||||
{
|
||||
uint8_t alpha0, alpha1;
|
||||
const uint8_t* bits;
|
||||
uint32_t alphaCode1;
|
||||
uint16_t alphaCode2;
|
||||
|
||||
uint16_t color0, color1;
|
||||
uint8_t r0, g0, b0, r1, g1, b1;
|
||||
|
||||
int i, j;
|
||||
|
||||
uint32_t temp, code;
|
||||
|
||||
alpha0 = *(blockStorage);
|
||||
alpha1 = *(blockStorage + 1);
|
||||
|
||||
bits = blockStorage + 2;
|
||||
alphaCode1 = bits[2] | (bits[3] << 8) | (bits[4] << 16) | (bits[5] << 24);
|
||||
alphaCode2 = bits[0] | (bits[1] << 8);
|
||||
|
||||
color0 = *(const uint16_t*)(blockStorage + 8);
|
||||
color1 = *(const uint16_t*)(blockStorage + 10);
|
||||
|
||||
temp = (color0 >> 11) * 255 + 16;
|
||||
r0 = (uint8_t)((temp/32 + temp)/32);
|
||||
temp = ((color0 & 0x07E0) >> 5) * 255 + 32;
|
||||
g0 = (uint8_t)((temp/64 + temp)/64);
|
||||
temp = (color0 & 0x001F) * 255 + 16;
|
||||
b0 = (uint8_t)((temp/32 + temp)/32);
|
||||
|
||||
temp = (color1 >> 11) * 255 + 16;
|
||||
r1 = (uint8_t)((temp/32 + temp)/32);
|
||||
temp = ((color1 & 0x07E0) >> 5) * 255 + 32;
|
||||
g1 = (uint8_t)((temp/64 + temp)/64);
|
||||
temp = (color1 & 0x001F) * 255 + 16;
|
||||
b1 = (uint8_t)((temp/32 + temp)/32);
|
||||
|
||||
code = *(const uint32_t*)(blockStorage + 12);
|
||||
|
||||
for (j = 0; j < 4; j++) {
|
||||
for (i = 0; i < 4; i++) {
|
||||
uint8_t finalAlpha;
|
||||
int alphaCode, alphaCodeIndex;
|
||||
uint8_t colorCode;
|
||||
uint32_t finalColor;
|
||||
|
||||
alphaCodeIndex = 3*(4*j+i);
|
||||
if (alphaCodeIndex <= 12) {
|
||||
alphaCode = (alphaCode2 >> alphaCodeIndex) & 0x07;
|
||||
} else if (alphaCodeIndex == 15) {
|
||||
alphaCode = (alphaCode2 >> 15) | ((alphaCode1 << 1) & 0x06);
|
||||
} else /* alphaCodeIndex >= 18 && alphaCodeIndex <= 45 */ {
|
||||
alphaCode = (alphaCode1 >> (alphaCodeIndex - 16)) & 0x07;
|
||||
}
|
||||
|
||||
if (alphaCode == 0) {
|
||||
finalAlpha = alpha0;
|
||||
} else if (alphaCode == 1) {
|
||||
finalAlpha = alpha1;
|
||||
} else {
|
||||
if (alpha0 > alpha1) {
|
||||
finalAlpha = (uint8_t)(((8-alphaCode)*alpha0 + (alphaCode-1)*alpha1)/7);
|
||||
} else {
|
||||
if (alphaCode == 6) {
|
||||
finalAlpha = 0;
|
||||
} else if (alphaCode == 7) {
|
||||
finalAlpha = 255;
|
||||
} else {
|
||||
finalAlpha = (uint8_t)(((6-alphaCode)*alpha0 + (alphaCode-1)*alpha1)/5);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
colorCode = (code >> 2*(4*j+i)) & 0x03;
|
||||
finalColor = 0;
|
||||
|
||||
switch (colorCode) {
|
||||
case 0:
|
||||
finalColor = PackRGBA(r0, g0, b0, finalAlpha);
|
||||
break;
|
||||
case 1:
|
||||
finalColor = PackRGBA(r1, g1, b1, finalAlpha);
|
||||
break;
|
||||
case 2:
|
||||
finalColor = PackRGBA((2*r0+r1)/3, (2*g0+g1)/3, (2*b0+b1)/3, finalAlpha);
|
||||
break;
|
||||
case 3:
|
||||
finalColor = PackRGBA((r0+2*r1)/3, (g0+2*g1)/3, (b0+2*b1)/3, finalAlpha);
|
||||
break;
|
||||
}
|
||||
|
||||
if(finalAlpha==0) *simpleAlpha = 1;
|
||||
else if(finalAlpha<0xff) *complexAlpha = 1;
|
||||
|
||||
image [i + x + (width* (y+j))] = finalColor;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
void DecompressBlockDXT3(): Decompresses one block of a DXT3 texture and stores the resulting pixels at the appropriate offset in 'image'.
|
||||
|
||||
uint32_t x: x-coordinate of the first pixel in the block.
|
||||
uint32_t y: y-coordinate of the first pixel in the block.
|
||||
uint32_t height: height of the texture being decompressed.
|
||||
const uint8_t *blockStorage: pointer to the block to decompress.
|
||||
uint32_t *image: pointer to image where the decompressed pixel data should be stored.
|
||||
*/
|
||||
void DecompressBlockDXT3(uint32_t x, uint32_t y, uint32_t width,
|
||||
const uint8_t* blockStorage,
|
||||
int transparent0, int* simpleAlpha, int *complexAlpha,
|
||||
uint32_t* image)
|
||||
{
|
||||
int i;
|
||||
|
||||
uint8_t alphaValues [16] = { 0 };
|
||||
|
||||
for (i = 0; i < 4; ++i) {
|
||||
const uint16_t* alphaData = (const uint16_t*) (blockStorage);
|
||||
|
||||
alphaValues [i*4 + 0] = (((*alphaData) >> 0) & 0xF ) * 17;
|
||||
alphaValues [i*4 + 1] = (((*alphaData) >> 4) & 0xF ) * 17;
|
||||
alphaValues [i*4 + 2] = (((*alphaData) >> 8) & 0xF ) * 17;
|
||||
alphaValues [i*4 + 3] = (((*alphaData) >> 12) & 0xF) * 17;
|
||||
|
||||
blockStorage += 2;
|
||||
}
|
||||
|
||||
DecompressBlockDXT1Internal (blockStorage,
|
||||
image + x + (y * width), width, transparent0, simpleAlpha, complexAlpha, alphaValues);
|
||||
}
|
||||
|
||||
// Texture DXT1 / DXT5 compression
|
||||
// Using STB "on file" library
|
||||
// go there https://github.com/nothings/stb
|
||||
// for more details and other libs
|
||||
|
||||
#define STB_DXT_IMPLEMENTATION
|
||||
#include "stb_dxt_104.h"
|
19
togles/linuxwin/decompress.h
Normal file
19
togles/linuxwin/decompress.h
Normal file
|
@ -0,0 +1,19 @@
|
|||
#ifndef _GL4ES_DECOMPRESS_H_
|
||||
#define _GL4ES_DECOMPRESS_H_
|
||||
|
||||
void DecompressBlockDXT1(uint32_t x, uint32_t y, uint32_t width,
|
||||
const uint8_t* blockStorage,
|
||||
int transparent0, int* simpleAlpha, int *complexAlpha,
|
||||
uint32_t* image);
|
||||
|
||||
void DecompressBlockDXT3(uint32_t x, uint32_t y, uint32_t width,
|
||||
const uint8_t* blockStorage,
|
||||
int transparent0, int* simpleAlpha, int *complexAlpha,
|
||||
uint32_t* image);
|
||||
|
||||
void DecompressBlockDXT5(uint32_t x, uint32_t y, uint32_t width,
|
||||
const uint8_t* blockStorage,
|
||||
int transparent0, int* simpleAlpha, int *complexAlpha,
|
||||
uint32_t* image);
|
||||
|
||||
#endif // _GL4ES_DECOMPRESS_H_
|
BIN
togles/linuxwin/decompress.o
Normal file
BIN
togles/linuxwin/decompress.o
Normal file
Binary file not shown.
|
@ -69,13 +69,13 @@ static char g_szShadow2D[] =
|
|||
"vec2 p2 = suv.xy+vec2(0.0,invSize);\n"
|
||||
"vec2 p3 = suv.xy+vec2(invSize,0.0);\n"
|
||||
"vec2 p4 = suv.xy+vec2(invSize);\n"
|
||||
"float d = texture2D(u_depthTex,p1).r;\n"
|
||||
"float d = texture(u_depthTex,p1).r;\n"
|
||||
"float r = float(d>suv.z);\n"
|
||||
"d = texture2D(u_depthTex,p2).r;\n"
|
||||
"d = texture(u_depthTex,p2).r;\n"
|
||||
"float r2 = float(d>suv.z);\n"
|
||||
"d = texture2D(u_depthTex,p3).r;\n"
|
||||
"d = texture(u_depthTex,p3).r;\n"
|
||||
"float r3 = float(d>suv.z);\n"
|
||||
"d = texture2D(u_depthTex,p4).r;\n"
|
||||
"d = texture(u_depthTex,p4).r;\n"
|
||||
"float r4 = float(d>suv.z);\n"
|
||||
"p1*=size;\n"
|
||||
"float a = p1.y-floor(p1.y);\n"
|
||||
|
@ -955,7 +955,7 @@ void D3DToGL::PrintUsageAndIndexToString( uint32 dwToken, char* strUsageUsageInd
|
|||
// if ( fSemanticFlags & SEMANTIC_OUTPUT )
|
||||
// V_snprintf( strUsageUsageIndexName, nBufLen, dwUsageIndex != 0 ? "gl_BackColor" : "gl_FrontColor" );
|
||||
// else
|
||||
V_snprintf( strUsageUsageIndexName, nBufLen, dwUsageIndex != 0 ? "_gl_SecondaryColor" : "_gl_Color" );
|
||||
V_snprintf( strUsageUsageIndexName, nBufLen, dwUsageIndex != 0 ? "_gl_FrontSecondaryColor" : "_gl_FrontColor" );
|
||||
break;
|
||||
case D3DDECLUSAGE_FOG:
|
||||
TranslationError();
|
||||
|
@ -1220,7 +1220,7 @@ void D3DToGL::PrintParameterToString ( uint32 dwToken, uint32 dwSourceOrDest, ch
|
|||
}
|
||||
else
|
||||
{
|
||||
V_snprintf( buff, sizeof( buff ), dwRegNum == 0 ? "_gl_Color" : "_gl_SecondaryColor" );
|
||||
V_snprintf( buff, sizeof( buff ), dwRegNum == 0 ? "_gl_FrontColor" : "_gl_FrontSecondaryColor" );
|
||||
}
|
||||
strcat_s( pRegisterName, nBufLen, buff );
|
||||
}
|
||||
|
@ -1456,7 +1456,6 @@ void D3DToGL::PrintParameterToString ( uint32 dwToken, uint32 dwSourceOrDest, ch
|
|||
m_dwConstIntUsageMask |= 0x00000001 << dwRegNum; // Keep track of the use of this integer constant
|
||||
break;
|
||||
case D3DSPR_COLOROUT:
|
||||
// TODO(nillerusr): go fck urself
|
||||
if( dwRegNum+1 > m_iFragDataCount )
|
||||
m_iFragDataCount = dwRegNum+1;
|
||||
|
||||
|
@ -2546,7 +2545,7 @@ void D3DToGL::Handle_TEX( uint32 dwToken, bool bIsTexLDL )
|
|||
V_snprintf( szExtra, sizeof( szExtra ), ".%c", GetSwizzleComponent( pSrc0Reg, 3 ) );
|
||||
V_strncat( szLOD, szExtra, sizeof( szLOD ) );
|
||||
|
||||
PrintToBufWithIndents( *m_pBufALUCode, "%s = %s( %s, %s, %s );\n", pDestReg, bIsShadowSampler ? "shadow2DLod" : "texture2DLod", pSrc1Reg, sCoordVar.String(), szLOD );
|
||||
PrintToBufWithIndents( *m_pBufALUCode, "%s = %s( %s, %s, %s );\n", pDestReg, bIsShadowSampler ? "shadow2DLod" : "textureLod", pSrc1Reg, sCoordVar.String(), szLOD );
|
||||
}
|
||||
else if ( bIsShadowSampler )
|
||||
{
|
||||
|
@ -2564,12 +2563,12 @@ void D3DToGL::Handle_TEX( uint32 dwToken, bool bIsTexLDL )
|
|||
// We use the vec4 variant of texture2DProj() intentionally here, since it lines up well with Direct3D.
|
||||
|
||||
CUtlString s4DProjCoords = EnsureNumSwizzleComponents( pSrc0Reg, 4 ); // Ensure vec4 variant
|
||||
PrintToBufWithIndents( *m_pBufALUCode, "%s = texture2DProj( %s, %s );\n", pDestReg, pSrc1Reg, s4DProjCoords.String() );
|
||||
PrintToBufWithIndents( *m_pBufALUCode, "%s = textureProj( %s, %s );\n", pDestReg, pSrc1Reg, s4DProjCoords.String() );
|
||||
}
|
||||
else
|
||||
else
|
||||
{
|
||||
CUtlString sCoordVar = EnsureNumSwizzleComponents( pSrc0Reg, bIsShadowSampler ? 3 : 2 );
|
||||
PrintToBufWithIndents( *m_pBufALUCode, "%s = texture2D( %s, %s );\n", pDestReg, pSrc1Reg, sCoordVar.String() );
|
||||
PrintToBufWithIndents( *m_pBufALUCode, "%s = texture( %s, %s );\n", pDestReg, pSrc1Reg, sCoordVar.String() );
|
||||
}
|
||||
}
|
||||
else if ( nSamplerType == SAMPLER_TYPE_3D )
|
||||
|
@ -2580,7 +2579,7 @@ void D3DToGL::Handle_TEX( uint32 dwToken, bool bIsTexLDL )
|
|||
}
|
||||
|
||||
CUtlString sCoordVar = EnsureNumSwizzleComponents( pSrc0Reg, 3 );
|
||||
PrintToBufWithIndents( *m_pBufALUCode, "%s = texture3D( %s, %s );\n", pDestReg, pSrc1Reg, sCoordVar.String() );
|
||||
PrintToBufWithIndents( *m_pBufALUCode, "%s = texture( %s, %s );\n", pDestReg, pSrc1Reg, sCoordVar.String() );
|
||||
}
|
||||
else if ( nSamplerType == SAMPLER_TYPE_CUBE )
|
||||
{
|
||||
|
@ -2590,7 +2589,7 @@ void D3DToGL::Handle_TEX( uint32 dwToken, bool bIsTexLDL )
|
|||
}
|
||||
|
||||
CUtlString sCoordVar = EnsureNumSwizzleComponents( pSrc0Reg, 3 );
|
||||
PrintToBufWithIndents( *m_pBufALUCode, "%s = textureCube( %s, %s );\n", pDestReg, pSrc1Reg, sCoordVar.String() );
|
||||
PrintToBufWithIndents( *m_pBufALUCode, "%s = texture( %s, %s );\n", pDestReg, pSrc1Reg, sCoordVar.String() );
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -3049,7 +3048,7 @@ void D3DToGL::WriteGLSLInputVariableAssignments()
|
|||
|
||||
if ( dwUsage == D3DDECLUSAGE_COLOR )
|
||||
{
|
||||
PrintToBufWithIndents( *m_pBufAttribCode, "vec4 oTempT%d = %s;\n", i, dwUsageIndex ? "_gl_SecondaryColor" : "_gl_Color" );
|
||||
PrintToBufWithIndents( *m_pBufAttribCode, "vec4 oTempT%d = %s;\n", i, dwUsageIndex ? "_gl_FrontSecondaryColor" : "_gl_FrontColor" );
|
||||
}
|
||||
else if ( dwUsage == D3DDECLUSAGE_TEXCOORD )
|
||||
{
|
||||
|
@ -3182,6 +3181,10 @@ int D3DToGL::TranslateShader( uint32* code, CUtlBuffer *pBufDisassembledCode, bo
|
|||
m_bPutHexCodesAfterLines = (options & D3DToGL_PutHexCommentsAfterLines) != 0;
|
||||
m_bGeneratingDebugText = (options & D3DToGL_GeneratingDebugText) != 0;
|
||||
m_bGenerateSRGBWriteSuffix = (options & D3DToGL_OptionSRGBWriteSuffix) != 0;
|
||||
// m_bGenerateSRGBWriteSuffix = true;
|
||||
|
||||
if( debugLabel && ( V_strstr( debugLabel ,"vertexlit_and_unlit_generic_bump_ps") ))
|
||||
m_bGenerateSRGBWriteSuffix = true;
|
||||
|
||||
m_NumIndentTabs = 1; // start code indented one tab
|
||||
m_nLoopDepth = 0;
|
||||
|
@ -3675,6 +3678,7 @@ int D3DToGL::TranslateShader( uint32* code, CUtlBuffer *pBufDisassembledCode, bo
|
|||
}
|
||||
|
||||
// Control bit for sRGB Write suffix
|
||||
|
||||
if ( m_bGenerateSRGBWriteSuffix )
|
||||
{
|
||||
// R500 Hookup
|
||||
|
@ -3889,33 +3893,38 @@ int D3DToGL::TranslateShader( uint32* code, CUtlBuffer *pBufDisassembledCode, bo
|
|||
{
|
||||
StrcatToHeaderCode( g_szShadow2D );
|
||||
StrcatToHeaderCode( g_szShadow2DProj );
|
||||
|
||||
}
|
||||
else if( FindSubcode("shadow2D") )
|
||||
StrcatToHeaderCode( g_szShadow2D );
|
||||
|
||||
if( FindSubcode("_gl_Color") )
|
||||
StrcatToHeaderCode( "vec4 _gl_Color;\n" );
|
||||
|
||||
if( FindSubcode("_gl_SecondaryColor") )
|
||||
StrcatToHeaderCode( "vec4 _gl_SecondaryColor;\n" );
|
||||
StrcatToHeaderCode( g_szShadow2D );
|
||||
|
||||
if( FindSubcode("_gl_FrontColor") && !m_bFrontColor )
|
||||
StrcatToHeaderCode( "in vec4 _gl_FrontColor;\n" );
|
||||
|
||||
if( FindSubcode("_gl_FrontSecondaryColor") && !m_bFrontSecondaryColor )
|
||||
StrcatToHeaderCode( "in vec4 _gl_FrontSecondaryColor;\n" );
|
||||
|
||||
if( m_iFragDataCount && bVertexShader )
|
||||
StrcatToHeaderCode( "\nuniform float alpha_ref;\n" );
|
||||
|
||||
StrcatToHeaderCode( "\nvoid main()\n{\n" );
|
||||
if ( m_bUsedAtomicTempVar )
|
||||
{
|
||||
PrintToBufWithIndents( *m_pBufHeaderCode, "vec4 %s;\n\n", g_pAtomicTempVarName );
|
||||
}
|
||||
|
||||
|
||||
// sRGB Write suffix
|
||||
if ( m_bGenerateSRGBWriteSuffix )
|
||||
{
|
||||
StrcatToALUCode( "vec3 sRGBFragData;\n" );
|
||||
StrcatToALUCode( "sRGBFragData.xyz = log( gl_FragData[0].xyz );\n" );
|
||||
StrcatToALUCode( "sRGBFragData.xyz = sRGBFragData.xyz * vec3( 0.454545f, 0.454545f, 0.454545f );\n" );
|
||||
StrcatToALUCode( "sRGBFragData.xyz = exp( sRGBFragData.xyz );\n" );
|
||||
StrcatToALUCode( "gl_FragData[0].xyz = mix( gl_FragData[0].xyz, sRGBFragData, flSRGBWrite );\n" );
|
||||
// StrcatToALUCode( "vec3 sRGBFragData;\n" );
|
||||
// StrcatToALUCode( "sRGBFragData.xyz = log( gl_FragData[0].xyz );\n" );
|
||||
// StrcatToALUCode( "sRGBFragData.xyz = sRGBFragData.xyz * vec3( 0.754545f, 0.754545f, 0.754545f );\n" );
|
||||
// StrcatToALUCode( "sRGBFragData.xyz = exp( sRGBFragData.xyz );\n" );
|
||||
StrcatToALUCode( "gl_FragData[0].xyz = pow(gl_FragData[0].xyz, vec3(1.0/2.2));\n" );
|
||||
}
|
||||
|
||||
if( m_iFragDataCount && bVertexShader )
|
||||
StrcatToALUCode( "if( gl_FragData[0].a < alpha_ref ) { discard; };\n" );
|
||||
|
||||
strcat_s( (char*)m_pBufALUCode->Base(), m_pBufALUCode->Size(), "}\n" );
|
||||
|
||||
// Put all of the strings together for final program ( pHeaderCode + pAttribCode + pParamCode + pALUCode )
|
||||
|
|
|
@ -1201,8 +1201,8 @@ static void FillD3DCaps9( const GLMRendererInfoFields &glmRendererInfo, D3DCAPS9
|
|||
pCaps->MaxPixelShader30InstructionSlots = 0;
|
||||
|
||||
#if DX_TO_GL_ABSTRACTION
|
||||
pCaps->FakeSRGBWrite = !glmRendererInfo.m_hasGammaWrites;
|
||||
pCaps->CanDoSRGBReadFromRTs = !glmRendererInfo.m_cantAttachSRGB;
|
||||
pCaps->FakeSRGBWrite = true;//!glmRendererInfo.m_hasGammaWrites;
|
||||
pCaps->CanDoSRGBReadFromRTs = true;//!glmRendererInfo.m_cantAttachSRGB;
|
||||
pCaps->MixedSizeTargets = glmRendererInfo.m_hasMixedAttachmentSizes;
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -389,6 +389,14 @@ COpenGLEntryPoints::COpenGLEntryPoints()
|
|||
m_bHave_GL_EXT_framebuffer_blit = true;
|
||||
m_bHave_GL_EXT_framebuffer_multisample = true;
|
||||
m_bHave_GL_ARB_occlusion_query = true;
|
||||
m_bHave_GL_ARB_map_buffer_range = true;
|
||||
m_bHave_GL_ARB_vertex_buffer_object = true;
|
||||
m_bHave_GL_ARB_vertex_array_bgra = true;
|
||||
m_bHave_GL_EXT_vertex_array_bgra = true;
|
||||
m_bHave_GL_ARB_debug_output = true;
|
||||
m_bHave_GL_EXT_direct_state_access = false;
|
||||
m_bHave_GL_EXT_framebuffer_multisample_blit_scaled = true;
|
||||
m_bHave_GL_EXT_texture_sRGB_decode = true;
|
||||
|
||||
glBindFramebuffer.Force(glBindFramebuffer.Pointer());
|
||||
glBindRenderbuffer.Force(glBindRenderbuffer.Pointer());
|
||||
|
@ -456,12 +464,6 @@ COpenGLEntryPoints::COpenGLEntryPoints()
|
|||
printf( "GL_EXT_buffer_storage: %s\n", m_bHave_GL_EXT_buffer_storage ? "AVAILABLE" : "NOT AVAILABLE" );
|
||||
printf( "GL_EXT_texture_sRGB_decode: %s\n", m_bHave_GL_EXT_texture_sRGB_decode ? "AVAILABLE" : "NOT AVAILABLE" );
|
||||
|
||||
bool bGLCanDecodeS3TCTextures = m_bHave_GL_EXT_texture_compression_s3tc || ( m_bHave_GL_EXT_texture_compression_dxt1 && m_bHave_GL_ANGLE_texture_compression_dxt3 && m_bHave_GL_ANGLE_texture_compression_dxt5 );
|
||||
if ( !bGLCanDecodeS3TCTextures )
|
||||
{
|
||||
Error( "This application requires either the GL_EXT_texture_compression_s3tc, or the GL_EXT_texture_compression_dxt1 + GL_ANGLE_texture_compression_dxt3 + GL_ANGLE_texture_compression_dxt5 OpenGL extensions. Please install S3TC texture support.\n" );
|
||||
}
|
||||
|
||||
#ifdef OSX
|
||||
if ( CommandLine()->FindParm( "-glmnosrgbdecode" ) )
|
||||
{
|
||||
|
|
|
@ -92,11 +92,11 @@ char g_preloadTexVertexProgramText[] = // Гроб гроб кладбище п
|
|||
"precision mediump float;\n"
|
||||
"out vec4 otex;\n"
|
||||
"void main() \n"
|
||||
"{ \n"
|
||||
"{\n"
|
||||
"vec4 pos = vec4( 0.1, 0.1, 0.1, 0.1 );\n"
|
||||
"vec4 tex = vec4( 0.0, 0.0, 0.0, 0.0 );\n"
|
||||
" \n"
|
||||
"gl_Position = pos; \n"
|
||||
"\n"
|
||||
"gl_Position = pos;\n"
|
||||
"otex = tex; \n"
|
||||
"} \n"
|
||||
};
|
||||
|
@ -116,7 +116,7 @@ char g_preload2DTexFragmentProgramText[] =
|
|||
"void main() \n"
|
||||
"{ \n"
|
||||
"vec4 r0; \n"
|
||||
"r0 = texture2D( sampler15, otex.xy ); \n"
|
||||
"r0 = texture( sampler15, otex.xy ); \n"
|
||||
"_gl_FragColor = r0; //discard; \n"
|
||||
"} \n"
|
||||
};
|
||||
|
@ -137,7 +137,7 @@ char g_preload3DTexFragmentProgramText[] =
|
|||
"void main() \n"
|
||||
"{ \n"
|
||||
"vec4 r0; \n"
|
||||
"r0 = texture3D( sampler15, otex.xyz ); \n"
|
||||
"r0 = texture( sampler15, otex.xyz ); \n"
|
||||
"_gl_FragColor = vec4(0,0,0,0); //discard; \n"
|
||||
"} \n"
|
||||
};
|
||||
|
@ -157,7 +157,7 @@ char g_preloadCubeTexFragmentProgramText[] =
|
|||
"void main() \n"
|
||||
"{ \n"
|
||||
"vec4 r0; \n"
|
||||
"r0 = textureCube( sampler15, otex.xyz ); \n"
|
||||
"r0 = texture( sampler15, otex.xyz ); \n"
|
||||
"_gl_FragColor = r0; //discard; \n"
|
||||
"} \n"
|
||||
};
|
||||
|
@ -449,6 +449,20 @@ GLMgr::~GLMgr()
|
|||
{
|
||||
}
|
||||
|
||||
extern void CompressedTexImage2D(GLenum target, GLint level, GLenum internalformat,
|
||||
GLsizei width, GLsizei height, GLint border,
|
||||
GLsizei imageSize, const GLvoid *data);
|
||||
|
||||
extern void TexImage2D(GLenum target,
|
||||
GLint level,
|
||||
GLint internalformat,
|
||||
GLsizei width,
|
||||
GLsizei height,
|
||||
GLint border,
|
||||
GLenum format,
|
||||
GLenum type,
|
||||
const void * data);
|
||||
|
||||
//===============================================================================
|
||||
|
||||
GLMContext *GLMgr::NewContext( IDirect3DDevice9 *pDevice, GLMDisplayParams *params )
|
||||
|
@ -953,14 +967,6 @@ void GLMContext::Blit2( CGLMTex *srcTex, GLMRect *srcRect, int srcFace, int srcM
|
|||
bool srcGamma = srcTex && ((srcTex->m_layout->m_key.m_texFlags & kGLMTexSRGB) != 0);
|
||||
bool dstGamma = dstTex && ((dstTex->m_layout->m_key.m_texFlags & kGLMTexSRGB) != 0);
|
||||
|
||||
bool doPushPop = (srcGamma != dstGamma) && gl_radar7954721_workaround_mixed.GetInt() && m_caps.m_nv; // workaround for cross gamma blit problems on NV
|
||||
// ^^ need to re-check this on some post-10.6.3 build on NV to see if it was fixed
|
||||
|
||||
if (doPushPop)
|
||||
{
|
||||
gGL->glPushAttrib( 0 );
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------- figure out the plan
|
||||
|
||||
bool blitTwoStep = false; // think positive
|
||||
|
@ -1052,8 +1058,8 @@ void GLMContext::Blit2( CGLMTex *srcTex, GLMRect *srcRect, int srcFace, int srcM
|
|||
glAttachTex2DtoFBO ( GL_DRAW_FRAMEBUFFER, formatClass, srcTex->m_texName, 0 );
|
||||
|
||||
// set read and draw buffers appropriately
|
||||
gGL->glReadBuffer ( glAttachFromClass[formatClass] );
|
||||
gGL->glDrawBuffer ( glAttachFromClass[formatClass] );
|
||||
gGL->glReadBuffer( glAttachFromClass[formatClass] );
|
||||
gGL->glDrawBuffers( 1, &glAttachFromClass[formatClass] );
|
||||
|
||||
// blit#1 - to resolve to scratch
|
||||
// implicitly means no scaling, thus will be done with NEAREST sampling
|
||||
|
@ -1117,10 +1123,12 @@ void GLMContext::Blit2( CGLMTex *srcTex, GLMRect *srcRect, int srcFace, int srcM
|
|||
if (blitToBack)
|
||||
{
|
||||
// backbuffer is special - FBO0 is left out (either scrubbed already, or not used)
|
||||
|
||||
BindFBOToCtx ( NULL, GL_DRAW_FRAMEBUFFER );
|
||||
gGL->glDrawBuffer ( GL_BACK );
|
||||
|
||||
|
||||
BindFBOToCtx( NULL, GL_DRAW_FRAMEBUFFER );
|
||||
|
||||
GLenum bufs = GL_BACK;
|
||||
gGL->glDrawBuffers( 1, &bufs );
|
||||
|
||||
yflip = true;
|
||||
}
|
||||
else
|
||||
|
@ -1201,12 +1209,6 @@ void GLMContext::Blit2( CGLMTex *srcTex, GLMRect *srcRect, int srcFace, int srcM
|
|||
// restore GLM drawing FBO
|
||||
BindFBOToCtx( m_drawingFBO, GL_FRAMEBUFFER );
|
||||
|
||||
if (doPushPop)
|
||||
{
|
||||
gGL->glPopAttrib( );
|
||||
}
|
||||
|
||||
|
||||
//----------------------------------------------------------------- restore old scissor state
|
||||
if (oldsciss.enable)
|
||||
{
|
||||
|
@ -1258,39 +1260,6 @@ void GLMContext::BlitTex( CGLMTex *srcTex, GLMRect *srcRect, int srcFace, int sr
|
|||
GLMPRINTF(( "-D- dst tex layout is %s", dstTex->m_layout->m_layoutSummary ));
|
||||
}
|
||||
|
||||
int pushed = 0;
|
||||
uint pushmask = gl_radar7954721_workaround_maskval.GetInt();
|
||||
//GL_COLOR_BUFFER_BIT
|
||||
//| GL_CURRENT_BIT
|
||||
//| GL_ENABLE_BIT
|
||||
//| GL_FOG_BIT
|
||||
//| GL_PIXEL_MODE_BIT
|
||||
//| GL_SCISSOR_BIT
|
||||
//| GL_STENCIL_BUFFER_BIT
|
||||
//| GL_TEXTURE_BIT
|
||||
//GL_VIEWPORT_BIT
|
||||
//;
|
||||
|
||||
if (gl_radar7954721_workaround_all.GetInt()!=0)
|
||||
{
|
||||
gGL->glPushAttrib( pushmask );
|
||||
pushed++;
|
||||
}
|
||||
else
|
||||
{
|
||||
bool srcGamma = (srcTex->m_layout->m_key.m_texFlags & kGLMTexSRGB) != 0;
|
||||
bool dstGamma = (dstTex->m_layout->m_key.m_texFlags & kGLMTexSRGB) != 0;
|
||||
|
||||
if (srcGamma != dstGamma)
|
||||
{
|
||||
if (gl_radar7954721_workaround_mixed.GetInt())
|
||||
{
|
||||
gGL->glPushAttrib( pushmask );
|
||||
pushed++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (useBlitFB)
|
||||
{
|
||||
// state we need to save
|
||||
|
@ -1354,8 +1323,7 @@ void GLMContext::BlitTex( CGLMTex *srcTex, GLMRect *srcRect, int srcFace, int sr
|
|||
attparams.m_zslice = 0;
|
||||
m_blitReadFBO->TexAttach( &attparams, attachIndex, GL_READ_FRAMEBUFFER );
|
||||
|
||||
gGL->glReadBuffer( attachIndexGL );
|
||||
|
||||
gGL->glDrawBuffers( 1, &attachIndexGL );
|
||||
|
||||
// set the write fb and buffer, and attach write tex
|
||||
BindFBOToCtx( m_blitDrawFBO, GL_DRAW_FRAMEBUFFER );
|
||||
|
@ -1366,7 +1334,7 @@ void GLMContext::BlitTex( CGLMTex *srcTex, GLMRect *srcRect, int srcFace, int sr
|
|||
attparams.m_zslice = 0;
|
||||
m_blitDrawFBO->TexAttach( &attparams, attachIndex, GL_DRAW_FRAMEBUFFER );
|
||||
|
||||
gGL->glDrawBuffer( attachIndexGL );
|
||||
gGL->glDrawBuffers( 1, &attachIndexGL );
|
||||
|
||||
// do the blit
|
||||
gGL->glBlitFramebuffer( srcRect->xmin, srcRect->ymin, srcRect->xmax, srcRect->ymax,
|
||||
|
@ -1425,8 +1393,8 @@ void GLMContext::BlitTex( CGLMTex *srcTex, GLMRect *srcRect, int srcFace, int sr
|
|||
attparams.m_zslice = 0;
|
||||
m_blitDrawFBO->TexAttach( &attparams, attachIndex, GL_DRAW_FRAMEBUFFER );
|
||||
|
||||
gGL->glDrawBuffer( attachIndexGL );
|
||||
|
||||
gGL->glDrawBuffers( 1, &attachIndexGL );
|
||||
|
||||
// attempt to just set states directly the way we want them, then use the latched states to repair them afterward.
|
||||
NullProgram(); // out of program mode
|
||||
|
||||
|
@ -1456,25 +1424,24 @@ void GLMContext::BlitTex( CGLMTex *srcTex, GLMRect *srcRect, int srcFace, int sr
|
|||
|
||||
// immediate mode is fine
|
||||
|
||||
#if 0 // Does it needed?
|
||||
const float topv = 1.0;
|
||||
const float botv = 0.0;
|
||||
|
||||
const float verts[] = {-1.f, -1.f, 1.f, -1.f, 1.f, 1.f, -1.f, 1.f};
|
||||
const float verts_tex[] = {0.f, botv, 1.f, botv, 1.f, topv, 0.f, topv};
|
||||
|
||||
gGL->glEnableClientState(GL_VERTEX_ARRAY);
|
||||
gGL->glEnableClientState(GL_TEXTURE_COORD_ARRAY);
|
||||
|
||||
const float verts_tex[] = {0.f, botv, 1.f, botv, 1.f, topv, 0.f, topv};
|
||||
|
||||
gGL->glVertexPointer(2, GL_FLOAT, 0, verts);
|
||||
gGL->glTexCoordPointer(2, GL_FLOAT, 0, verts_tex);
|
||||
|
||||
glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
|
||||
gGL->glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
|
||||
|
||||
gGL->glDisableClientState(GL_VERTEX_ARRAY);
|
||||
gGL->glDisableClientState(GL_TEXTURE_COORD_ARRAY);
|
||||
|
||||
#endif
|
||||
|
||||
gGL->glBindTexture( GL_TEXTURE_2D, 0 );
|
||||
|
||||
|
||||
gGL->glDisable(GL_TEXTURE_2D);
|
||||
|
||||
BindTexToTMU( m_samplers[0].m_pBoundTex, 0 );
|
||||
|
@ -1509,12 +1476,6 @@ void GLMContext::BlitTex( CGLMTex *srcTex, GLMRect *srcRect, int srcFace, int sr
|
|||
BindFBOToCtx( m_drawingFBO, GL_FRAMEBUFFER );
|
||||
}
|
||||
|
||||
while(pushed)
|
||||
{
|
||||
gGL->glPopAttrib();
|
||||
pushed--;
|
||||
}
|
||||
|
||||
RestoreSavedColorMask();
|
||||
}
|
||||
|
||||
|
@ -1632,7 +1593,7 @@ void GLMContext::ResolveTex( CGLMTex *tex, bool forceDirty )
|
|||
gGL->glFramebufferTexture2D( GL_DRAW_FRAMEBUFFER, attachIndexGL, GL_TEXTURE_2D, tex->m_texName, 0 );
|
||||
}
|
||||
|
||||
gGL->glDrawBuffer( attachIndexGL );
|
||||
gGL->glDrawBuffers( 1, &attachIndexGL );
|
||||
|
||||
//-----------------------------------------------------------------------------------
|
||||
|
||||
|
@ -2369,10 +2330,6 @@ void GLMContext::Present( CGLMTex *tex )
|
|||
tmMessage( TELEMETRY_LEVEL2, TMMF_ICON_EXCLAMATION, "VS Uniform Calls: %u, VS Uniforms: %u|VS Uniform Bone Calls: %u, VS Bone Uniforms: %u|PS Uniform Calls: %u, PS Uniforms: %u", m_nTotalVSUniformCalls, m_nTotalVSUniformsSet, m_nTotalVSUniformBoneCalls, m_nTotalVSUniformsBoneSet, m_nTotalPSUniformCalls, m_nTotalPSUniformsSet );
|
||||
m_nTotalVSUniformCalls = 0, m_nTotalVSUniformBoneCalls = 0, m_nTotalVSUniformsSet = 0, m_nTotalVSUniformsBoneSet = 0, m_nTotalPSUniformCalls = 0, m_nTotalPSUniformsSet = 0;
|
||||
#endif
|
||||
|
||||
#ifndef OSX
|
||||
GLMGPUTimestampManagerTick();
|
||||
#endif
|
||||
}
|
||||
|
||||
//===============================================================================
|
||||
|
@ -2845,7 +2802,6 @@ void GLMContext::BindTexToTMU( CGLMTex *pTex, int tmu )
|
|||
|
||||
if ( !pTex )
|
||||
{
|
||||
gGL->glBindTexture( GL_TEXTURE_1D, 0 );
|
||||
gGL->glBindTexture( GL_TEXTURE_2D, 0 );
|
||||
gGL->glBindTexture( GL_TEXTURE_3D, 0 );
|
||||
gGL->glBindTexture( GL_TEXTURE_CUBE_MAP, 0 );
|
||||
|
@ -2853,7 +2809,6 @@ void GLMContext::BindTexToTMU( CGLMTex *pTex, int tmu )
|
|||
else
|
||||
{
|
||||
const GLenum texGLTarget = pTex->m_texGLTarget;
|
||||
if ( texGLTarget != GL_TEXTURE_1D ) gGL->glBindTexture( GL_TEXTURE_1D, 0 );
|
||||
if ( texGLTarget != GL_TEXTURE_2D ) gGL->glBindTexture( GL_TEXTURE_2D, 0 );
|
||||
if ( texGLTarget != GL_TEXTURE_3D ) gGL->glBindTexture( GL_TEXTURE_3D, 0 );
|
||||
if ( texGLTarget != GL_TEXTURE_CUBE_MAP ) gGL->glBindTexture( GL_TEXTURE_CUBE_MAP, 0 );
|
||||
|
@ -3006,11 +2961,11 @@ void GLMContext::CleanupTex( GLenum texBind, GLMTexLayout* pLayout, GLuint tex )
|
|||
const int dataSize = ( chunks * chunks ) * pLayout->m_format->m_bytesPerSquareChunk;
|
||||
Assert( dataSize <= ( sizeof( uint32) * ARRAYSIZE( g_garbageTextureBits ) ) );
|
||||
|
||||
gGL->glCompressedTexImage2D( texBind, i, pLayout->m_format->m_glIntFormat, mipDim, mipDim, 0, dataSize, 0 );
|
||||
CompressedTexImage2D( texBind, i, pLayout->m_format->m_glIntFormat, mipDim, mipDim, 0, dataSize, 0 );
|
||||
}
|
||||
else
|
||||
{
|
||||
gGL->glTexImage2D( texBind, i, pLayout->m_format->m_glIntFormat, mipDim, mipDim, 0, pLayout->m_format->m_glDataFormat, pLayout->m_format->m_glDataType, 0 );
|
||||
TexImage2D( texBind, i, pLayout->m_format->m_glIntFormat, mipDim, mipDim, 0, pLayout->m_format->m_glDataFormat, pLayout->m_format->m_glDataType, 0 );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4412,8 +4367,7 @@ void GLMContext::DebugHook( GLMDebugHookInfo *info )
|
|||
break;
|
||||
|
||||
case 2:
|
||||
short fakecolor[4] = { 0, 0, 0, 0 };
|
||||
gGL->glColor4sv( fakecolor ); // break to OGLP
|
||||
// What the fuck?
|
||||
break;
|
||||
}
|
||||
// re-flush all GLM states so you can fiddle with them in the debugger. then run the batch again and spin..
|
||||
|
@ -4766,36 +4720,11 @@ void GLMContext::DrawDebugText( float x, float y, float z, float drawCharWidth,
|
|||
|
||||
gGL->glEnable(GL_TEXTURE_2D);
|
||||
|
||||
if (0)
|
||||
{
|
||||
gGL->glEnableClientState(GL_VERTEX_ARRAY);
|
||||
|
||||
gGL->glEnableClientState(GL_TEXTURE_COORD_ARRAY);
|
||||
|
||||
gGL->glVertexPointer( 3, GL_FLOAT, sizeof( vtx[0] ), &vtx[0].x );
|
||||
|
||||
gGL->glClientActiveTexture(GL_TEXTURE0);
|
||||
|
||||
gGL->glTexCoordPointer( 2, GL_FLOAT, sizeof( vtx[0] ), &vtx[0].u );
|
||||
}
|
||||
else
|
||||
{
|
||||
SetVertexAttributes( &vertSetup );
|
||||
}
|
||||
SetVertexAttributes( &vertSetup );
|
||||
|
||||
gGL->glDrawArrays( GL_QUADS, 0, stringlen * 4 );
|
||||
|
||||
// disable all the input streams
|
||||
if (0)
|
||||
{
|
||||
gGL->glDisableClientState(GL_VERTEX_ARRAY);
|
||||
|
||||
gGL->glDisableClientState(GL_TEXTURE_COORD_ARRAY);
|
||||
}
|
||||
else
|
||||
{
|
||||
SetVertexAttributes( NULL );
|
||||
}
|
||||
SetVertexAttributes( NULL );
|
||||
|
||||
gGL->glDisable(GL_TEXTURE_2D);
|
||||
|
||||
|
@ -5288,7 +5217,7 @@ void GLMTester::StdSetup( void )
|
|||
gGL->glScissor( 0,0, (GLsizei) m_drawWidth, (GLsizei) m_drawHeight );
|
||||
CheckGLError("stdsetup scissor");
|
||||
|
||||
gGL->glOrtho( -1,1, -1,1, -1,1 );
|
||||
//gGL->glOrtho( -1,1, -1,1, -1,1 );
|
||||
CheckGLError("stdsetup ortho");
|
||||
|
||||
// activate debug font
|
||||
|
@ -5331,7 +5260,7 @@ void GLMTester::Clear( void )
|
|||
|
||||
gGL->glViewport(0, 0, (GLsizei) m_drawWidth, (GLsizei) m_drawHeight );
|
||||
gGL->glScissor( 0,0, (GLsizei) m_drawWidth, (GLsizei) m_drawHeight );
|
||||
gGL->glOrtho( -1,1, -1,1, -1,1 );
|
||||
//gGL->glOrtho( -1,1, -1,1, -1,1 );
|
||||
CheckGLError("clearing viewport");
|
||||
|
||||
// clear to black
|
||||
|
@ -6000,7 +5929,9 @@ void GLMTester::Test3( void )
|
|||
void GLMTriggerDebuggerBreak()
|
||||
{
|
||||
// we call an obscure GL function which we know has been breakpointed in the OGLP function list
|
||||
static signed short nada[] = { -1,-1,-1,-1 };
|
||||
gGL->glColor4sv( nada );
|
||||
|
||||
// What the fuck is that?
|
||||
// static signed short nada[] = { -1,-1,-1,-1 };
|
||||
// gGL->glColor4sv( nada );
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
// BE VERY VERY CAREFUL what you do in these function. They are extremely hot, and calling the wrong GL API's in here will crush perf. (especially on NVidia threaded drivers).
|
||||
|
||||
#include "togles/linuxwin/glmgr.h"
|
||||
|
||||
FORCEINLINE uint32 bitmix32(uint32 a)
|
||||
{
|
||||
a -= (a<<6);
|
||||
|
@ -433,7 +435,6 @@ FORCEINLINE void GLMContext::FlushDrawStates( uint nStartIndex, uint nEndIndex,
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
// see if VS uses i0, b0, b1, b2, b3.
|
||||
// use a glUniform1i to set any one of these if active. skip all of them if no dirties reported.
|
||||
// my kingdom for the UBO extension!
|
||||
|
@ -478,6 +479,15 @@ FORCEINLINE void GLMContext::FlushDrawStates( uint nStartIndex, uint nEndIndex,
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
if( m_pBoundPair->m_locAlphaRef )
|
||||
{
|
||||
if( !m_AlphaTestEnable.GetData().enable )
|
||||
gGL->glUniform1f( m_pBoundPair->m_locAlphaRef, 0.0 );
|
||||
else
|
||||
gGL->glUniform1f( m_pBoundPair->m_locAlphaRef, m_AlphaTestFunc.GetData().ref );
|
||||
}
|
||||
|
||||
Assert( ( m_pDevice->m_streams[0].m_vtxBuffer && ( m_pDevice->m_streams[0].m_vtxBuffer->m_vtxBuffer == m_pDevice->m_vtx_buffers[0] ) ) || ( ( !m_pDevice->m_streams[0].m_vtxBuffer ) && ( m_pDevice->m_vtx_buffers[0] == m_pDevice->m_pDummy_vtx_buffer ) ) );
|
||||
Assert( ( m_pDevice->m_streams[1].m_vtxBuffer && ( m_pDevice->m_streams[1].m_vtxBuffer->m_vtxBuffer == m_pDevice->m_vtx_buffers[1] ) ) || ( ( !m_pDevice->m_streams[1].m_vtxBuffer ) && ( m_pDevice->m_vtx_buffers[1] == m_pDevice->m_pDummy_vtx_buffer ) ) );
|
||||
Assert( ( m_pDevice->m_streams[2].m_vtxBuffer && ( m_pDevice->m_streams[2].m_vtxBuffer->m_vtxBuffer == m_pDevice->m_vtx_buffers[2] ) ) || ( ( !m_pDevice->m_streams[2].m_vtxBuffer ) && ( m_pDevice->m_vtx_buffers[2] == m_pDevice->m_pDummy_vtx_buffer ) ) );
|
||||
|
|
|
@ -3116,624 +3116,13 @@ void GLMSetIndent( int indent )
|
|||
char sg_pPIXName[128];
|
||||
|
||||
|
||||
#ifndef OSX
|
||||
ConVar gl_telemetry_gpu_pipeline_flushing( "gl_telemetry_gpu_pipeline_flushing", "0" );
|
||||
|
||||
class CGPUTimestampManager
|
||||
{
|
||||
CGPUTimestampManager( const CGPUTimestampManager & );
|
||||
CGPUTimestampManager& operator= ( CGPUTimestampManager & );
|
||||
|
||||
public:
|
||||
CGPUTimestampManager() :
|
||||
m_bInitialized( false ),
|
||||
m_nCurFrame( 0 ),
|
||||
m_flGPUToCPUOffsetInS( 0 ),
|
||||
m_flGPUToS( 0 ),
|
||||
m_flRdtscToS( 0 ),
|
||||
m_flSToRdtsc( 0 ),
|
||||
m_nFreeQueryPoolSize( 0 ),
|
||||
m_nOutstandingQueriesHead( 0 ),
|
||||
m_nOutstandingQueriesTail( 0 ),
|
||||
m_nNumOutstandingQueryZones( 0 ),
|
||||
m_nQueryZoneStackSize( 0 ),
|
||||
m_nNumFinishedZones( 0 ),
|
||||
m_nTotalSpanWorkCount( 0 )
|
||||
{
|
||||
memset( m_FreeQueryPool, 0, sizeof( m_FreeQueryPool ) ) ;
|
||||
memset( m_QueryZoneStack, 0, sizeof( m_QueryZoneStack ) );
|
||||
memset( m_OutstandingQueryZones, 0, sizeof( m_OutstandingQueryZones ) );
|
||||
memset( m_FinishedZones, 0, sizeof( m_FinishedZones ) );
|
||||
}
|
||||
|
||||
~CGPUTimestampManager()
|
||||
{
|
||||
Deinit();
|
||||
}
|
||||
|
||||
inline bool IsInitialized() const { return m_bInitialized; }
|
||||
inline uint GetCurFrame() const { return m_nCurFrame; }
|
||||
|
||||
void Init()
|
||||
{
|
||||
Deinit();
|
||||
|
||||
memset( m_FreeQueryPool, 0, sizeof( m_FreeQueryPool ) ) ;
|
||||
memset( m_QueryZoneStack, 0, sizeof( m_QueryZoneStack ) );
|
||||
memset( m_OutstandingQueryZones, 0, sizeof( m_OutstandingQueryZones ) );
|
||||
memset( m_FinishedZones, 0, sizeof( m_FinishedZones ) );
|
||||
|
||||
InitRdtsc();
|
||||
|
||||
m_nCurFrame = 0;
|
||||
|
||||
gGL->glGenQueries( cFreeQueryPoolSize, m_FreeQueryPool );
|
||||
m_nFreeQueryPoolSize = cFreeQueryPoolSize;
|
||||
|
||||
m_nOutstandingQueriesHead = 0;
|
||||
m_nOutstandingQueriesTail = 0;
|
||||
m_nNumOutstandingQueryZones = 0;
|
||||
|
||||
m_nQueryZoneStackSize = 0;
|
||||
m_nNumFinishedZones = 0;
|
||||
|
||||
m_bInitialized = true;
|
||||
|
||||
m_nTotalSpanWorkCount = 0;
|
||||
|
||||
Calibrate();
|
||||
}
|
||||
|
||||
void Calibrate()
|
||||
{
|
||||
if ( !m_bInitialized )
|
||||
return;
|
||||
|
||||
PipelineFlush();
|
||||
|
||||
m_flGPUToS = 1.0 / 1000000000.0;
|
||||
|
||||
//0.99997541250006794;
|
||||
//0.99997530000006662;
|
||||
// Correction factor to prevent excessive drift, only calibrated on my system, we need a better way of computing/recording this.
|
||||
double flGPURatio = 0.99997425000007034000;
|
||||
|
||||
const uint NT = 1;
|
||||
for ( uint nTrial = 0; nTrial < NT; nTrial++ )
|
||||
{
|
||||
const uint R = 16;
|
||||
double flClockOffsetsInS[R];
|
||||
for ( uint q = 0; q < R; q++)
|
||||
{
|
||||
uint64 nBestTotalCPUTimestamp = (uint64)-1;
|
||||
uint64 nBestCPUTimestamp = 0;
|
||||
GLuint64 nBestGPUTimestamp = 0;
|
||||
|
||||
for ( uint i = 0; i < 10; i++)
|
||||
{
|
||||
const uint64 nStartCPUTimestamp = Plat_Rdtsc();
|
||||
|
||||
gGL->glQueryCounter( m_FreeQueryPool[0], GL_TIMESTAMP);
|
||||
PipelineFlush();
|
||||
|
||||
const uint64 nEndCPUTimestamp = Plat_Rdtsc();
|
||||
|
||||
GLint nAvailable;
|
||||
do
|
||||
{
|
||||
gGL->glGetQueryObjectiv( m_FreeQueryPool[0], GL_QUERY_RESULT_AVAILABLE, &nAvailable );
|
||||
} while ( !nAvailable );
|
||||
|
||||
GLuint64 nGPUTimestamp;
|
||||
gGL->glGetQueryObjectui64v( m_FreeQueryPool[0], GL_QUERY_RESULT, &nGPUTimestamp );
|
||||
|
||||
const uint64 nTotalCPUTimestamp = nEndCPUTimestamp - nStartCPUTimestamp;
|
||||
if ( nTotalCPUTimestamp < nBestTotalCPUTimestamp )
|
||||
{
|
||||
nBestTotalCPUTimestamp = nTotalCPUTimestamp;
|
||||
nBestCPUTimestamp = nStartCPUTimestamp;
|
||||
nBestGPUTimestamp = nGPUTimestamp;
|
||||
}
|
||||
}
|
||||
|
||||
double flCPUTimestampTimeInSeconds = nBestCPUTimestamp * m_flRdtscToS;
|
||||
double flGPUTimestampTimeInSeconds = nBestGPUTimestamp * m_flGPUToS * flGPURatio;
|
||||
|
||||
flClockOffsetsInS[q] = flCPUTimestampTimeInSeconds - flGPUTimestampTimeInSeconds;
|
||||
|
||||
ThreadSleep(100);
|
||||
|
||||
DbgPrintf("%f %f %1.20f\n", flCPUTimestampTimeInSeconds, flGPUTimestampTimeInSeconds, flClockOffsetsInS[q] );
|
||||
}
|
||||
|
||||
m_flGPUToCPUOffsetInS = 0.0f;
|
||||
for ( uint i = 0; i < R; i++ )
|
||||
m_flGPUToCPUOffsetInS += flClockOffsetsInS[i];
|
||||
m_flGPUToCPUOffsetInS /= R;
|
||||
|
||||
if ( NT > 1 )
|
||||
{
|
||||
DbgPrintf("------- Ratio: %2.20f\n", flGPURatio );
|
||||
|
||||
double flDelta = flClockOffsetsInS[0] - flClockOffsetsInS[R - 1];
|
||||
|
||||
DbgPrintf("------- %1.20f\n", flDelta );
|
||||
|
||||
#if 1
|
||||
if ( flDelta < 0.0000005f )
|
||||
{
|
||||
flGPURatio += .000000125f;
|
||||
}
|
||||
else if ( flDelta > 0.0000005f )
|
||||
{
|
||||
flGPURatio -= .000000125f;
|
||||
}
|
||||
#else
|
||||
if ( flDelta < 0.0000005f )
|
||||
{
|
||||
flGPURatio += .0000000125f;
|
||||
}
|
||||
else if ( flDelta > 0.0000005f )
|
||||
{
|
||||
flGPURatio -= .0000000125f;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
m_flGPUToS *= flGPURatio;
|
||||
|
||||
#if 0
|
||||
// dump drift over time to debugger output
|
||||
double flLatency = 0;
|
||||
for ( ; ; )
|
||||
{
|
||||
// test
|
||||
const uint64 nStartCPUTime = Plat_Rdtsc();
|
||||
|
||||
gGL->glQueryCounter( m_FreeQueryPool[0], GL_TIMESTAMP);
|
||||
|
||||
PipelineFlush();
|
||||
|
||||
GLint nAvailable;
|
||||
do
|
||||
{
|
||||
gGL->glGetQueryObjectiv( m_FreeQueryPool[0], GL_QUERY_RESULT_AVAILABLE, &nAvailable );
|
||||
} while ( !nAvailable );
|
||||
|
||||
GLuint64 nGPUTime;
|
||||
gGL->glGetQueryObjectui64v( m_FreeQueryPool[0], GL_QUERY_RESULT, &nGPUTime );
|
||||
|
||||
double flStartGPUTime = ( ( nGPUTime * m_flGPUToS ) + m_flGPUToCPUOffsetInS );
|
||||
|
||||
flLatency = flStartGPUTime - nStartCPUTime * m_flRdtscToS;
|
||||
DbgPrintf("%f\n", flLatency );
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void Deinit()
|
||||
{
|
||||
if ( !m_bInitialized )
|
||||
return;
|
||||
|
||||
if ( m_nFreeQueryPoolSize )
|
||||
{
|
||||
gGL->glDeleteQueries( m_nFreeQueryPoolSize, m_FreeQueryPool );
|
||||
}
|
||||
m_nFreeQueryPoolSize = 0;
|
||||
|
||||
for ( uint i = 0; i < m_nNumOutstandingQueryZones; i++ )
|
||||
{
|
||||
QueryZone_t &query = m_OutstandingQueryZones[ ( m_nOutstandingQueriesHead + i ) % cMaxQueryZones ];
|
||||
if ( query.m_nBeginQuery )
|
||||
{
|
||||
gGL->glDeleteQueries( 1, &query.m_nBeginQuery );
|
||||
}
|
||||
if ( query.m_nEndQuery )
|
||||
{
|
||||
gGL->glDeleteQueries( 1, &query.m_nEndQuery );
|
||||
}
|
||||
}
|
||||
m_nOutstandingQueriesHead = 0;
|
||||
m_nOutstandingQueriesTail = 0;
|
||||
m_nNumOutstandingQueryZones = 0;
|
||||
|
||||
for ( uint i = 0; i < m_nQueryZoneStackSize; i++ )
|
||||
{
|
||||
QueryZone_t &query = m_QueryZoneStack[i];
|
||||
if ( query.m_nBeginQuery )
|
||||
{
|
||||
gGL->glDeleteQueries( 1, &query.m_nBeginQuery );
|
||||
}
|
||||
if ( query.m_nEndQuery )
|
||||
{
|
||||
gGL->glDeleteQueries( 1, &query.m_nEndQuery );
|
||||
}
|
||||
}
|
||||
m_nQueryZoneStackSize = 0;
|
||||
|
||||
m_flGPUToCPUOffsetInS = 0;
|
||||
m_flGPUToS = 0;
|
||||
m_flRdtscToS = 0;
|
||||
m_flSToRdtsc = 0;
|
||||
|
||||
m_bInitialized = false;
|
||||
}
|
||||
|
||||
// pName is assumed to be a telemetry dynamic string!
|
||||
void BeginZone( const char *pName )
|
||||
{
|
||||
if ( !m_bInitialized )
|
||||
return;
|
||||
|
||||
if ( m_nQueryZoneStackSize >= cMaxQueryZoneStackSize )
|
||||
{
|
||||
Panic( "Increase cMaxQueryZoneStackSize!" );
|
||||
}
|
||||
|
||||
QueryZone_t &zone = m_QueryZoneStack[m_nQueryZoneStackSize];
|
||||
|
||||
zone.m_pName = pName;
|
||||
|
||||
zone.m_nBeginQuery = AllocQueryHandle();
|
||||
zone.m_nEndQuery = 0;
|
||||
zone.m_nStackLevel = m_nQueryZoneStackSize;
|
||||
|
||||
zone.m_nTotalGPUWorkCount = g_nTotalDrawsOrClears;
|
||||
#if GL_TELEMETRY_GPU_ZONES
|
||||
zone.m_nTotalGPUWorkCount += g_TelemetryGPUStats.GetTotal();
|
||||
#endif
|
||||
|
||||
gGL->glQueryCounter( m_QueryZoneStack[m_nQueryZoneStackSize].m_nBeginQuery, GL_TIMESTAMP );
|
||||
|
||||
m_nQueryZoneStackSize++;
|
||||
}
|
||||
|
||||
void EndZone()
|
||||
{
|
||||
if ( !m_bInitialized )
|
||||
return;
|
||||
|
||||
if ( ( !m_nQueryZoneStackSize ) || ( m_nNumOutstandingQueryZones == cMaxQueryZones ) )
|
||||
{
|
||||
Panic( "Query zone error!" );
|
||||
}
|
||||
|
||||
m_nQueryZoneStackSize--;
|
||||
|
||||
uint nCurGPUWorkCount = g_nTotalDrawsOrClears;
|
||||
#if GL_TELEMETRY_GPU_ZONES
|
||||
nCurGPUWorkCount += g_TelemetryGPUStats.GetTotal();
|
||||
#endif
|
||||
|
||||
uint nTotalDraws = nCurGPUWorkCount - m_QueryZoneStack[m_nQueryZoneStackSize].m_nTotalGPUWorkCount;
|
||||
|
||||
m_QueryZoneStack[m_nQueryZoneStackSize].m_nEndQuery = AllocQueryHandle();
|
||||
gGL->glQueryCounter( m_QueryZoneStack[m_nQueryZoneStackSize].m_nEndQuery, GL_TIMESTAMP );
|
||||
m_QueryZoneStack[m_nQueryZoneStackSize].m_nTotalGPUWorkCount = nTotalDraws;
|
||||
|
||||
m_OutstandingQueryZones[m_nOutstandingQueriesHead] = m_QueryZoneStack[m_nQueryZoneStackSize];
|
||||
m_nOutstandingQueriesHead = ( m_nOutstandingQueriesHead + 1 ) % cMaxQueryZones;
|
||||
m_nNumOutstandingQueryZones++;
|
||||
|
||||
COMPILE_TIME_ASSERT( ( int )cMaxQueryZones > ( int )cMaxQueryZoneStackSize );
|
||||
if ( m_nNumOutstandingQueryZones >= ( cMaxQueryZones - cMaxQueryZoneStackSize ) )
|
||||
{
|
||||
tmMessage( TELEMETRY_LEVEL2, TMMF_ICON_NOTE | TMMF_SEVERITY_WARNING, "CGPUTimestampManager::EndZone: Too many outstanding query zones - forcing a pipeline flush! This is probably expensive." );
|
||||
|
||||
FlushOutstandingQueries( true );
|
||||
}
|
||||
|
||||
if ( gl_telemetry_gpu_pipeline_flushing.GetBool() )
|
||||
{
|
||||
PipelineFlush();
|
||||
}
|
||||
}
|
||||
|
||||
void Tick()
|
||||
{
|
||||
m_nCurFrame++;
|
||||
|
||||
if ( !m_bInitialized )
|
||||
return;
|
||||
|
||||
if ( m_nQueryZoneStackSize > 0 )
|
||||
{
|
||||
Panic( "Zone stack is not empty!" );
|
||||
}
|
||||
|
||||
FlushOutstandingQueries( false );
|
||||
|
||||
tmMessage( TELEMETRY_LEVEL2, 0, "Total PIX timespan GPU work count: %u", m_nTotalSpanWorkCount );
|
||||
|
||||
m_nTotalSpanWorkCount = 0;
|
||||
}
|
||||
|
||||
void FlushOutstandingQueries( bool bForce )
|
||||
{
|
||||
tmZone( TELEMETRY_LEVEL2, 0, "FlushOutstandingQueries: %u", m_nNumOutstandingQueryZones );
|
||||
|
||||
if ( bForce )
|
||||
{
|
||||
PipelineFlush();
|
||||
}
|
||||
|
||||
while ( m_nNumOutstandingQueryZones )
|
||||
{
|
||||
QueryZone_t &zone = m_OutstandingQueryZones[m_nOutstandingQueriesTail];
|
||||
|
||||
GLint nEndAvailable = 0;
|
||||
do
|
||||
{
|
||||
gGL->glGetQueryObjectiv( zone.m_nEndQuery, GL_QUERY_RESULT_AVAILABLE, &nEndAvailable );
|
||||
|
||||
} while ( ( bForce ) && ( nEndAvailable == 0 ) );
|
||||
|
||||
if ( !nEndAvailable )
|
||||
{
|
||||
if ( bForce )
|
||||
{
|
||||
Panic( "Query results not available after a full pipeline flush!" );
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
GLuint64 nBeginGPUTime, nEndGPUTime;
|
||||
gGL->glGetQueryObjectui64v( zone.m_nBeginQuery, GL_QUERY_RESULT, &nBeginGPUTime );
|
||||
gGL->glGetQueryObjectui64v( zone.m_nEndQuery, GL_QUERY_RESULT, &nEndGPUTime );
|
||||
|
||||
ReleaseQueryHandle( zone.m_nBeginQuery );
|
||||
zone.m_nBeginQuery = 0;
|
||||
|
||||
ReleaseQueryHandle( zone.m_nEndQuery );
|
||||
zone.m_nEndQuery = 0;
|
||||
|
||||
if ( m_nNumFinishedZones >= cMaxQueryZones )
|
||||
{
|
||||
Panic( "Too many finished zones!" );
|
||||
}
|
||||
|
||||
FinishedQueryZone_t &finishedZone = m_FinishedZones[m_nNumFinishedZones];
|
||||
finishedZone.m_pName = zone.m_pName;
|
||||
finishedZone.m_nBeginGPUTime = nBeginGPUTime;
|
||||
finishedZone.m_nEndGPUTime = nEndGPUTime;
|
||||
finishedZone.m_nStackLevel = zone.m_nStackLevel;
|
||||
finishedZone.m_nTotalGPUWorkCount = zone.m_nTotalGPUWorkCount;
|
||||
m_nNumFinishedZones++;
|
||||
|
||||
if ( !zone.m_nStackLevel )
|
||||
{
|
||||
std::sort( m_FinishedZones, m_FinishedZones + m_nNumFinishedZones );
|
||||
FlushFinishedZones();
|
||||
m_nNumFinishedZones = 0;
|
||||
}
|
||||
|
||||
m_nOutstandingQueriesTail = ( m_nOutstandingQueriesTail + 1 ) % cMaxQueryZones;
|
||||
m_nNumOutstandingQueryZones--;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
bool m_bInitialized;
|
||||
uint m_nCurFrame;
|
||||
|
||||
double m_flGPUToCPUOffsetInS;
|
||||
double m_flGPUToS;
|
||||
double m_flRdtscToS;
|
||||
double m_flSToRdtsc;
|
||||
|
||||
enum { cMaxQueryZones = 4096, cFreeQueryPoolSize = cMaxQueryZones * 2 };
|
||||
GLuint m_FreeQueryPool[cFreeQueryPoolSize ];
|
||||
uint m_nFreeQueryPoolSize;
|
||||
|
||||
GLuint AllocQueryHandle()
|
||||
{
|
||||
if ( !m_nFreeQueryPoolSize )
|
||||
{
|
||||
Panic( "Out of query handles!");
|
||||
}
|
||||
return m_FreeQueryPool[--m_nFreeQueryPoolSize];
|
||||
}
|
||||
|
||||
void ReleaseQueryHandle( GLuint nHandle )
|
||||
{
|
||||
if ( m_nFreeQueryPoolSize >= cFreeQueryPoolSize )
|
||||
{
|
||||
Panic( "Query handle error!" );
|
||||
}
|
||||
m_FreeQueryPool[m_nFreeQueryPoolSize++] = nHandle;
|
||||
}
|
||||
|
||||
struct QueryZone_t
|
||||
{
|
||||
const char *m_pName;
|
||||
GLuint m_nBeginQuery;
|
||||
GLuint m_nEndQuery;
|
||||
uint m_nStackLevel;
|
||||
uint m_nTotalGPUWorkCount;
|
||||
};
|
||||
|
||||
QueryZone_t m_OutstandingQueryZones[cMaxQueryZones];
|
||||
uint m_nOutstandingQueriesHead; // index of first outstanding query (oldest)
|
||||
uint m_nOutstandingQueriesTail; // index where next query goes (newest)
|
||||
uint m_nNumOutstandingQueryZones;
|
||||
|
||||
enum { cMaxQueryZoneStackSize = 256 };
|
||||
QueryZone_t m_QueryZoneStack[cMaxQueryZoneStackSize];
|
||||
uint m_nQueryZoneStackSize;
|
||||
|
||||
struct FinishedQueryZone_t
|
||||
{
|
||||
const char *m_pName;
|
||||
GLuint64 m_nBeginGPUTime;
|
||||
GLuint64 m_nEndGPUTime;
|
||||
uint m_nStackLevel;
|
||||
uint m_nTotalGPUWorkCount;
|
||||
|
||||
inline bool operator< ( const FinishedQueryZone_t &rhs ) const
|
||||
{
|
||||
if ( m_nBeginGPUTime == rhs.m_nBeginGPUTime)
|
||||
return m_nStackLevel < rhs.m_nStackLevel;
|
||||
|
||||
return m_nBeginGPUTime < rhs.m_nBeginGPUTime;
|
||||
}
|
||||
};
|
||||
|
||||
FinishedQueryZone_t m_FinishedZones[cMaxQueryZones];
|
||||
uint m_nNumFinishedZones;
|
||||
|
||||
uint m_nTotalSpanWorkCount;
|
||||
|
||||
void InitRdtsc()
|
||||
{
|
||||
m_flRdtscToS = 0.0f;
|
||||
m_flSToRdtsc = 0.0f;
|
||||
|
||||
for ( uint i = 0; i < 10; i++ )
|
||||
{
|
||||
uint64 t0 = Plat_Rdtsc();
|
||||
double d0 = Plat_FloatTime();
|
||||
|
||||
ThreadSleep( 250 );
|
||||
|
||||
uint64 t1 = Plat_Rdtsc();
|
||||
double d1 = Plat_FloatTime();
|
||||
|
||||
double flRdtscToS = ( d1 - d0 ) / ( t1 - t0 );
|
||||
double flSToRdtsc = ( t1 - t0 ) / ( d1 - d0 );
|
||||
if ( flSToRdtsc > m_flSToRdtsc )
|
||||
{
|
||||
m_flRdtscToS = flRdtscToS;
|
||||
m_flSToRdtsc = flSToRdtsc;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PipelineFlush()
|
||||
{
|
||||
#ifdef HAVE_GL_ARB_SYNC
|
||||
GLsync nSyncObj = gGL->glFenceSync( GL_SYNC_GPU_COMMANDS_COMPLETE, 0 );
|
||||
if ( nSyncObj )
|
||||
{
|
||||
gGL->glClientWaitSync( nSyncObj, GL_SYNC_FLUSH_COMMANDS_BIT, 300000000000ULL );
|
||||
gGL->glDeleteSync( nSyncObj );
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void NewTimeSpan( uint64 nStartGPUTime, uint64 nEndGPUTime, const char *pName, uint nTotalDraws )
|
||||
{
|
||||
// apparently we must use level0 for timespans?
|
||||
tmBeginTimeSpanAt( TELEMETRY_LEVEL0, 1, 0, nStartGPUTime, "%s [C:%u]", pName ? pName : "", nTotalDraws );
|
||||
tmEndTimeSpanAt( TELEMETRY_LEVEL0, 1, 0, nEndGPUTime, "%s [C:%u]", pName ? pName : "", nTotalDraws );
|
||||
}
|
||||
|
||||
void FlushFinishedZones()
|
||||
{
|
||||
for ( uint i = 0; i < m_nNumFinishedZones; i++ )
|
||||
{
|
||||
FinishedQueryZone_t &zone = m_FinishedZones[i];
|
||||
if ( !zone.m_nTotalGPUWorkCount )
|
||||
continue;
|
||||
|
||||
bool bEmit = false;
|
||||
if ( i == ( m_nNumFinishedZones - 1 ) )
|
||||
bEmit = true;
|
||||
else
|
||||
{
|
||||
FinishedQueryZone_t &nextZone = m_FinishedZones[i + 1];
|
||||
bEmit = zone.m_nEndGPUTime <= nextZone.m_nBeginGPUTime;
|
||||
}
|
||||
|
||||
if ( bEmit )
|
||||
{
|
||||
uint64 nStartGPUTime = ( ( zone.m_nBeginGPUTime * m_flGPUToS ) + m_flGPUToCPUOffsetInS ) * m_flSToRdtsc;
|
||||
uint64 nEndGPUTime = ( ( zone.m_nEndGPUTime * m_flGPUToS ) + m_flGPUToCPUOffsetInS ) * m_flSToRdtsc;
|
||||
|
||||
NewTimeSpan( nStartGPUTime, nEndGPUTime, zone.m_pName, zone.m_nTotalGPUWorkCount );
|
||||
|
||||
m_nTotalSpanWorkCount += zone.m_nTotalGPUWorkCount;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Panic( const char *pMsg )
|
||||
{
|
||||
DXABSTRACT_BREAK_ON_ERROR();
|
||||
Error( "%s", pMsg );
|
||||
}
|
||||
|
||||
static void DbgPrintf( const char *pFmt, ... )
|
||||
{
|
||||
va_list vargs;
|
||||
va_start( vargs, pFmt );
|
||||
char buf[1024];
|
||||
V_vsnprintf( buf, sizeof( buf ), pFmt, vargs );
|
||||
|
||||
#ifdef WIN32
|
||||
OutputDebugStringA( buf );
|
||||
#else
|
||||
printf( "%s", buf );
|
||||
#endif
|
||||
|
||||
va_end( vargs );
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
static CGPUTimestampManager g_GPUTimestampManager;
|
||||
|
||||
void GLMGPUTimestampManagerInit()
|
||||
{
|
||||
g_GPUTimestampManager.Init();
|
||||
}
|
||||
|
||||
void GLMGPUTimestampManagerDeinit()
|
||||
{
|
||||
g_GPUTimestampManager.Deinit();
|
||||
}
|
||||
|
||||
ConVar gl_telemetry_gpu( "gl_telemetry_gpu", "0" );
|
||||
static bool g_bPrevTelemetryGPU;
|
||||
|
||||
void GLMGPUTimestampManagerTick()
|
||||
{
|
||||
if ( g_bPrevTelemetryGPU != gl_telemetry_gpu.GetBool() )
|
||||
{
|
||||
if ( !gl_telemetry_gpu.GetBool() )
|
||||
g_GPUTimestampManager.Deinit();
|
||||
else
|
||||
{
|
||||
#if !PIX_ENABLE || !GL_TELEMETRY_GPU_ZONES
|
||||
ConMsg( "Must define PIX_ENABLE and GL_TELEMETRY_GPU_ZONES to use this feature" );
|
||||
#else
|
||||
g_GPUTimestampManager.Init();
|
||||
#endif
|
||||
}
|
||||
|
||||
g_bPrevTelemetryGPU = gl_telemetry_gpu.GetBool();
|
||||
}
|
||||
|
||||
g_GPUTimestampManager.Tick();
|
||||
}
|
||||
|
||||
#endif // !OSX
|
||||
|
||||
static uint g_nPIXEventIndex;
|
||||
|
||||
void GLMBeginPIXEvent( const char *str )
|
||||
{
|
||||
#ifndef OSX
|
||||
char szName[1024];
|
||||
V_snprintf( szName, sizeof( szName ), "[ID:%u FR:%u] %s", g_nPIXEventIndex, g_GPUTimestampManager.GetCurFrame(), str );
|
||||
const char *p = tmDynamicString( TELEMETRY_LEVEL2, szName ); //p can be null if tm is getting shut down
|
||||
tmEnter( TELEMETRY_LEVEL2, TMZF_NONE, "PIX %s", p ? p : "" );
|
||||
|
||||
g_nPIXEventIndex++;
|
||||
|
||||
g_GPUTimestampManager.BeginZone( p );
|
||||
#endif // !OSX
|
||||
V_strncpy( sg_pPIXName, str, 128 );
|
||||
|
||||
#if defined( OSX ) && defined( CGLPROFILER_ENABLE )
|
||||
|
@ -3748,10 +3137,6 @@ void GLMBeginPIXEvent( const char *str )
|
|||
|
||||
void GLMEndPIXEvent( void )
|
||||
{
|
||||
#ifndef OSX
|
||||
g_GPUTimestampManager.EndZone();
|
||||
#endif
|
||||
|
||||
#if defined( OSX ) && defined( CGLPROFILER_ENABLE )
|
||||
CGLSetOption( kCGLGOComment, (GLint)sg_pPIXName );
|
||||
#endif
|
||||
|
|
624
togles/linuxwin/stb_dxt_104.h
Normal file
624
togles/linuxwin/stb_dxt_104.h
Normal file
|
@ -0,0 +1,624 @@
|
|||
// stb_dxt.h - v1.04 - DXT1/DXT5 compressor - public domain
|
||||
// original by fabian "ryg" giesen - ported to C by stb
|
||||
// use '#define STB_DXT_IMPLEMENTATION' before including to create the implementation
|
||||
//
|
||||
// USAGE:
|
||||
// call stb_compress_dxt_block() for every block (you must pad)
|
||||
// source should be a 4x4 block of RGBA data in row-major order;
|
||||
// A is ignored if you specify alpha=0; you can turn on dithering
|
||||
// and "high quality" using mode.
|
||||
//
|
||||
// version history:
|
||||
// v1.04 - (ryg) default to no rounding bias for lerped colors (as per S3TC/DX10 spec);
|
||||
// single color match fix (allow for inexact color interpolation);
|
||||
// optimal DXT5 index finder; "high quality" mode that runs multiple refinement steps.
|
||||
// v1.03 - (stb) endianness support
|
||||
// v1.02 - (stb) fix alpha encoding bug
|
||||
// v1.01 - (stb) fix bug converting to RGB that messed up quality, thanks ryg & cbloom
|
||||
// v1.00 - (stb) first release
|
||||
|
||||
#ifndef STB_INCLUDE_STB_DXT_H
|
||||
#define STB_INCLUDE_STB_DXT_H
|
||||
|
||||
// compression mode (bitflags)
|
||||
#define STB_DXT_NORMAL 0
|
||||
#define STB_DXT_DITHER 1 // use dithering. dubious win. never use for normal maps and the like!
|
||||
#define STB_DXT_HIGHQUAL 2 // high quality mode, does two refinement steps instead of 1. ~30-40% slower.
|
||||
|
||||
void stb_compress_dxt_block(unsigned char *dest, const unsigned char *src, int alpha, int mode);
|
||||
#define STB_COMPRESS_DXT_BLOCK
|
||||
|
||||
#ifdef STB_DXT_IMPLEMENTATION
|
||||
|
||||
// configuration options for DXT encoder. set them in the project/makefile or just define
|
||||
// them at the top.
|
||||
|
||||
// STB_DXT_USE_ROUNDING_BIAS
|
||||
// use a rounding bias during color interpolation. this is closer to what "ideal"
|
||||
// interpolation would do but doesn't match the S3TC/DX10 spec. old versions (pre-1.03)
|
||||
// implicitly had this turned on.
|
||||
//
|
||||
// in case you're targeting a specific type of hardware (e.g. console programmers):
|
||||
// NVidia and Intel GPUs (as of 2010) as well as DX9 ref use DXT decoders that are closer
|
||||
// to STB_DXT_USE_ROUNDING_BIAS. AMD/ATI, S3 and DX10 ref are closer to rounding with no bias.
|
||||
// you also see "(a*5 + b*3) / 8" on some old GPU designs.
|
||||
// #define STB_DXT_USE_ROUNDING_BIAS
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <string.h> // memset
|
||||
|
||||
static unsigned char stb__Expand5[32];
|
||||
static unsigned char stb__Expand6[64];
|
||||
static unsigned char stb__OMatch5[256][2];
|
||||
static unsigned char stb__OMatch6[256][2];
|
||||
static unsigned char stb__QuantRBTab[256+16];
|
||||
static unsigned char stb__QuantGTab[256+16];
|
||||
|
||||
static int stb__Mul8Bit(int a, int b)
|
||||
{
|
||||
int t = a*b + 128;
|
||||
return (t + (t >> 8)) >> 8;
|
||||
}
|
||||
|
||||
static void stb__From16Bit(unsigned char *out, unsigned short v)
|
||||
{
|
||||
int rv = (v & 0xf800) >> 11;
|
||||
int gv = (v & 0x07e0) >> 5;
|
||||
int bv = (v & 0x001f) >> 0;
|
||||
|
||||
out[0] = stb__Expand5[rv];
|
||||
out[1] = stb__Expand6[gv];
|
||||
out[2] = stb__Expand5[bv];
|
||||
out[3] = 0;
|
||||
}
|
||||
|
||||
static unsigned short stb__As16Bit(int r, int g, int b)
|
||||
{
|
||||
return (stb__Mul8Bit(r,31) << 11) + (stb__Mul8Bit(g,63) << 5) + stb__Mul8Bit(b,31);
|
||||
}
|
||||
|
||||
// linear interpolation at 1/3 point between a and b, using desired rounding type
|
||||
static int stb__Lerp13(int a, int b)
|
||||
{
|
||||
#ifdef STB_DXT_USE_ROUNDING_BIAS
|
||||
// with rounding bias
|
||||
return a + stb__Mul8Bit(b-a, 0x55);
|
||||
#else
|
||||
// without rounding bias
|
||||
// replace "/ 3" by "* 0xaaab) >> 17" if your compiler sucks or you really need every ounce of speed.
|
||||
return (2*a + b) / 3;
|
||||
#endif
|
||||
}
|
||||
|
||||
// lerp RGB color
|
||||
static void stb__Lerp13RGB(unsigned char *out, unsigned char *p1, unsigned char *p2)
|
||||
{
|
||||
out[0] = stb__Lerp13(p1[0], p2[0]);
|
||||
out[1] = stb__Lerp13(p1[1], p2[1]);
|
||||
out[2] = stb__Lerp13(p1[2], p2[2]);
|
||||
}
|
||||
|
||||
/****************************************************************************/
|
||||
|
||||
// compute table to reproduce constant colors as accurately as possible
|
||||
static void stb__PrepareOptTable(unsigned char *Table,const unsigned char *expand,int size)
|
||||
{
|
||||
int i,mn,mx;
|
||||
for (i=0;i<256;i++) {
|
||||
int bestErr = 256;
|
||||
for (mn=0;mn<size;mn++) {
|
||||
for (mx=0;mx<size;mx++) {
|
||||
int mine = expand[mn];
|
||||
int maxe = expand[mx];
|
||||
int err = abs(stb__Lerp13(maxe, mine) - i);
|
||||
|
||||
// DX10 spec says that interpolation must be within 3% of "correct" result,
|
||||
// add this as error term. (normally we'd expect a random distribution of
|
||||
// +-1.5% error, but nowhere in the spec does it say that the error has to be
|
||||
// unbiased - better safe than sorry).
|
||||
err += abs(maxe - mine) * 3 / 100;
|
||||
|
||||
if(err < bestErr)
|
||||
{
|
||||
Table[i*2+0] = mx;
|
||||
Table[i*2+1] = mn;
|
||||
bestErr = err;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void stb__EvalColors(unsigned char *color,unsigned short c0,unsigned short c1)
|
||||
{
|
||||
stb__From16Bit(color+ 0, c0);
|
||||
stb__From16Bit(color+ 4, c1);
|
||||
stb__Lerp13RGB(color+ 8, color+0, color+4);
|
||||
stb__Lerp13RGB(color+12, color+4, color+0);
|
||||
}
|
||||
|
||||
// Block dithering function. Simply dithers a block to 565 RGB.
|
||||
// (Floyd-Steinberg)
|
||||
static void stb__DitherBlock(unsigned char *dest, unsigned char *block)
|
||||
{
|
||||
int err[8],*ep1 = err,*ep2 = err+4, *et;
|
||||
int ch,y;
|
||||
|
||||
// process channels seperately
|
||||
for (ch=0; ch<3; ++ch) {
|
||||
unsigned char *bp = block+ch, *dp = dest+ch;
|
||||
unsigned char *quant = (ch == 1) ? stb__QuantGTab+8 : stb__QuantRBTab+8;
|
||||
memset(err, 0, sizeof(err));
|
||||
for(y=0; y<4; ++y) {
|
||||
dp[ 0] = quant[bp[ 0] + ((3*ep2[1] + 5*ep2[0]) >> 4)];
|
||||
ep1[0] = bp[ 0] - dp[ 0];
|
||||
dp[ 4] = quant[bp[ 4] + ((7*ep1[0] + 3*ep2[2] + 5*ep2[1] + ep2[0]) >> 4)];
|
||||
ep1[1] = bp[ 4] - dp[ 4];
|
||||
dp[ 8] = quant[bp[ 8] + ((7*ep1[1] + 3*ep2[3] + 5*ep2[2] + ep2[1]) >> 4)];
|
||||
ep1[2] = bp[ 8] - dp[ 8];
|
||||
dp[12] = quant[bp[12] + ((7*ep1[2] + 5*ep2[3] + ep2[2]) >> 4)];
|
||||
ep1[3] = bp[12] - dp[12];
|
||||
bp += 16;
|
||||
dp += 16;
|
||||
et = ep1, ep1 = ep2, ep2 = et; // swap
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// The color matching function
|
||||
static unsigned int stb__MatchColorsBlock(unsigned char *block, unsigned char *color,int dither)
|
||||
{
|
||||
unsigned int mask = 0;
|
||||
int dirr = color[0*4+0] - color[1*4+0];
|
||||
int dirg = color[0*4+1] - color[1*4+1];
|
||||
int dirb = color[0*4+2] - color[1*4+2];
|
||||
int dots[16];
|
||||
int stops[4];
|
||||
int i;
|
||||
int c0Point, halfPoint, c3Point;
|
||||
|
||||
for(i=0;i<16;i++)
|
||||
dots[i] = block[i*4+0]*dirr + block[i*4+1]*dirg + block[i*4+2]*dirb;
|
||||
|
||||
for(i=0;i<4;i++)
|
||||
stops[i] = color[i*4+0]*dirr + color[i*4+1]*dirg + color[i*4+2]*dirb;
|
||||
|
||||
// think of the colors as arranged on a line; project point onto that line, then choose
|
||||
// next color out of available ones. we compute the crossover points for "best color in top
|
||||
// half"/"best in bottom half" and then the same inside that subinterval.
|
||||
//
|
||||
// relying on this 1d approximation isn't always optimal in terms of euclidean distance,
|
||||
// but it's very close and a lot faster.
|
||||
// http://cbloomrants.blogspot.com/2008/12/12-08-08-dxtc-summary.html
|
||||
|
||||
c0Point = (stops[1] + stops[3]) >> 1;
|
||||
halfPoint = (stops[3] + stops[2]) >> 1;
|
||||
c3Point = (stops[2] + stops[0]) >> 1;
|
||||
|
||||
if(!dither) {
|
||||
// the version without dithering is straightforward
|
||||
for (i=15;i>=0;i--) {
|
||||
int dot = dots[i];
|
||||
mask <<= 2;
|
||||
|
||||
if(dot < halfPoint)
|
||||
mask |= (dot < c0Point) ? 1 : 3;
|
||||
else
|
||||
mask |= (dot < c3Point) ? 2 : 0;
|
||||
}
|
||||
} else {
|
||||
// with floyd-steinberg dithering
|
||||
int err[8],*ep1 = err,*ep2 = err+4;
|
||||
int *dp = dots, y;
|
||||
|
||||
c0Point <<= 4;
|
||||
halfPoint <<= 4;
|
||||
c3Point <<= 4;
|
||||
for(i=0;i<8;i++)
|
||||
err[i] = 0;
|
||||
|
||||
for(y=0;y<4;y++)
|
||||
{
|
||||
int dot,lmask,step;
|
||||
|
||||
dot = (dp[0] << 4) + (3*ep2[1] + 5*ep2[0]);
|
||||
if(dot < halfPoint)
|
||||
step = (dot < c0Point) ? 1 : 3;
|
||||
else
|
||||
step = (dot < c3Point) ? 2 : 0;
|
||||
ep1[0] = dp[0] - stops[step];
|
||||
lmask = step;
|
||||
|
||||
dot = (dp[1] << 4) + (7*ep1[0] + 3*ep2[2] + 5*ep2[1] + ep2[0]);
|
||||
if(dot < halfPoint)
|
||||
step = (dot < c0Point) ? 1 : 3;
|
||||
else
|
||||
step = (dot < c3Point) ? 2 : 0;
|
||||
ep1[1] = dp[1] - stops[step];
|
||||
lmask |= step<<2;
|
||||
|
||||
dot = (dp[2] << 4) + (7*ep1[1] + 3*ep2[3] + 5*ep2[2] + ep2[1]);
|
||||
if(dot < halfPoint)
|
||||
step = (dot < c0Point) ? 1 : 3;
|
||||
else
|
||||
step = (dot < c3Point) ? 2 : 0;
|
||||
ep1[2] = dp[2] - stops[step];
|
||||
lmask |= step<<4;
|
||||
|
||||
dot = (dp[3] << 4) + (7*ep1[2] + 5*ep2[3] + ep2[2]);
|
||||
if(dot < halfPoint)
|
||||
step = (dot < c0Point) ? 1 : 3;
|
||||
else
|
||||
step = (dot < c3Point) ? 2 : 0;
|
||||
ep1[3] = dp[3] - stops[step];
|
||||
lmask |= step<<6;
|
||||
|
||||
dp += 4;
|
||||
mask |= lmask << (y*8);
|
||||
{ int *et = ep1; ep1 = ep2; ep2 = et; } // swap
|
||||
}
|
||||
}
|
||||
|
||||
return mask;
|
||||
}
|
||||
|
||||
// The color optimization function. (Clever code, part 1)
|
||||
static void stb__OptimizeColorsBlock(unsigned char *block, unsigned short *pmax16, unsigned short *pmin16)
|
||||
{
|
||||
int mind = 0x7fffffff,maxd = -0x7fffffff;
|
||||
unsigned char *minp, *maxp;
|
||||
double magn;
|
||||
int v_r,v_g,v_b;
|
||||
static const int nIterPower = 4;
|
||||
float covf[6],vfr,vfg,vfb;
|
||||
|
||||
// determine color distribution
|
||||
int cov[6];
|
||||
int mu[3],min[3],max[3];
|
||||
int ch,i,iter;
|
||||
|
||||
for(ch=0;ch<3;ch++)
|
||||
{
|
||||
const unsigned char *bp = ((const unsigned char *) block) + ch;
|
||||
int muv,minv,maxv;
|
||||
|
||||
muv = minv = maxv = bp[0];
|
||||
for(i=4;i<64;i+=4)
|
||||
{
|
||||
muv += bp[i];
|
||||
if (bp[i] < minv) minv = bp[i];
|
||||
else if (bp[i] > maxv) maxv = bp[i];
|
||||
}
|
||||
|
||||
mu[ch] = (muv + 8) >> 4;
|
||||
min[ch] = minv;
|
||||
max[ch] = maxv;
|
||||
}
|
||||
|
||||
// determine covariance matrix
|
||||
for (i=0;i<6;i++)
|
||||
cov[i] = 0;
|
||||
|
||||
for (i=0;i<16;i++)
|
||||
{
|
||||
int r = block[i*4+0] - mu[0];
|
||||
int g = block[i*4+1] - mu[1];
|
||||
int b = block[i*4+2] - mu[2];
|
||||
|
||||
cov[0] += r*r;
|
||||
cov[1] += r*g;
|
||||
cov[2] += r*b;
|
||||
cov[3] += g*g;
|
||||
cov[4] += g*b;
|
||||
cov[5] += b*b;
|
||||
}
|
||||
|
||||
// convert covariance matrix to float, find principal axis via power iter
|
||||
for(i=0;i<6;i++)
|
||||
covf[i] = cov[i] / 255.0f;
|
||||
|
||||
vfr = (float) (max[0] - min[0]);
|
||||
vfg = (float) (max[1] - min[1]);
|
||||
vfb = (float) (max[2] - min[2]);
|
||||
|
||||
for(iter=0;iter<nIterPower;iter++)
|
||||
{
|
||||
float r = vfr*covf[0] + vfg*covf[1] + vfb*covf[2];
|
||||
float g = vfr*covf[1] + vfg*covf[3] + vfb*covf[4];
|
||||
float b = vfr*covf[2] + vfg*covf[4] + vfb*covf[5];
|
||||
|
||||
vfr = r;
|
||||
vfg = g;
|
||||
vfb = b;
|
||||
}
|
||||
|
||||
magn = fabs(vfr);
|
||||
if (fabs(vfg) > magn) magn = fabs(vfg);
|
||||
if (fabs(vfb) > magn) magn = fabs(vfb);
|
||||
|
||||
if(magn < 4.0f) { // too small, default to luminance
|
||||
v_r = 299; // JPEG YCbCr luma coefs, scaled by 1000.
|
||||
v_g = 587;
|
||||
v_b = 114;
|
||||
} else {
|
||||
magn = 512.0 / magn;
|
||||
v_r = (int) (vfr * magn);
|
||||
v_g = (int) (vfg * magn);
|
||||
v_b = (int) (vfb * magn);
|
||||
}
|
||||
|
||||
// Pick colors at extreme points
|
||||
for(i=0;i<16;i++)
|
||||
{
|
||||
int dot = block[i*4+0]*v_r + block[i*4+1]*v_g + block[i*4+2]*v_b;
|
||||
|
||||
if (dot < mind) {
|
||||
mind = dot;
|
||||
minp = block+i*4;
|
||||
}
|
||||
|
||||
if (dot > maxd) {
|
||||
maxd = dot;
|
||||
maxp = block+i*4;
|
||||
}
|
||||
}
|
||||
|
||||
*pmax16 = stb__As16Bit(maxp[0],maxp[1],maxp[2]);
|
||||
*pmin16 = stb__As16Bit(minp[0],minp[1],minp[2]);
|
||||
}
|
||||
|
||||
static int stb__sclamp(float y, int p0, int p1)
|
||||
{
|
||||
int x = (int) y;
|
||||
if (x < p0) return p0;
|
||||
if (x > p1) return p1;
|
||||
return x;
|
||||
}
|
||||
|
||||
// The refinement function. (Clever code, part 2)
|
||||
// Tries to optimize colors to suit block contents better.
|
||||
// (By solving a least squares system via normal equations+Cramer's rule)
|
||||
static int stb__RefineBlock(unsigned char *block, unsigned short *pmax16, unsigned short *pmin16, unsigned int mask)
|
||||
{
|
||||
static const int w1Tab[4] = { 3,0,2,1 };
|
||||
static const int prods[4] = { 0x090000,0x000900,0x040102,0x010402 };
|
||||
// ^some magic to save a lot of multiplies in the accumulating loop...
|
||||
// (precomputed products of weights for least squares system, accumulated inside one 32-bit register)
|
||||
|
||||
float frb,fg;
|
||||
unsigned short oldMin, oldMax, min16, max16;
|
||||
int i, akku = 0, xx,xy,yy;
|
||||
int At1_r,At1_g,At1_b;
|
||||
int At2_r,At2_g,At2_b;
|
||||
unsigned int cm = mask;
|
||||
|
||||
oldMin = *pmin16;
|
||||
oldMax = *pmax16;
|
||||
|
||||
if((mask ^ (mask<<2)) < 4) // all pixels have the same index?
|
||||
{
|
||||
// yes, linear system would be singular; solve using optimal
|
||||
// single-color match on average color
|
||||
int r = 8, g = 8, b = 8;
|
||||
for (i=0;i<16;++i) {
|
||||
r += block[i*4+0];
|
||||
g += block[i*4+1];
|
||||
b += block[i*4+2];
|
||||
}
|
||||
|
||||
r >>= 4; g >>= 4; b >>= 4;
|
||||
|
||||
max16 = (stb__OMatch5[r][0]<<11) | (stb__OMatch6[g][0]<<5) | stb__OMatch5[b][0];
|
||||
min16 = (stb__OMatch5[r][1]<<11) | (stb__OMatch6[g][1]<<5) | stb__OMatch5[b][1];
|
||||
} else {
|
||||
At1_r = At1_g = At1_b = 0;
|
||||
At2_r = At2_g = At2_b = 0;
|
||||
for (i=0;i<16;++i,cm>>=2) {
|
||||
int step = cm&3;
|
||||
int w1 = w1Tab[step];
|
||||
int r = block[i*4+0];
|
||||
int g = block[i*4+1];
|
||||
int b = block[i*4+2];
|
||||
|
||||
akku += prods[step];
|
||||
At1_r += w1*r;
|
||||
At1_g += w1*g;
|
||||
At1_b += w1*b;
|
||||
At2_r += r;
|
||||
At2_g += g;
|
||||
At2_b += b;
|
||||
}
|
||||
|
||||
At2_r = 3*At2_r - At1_r;
|
||||
At2_g = 3*At2_g - At1_g;
|
||||
At2_b = 3*At2_b - At1_b;
|
||||
|
||||
// extract solutions and decide solvability
|
||||
xx = akku >> 16;
|
||||
yy = (akku >> 8) & 0xff;
|
||||
xy = (akku >> 0) & 0xff;
|
||||
|
||||
frb = 3.0f * 31.0f / 255.0f / (xx*yy - xy*xy);
|
||||
fg = frb * 63.0f / 31.0f;
|
||||
|
||||
// solve.
|
||||
max16 = stb__sclamp((At1_r*yy - At2_r*xy)*frb+0.5f,0,31) << 11;
|
||||
max16 |= stb__sclamp((At1_g*yy - At2_g*xy)*fg +0.5f,0,63) << 5;
|
||||
max16 |= stb__sclamp((At1_b*yy - At2_b*xy)*frb+0.5f,0,31) << 0;
|
||||
|
||||
min16 = stb__sclamp((At2_r*xx - At1_r*xy)*frb+0.5f,0,31) << 11;
|
||||
min16 |= stb__sclamp((At2_g*xx - At1_g*xy)*fg +0.5f,0,63) << 5;
|
||||
min16 |= stb__sclamp((At2_b*xx - At1_b*xy)*frb+0.5f,0,31) << 0;
|
||||
}
|
||||
|
||||
*pmin16 = min16;
|
||||
*pmax16 = max16;
|
||||
return oldMin != min16 || oldMax != max16;
|
||||
}
|
||||
|
||||
// Color block compression
|
||||
static void stb__CompressColorBlock(unsigned char *dest, unsigned char *block, int mode)
|
||||
{
|
||||
unsigned int mask;
|
||||
int i;
|
||||
int dither;
|
||||
int refinecount;
|
||||
unsigned short max16, min16;
|
||||
unsigned char dblock[16*4],color[4*4];
|
||||
|
||||
dither = mode & STB_DXT_DITHER;
|
||||
refinecount = (mode & STB_DXT_HIGHQUAL) ? 2 : 1;
|
||||
|
||||
// check if block is constant
|
||||
for (i=1;i<16;i++)
|
||||
if (((unsigned int *) block)[i] != ((unsigned int *) block)[0])
|
||||
break;
|
||||
|
||||
if(i == 16) { // constant color
|
||||
int r = block[0], g = block[1], b = block[2];
|
||||
mask = 0xaaaaaaaa;
|
||||
max16 = (stb__OMatch5[r][0]<<11) | (stb__OMatch6[g][0]<<5) | stb__OMatch5[b][0];
|
||||
min16 = (stb__OMatch5[r][1]<<11) | (stb__OMatch6[g][1]<<5) | stb__OMatch5[b][1];
|
||||
} else {
|
||||
// first step: compute dithered version for PCA if desired
|
||||
if(dither)
|
||||
stb__DitherBlock(dblock,block);
|
||||
|
||||
// second step: pca+map along principal axis
|
||||
stb__OptimizeColorsBlock(dither ? dblock : block,&max16,&min16);
|
||||
if (max16 != min16) {
|
||||
stb__EvalColors(color,max16,min16);
|
||||
mask = stb__MatchColorsBlock(block,color,dither);
|
||||
} else
|
||||
mask = 0;
|
||||
|
||||
// third step: refine (multiple times if requested)
|
||||
for (i=0;i<refinecount;i++) {
|
||||
unsigned int lastmask = mask;
|
||||
|
||||
if (stb__RefineBlock(dither ? dblock : block,&max16,&min16,mask)) {
|
||||
if (max16 != min16) {
|
||||
stb__EvalColors(color,max16,min16);
|
||||
mask = stb__MatchColorsBlock(block,color,dither);
|
||||
} else {
|
||||
mask = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(mask == lastmask)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// write the color block
|
||||
if(max16 < min16)
|
||||
{
|
||||
unsigned short t = min16;
|
||||
min16 = max16;
|
||||
max16 = t;
|
||||
mask ^= 0x55555555;
|
||||
}
|
||||
|
||||
dest[0] = (unsigned char) (max16);
|
||||
dest[1] = (unsigned char) (max16 >> 8);
|
||||
dest[2] = (unsigned char) (min16);
|
||||
dest[3] = (unsigned char) (min16 >> 8);
|
||||
dest[4] = (unsigned char) (mask);
|
||||
dest[5] = (unsigned char) (mask >> 8);
|
||||
dest[6] = (unsigned char) (mask >> 16);
|
||||
dest[7] = (unsigned char) (mask >> 24);
|
||||
}
|
||||
|
||||
// Alpha block compression (this is easy for a change)
|
||||
static void stb__CompressAlphaBlock(unsigned char *dest,unsigned char *src,int mode)
|
||||
{
|
||||
int i,dist,bias,dist4,dist2,bits,mask;
|
||||
|
||||
// find min/max color
|
||||
int mn,mx;
|
||||
mn = mx = src[3];
|
||||
|
||||
for (i=1;i<16;i++)
|
||||
{
|
||||
if (src[i*4+3] < mn) mn = src[i*4+3];
|
||||
else if (src[i*4+3] > mx) mx = src[i*4+3];
|
||||
}
|
||||
|
||||
// encode them
|
||||
((unsigned char *)dest)[0] = mx;
|
||||
((unsigned char *)dest)[1] = mn;
|
||||
dest += 2;
|
||||
|
||||
// determine bias and emit color indices
|
||||
// given the choice of mx/mn, these indices are optimal:
|
||||
// http://fgiesen.wordpress.com/2009/12/15/dxt5-alpha-block-index-determination/
|
||||
dist = mx-mn;
|
||||
dist4 = dist*4;
|
||||
dist2 = dist*2;
|
||||
bias = (dist < 8) ? (dist - 1) : (dist/2 + 2);
|
||||
bias -= mn * 7;
|
||||
bits = 0,mask=0;
|
||||
|
||||
for (i=0;i<16;i++) {
|
||||
int a = src[i*4+3]*7 + bias;
|
||||
int ind,t;
|
||||
|
||||
// select index. this is a "linear scale" lerp factor between 0 (val=min) and 7 (val=max).
|
||||
t = (a >= dist4) ? -1 : 0; ind = t & 4; a -= dist4 & t;
|
||||
t = (a >= dist2) ? -1 : 0; ind += t & 2; a -= dist2 & t;
|
||||
ind += (a >= dist);
|
||||
|
||||
// turn linear scale into DXT index (0/1 are extremal pts)
|
||||
ind = -ind & 7;
|
||||
ind ^= (2 > ind);
|
||||
|
||||
// write index
|
||||
mask |= ind << bits;
|
||||
if((bits += 3) >= 8) {
|
||||
*dest++ = mask;
|
||||
mask >>= 8;
|
||||
bits -= 8;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void stb__InitDXT()
|
||||
{
|
||||
int i;
|
||||
for(i=0;i<32;i++)
|
||||
stb__Expand5[i] = (i<<3)|(i>>2);
|
||||
|
||||
for(i=0;i<64;i++)
|
||||
stb__Expand6[i] = (i<<2)|(i>>4);
|
||||
|
||||
for(i=0;i<256+16;i++)
|
||||
{
|
||||
int v = i-8 < 0 ? 0 : i-8 > 255 ? 255 : i-8;
|
||||
stb__QuantRBTab[i] = stb__Expand5[stb__Mul8Bit(v,31)];
|
||||
stb__QuantGTab[i] = stb__Expand6[stb__Mul8Bit(v,63)];
|
||||
}
|
||||
|
||||
stb__PrepareOptTable(&stb__OMatch5[0][0],stb__Expand5,32);
|
||||
stb__PrepareOptTable(&stb__OMatch6[0][0],stb__Expand6,64);
|
||||
}
|
||||
|
||||
void stb_compress_dxt_block(unsigned char *dest, const unsigned char *src, int alpha, int mode)
|
||||
{
|
||||
static int init=1;
|
||||
if (init) {
|
||||
stb__InitDXT();
|
||||
init=0;
|
||||
}
|
||||
|
||||
if (alpha) {
|
||||
stb__CompressAlphaBlock(dest,(unsigned char*) src,mode);
|
||||
dest += 8;
|
||||
}
|
||||
|
||||
stb__CompressColorBlock(dest,(unsigned char*) src,mode);
|
||||
}
|
||||
#endif // STB_DXT_IMPLEMENTATION
|
||||
|
||||
#endif // STB_INCLUDE_STB_DXT_H
|
Loading…
Reference in a new issue