css_enhanced_waf/vstdlib/coroutine_win64.masm
FluorescentCIAAfricanAmerican 3bf9df6b27 1
2020-04-22 12:56:21 -04:00

175 lines
5.3 KiB
Text

option casemap:none
.CODE
; import Coroutine_Finish with its mangled Microsoft Visual C++ name
?Coroutine_Finish@@YAXXZ PROTO
; extern "C" void SaveNonVolatileRegs( uintptr_t regs[8] );
; incoming parameter is rcs
SaveNonVolatileRegs PROC FRAME
.endprolog
mov qword ptr[rcx], rbx
mov qword ptr[rcx+8], rbp
mov qword ptr[rcx+16], rsi
mov qword ptr[rcx+24], rdi
mov qword ptr[rcx+32], r12
mov qword ptr[rcx+40], r13
mov qword ptr[rcx+48], r14
mov qword ptr[rcx+56], r15
ret
SaveNonVolatileRegs ENDP
; extern "C" void NORETURN Coroutine_Launch_ASM( byte **ppStackHigh, uintptr_t **ppLaunchParentFramePtr, void (*pfnExec)( void* ), void *pvParam )
; Per Win64 ABI, incoming params are rcx, rdx, r8, r9. initial stack pointer is half-aligned due to return address
Coroutine_Launch_ASM PROC FRAME
; x64 prolog and prolog description macros:
; save caller's nonvolatile registers (pushed in reverse order to match SaveNonVolatileRegs)
; so that we can slam new values in later to trick the x64 callstack unwind procedure
push r15
.pushreg r15
push r14
.pushreg r14
push r13
.pushreg r13
push r12
.pushreg r12
push rdi
.pushreg rdi
push rsi
.pushreg rsi
push rbp
.pushreg rbp
push rbx
.pushreg rbx
; stack-allocate Win64 function call shadow space for calls to pfnExec and Coroutine_Finish,
; plus 8 additional bytes to align the stack frame properly (comes in off by 8)
sub rsp, 28h
.allocstack 28h
.endprolog
; compute top of stack for coroutine: 40 bytes for stack, 64 for saved regs, 8 for return address
; (we do not bother including the additional unused 32 byte shadow space we own above that)
lea rax, [rsp+70h]
mov qword ptr [rcx], rax
; save off the address of our saved regs so that we can memcpy over them later and trick
; the x64 stack unwind logic into walking up to a different Internal_Coroutine_Continue
lea rax, [rsp+28h]
mov qword ptr [rdx], rax
; call pfnExec(pvParam)
mov rcx, r9
call r8
; call Coroutine_Finish - does not return
call ?Coroutine_Finish@@YAXXZ
Coroutine_Launch_ASM ENDP
; Needs to match definition found in setjmp.h
_JUMP_BUFFER STRUCT
m_Frame QWORD ?
m_Rbx QWORD ?
m_Rsp QWORD ?
m_Rbp QWORD ?
m_Rsi QWORD ?
m_Rdi QWORD ?
m_R12 QWORD ?
m_R13 QWORD ?
m_R14 QWORD ?
m_R15 QWORD ?
m_Rip QWORD ?
m_MxCsr DWORD ?
m_FpCsr WORD ?
m_Spare WORD ?
m_Xmm6 XMMWORD ?
m_Xmm7 XMMWORD ?
m_Xmm8 XMMWORD ?
m_Xmm9 XMMWORD ?
m_Xmm10 XMMWORD ?
m_Xmm11 XMMWORD ?
m_Xmm12 XMMWORD ?
m_Xmm13 XMMWORD ?
m_Xmm14 XMMWORD ?
m_Xmm15 XMMWORD ?
_JUMP_BUFFER ENDS
;This is the reference asm for __intrinsic_setjmp() in VS2015
;mov qword ptr [rcx],rdx ; intrinsic call site does "mov rdx,rbp" followed by "add rdx,0FFFFFFFFFFFFFFC0h", looks like a nonstandard abi
;mov qword ptr [rcx+8],rbx
;mov qword ptr [rcx+18h],rbp
;mov qword ptr [rcx+20h],rsi
;mov qword ptr [rcx+28h],rdi
;mov qword ptr [rcx+30h],r12
;mov qword ptr [rcx+38h],r13
;mov qword ptr [rcx+40h],r14
;mov qword ptr [rcx+48h],r15
;lea r8,[rsp+8] ; rsp set to post-return address
;mov qword ptr [rcx+10h],r8
;mov r8,qword ptr [rsp]
;mov qword ptr [rcx+50h],r8
;stmxcsr dword ptr [rcx+58h]
;fnstcw word ptr [rcx+5Ch]
;movdqa xmmword ptr [rcx+60h],xmm6
;ovdqa xmmword ptr [rcx+70h],xmm7
;movdqa xmmword ptr [rcx+80h],xmm8
;movdqa xmmword ptr [rcx+90h],xmm9
;movdqa xmmword ptr [rcx+0A0h],xmm10
;movdqa xmmword ptr [rcx+0B0h],xmm11
;movdqa xmmword ptr [rcx+0C0h],xmm12
;movdqa xmmword ptr [rcx+0D0h],xmm13
;movdqa xmmword ptr [rcx+0E0h],xmm14
;movdqa xmmword ptr [rcx+0F0h],xmm15
;xor eax,eax
;ret
; extern "C" void NORETURN Coroutine_LongJmp_UnChecked( jmp_buf buf, int nResult )
; Per Win64 ABI, incoming params are rcx, rdx, r8, r9. initial stack pointer is half-aligned due to return address
Coroutine_LongJmp_Unchecked PROC
;load nResult into result from initial setjmp()
xor rax, rax
mov eax, edx
;restore to setjmp() caller state
mov rdx, [rcx]._JUMP_BUFFER.m_Frame ; appears to be an error checking value of (_JUMP_BUFFER.m_Rbp + 0FFFFFFFFFFFFFFC0h) passed non-standardly through rdx to setjmp()
mov rbx, [rcx]._JUMP_BUFFER.m_Rbx
mov rsp, [rcx]._JUMP_BUFFER.m_Rsp
mov rbp, [rcx]._JUMP_BUFFER.m_Rbp
mov rsi, [rcx]._JUMP_BUFFER.m_Rsi
mov rdi, [rcx]._JUMP_BUFFER.m_Rdi
mov r12, [rcx]._JUMP_BUFFER.m_R12
mov r13, [rcx]._JUMP_BUFFER.m_R13
mov r14, [rcx]._JUMP_BUFFER.m_R14
mov r15, [rcx]._JUMP_BUFFER.m_R15
mov r10, [rcx]._JUMP_BUFFER.m_Rip ; store return address in r10 for return
ldmxcsr [rcx]._JUMP_BUFFER.m_MxCsr
fldcw [rcx]._JUMP_BUFFER.m_FpCsr
;[rcx]._JUMP_BUFFER.m_Spare
movaps xmm6, [rcx]._JUMP_BUFFER.m_Xmm6
movaps xmm7, [rcx]._JUMP_BUFFER.m_Xmm7
movaps xmm8, [rcx]._JUMP_BUFFER.m_Xmm8
movaps xmm9, [rcx]._JUMP_BUFFER.m_Xmm9
movaps xmm10, [rcx]._JUMP_BUFFER.m_Xmm10
movaps xmm11, [rcx]._JUMP_BUFFER.m_Xmm11
movaps xmm12, [rcx]._JUMP_BUFFER.m_Xmm12
movaps xmm13, [rcx]._JUMP_BUFFER.m_Xmm13
movaps xmm14, [rcx]._JUMP_BUFFER.m_Xmm14
movaps xmm15, [rcx]._JUMP_BUFFER.m_Xmm15
;jmp instead of ret to _JUMP_BUFFER.m_Rip because setjmp() already set the _JUMP_BUFFER.m_Rsp to the post-return state
db 048h ; emit a REX prefix on the jmp to ensure it's a full qword
jmp qword ptr r10
Coroutine_LongJmp_Unchecked ENDP
_TEXT ENDS
END