Input: xmm0 = x, xmm1 = y
Output: xmm0 = hypot(x, y);
hypot_ps:
push 0x1F80
; Save MXCSR
stmxcsr [rsp+4]
; Reset MXCSR
ldmxcsr [rsp]
mov eax, 1.0
pcmpeqd xmm2, xmm2
psrld xmm2, 1
movd xmm3, eax
andps xmm0, xmm2
andps xmm1, xmm2
movaps xmm2, xmm0
maxps xmm0, xmm1
minps xmm1, xmm2
pxor xmm2, xmm2
pshufd xmm3, xmm3, 0
cmpneqps xmm2, xmm0
divps xmm1, xmm0
pand xmm1, xmm2
mulps xmm1, xmm1
addps xmm1, xmm3
sqrtps xmm1, xmm1
mulps xmm0, xmm1
; Restore MXCSR
ldmxcsr [rsp+4]
pop rax
ret
hypot_pd:
push 0x1F80
; Save MXCSR
stmxcsr [rsp+4]
; Reset MXCSR
ldmxcsr [rsp]
mov rax, 1.0
pcmpeqd xmm2, xmm2
psrlq xmm2, 1
movq xmm3, rax
andpd xmm0, xmm2
andpd xmm1, xmm2
movapd xmm2, xmm0
maxpd xmm0, xmm1
minpd xmm1, xmm2
pxor xmm2, xmm2
pshufd xmm3, xmm3, 0x44
cmpneqpd xmm2, xmm0
divpd xmm1, xmm0
pand xmm1, xmm2
mulpd xmm1, xmm1
addpd xmm1, xmm3
sqrtpd xmm1, xmm1
mulpd xmm0, xmm1
; Restore MXCSR
ldmxcsr [rsp+4]
pop rax
ret