
align 64
avxpatan: 
	vmovmskps ecx,xmm0              ; ecx=1 if y<=0; ecx=0 if y>=0
	vmovmskps eax,xmm1              ; eax=1 if x<=0, eax=0 if x>=0
	divss xmm0,xmm1           ; xmm0 = y/x - prepare the argument of arctangent
	and ecx,1                       ; Only bit 0 of ecx is needed
	and eax,1                       ; Only bit 0 of eax is needed
	bts ecx,eax                     ; ecx=2 or 3 if x<=0, ecx=1 if x>=0
	;call arctgDegrees               ; Calculate arctangent in degrees from y/x
vshufps xmm1,xmm0,xmm0,0        ; xmm1 = x # x : x # x
    mov edx,@ct20	               ; edx contains the address of constants table
    vmulps xmm2,xmm1,xmm1           ; xmm2 = x^2 # x^2 : x^2 # x^2
    vmovaps xmm3,[edx+16]           ; xmm3 = a0*a2 # a0*c2 : a0 # a0
    vmulps xmm4,xmm2,xmm2           ; xmm4 = y^2 # y^2 : y^2 # y^2
    vucomiss xmm2,[edx+36]          ; Compare y=x^2 to 1
    ja arctg_big                    ; Goto if |x|>1
    vfmadd231ps xmm3,xmm2,[edx]     ; xmm3 ~ a3*y+a2 # c3*y+c2 : a1*y+1 # c1*y+1
    vmovhlps xmm1,xmm1,xmm3         ; xmm1 ~ a3*y+a2 # c3*y+c2
    vfmadd231ps xmm3,xmm4,xmm1      ; xmm3 ~ a3*y^3+a2*y^2+a1*y+1 # c3*y^3+c2*y^2+c1*y+1
    vmovshdup xmm2,xmm3             ; xmm2 = P; xmm3 = Q
    ;vdivss xmm2,xmm2,xmm3           ; xmm2 = P/Q
    ;vmulss xmm0,xmm0,xmm2           ; xmm0 = x*P/Q = arctgD(x)
	divss xmm2,xmm3
	mulss xmm0,xmm2
    addss xmm0,[@ct3-4+4*ecx] ; If neccessary, to add or subtract 180
	ret                           
arctg_big:                    	    ; When |x|>1 use formula 90*sgn(x)-arctgD(1/x)
    vfmadd213ps xmm3,xmm2,[edx]     ; xmm3 ~ a2*y+a3 # c2*y+c3 : y+a1 # y+c1
    vmovmskpd eax,xmm1              ; eax=3 if x<0, otherwise eax=0
    vmovhlps xmm0,xmm0,xmm3         ; xmm0 ~ a2*y+a3 # c2*y+c3
    vfmadd213ps xmm3,xmm4,xmm0      ; xmm3 ~ y^3+a1*y^2+a2*y+a3 # y^3+c1*y^2+c2*y+c3
    vmovss xmm0,[edx+4*eax+32]      ; xmm0 = 90*sgn(x)
    vcomiss xmm2,[edx+40]           ; Compare y=x^2 to threshold value
    jnb arctg_end                   ; If |x|>=15019745 result already done
    vmovshdup xmm4,xmm3             ; xmm4 = P; xmm3 = Q
    ;vmulss xmm1,xmm1,xmm3           ; xmm1 = x*Q
    ;vdivss xmm4,xmm4,xmm1           ; xmm4 = P/(x*Q)
    ;vsubss xmm0,xmm0,xmm4           ; xmm0 = 90*sgn(x)-P/(x*Q)
	mulss xmm1,xmm3
	divss xmm4,xmm1
	subss xmm0,xmm4
arctg_end:             
	addss xmm0,[@ct3-4+4*ecx] ; If neccessary, to add or subtract 180
	ret
align 16
@ct3 dd -0.0,180.0,-180.0,0.017453288889	;last convert degrees to radian
@ct20 dd   1.92582580E-14,\          ; a0*(a1+b0)=a0*c1
    8.54345240E-13,\                ; a0*a1 (in degrees)
    6.88789034E-16,\                ; a0*(a3+b2)=a0*c3
    7.05811633E-15,\                ; a0*a3 (in degrees)
    1.30413631E-14,\                ; a0
    7.47215061E-13,\                ; a0 (in degrees)
    7.66305964E-15,\                ; a0*(a2+b1)=a0*c2
    2.20697728E-13,\                ; a0*a2 (in degrees)
    90.0,\                          
    1.0,\                           
    2.25592738E14,\                 ; Threshold of x^2 when arctgD(x)=90*sgn(x)
    -90.0            
              


           