@ct2 dd 6.28740248E-17,\          ; a0*(a1+b0)=a0*c1
    4.86816205E-17,\             ; a0*a1
    2.24874633E-18,\            ; a0*(a3+b2)=a0*c3
    4.02179944E-19,\            ; a0*a3
    4.25772129E-17,\            ; a0
    4.25772129E-17,\            ; a0
    2.50182216E-17,\            ; a0*(a2+b1)=a0*c2
    1.25756219E-17,\            ; a0*a2
    0.0707963258,\              ;(pi-3)/2
    1.5,\                       ; 3/2
    1.0,\                       ; 1
    -0.0707963258,\             ; -(pi-3)/2
    -1.5,\                      ; -3/2
    3.95889818E15              ; Threshold of x^2 when arctg(x)=pi/2*sgn(x)

arctg2:
    vshufps xmm1,xmm0,xmm0,0    ; xmm1 = x # x : x # x
    mov edx,       @ct2         ; edx contains the address of constants table
    vmulps xmm2,xmm1,xmm1       ; xmm2 = x^2 # x^2 : x^2 # x^2
    vmovups xmm3,[edx+16]       ; xmm3 = a0*a2 # a0*c2 : a0 # a0
    vmulps xmm4,xmm2,xmm2       ; xmm4 = y^2 # y^2 : y^2 # y^2
    vucomiss xmm2,[edx+40]      ; Compare y=x^2 to 1
    ja arctg_big2                ; Jump if |x|>1
    vfmadd231ps xmm3,xmm2,[edx] ; xmm3 ~ a3*y+a2 # c3*y+c2 : a1*y+1 # c1*y+1
    vmovhlps xmm1,xmm1,xmm3     ; xmm1 ~ a3*y+a2 # c3*y+c2
    vfmadd231ps xmm3,xmm4,xmm1  ; xmm3 ~ a3*y^3+a2*y^2+a1*y+1 # c3*y^3+c2*y^2+c1*y+1
    vmovshdup xmm2,xmm3         ; xmm2 = P; xmm3 = Q
    vdivss xmm2,xmm2,xmm3       ; xmm2 = P/Q
    vmulss xmm0,xmm0,xmm2       ; xmm0 = x*P/Q = arctg(x)
    ret                         ; Return
      arctg_big2:                ; When |x|>1 use formula pi/2*sgn(x)-arctg(1/x)
    vfmadd213ps xmm3,xmm2,[edx] ; xmm3 ~ a2*y+a3 # c2*y+c3 : y+a1 # y+c1
    vmovmskpd eax,xmm1          ; eax=3 if x<0, otherwise eax=0
    vmovhlps xmm0,xmm0,xmm3     ; xmm0 ~ a2*y+a3 # c2*y+c3
    vfmadd213ps xmm3,xmm4,xmm0  ; xmm3 ~ y^3+a1*y^2+a2*y+a3 # y^3+c1*y^2+c2*y+c3
    vmovss xmm0,[edx+4*eax+32]  ; xmm0 = (pi-3)/2*sgn(x)
    vucomiss xmm2,[edx+52]      ; Compare y=x^2 to threshold value
    jnb arctg_end3               ; The data is already in xmm0, if |x|>=62919776
    vmovshdup xmm4,xmm3         ; xmm4 = P; xmm3 = Q
    vmulss xmm1,xmm1,xmm3       ; xmm1 = x*Q
    vfmsub132ss xmm0,xmm4,xmm1  ; xmm0 = (pi-3)/2*|x|*Q-P
    vdivss xmm0,xmm0,xmm1       ; xmm0 = (pi-3)/2*sgn(x)-P/(x*Q)
      arctg_end3:                ; Add to result 3/2*sgn(x)
    vaddss xmm0,xmm0,[edx+4*eax+36] ; xmm0 = pi/2*sgn(x)-P/(x*Q)
    ret                          