format ELF64 executable 3
entry _start
;-------------------------------------------------------------------------------
segment readable executable
;-------------------------------------------------------------------------------
; NAME:         logss
; IN:           xmm0.x      function argument
; OUT:          xmm0.x      function result
;-------------------------------------------------------------------------------
align 16
logss:
                maxss       xmm0,[g_min_norm_pos]
                movss       xmm1,[g_1_0]
                movd        edx,xmm0
                andps       xmm0,dqword [g_inv_mant_mask]
                orps        xmm0,xmm1
                movaps      xmm4,xmm0
                subss       xmm0,xmm1
                addss       xmm4,xmm1
                shr         edx,23
                rcpss       xmm4,xmm4
                mulss       xmm0,xmm4
                addss       xmm0,xmm0
                movaps      xmm2,xmm0
                mulss       xmm0,xmm0
                sub         edx,0x7f
                movss       xmm4,[g_log_p0]
                movss       xmm6,[g_log_q0]
                mulss       xmm4,xmm0
                movss       xmm5,[g_log_p1]
                mulss       xmm6,xmm0
                movss       xmm7,[g_log_q1]
                addss       xmm4,xmm5
                addss       xmm6,xmm7
                movss       xmm5,[g_log_p2]
                mulss       xmm4,xmm0
                movss       xmm7,[g_log_q2]
                mulss       xmm6,xmm0
                addss       xmm4,xmm5
                movss       xmm5,[g_log_c0]
                addss       xmm6,xmm7
                cvtsi2ss    xmm1,edx
                mulss       xmm0,xmm4
                rcpss       xmm6,xmm6
                mulss       xmm0,xmm6
                mulss       xmm0,xmm2
                mulss       xmm1,xmm5
                addss       xmm0,xmm2
                addss       xmm0,xmm1
                ret
;-------------------------------------------------------------------------------
; NAME:         main
; DESC:         Program main function.
;-------------------------------------------------------------------------------
align 16
main:
imgptr          equ         rbp-8
                push        rbp
                mov         rbp,rsp
                sub         rsp,128
                ; alloc memory for the image
                mov         eax,9           ; sys_mmap
                xor         edi,edi         ; addr
                mov         esi,SIZE*SIZE*4 ; length
                mov         edx,0x1+0x2     ; PROT_READ | PROT_WRITE
                mov         r10d,0x02+0x20  ; MAP_PRIVATE | MAP_ANONYMOUS
                mov         r8,-1           ; fd
                xor         r9d,r9d         ; offset
                syscall
                mov         [imgptr],rax
                mov         rbx,rax
                ; begin loops
                xor         r13d,r13d       ; .LoopY index
.LoopY:
                xor         r12d,r12d       ; .LoopX index
.LoopX:
                ; compute c
                cvtsi2ss    xmm0,r12d
                cvtsi2ss    xmm1,r13d
                shufps      xmm0,xmm1,00000000b
                shufps      xmm0,xmm0,00001000b
                divps       xmm0,dqword [g_size]
                subps       xmm0,dqword [g_0_5]
                addps       xmm0,xmm0
                movaps      xmm13,xmm0
                divps       xmm13,dqword [g_zoom]
                addps       xmm13,dqword [g_center]   ; c = xmm13
                ; z = (0.0,0.0) dz = (1.0,0.0)
                xorps       xmm14,xmm14               ; z = xmm14
                xorps       xmm15,xmm15
                movss       xmm15,[g_1_0]             ; dz = xmm15
                mov         ecx,[g_bailout]
.LoopBailout:
                ; dz = 2.0 * z * dz + (1.0,0.0)
                movaps      xmm0,xmm14
                movaps      xmm1,xmm15
                shufps      xmm0,xmm0,01000100b
                shufps      xmm1,xmm1,00010100b
                mulps       xmm0,xmm1
                xorps       xmm0,dqword [g_inv_y_sign]
                movaps      xmm1,xmm0
                shufps      xmm0,xmm0,00001000b
                shufps      xmm1,xmm1,00001101b
                addps       xmm0,xmm1
                addps       xmm0,xmm0
                addss       xmm0,[g_1_0]
                movaps      xmm15,xmm0
                ; z = z * z + c
                movaps      xmm0,xmm14
                movaps      xmm1,xmm0
                shufps      xmm0,xmm0,00000100b
                shufps      xmm1,xmm1,01010100b
                mulps       xmm0,xmm1
                xorps       xmm0,dqword [g_inv_y_sign]
                movaps      xmm1,xmm0
                shufps      xmm0,xmm0,00001000b
                shufps      xmm1,xmm1,00001101b
                addps       xmm0,xmm1
                addps       xmm0,xmm13
                movaps      xmm14,xmm0
                ; compute dot(z,z)
                mulps       xmm0,xmm0
                movaps      xmm1,xmm0
                shufps      xmm1,xmm1,01010101b
                addps       xmm0,xmm1
                ; if dot(z,z) > g_z_max break .LoopBailout
                ucomiss     xmm0,[g_z_max]
                ja          .NotInSet
                sub         ecx,1
                jnz         .LoopBailout
                xorps       xmm0,xmm0   ; distance is zero
                jmp         .InSet
.NotInSet:
                movaps      xmm8,xmm0
                call        logss
                movaps      xmm9,xmm0
                movaps      xmm1,xmm15
                mulps       xmm1,xmm1
                movaps      xmm2,xmm1
                shufps      xmm2,xmm2,01010101b
                addps       xmm1,xmm2   ; dot(dz,dz)
                divps       xmm8,xmm1   ; dot(z,z) / dot(dz,dz)
                sqrtps      xmm0,xmm8
                mulps       xmm0,dqword [g_0_5]
                mulps       xmm0,xmm9
                mulps       xmm0,dqword [g_zoom]
                sqrtps      xmm0,xmm0
                sqrtps      xmm0,xmm0
                shufps      xmm0,xmm0,00000000b
                mulps       xmm0,dqword [g_brightness]
.InSet:
                ; convert from [0.0,1.0] to [0,255]
                mulps       xmm0,dqword [g_255_0]
                cvttps2dq   xmm0,xmm0
                movd        eax,xmm0
                mov         [rbx+0],al
                pshufd      xmm1,xmm0,00000001b
                movd        eax,xmm1
                mov         [rbx+1],al
                pshufd      xmm1,xmm0,00000010b
                movd        eax,xmm1
                mov         [rbx+2],al
                mov         byte [rbx+3],255
                ; advance pixel pointer
                add         rbx,4
                ; continue .LoopX
                inc         r12d
                cmp         r12d,SIZE
                jne         .LoopX
                ; continue .LoopY
                inc         r13d
                cmp         r13d,SIZE
                jne         .LoopY
                ; create TGA file
                mov         eax,85
                mov         rdi,g_tga_name
                mov         esi,110000000b
                syscall
                mov         rbx,rax
                ; write header
                mov         eax,1
                mov         rdi,rbx
                mov         rsi,g_tga_head
                mov         edx,18
                syscall
                ; write pixel data
                mov         eax,1
                mov         rdi,rbx
                mov         rsi,[imgptr]
                mov         edx,SIZE*SIZE*4
                syscall
                mov         rsp,rbp
                pop         rbp
                ret
                restore     imgptr
;-------------------------------------------------------------------------------
; NAME:         _start
; DESC:         Program entry point.
;-------------------------------------------------------------------------------
_start:
                call        main
                ; terminate process
                mov         eax,60
                xor         edi,edi
                syscall
;-------------------------------------------------------------------------------
segment readable
;-------------------------------------------------------------------------------
align 1
g_tga_name      db          'mandelbrot.tga',0
g_tga_head      db          0,0,2,9 dup 0
                db          (SIZE and 0x00ff),(SIZE and 0xff00) shr 8
                db          (SIZE and 0x00ff),(SIZE and 0xff00) shr 8,32,0
align 4
g_min_norm_pos  dd          0x00800000
g_log_p0        dd          -0.789580278884799154124
g_log_p1        dd          16.3866645699558079767
g_log_p2        dd          -64.1409952958715622951
g_log_q0        dd          -35.6722798256324312549
g_log_q1        dd          312.093766372244180303
g_log_q2        dd          -769.691943550460008604
g_log_c0        dd          0.693147180559945
g_z_max         dd          100.0

g_bailout       dd          1000

align 16
SIZE=800
g_size          dd          4 dup 800.0
g_center        dd          -0.761574,-0.0847596,0.0,0.0
g_zoom          dd          4 dup 1000.0
g_brightness    dd          4 dup 1.2

g_0_0           dd          4 dup 0.0
g_0_5           dd          4 dup 0.5
g_1_0           dd          4 dup 1.0
g_255_0         dd          4 dup 255.0
g_inv_mant_mask dd          4 dup (not 0x7f800000)
g_inv_y_sign    dd          0x0,0x80000000,0x0,0x0
;-------------------------------------------------------------------------------
