format Flat on "vitamin.exe"
entry Start32
stack 8k
use64
;
Code16 = 90h ; 16-bit compatibility mode code selector
Code32 = 38h ; 32-bit compatibility mode code selector
Data32 = 30h ; 32-bit compatibility mode data selector
Code64 = 28h ; 64-bit code selector
BufferSize = 32k ; transfer buffer
;
macro int No
{
int No+80h
}
;
struc RMCS ;real mode call structure
{
.rEDI dd ? ;+0
virtual at .rEDI
.rDI dw ?
end virtual
.rESI dd ? ;+4
virtual at .rESI
.rSI dw ?
end virtual
.rEBP dd ? ;+8
virtual at .rEBP
.rBP dw ?
end virtual
.Reserve dd ? ;+12
.rEBX dd ? ;+16
virtual at .rEBX
.rBX dw ?
end virtual
virtual at .rBX
.rBL db ?
.rBH db ?
end virtual
.rEDX dd ? ;+20
virtual at .rEDX
.rDX dw ?
end virtual
virtual at .rDX
.rDL db ?
.rDH db ?
end virtual
.rECX dd ? ;+24
virtual at .rECX
.rCX dw ?
end virtual
virtual at .rCX
.rCL db ?
.rCH db ?
end virtual
.rEAX dd ? ;+28
virtual at .rEAX
.rAX dw ?
end virtual
virtual at .rAX
.rAL db ?
.rAH db ?
end virtual
.rFL dw ? ;+32
.rES dw ? ;+34
.rDS dw ? ;+36
.rFS dw ? ;+38
.rGS dw ? ;+40
.rCSIP dd ? ;+42
virtual at .rCSIP
.rIP dw ?
.rCS dw ?
end virtual
.rSSSP dd ? ;+46
virtual at .rSSSP
.rSP dw ?
.rSS dw ?
end virtual
}
;
virtual at 0
RMCS RMCS
end virtual
;
;I: rcx,rdx
;O: rax
WriteToFile:
push r8 rcx rdx rsi rdi rbp
xor r8d,r8d
jrcxz .End
mov rbp,rcx
mov rsi,rdx
.Loop:
mov ecx,BufferSize
sub rbp,rcx
jnc .Write
add rbp,rcx
mov ecx,ebp
xor ebp,ebp
.Write:
push rcx
mov edi,[LinBuff]
shr rcx,3
rep movsq
mov cl,[rsp]
and cl,111b
rep movsb
pop rcx
mov ah,40h
call DosIntWithBufferZero
add r8,rax
or rbp,rbp
jnz .Loop
.End:
mov rax,r8
pop rbp rdi rsi rdx rcx r8
ret
DosIntWithBufferZero:
xor edx,edx
DosIntWithBuffer:
push rbx rcx rdi
lea edi,[Regs]
mov [rdi+RMCS.rAH],ah
mov eax,[SegBuff]
mov [rdi+RMCS.rDS],ax
mov [rdi+RMCS.rES],ax
mov [rdi+RMCS.rEBX],ebx
mov [rdi+RMCS.rECX],ecx
mov [rdi+RMCS.rEDX],edx
call DosInt
movzx eax,[rdi+RMCS.rAX]
bt dword [rdi+RMCS.rFL],0
pop rdi rcx rbx
ret
DosInt: mov bl,21h
mov ax,0300h
xor bh,bh
xor ecx,ecx
mov [rdi+RMCS.rSSSP],ecx
mov [rdi+RMCS.rFL],1
int 31h
ret
Start32:
use32
jmp Code64:Start64
use64
;-------------------------------------------------------------------------------
; NAME: XORWOW
; DESC: Pseudo random number generator.
; OUT: eax [0;2^32-1]
;-------------------------------------------------------------------------------
macro XORWOW {
mov edx,[g_xorwow_x] ; edx = x
shr edx,2 ; edx = x >> 2
xor edx,[g_xorwow_x] ; t = x ^ (x >> 2)
mov eax,[g_xorwow_y] ; eax = y
mov [g_xorwow_x],eax ; x = y
mov eax,[g_xorwow_z] ; eax = z
mov [g_xorwow_y],eax ; y = z
mov eax,[g_xorwow_w] ; eax = w
mov [g_xorwow_z],eax ; z = w
mov eax,[g_xorwow_v] ; eax = v
mov [g_xorwow_w],eax ; w = v
mov edi,eax ; edi = v
shl edi,4 ; edi = v << 4
xor edi,eax ; edi = (v ^ (v << 4))
mov eax,edx ; eax = t
shl eax,1 ; eax = t << 1
xor eax,edx ; eax = (t ^ (t << 1))
xor eax,edi ; eax = (v ^ (v << 4)) ^ (t ^ (t << 1))
mov [g_xorwow_v],eax ; v = eax
add [g_xorwow_d],362437; d += 362437
mov eax,[g_xorwow_d] ; eax = d
add eax,[g_xorwow_v] ; eax = d + v
}
;-------------------------------------------------------------------------------
; NAME: RANDOM
; DESC: Returns pseudo random number in the range [-0.5;0.5).
; OUT: xmm0.x [-0.5;0.5)
;-------------------------------------------------------------------------------
macro RANDOM {
XORWOW
cvtsi2ss xmm0,eax
mulss xmm0,[g_rand_scale]
}
;-------------------------------------------------------------------------------
; NAME: GenerateSequence
; IN: xmm0.x re (c0.x)
; IN: xmm1.x im (c0.y)
; IN: rdi array size
; IN: rsi pointer to the allocated array
; OUT: rax generated sequence size
;-------------------------------------------------------------------------------
even 16
GenerateSequence:
xor eax,eax ; eax is index loop
xorps xmm4,xmm4 ; xmm4 is c.x
xorps xmm5,xmm5 ; xmm5 is c.y
.Loop:
; cn.x = c.x * c.x - c.y * c.y + c0.x
movaps xmm2,xmm4
movaps xmm3,xmm5
mulss xmm2,xmm4
mulss xmm3,xmm5
subss xmm2,xmm3
addss xmm2,xmm0
movaps xmm6,xmm2 ; xmm6 is cn.x
; cn.y = 2.0 * c.x * c.y + c0.y
movaps xmm7,xmm4
mulss xmm7,xmm5
addss xmm7,xmm7
addss xmm7,xmm1 ; xmm7 is cn.y
; store cn
movd dword [rsi+rax*8+0],xmm6
movd dword [rsi+rax*8+4],xmm7
; if (cn.x * cn.x + cn.y * cn.y > 10.0) return eax;
movaps xmm2,xmm6
movaps xmm3,xmm7
mulss xmm2,xmm6
mulss xmm3,xmm7
addss xmm2,xmm3
ucomiss xmm2,[g_max_dist]
ja .EndLoop
movaps xmm4,xmm6 ; c.x = cn.x
movaps xmm5,xmm7 ; c.y = cn.y
; continue loop
inc eax
cmp eax,edi
jb .Loop
; return 0
xor eax,eax
.EndLoop:
ret
;-------------------------------------------------------------------------------
; NAME: AllocateMemory
; IN: rdi size in bytes
; OUT: rax memory address
;-------------------------------------------------------------------------------
even 16
AllocateMemory:
mov rax,[MemoryStart]
add [MemoryStart],rdi
push rax rdi
mov rcx,rdi
shr rcx,3
mov rdi,rax
xor eax,eax
rep stosq
pop rcx
and ecx,111b
rep stosb
pop rax
ret
;-------------------------------------------------------------------------------
; NAME: Main
; DESC: Program main function.
;-------------------------------------------------------------------------------
align 16
Main:
ImgPtr equ rbp-08
SeqPtr equ rbp-16
Pixel equ rbp-24
push rbp
mov rbp,rsp
sub rsp,128
; alloc mem for the sequence
mov edi,SEQ_SIZE*8
call AllocateMemory
mov [SeqPtr],rax
; alloc mem for the image
mov edi,IMG_SIZE*IMG_SIZE*4
call AllocateMemory
mov [ImgPtr],rax
; begin loops
xor r13d,r13d ; .LoopIterations counter
.LoopIterations:
xor r12d,r12d ; .LoopOneMillion counter
.LoopOneMillion:
RANDOM
mulss xmm0,[g_range]
movaps xmm1,xmm0
RANDOM
mulss xmm0,[g_range]
mov edi,SEQ_SIZE
mov rsi,[SeqPtr]
call GenerateSequence; eax = n sequence size
test eax,eax
jz .LoopSequenceEnd
xor ecx,ecx ; ecx = i = 0 loop counter
mov r9,[SeqPtr] ; r9 = sequence base address
mov r8,[ImgPtr] ; r8 = image base address
movss xmm2,[g_img_size]
movaps xmm3,xmm2
mulss xmm3,[g_0_5] ; xmm3 = (g_img_size)/2
movss xmm4,[g_zoom]
mulss xmm4,xmm2 ; xmm4 = g_zoom * g_img_size
movss xmm5,[g_offsetx]; xmm5 = g_offsetx
movss xmm6,[g_offsety]; xmm6 = g_offsety
.LoopSequence:
cmp ecx,eax ; i < n
je .LoopSequenceEnd
movd xmm0,[r9+rcx*8+0] ; load re
movd xmm1,[r9+rcx*8+4] ; load im
addss xmm0,xmm5 ; xmm0 = re+g_offsetx
addss xmm1,xmm6 ; xmm1 = im+g_offsety
mulss xmm0,xmm4 ; xmm0 = (re+g_offsetx)*g_img_size*g_zoom
mulss xmm1,xmm4 ; xmm1 = (im+g_offsety)*g_img_size*g_zoom
addss xmm0,xmm3 ; xmm0 = (re+g_offsetx)*g_img_size*g_zoom+g_img_size/2
addss xmm1,xmm3 ; xmm1 = (im+g_offsety)*g_img_size*g_zoom+g_img_size/2
cvtss2si edi,xmm0 ; edi = x = int(xmm0.x)
cvtss2si esi,xmm1 ; esi = y = int(xmm1.x)
or edi,edi
jl @F
cmp edi,IMG_SIZE
jge @F
or esi,esi
jl @F
cmp esi,IMG_SIZE
jge @F
imul esi,esi,IMG_SIZE
add esi,edi
inc dword [r8+rsi*4]
@@:
inc ecx
jmp .LoopSequence
.LoopSequenceEnd:
; continue .LoopOneMillion
inc r12d
cmp r12d,1000000
jb .LoopOneMillion
; continue .LoopIterations
inc r13d
cmp r13d,ITERATIONS
jb .LoopIterations
; find max value
mov r8,[ImgPtr] ; r8 = image base address
xor r12d,r12d ; r12d = max_val = 0
xor eax,eax ; eax = i = loop counter
.LoopMax:
cmp [r8+rax*4],r12d
cmova r12d,[r8+rax*4]
inc eax
cmp eax,IMG_SIZE*IMG_SIZE
jb .LoopMax
; find min value
mov r13d,r12d ; r13d = min_val = max_val
xor eax,eax ; eax = i = loop counter
.LoopMin:
cmp [r8+rax*4],r13d
cmovb r13d,[r8+rax*4]
inc eax
cmp eax,IMG_SIZE*IMG_SIZE
jb .LoopMin
; create TGA file
mov ah,3Ch
lea edx,[g_tga_name]
xor ecx,ecx
int 21h
xchg ebx,eax
; write TGA header
lea edx,[g_tga_head]
mov ecx,18
call WriteToFile
; write image pixels
mov byte [Pixel+3],255
mov r14,[ImgPtr] ; r14 = image base address
xor r15d,r15d ; r15d = i = loop counter
cvtsi2ss xmm0,r12d ; load max_value
cvtsi2ss xmm1,r13d ; load min_value
movaps xmm2,xmm0
subss xmm2,xmm1 ; xmm2 = r = max_value - min_value
.LoopWrite:
mov eax,[r14+r15*4] ; eax = image_value
sub eax,r13d ; eax = image_value - min_value
cvtsi2ss xmm0,eax ; xmm0 = float(image_value - min_value)
addss xmm0,xmm0 ; xmm0 = 2.0f * float(image_value - min_value)
divss xmm0,xmm2 ; xmm0 = 2.0f * float(image_value - min_value) / r
minss xmm0,[g_1_0] ; clamp to 1.0
maxss xmm0,[g_0_0] ; clamp to 0.0
mulss xmm0,[g_255_0] ; convert to 0 - 255
cvtss2si eax,xmm0
mov [Pixel+0],al ; store B component
mov [Pixel+1],al ; store G component
mov [Pixel+2],al ; store R component
; write pixel data
lea edx,[Pixel]
mov ecx,4-1
call WriteToFile
; continue .LoopWrite
inc r15d
cmp r15d,IMG_SIZE*IMG_SIZE
jb .LoopWrite
; close file
mov ah,3Eh
int 21h
mov rsp,rbp
pop rbp
ret
restore ImgPtr,SeqPtr,Pixel
;-------------------------------------------------------------------------------
; NAME: Start64
; DESC: Program entry point.
;-------------------------------------------------------------------------------
Start64:
push 0
syscall
mov [ExitAddr],r8
mov [BufferVar],rcx
call Main
xor al,al
jmp [ExitAddr]
;-------------------------------------------------------------------------------
even 1
g_tga_name db 'picture.tga',0
g_tga_head db 0,0,2,9 dup 0
db (IMG_SIZE and 0x00FF),(IMG_SIZE and 0xFF00) shr 8
db (IMG_SIZE and 0x00FF),(IMG_SIZE and 0xFF00) shr 8,32-8,0
even 4
g_xorwow_x dd 123456789
g_xorwow_y dd 362436069
g_xorwow_z dd 521288629
g_xorwow_w dd 88675123
g_xorwow_v dd 5783321
g_xorwow_d dd 6615241
g_rand_scale dd 2.3283064e-10; 1.0 / 2^32
IMG_SIZE = 800
SEQ_SIZE = 50
ITERATIONS = 100 ;1000
g_img_size dd 800.0
g_offsetx dd 0.5
g_offsety dd 0.0
g_zoom dd 0.4
g_max_dist dd 10.0
g_range dd 4.2
g_0_5 dd 0.5
g_0_0 dd 0.0
g_1_0 dd 1.0
g_255_0 dd 255.0
;-------------------------------------------------------------------------------
MemoryStart dq MemStrt
ExitAddr dq ?
;
BufferVar:
LinBuff dd ? ;transfer buffer *linear address
SegBuff dd ? ;transfer buffer segment address
;
Regs RMCS
;-------------------------------------------------------------------------------
even 16
MemStrt rb SEQ_SIZE*8+IMG_SIZE*IMG_SIZE*4
;-------------------------------------------------------------------------------