Hello! Help me to rewrite code, please!
Here, code part, that I rewrite NOW:
{$L Root16Viz.obj}
procedure Root16Viz(IterDat: array of Single; min, max, step: Single; size:integer; pal:array of TCol; out buffer:array of byte); register; external;
begin
index := 0;
x := 0;
stepSSE.a:=1/((sqrt(sqrt(sqrt(sqrt(max))))-sqrt(sqrt(sqrt(sqrt(min)))))/4095.0); Fill1SSE(stepSSE);
minSSE.a:=sqrt(sqrt(sqrt(sqrt(min)))); Fill1SSE(minSSE);
While x<maxx*maxy do
begin
{tmpSSE.a := (IterDat[x]);
tmpSSE.b := (IterDat[x-1]);
tmpSSE.c := (IterDat[x-2]);
tmpSSE.d := (IterDat[x-3]); }
asm
mov eax, IterDat
mov edx, [x]
shl edx, 2
add eax, edx
movups xmm0, [eax]
sqrtps xmm0, xmm0
sqrtps xmm0, xmm0
sqrtps xmm0, xmm0
sqrtps xmm0, xmm0
movups xmm1, [minSSE]
subps xmm0, xmm1
movups xmm1, [stepSSE]
mulps xmm0, xmm1
movups [tmpSSE], xmm0
end;
ColorIndexSSE.a:=round(tmpSSE.a);
ColorIndexSSE.b:=round(tmpSSE.b);
ColorIndexSSE.c:=round(tmpSSE.c);
ColorIndexSSE.d:=round(tmpSSE.d);
if (ColorIndexSSE.a>4095) then ColorIndexSSE.a:=0;
if (ColorIndexSSE.b>4095) then ColorIndexSSE.b:=0;
if (ColorIndexSSE.c>4095) then ColorIndexSSE.c:=0;
if (ColorIndexSSE.d>4095) then ColorIndexSSE.d:=0;
if (IterDat[x]<=1) then ColorIndexSSE.a:=0;
if (IterDat[x+1]<=1) then ColorIndexSSE.b:=0;
if (IterDat[x+2]<=1) then ColorIndexSSE.c:=0;
if (IterDat[x+3]<=1) then ColorIndexSSE.d:=0;
buffer[index] := pal[ColorIndexSSE.a].b;
buffer[index+1] := pal[ColorIndexSSE.a].g;
buffer[index+2] := pal[ColorIndexSSE.a].r;
buffer[index+4] := pal[ColorIndexSSE.b].b;
buffer[index+5] := pal[ColorIndexSSE.b].g;
buffer[index+6] := pal[ColorIndexSSE.b].r;
buffer[index+8] := pal[ColorIndexSSE.c].b;
buffer[index+9] := pal[ColorIndexSSE.c].g;
buffer[index+10] := pal[ColorIndexSSE.c].r;
buffer[index+12] := pal[ColorIndexSSE.d].b;
buffer[index+13] := pal[ColorIndexSSE.d].g;
buffer[index+14] := pal[ColorIndexSSE.d].r;
inc(index, 16);
inc(x,4);
end;
end;
Here MS COFF file:
format MS COFF
public Root16Viz
extrn IterDat
extrn min
extrn max
extrn step
extrn size
extrn palette
extrn buffer
sz rd 1
ones dd 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0
zeroes dd 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
Root16Viz:
mov edi, [size]
sub edi, 4
mov [sz], edi
mov ecx, 0
mov edi, 0
mov eax, [IterDat]
mov edx, [buffer]
mov ebx, [palette]
vmovups ymm1, [min]
vmovups ymm2, [step]
vshufps ymm1, ymm1, ymm1, 0 ;ðàçìíîæàåì min
vshufps ymm2, ymm2, ymm1, 0 ;è step ïî âñåìó ðåãèñòðó
fx: ;ëóï
vmovups ymm0, [eax]
vsqrtps ymm0, ymm0
vsqrtps ymm0, ymm0
vsqrtps ymm0, ymm0
vsqrtps ymm0, ymm0
vsubps ymm0, ymm1, ymm0 ;ymm0 = ymm0 - ymm1
vmulps ymm0, ymm2, ymm0 ;ymm0 = ymm0 * ymm2
vcmpeqps ymm0, ymm0, yword[ones]
; ymm1 has 1.0 mask
vpandn ymm0, ymm1, ymm0 ; zero 1.0 cells
vpand ymm1, ymm1, yword[zeroes] ; create 0.0 cells
vpor ymm0, ymm0, ymm1
vmovupb ymm1, [ebx]
cmp ecx, sz ;Åñëè ecx<=size-4
jne fx ;òî ïèëèì öèêë!
ret 8
Help me to rewrite last part with buffer!!! Please... How to load and compare params in fastest way?