flat assembler
Message board for the users of flat assembler.

Index > Main > Help me to rewrite (SSE code) last part with buffer

Author
Thread Post new topic Reply to topic
SeryZone



Joined: 20 Dec 2013
Posts: 38
Location: Ukraine, Kryviy Rih
SeryZone 08 Jun 2014, 17:09
Hello! Help me to rewrite code, please!

Here, code part, that I rewrite NOW:

Code:

{$L Root16Viz.obj}
procedure Root16Viz(IterDat: array of Single; min, max, step: Single; size:integer; pal:array of TCol; out buffer:array of byte); register; external;
 

begin
        index := 0;
        x := 0;
        stepSSE.a:=1/((sqrt(sqrt(sqrt(sqrt(max))))-sqrt(sqrt(sqrt(sqrt(min)))))/4095.0);   Fill1SSE(stepSSE);
        minSSE.a:=sqrt(sqrt(sqrt(sqrt(min))));                       Fill1SSE(minSSE);

        While x<maxx*maxy do
         begin
            {tmpSSE.a := (IterDat[x]);
            tmpSSE.b := (IterDat[x-1]);
            tmpSSE.c := (IterDat[x-2]);
            tmpSSE.d := (IterDat[x-3]); }
            asm
              mov    eax, IterDat
              mov    edx,  [x]
              shl    edx,  2
              add    eax,  edx
              movups xmm0, [eax]
              sqrtps xmm0, xmm0
              sqrtps xmm0, xmm0
              sqrtps xmm0, xmm0
              sqrtps xmm0, xmm0
              movups xmm1, [minSSE]
              subps  xmm0, xmm1
              movups xmm1, [stepSSE]
              mulps  xmm0, xmm1
              movups [tmpSSE], xmm0
            end;
            ColorIndexSSE.a:=round(tmpSSE.a);
            ColorIndexSSE.b:=round(tmpSSE.b);
            ColorIndexSSE.c:=round(tmpSSE.c);
            ColorIndexSSE.d:=round(tmpSSE.d);
            if (ColorIndexSSE.a>4095) then ColorIndexSSE.a:=0;
            if (ColorIndexSSE.b>4095) then ColorIndexSSE.b:=0;
            if (ColorIndexSSE.c>4095) then ColorIndexSSE.c:=0;
            if (ColorIndexSSE.d>4095) then ColorIndexSSE.d:=0;
            if (IterDat[x]<=1)   then ColorIndexSSE.a:=0;
            if (IterDat[x+1]<=1) then ColorIndexSSE.b:=0;
            if (IterDat[x+2]<=1) then ColorIndexSSE.c:=0;
            if (IterDat[x+3]<=1) then ColorIndexSSE.d:=0;
            buffer[index]     :=  pal[ColorIndexSSE.a].b;
            buffer[index+1]   :=  pal[ColorIndexSSE.a].g;
            buffer[index+2]   :=  pal[ColorIndexSSE.a].r;
            buffer[index+4]   :=  pal[ColorIndexSSE.b].b;
            buffer[index+5]   :=  pal[ColorIndexSSE.b].g;
            buffer[index+6]   :=  pal[ColorIndexSSE.b].r;
            buffer[index+8]   :=  pal[ColorIndexSSE.c].b;
            buffer[index+9]   :=  pal[ColorIndexSSE.c].g;
            buffer[index+10]  :=  pal[ColorIndexSSE.c].r;
            buffer[index+12]  :=  pal[ColorIndexSSE.d].b;
            buffer[index+13]  :=  pal[ColorIndexSSE.d].g;
            buffer[index+14]  :=  pal[ColorIndexSSE.d].r;
            inc(index, 16);
            inc(x,4);
          end;
      end;
    


Here MS COFF file:

Code:
format MS COFF

public Root16Viz

extrn IterDat
extrn min
extrn max
extrn step
extrn size
extrn palette
extrn buffer


sz rd 1
ones dd  1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0
zeroes dd  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0


Root16Viz:
  mov edi,  [size]
  sub edi,  4
  mov [sz], edi
  mov ecx,  0
  mov edi,  0
  mov eax,  [IterDat]
  mov edx,  [buffer]
  mov ebx,  [palette]
  vmovups ymm1, [min]
  vmovups ymm2, [step]
  vshufps ymm1, ymm1, ymm1, 0   ;ðàçìíîæàåì min
  vshufps ymm2, ymm2, ymm1, 0    step ïî âñåìó ðåãèñòðó
  fx:                      ;ëóï
    vmovups     ymm0, [eax]
    vsqrtps     ymm0, ymm0
    vsqrtps     ymm0, ymm0
    vsqrtps     ymm0, ymm0
    vsqrtps     ymm0, ymm0
    vsubps      ymm0, ymm1, ymm0  ;ymm0 = ymm0 - ymm1
    vmulps      ymm0, ymm2, ymm0  ;ymm0 = ymm0 * ymm2

    vcmpeqps    ymm0, ymm0, yword[ones]
                                        ; ymm1 has 1.0 mask 
    vpandn      ymm0, ymm1, ymm0                   ; zero 1.0 cells
    vpand       ymm1, ymm1, yword[zeroes]             ; create 0.0 cells
    vpor        ymm0, ymm0, ymm1

    vmovupb     ymm1, [ebx]


    cmp         ecx, sz           ;Åñëè ecx<=size-4
  jne fx                          ;òî ïèëèì öèêë!

ret 8    


Help me to rewrite last part with buffer!!! Please... How to load and compare params in fastest way?
Post 08 Jun 2014, 17:09
View user's profile Send private message Reply with quote
Display posts from previous:
Post new topic Reply to topic

Jump to:  


< Last Thread | Next Thread >
Forum Rules:
You cannot post new topics in this forum
You cannot reply to topics in this forum
You cannot edit your posts in this forum
You cannot delete your posts in this forum
You cannot vote in polls in this forum
You cannot attach files in this forum
You can download files in this forum


Copyright © 1999-2025, Tomasz Grysztar. Also on GitHub, YouTube.

Website powered by rwasa.