flat assembler
Message board for the users of flat assembler.

Index > Main > Bresenham's line algorithm

Author
Thread Post new topic Reply to topic
klavs.pr



Joined: 20 Apr 2010
Posts: 20
Location: Latvia
klavs.pr 22 Aug 2010, 16:39
Can anyone tell me whats wrong with this code? It is drawing a line but that line is not correct. Sometimes it is not drawing anything.

Algorithm: http://en.wikipedia.org/wiki/Bresenham%27s_line_algorithm#Optimization

Thanks in advance.
Klavs Prieditis.

Code:
proc dbg_draw_line x0:WORD, y0:WORD, x1:WORD, y1:WORD, color:DWORD
      locals
        steep db 0 ; true
        deltax dw ?
        deltay dw ?
        error dw ?
        ystep dw ?
        y dw ?
        x dw ?
        tmp1 dw ?
       ; tmp2 dw ?
        div2 db 2
      endl

        mov ax, [x1]          ; ax = x1
        sub ax, [x0]          ; ax = x1-x0

        cmp ax, 0             ; if ax
        jae a1                ;   >= 0 goto a1
        neg ax                ; else ax = -ax
      a1:
        mov [tmp1], ax        ; tmp1 = x1-x0

        mov ax, [y1]          ; ax = y1
        sub ax, [y0]          ; ax = y1-y0

        cmp ax, 0             ; if ax
        jae a2                ;   >= 0 goto a2
        neg ax                ; else ax = -ax
      a2:

        cmp ax, [tmp1]        ; if y1-y0
        jae true              ;   >= x1-x0 goto true
        mov [steep], -1       ; else steep = false
    true:

        cmp [steep], -1       ; if steep
        je l1                 ;   == false goto l1

        mov ax, [x0]          ; ax = x0
        xchg ax, [y0]         ; y0 = x0; ax = y0
        mov [x0], ax          ; x0 = ax

        mov ax, [x1]          ; ax = x1
        xchg ax, [y1]         ; y1 = x1; ax = y1
        mov [x1], ax          ; x1 = y1

      l1:
        mov ax, [x1]          ; ax = x1
        cmp [x0], ax          ; if x0
        jbe l2                ;   <= x1 goto l2

        ;mov ax, [x0]
        ;xcng ax, [x1]
        ;mov [x0], ax          ;swap
        xchg ax, [x0]         ; x0 = x1; ax = x0
        mov [x1], ax          ; x1 = x0

        mov ax, [y0]          ; ax = y0
        xchg ax, [y1]         ; y1 = y0; ax = y1
        mov [y0], ax          ; y0 = y1

      l2:
        mov ax, [y1]          ; ax = y1
        sub ax, [y0]          ; ax = y1-y0
        cmp ax, 0             ; if ax
        jae a3                ;   >= 0 goto a3
        neg ax                ; else ax = -ax
      a3:
        mov [deltay], ax      ; deltay = abs(y1-y0)

        mov ax, [x1]          ; ax = x1
        sub ax, [x0]          ; ax = x1-x0
        mov [deltax], ax      ; deltax = x1-x0

        ;mov ax, [deltax]
        div byte [div2]       ; ax = deltax/2
        ;and ax, 0x00FF
        mov byte [error], al  ; error = deltax/2

        push [y0]             ; stack = y0
        pop [y]               ; y = y0

        mov ax, [y0]          ; ax = y0
        cmp ax, [y1]          ; if y0
        jae l3                ;   >= y1 goto l3
        mov [ystep], 1        ; else ystep = 1
        jmp l4                ; goto l4
      l3:
        mov [ystep], -1       ; ystep = -1
      l4:
        push [x0]             ; stack = x0
        pop [x]               ; x = x0
      l5:
        cmp [steep], -1       ; if steep
        je l6                 ;   == false goto l6
        stdcall dbg_put_pixel, dword[y], dword[x], [color] ; else plot(y,x)
        jmp l7                ; goto l7
      l6:
        stdcall dbg_put_pixel, dword[x], dword[y], [color] ; plot (x,y)
      l7:
        mov ax, [error]       ; ax = error
        sub ax, [deltay]      ; ax = error - deltay
        mov [error], ax       ; error = error - deltay

        cmp ax, 0             ; if error
        jae l8                ;   >= 0 goto l8
        mov ax, [ystep]       ; ax = ystep
        add [y], ax           ; y = y + ystep

        mov ax, [deltax]      ; ax = deltax
        add [error], ax       ; ax = deltax+error
      l8:
        mov ax, [x]           ; ax = x
        cmp ax, [x1]          ; if x
        inc [x]               ; /x++
        jb l5                 ;    < x1 goto l5
        ret
endp

    
Post 22 Aug 2010, 16:39
View user's profile Send private message Reply with quote
DJ Mauretto



Joined: 14 Mar 2007
Posts: 464
Location: Rome,Italy
DJ Mauretto 22 Aug 2010, 16:56
Some time ago i wrote a line algorithm...[code]


This tool test the algorithm...
Wink

_________________
Nil Volentibus Arduum Razz


Last edited by DJ Mauretto on 06 Dec 2011, 08:06; edited 1 time in total
Post 22 Aug 2010, 16:56
View user's profile Send private message Reply with quote
luke77



Joined: 14 May 2010
Posts: 18
luke77 22 Aug 2010, 20:16
;-------------------------------------------------2002 AK---------
; line.asm Draw a line using Bresenham's algorithmn
;
;

.model small
.stack 200h ; 512 byte stack

.data

vid_mode db 0
start_x dw 0
end_x dw 319
start_y dw 0
end_y dw 199
color db 2

diagonal_y_increment dw ?
diagonal_x_increment dw ?
short_distance dw ?
straight_x_increment dw ?
straight_y_increment dw ?
straight_count dw ?
diagonal_count dw ?

prescence db ' è---é '

.code

start:
mov ax,@data
mov ds,ax

mov ah,0fh ; determine current video mode
int 10h
mov vid_mode,al ; move mode value into buffer

;set screen mode
mov ah,0
mov al,4 ; orig. value = 4
int 10h

mov cx,1
mov dx,1

mov di,end_y
sub di,start_y
jge keep_y
neg dx
neg di
keep_y:
mov diagonal_y_increment,dx
mov si,end_x
sub si,start_x
jge keep_x
neg cx
neg si
keep_x:
mov diagonal_y_increment,cx
cmp si,di
jge horz_seg
mov cx,0
xchg si,di
jmp save_values
horz_seg:
mov dx,0
save_values:
mov short_distance,di
mov straight_x_increment,cx
mov straight_y_increment,dx

mov ax,short_distance
shl ax,1
mov straight_count,ax
sub ax,si
mov bx,ax
sub ax,si
mov diagonal_count,ax

mov cx,start_x
mov dx,start_y
inc si
mov al,color

mainloop:
dec si
jz line_finished
mov ah,12
int 10h
cmp bx,0
jge diagonal_line
add cx,straight_x_increment
add dx,straight_y_increment
add bx,straight_count

jmp mainloop

diagonal_line:

add cx,diagonal_x_increment
add dx,diagonal_y_increment
add bx,diagonal_count
jmp mainloop

line_finished:

mov ah,0 ; function no. for read
int 1ah ; get the time of day count
add dx,36 ; add « second(9) delay to low word
mov bx,dx ; store end of delay value in bx
repeat:
int 1ah
cmp dx,bx
jne repeat

mov ah,0 ; restore original video mode
mov al,vid_mode ;
int 10h


exit:
mov ax,4c00h
int 21h

end start

Code:
    
Post 22 Aug 2010, 20:16
View user's profile Send private message Reply with quote
Treant



Joined: 09 Oct 2009
Posts: 16
Location: Russia
Treant 23 Aug 2010, 04:00
My version:
Code:
;all 8 registers saved
; print point with certain color on screen
;  in a stack:
;   sp
;   sp+4 - x1
;   sp+8 - y1
;   sp+12- x2
;   sp+16- y2
;   sp+20- 32-bit ARGB color
line:
pusha
add esp, 00000020h ; 8 registers 8*4=32=20h bytes
 pop ebp ; return point
 pop eax ; x1
 pop ebx ; y1
 pop esi ; x2
 pop edi ; y2
 pop [color] ; color
 sub esp, 00000038h ; 8 registes + 5 function parametres + return point = 8*4+5*4+4=56=38h bytes
  ; clear error
  mov [error], 00000000h
 ; calculate line's width and height
 ; dxx=|x1-x2|
  mov ecx, eax
  sub ecx, esi ; ecx = x1-x2
  ; in ecx number without sign
  mov edx, ecx ; save ecx in edx
  neg ecx ; invert number sign
  cmovl ecx, edx ; if ecx<edx then place edx in ecx
  mov [dxx], ecx ; save |x1-x2| in [dxx]
 ; dyy=|y1-y2|
  mov ecx, ebx
  sub ecx, edi ; ecx = y1-y2
  ; in ecx number without sign
  mov edx, ecx ; save ecx in edx
  neg ecx ; invert number sign
  cmovl ecx, edx ; if ecx<edx then place edx in ecx 
  mov [dyy], ecx ; save |y1-y2| in [dyy]
 ; select painting direction
  cmp [dxx], ecx ; |x1-x2|?|y1-y2|
  jge h_line ; if |x1-x2|>=|y1-y2|
   jmp v_line ; if |y1-y2|>|x1-x2|
  h_line: ; begin paint
   ; painting always from left to right
   cmp eax, esi
   jg swap_h; if x1>x2 then interchange  x and y coordinates
    jmp start_paint_h
   swap_h: ; x=y y=x
    xchg eax, esi
    xchg ebx, edi
   start_paint_h:
    mov ecx, [dxx] ; ecx=|x1-x2|
    mov [y], ebx ; y=y0
    mov [x], eax ; x=x0
    ; start pixel
    push [color] ; color
    push [y] ;y
    push [x] ;x
    call put_pixel
    loop_paint_h:
     inc [x]
     push [color] ; color
     push [y] ;y
     push [x] ;x
     call put_pixel
     mov edx, [dyy]
     add edx, [error] ; error=error+dyy
     mov [error], edx
     imul edx, 00000002h ; edx=error*2
     cmp edx, [dxx] ; if [error]*2>=[dxx]
      jge inc_y
      jmp next_step_h
      inc_y:
       inc [y]
       mov edx, [error]
       sub edx, [dxx]
       mov [error], edx ; [error]=[error]-[dxx]
      next_step_h:
     loop loop_paint_h ; print next point
     jmp exit_line
  v_line: ; begin paint
  ; painting always from left to right
   cmp ebx, edi
   jg swap_v; if y1>y2 then interchange  x and y coordinates
    jmp start_paint_v
   swap_v: ; x=y y=x
    xchg eax, esi
    xchg ebx, edi
   start_paint_v:
    mov ecx, [dyy] ; ecx=|y1-y2|
    mov [y], ebx ; y=y0
    mov [x], eax ; x=x0
    ; start pixel
    push [color] ; color
    push [y] ;y
    push [x] ;x
    call put_pixel
    loop_paint_v:
     inc [y]
     push [color] ; color
     push [y] ;y
     push [x] ;x
     call put_pixel
     mov edx, [dxx]
     add edx, [error] ; error=error+dxx
     mov [error], edx
     imul edx, 00000002h ; edx=error*2
     cmp edx, [dyy] ; if [error]*2>=[dxx]
      jge inc_x
      jmp next_step_v
      inc_x:
       inc [x]
       mov edx, [error]
       sub edx, [dyy]
       mov [error], edx ; [error]=[error]-[dxx]
      next_step_v:
     loop loop_paint_v ; print next point
 exit_line:
popa
retn 14h

error dd 0
dxx   dd 0
dyy   dd 0
x     dd 0
y     dd 0
color dd 0
    
Post 23 Aug 2010, 04:00
View user's profile Send private message Reply with quote
Madis731



Joined: 25 Sep 2003
Posts: 2139
Location: Estonia
Madis731 23 Aug 2010, 19:25
@klavs.pr you probably have a flag set in reverse somewhere. That is the most common error with Bresenham's.

First try changing all your above/below flags to greater/less because you're dealing with signed numbers. Your abs() function would start working then.
Line 117 is correct (jb l5 ;< x1 goto l5) - don't change that Wink

Code:
;@Line 33 - change
        jae true              ;   >= x1-x0 goto true
;to
        jg  true              ;   > x1-x0 goto true (like in the C-source on wiki)

;@Line 77 - optimization
        div byte [div2]       ; ax = deltax/2
;to
        shr ax,1              ; ax = deltax/2

;@Line 79 - optimization
        mov byte [error], al  ; error = deltax/2
;to
        mov [error], ax       ; error = deltax/2
    


after some more optimizations you should get:
Code:
proc dbg_draw_line x0:WORD, y0:WORD, x1:WORD, y1:WORD, color:DWORD
      locals
        steep db 0 ; true
        deltax dw ?
        deltay dw ?
        error dw ?
        ystep dw ?
        y dw ?
        x dw ?
        tmp1 dw ?
       ; tmp2 dw ?
;       div2 db 2 - you don't need slow division to divide by 2
      endl

        mov ax, [x1]          ; ax = x1
        sub ax, [x0]          ; ax = x1-x0

        cmp ax, 0             ; if ax
        jge a1                ;   >= 0 goto a1
        neg ax                ; else ax = -ax
      a1:
        mov [tmp1], ax        ; tmp1 = x1-x0

        mov ax, [y1]          ; ax = y1
        sub ax, [y0]          ; ax = y1-y0

        cmp ax, 0             ; if ax
        jge a2                ;   >= 0 goto a2
        neg ax                ; else ax = -ax
      a2:

        cmp ax, [tmp1]        ; if y1-y0
        jg  true              ;   >= x1-x0 goto true
        mov [steep], -1       ; else steep = false
    true:

        cmp [steep], -1       ; if steep
        je l1                 ;   == false goto l1

        mov ax, [x0]          ; ax = x0
        xchg ax, [y0]         ; y0 = x0; ax = y0
        mov [x0], ax          ; x0 = ax

        mov ax, [x1]          ; ax = x1
        xchg ax, [y1]         ; y1 = x1; ax = y1
        mov [x1], ax          ; x1 = y1

      l1:
        mov ax, [x1]          ; ax = x1
        cmp [x0], ax          ; if x0
        jle l2                ;   <= x1 goto l2

        ;mov ax, [x0]
        ;xcng ax, [x1]
        ;mov [x0], ax          ;swap
        xchg ax, [x0]         ; x0 = x1; ax = x0
        mov [x1], ax          ; x1 = x0

        mov ax, [y0]          ; ax = y0
        xchg ax, [y1]         ; y1 = y0; ax = y1
        mov [y0], ax          ; y0 = y1

      l2:
        mov ax, [y1]          ; ax = y1
        sub ax, [y0]          ; ax = y1-y0
        cmp ax, 0             ; if ax
        jge a3                ;   >= 0 goto a3
        neg ax                ; else ax = -ax
      a3:
        mov [deltay], ax      ; deltay = abs(y1-y0)

        mov ax, [x1]          ; ax = x1
        sub ax, [x0]          ; ax = x1-x0
        mov [deltax], ax      ; deltax = x1-x0

        ;mov ax, [deltax]
        shr ax,1              ; ax = deltax/2
        ;and ax, 0x00FF
        mov [error],ax        ; error = deltax/2

        push [y0]             ; stack = y0
        pop [y]               ; y = y0

        mov ax, [y0]          ; ax = y0
        cmp ax, [y1]          ; if y0
        jge l3                ;   >= y1 goto l3
        mov [ystep], 1        ; else ystep = 1
        jmp l4                ; goto l4
      l3:
        mov [ystep], -1       ; ystep = -1
      l4:
        push [x0]             ; stack = x0
        pop [x]               ; x = x0
      l5:
        cmp [steep], -1       ; if steep
        je l6                 ;   == false goto l6
        stdcall dbg_put_pixel, dword[y], dword[x], [color] ; else plot(y,x)
        jmp l7                ; goto l7
      l6:
        stdcall dbg_put_pixel, dword[x], dword[y], [color] ; plot (x,y)
      l7:
        mov ax, [error]       ; ax = error
        sub ax, [deltay]      ; ax = error - deltay
        mov [error], ax       ; error = error - deltay

        cmp ax, 0             ; if error
        jge l8                ;   >= 0 goto l8
        mov ax, [ystep]       ; ax = ystep
        add [y], ax           ; y = y + ystep

        mov ax, [deltax]      ; ax = deltax
        add [error], ax       ; ax = deltax+error
      l8:
        mov ax, [x]           ; ax = x
        cmp ax, [x1]          ; if x
        inc [x]               ; /x++
        jb l5                 ;    < x1 goto l5
        ret
endp
    


I love how you only used 1 register Smile
Post 23 Aug 2010, 19:25
View user's profile Send private message Visit poster's website Yahoo Messenger MSN Messenger Reply with quote
klavs.pr



Joined: 20 Apr 2010
Posts: 20
Location: Latvia
klavs.pr 24 Aug 2010, 20:49
Thanks Madis731! Now it is finally working.
About that one register - I do not know where have I read it, but one wise man has said: first make your program work, than make it work fast. Now that my program is working, I will try to make it run faster. Wink
Post 24 Aug 2010, 20:49
View user's profile Send private message Reply with quote
bitshifter



Joined: 04 Dec 2007
Posts: 796
Location: Massachusetts, USA
bitshifter 04 Sep 2010, 03:37
I made this DDA line algorithm in C/C++ a while back.
My goal was to do as little calculations as possible.
Note that i traded size for speed in this way.
Code:
///////////////////////////////////////////////////////////////////////////////
// Renders a line from point a to point b.
// The pixel at point b is rendered.
// No clipping is performed.
//
//   315      0       45
//     \      |      /
//      \  8  |  1  /
//       \    |    /
//        \   |   /
//         \  |  /
//       7  \ | /  2
//           \|/
// 270 ------(*)------ 90
//           /|\
//       6  / | \  3
//         /  |  \
//        /   |   \
//       /    |    \
//      /  5  |  4  \
//     /      |      \
//   225     180     135
//
// NOTE: All rendering loops were adjust as to always increment the minor value.
//       This is required for float to integer conversions to be straight-forward.
//
///////////////////////////////////////////////////////////////////////////////

#define TRACE_SECTOR(n) printf("TRACE: SECTOR(%i)\n",(n))

void DrawLine(
   long x1,
   long y1,
   long x2,
   long y2,
   long color)
{
   // if going right -> possible sectors are [1,2,3,4]
   if(x1 < x2)
   {
      long dx = x2 - x1;

      // if going down -> possible sectors are [3,4]
      if(y1 < y2)
      {
         long dy = y2 - y1;

         // if more vert (SECTOR 4)
         if(dx < dy)
         {
            TRACE_SECTOR(4);

            float m = float(dx) / float(dy);
            float x = float(x1);

            for(long y = y1; y <= y2; y++)
            {
               SetPixel(long(x),y,color);
               x += m;
            }
         }
         // else more horz (SECTOR 3)
         else
         {
            TRACE_SECTOR(3);

            float m = float(dy) / float(dx);
            float y = float(y1);

            for(long x = x1; x <= x2; x++)
            {
               SetPixel(x,long(y),color);
               y += m;
            }
         }
      }
      // else going up -> possible sectors are [1,2]
      else
      {
         long dy = y1 - y2;

         // if more vert (SECTOR 1)
         if(dx < dy)
         {
            TRACE_SECTOR(1);

            float m = float(dx) / float(dy);
            float x = float(x1);

            for(long y = y1; y >= y2; y--)
            {
               SetPixel(long(x),y,color);
               x += m;
            }
         }
         // else more horz (SECTOR 2)
         else
         {
            TRACE_SECTOR(2);

            float m = float(dy) / float(dx);
            float y = float(y2);

            for(long x = x2; x >= x1; x--)
            {
               SetPixel(x,long(y),color);
               y += m;
            }
         }
      }
   }
   // else going left -> possible sectors are [5,6,7,8]
   else
   {
      long dx = x1 - x2;

      // if going down -> possible sectors are [5,6]
      if(y1 < y2)
      {
         long dy = y2 - y1;

         // if more vert (SECTOR 5)
         if(dx < dy)
         {
            TRACE_SECTOR(5);

            float m = float(dx) / float(dy);
            float x = float(x2);

            for(long y = y2; y >= y1; y--)
            {
               SetPixel(long(x),y,color);
               x += m;
            }
         }
         // else more horz (SECTOR 6)
         else
         {
            TRACE_SECTOR(6);

            float m = float(dy) / float(dx);
            float y = float(y1);

            for(long x = x1; x >= x2; x--)
            {
               SetPixel(x,long(y),color);
               y += m;
            }
         }
      }
      // else going up -> possible sectors are [7,8]
      else
      {
         long dy = y1 - y2;

         // if more vert (SECTOR 8)
         if(dx < dy)
         {
            TRACE_SECTOR(8);

            float m = float(dx) / float(dy);
            float x = float(x2);

            for(long y = y2; y <= y1; y++)
            {
               SetPixel(long(x),y,color);
               x += m;
            }
         }
         // else more horz (SECTOR 7)
         else
         {
            TRACE_SECTOR(7);

            float m = (float)dy / (float)dx;
            float y = (float)y2;

            for(long x = x2; x <= x1; x++)
            {
               SetPixel(x,long(y),color);
               y += m;
            }
         }
      }
   }
}
    

I think its pretty fast.
The slowest part is converting float into integer into float.
By dissasembling HLL compiler output you will see why.
Maybe to have your own little routine for this conversion...
Code:
float_into_long:
fld dword[fvalue]
fistp dword[ivalue]
    

I think a good mixture of GPR and FPU would be fastest.
Any comments or constructive criticism is greatly appreciated...
Post 04 Sep 2010, 03:37
View user's profile Send private message Reply with quote
bitRAKE



Joined: 21 Jul 2003
Posts: 4073
Location: vpcmpistri
bitRAKE 04 Sep 2010, 05:20
The optimal inner loop is:
Code:
inc .x
stc
adc .y_low,.slope_frac
adc .y,0

SetPixel .x,.y,.color    
Of course, these four instructions can be integrated into the pixel plotting routine, but multiple versions are required for different line directions. Basically, one coordinate always advances one and the other has a chance of advancing when the fraction overflows. STC is required to allow both coordinates to advance every iteration - the 45 degree line. Only integer instructions are needed.

Chapter 35, Michael Abrash's classic Graphics Programming Black Book
http://www.gamedev.net/reference/articles/article1698.asp
http://www.edepot.com/algorithm.html
Post 04 Sep 2010, 05:20
View user's profile Send private message Visit poster's website Reply with quote
bitshifter



Joined: 04 Dec 2007
Posts: 796
Location: Massachusetts, USA
bitshifter 07 Sep 2010, 00:26
Ahh, the Black Book Smile
In chapter 37 we have run slice length lines...
Very efficient at drawing long'ish lines.
So i ported some code from the book to FASM...
It runs in .COM enviroment where INT 20h is recognized.
It also uses some BIOS keyboard and video interrupts.
I tested on WinXP but i think it will run on anything earlier.
Also there are ways to optimize (see chapter 37 for details)
Code:
SCREEN_WIDTH            equ 320
SCREEN_SEGMENT          equ 0a000h

org 0100h
use16

        mov     ax,13h
        int     10h

        mov     cx,0ffh
        loop    $

        cld

        ;left border
        push    04h     ;color
        push    199     ;y2
        push    0       ;x2
        push    0       ;y1
        push    0       ;x1
        call    vga_320x200_line
        ;right border
        push    05h     ;color
        push    199     ;y2
        push    319     ;x2
        push    0       ;y1
        push    319     ;x1
        call    vga_320x200_line
        ;top border
        push    06h     ;color
        push    0       ;y2
        push    319     ;x2
        push    0       ;y1
        push    0       ;x1
        call    vga_320x200_line
        ;bottom border
        push    07h     ;color
        push    199     ;y2
        push    319     ;x2
        push    199     ;y1
        push    0       ;x1
        call    vga_320x200_line

        mov     ah,00h
        int     16h

        mov     ax,03h
        int     10h

        mov     cx,0ffh
        loop    $

        mov     ax,0
        int     20h
        jmp     $

;============================================================================
; void near stdcall
; vga_320x200_line(
;       unsigned short XStart,
;       unsigned short YStart,
;       unsigned short XEnd,
;       unsigned short YEnd,
;       unsigned short Color)
;
; Draws a colored line between the specified endpoints.
;============================================================================

vga_320x200_line:

        push    bp                      ;preserve caller's stack frame
        mov     bp,sp                   ;point to our stack frame

; Proc parameters.
label .XStart word at bp+4              ;X start coordinate of line
label .YStart word at bp+6              ;Y start coordinate of line
label .XEnd   word at bp+8              ;X end coordinate of line
label .YEnd   word at bp+10             ;Y end coordinate of line
label .Color  byte at bp+12             ;color in which to draw line

        sub     sp,8                    ; Allocate space for local variables.

; Local variables.
label .AdjUp     word at bp-2           ;error term adjust up on each advance
label .AdjDown   word at bp-4           ;error term adjust down when error term turns over
label .WholeStep word at bp-6           ;minimum run length
label .XAdvance  word at bp-8           ;1 or -1, for direction in which X advances

        push    si                      ;preserve C register variables
        push    di
        push    ds                      ;preserve caller's DS
; We'll draw top to bottom, to reduce the number of cases we have to handle,
; and to make lines between the same endpoints always draw the same pixels.
        mov     ax,[.YStart]
        cmp     ax,[.YEnd]
        jle     .LineIsTopToBottom
; Swap endpoints.
        xchg    [.YEnd],ax
        mov     [.YStart],ax
        mov     bx,[.XStart]
        xchg    [.XEnd],bx
        mov     [.XStart],bx
.LineIsTopToBottom:
; Point DI to the first pixel to draw.
        mov     dx,SCREEN_WIDTH
        mul     dx                      ;YStart * SCREEN_WIDTH
        mov     si,[.XStart]
        mov     di,si
        add     di,ax                   ;DI = YStart * SCREEN_WIDTH + XStart
                                        ; = offset of initial pixel
; Figure out how far we're going vertically (guaranteed to be positive).
        mov     cx,[.YEnd]
        sub     cx,[.YStart]            ;CX = YDelta
; Figure out whether we're going left or right, and how far we're going
; horizontally. In the process, special-case vertical lines, for speed and
; to avoid nasty boundary conditions and division by 0.
        mov     dx,[.XEnd]
        sub     dx,si                   ;XDelta
        jnz     .NotVerticalLine        ;XDelta == 0 means vertical line

; Special-case code for vertical lines.
        mov     ax,SCREEN_SEGMENT
        mov     ds,ax                   ;point DS:DI to the first byte to draw
        mov     al,[.Color]
.VerticalLineLoop:
        mov     [di],al
        add     di,SCREEN_WIDTH
        dec     cx
        jns     .VerticalLineLoop
        jmp     .Done

; Special-case code for horizontal lines.
align 2
.IsHorizontalLine:
        mov     ax,SCREEN_SEGMENT
        mov     es,ax                                   ;point ES:DI to the first byte to draw
        mov     al,[.Color]
        mov     ah,al                           ;duplicate in high byte for word access
        and     bx,bx                                   ;left to right?
        jns     .DirSet                                  ;yes
        sub     di,dx                                   ;currently right to left, point to left end so we
                                        ; can go left to right (avoids unpleasantness with
                                        ; right to left REP STOSW)
.DirSet:
        mov     cx,dx
        inc     cx                              ;# of pixels to draw
        shr     cx,1                                    ;# of words to draw
        rep     stosw                                   ;do as many words as possible
        adc     cx,cx
        rep     stosb                                   ;do the odd byte, if there is one
        jmp     .Done

; Special-case code for diagonal lines.
align 2
.IsDiagonalLine:
        mov     ax,SCREEN_SEGMENT
        mov     ds,ax                                   ;point DS:DI to the first byte to draw
        mov     al,[.Color]
        add     bx,SCREEN_WIDTH                         ;advance distance from one pixel to next
.DLoop:
        mov     [di],al
        add     di,bx
        dec     cx
        jns     .DLoop
        jmp     .Done

align 2
.NotVerticalLine:
        mov     bx,1                            ;assume left to right, so XAdvance = 1
                                                ;***leaves flags unchanged***
        jns     .LeftToRight                     ;left to right, all set
        neg     bx                                      ;right to left, so XAdvance = -1
        neg     dx                                      ;|XDelta|
.LeftToRight:
; Special-case horizontal lines.
        and     cx,cx                                   ;YDelta == 0?
        jz      .IsHorizontalLine ;yes
; Special-case diagonal lines.
        cmp     cx,dx   ;YDelta == XDelta?
        jz      .IsDiagonalLine  ;yes
; Determine whether the line is X or Y major, and handle accordingly.
        cmp     dx,cx
        jae     .XMajor
        jmp     .YMajor

; X-major (more horizontal than vertical) line.
align 2
.XMajor:
        mov     ax,SCREEN_SEGMENT
        mov     es,ax                                   ;point ES:DI to the first byte to draw
        and     bx,bx                   ;left to right?
        jns     .DFSet                   ;yes, CLD is already set
        std                             ;right to left, so draw backwards
.DFSet:
        mov     ax,dx                   ;XDelta
        sub     dx,dx                   ;prepare for division
        div     cx                              ;AX = XDelta/YDelta
                                                ; (minimum # of pixels in a run in this line)
                                                ;DX = XDelta % YDelta
        mov     bx,dx                           ;error term adjust each time Y steps by 1;
        add     bx,bx                           ; used to tell when one extra pixel should be
        mov     [.AdjUp],bx                   ; drawn as part of a run, to account for
                                                ; fractional steps along the X axis per
                                                ; 1-pixel steps along Y
        mov     si,cx                           ;error term adjust when the error term turns
        add     si,si                           ; over, used to factor out the X step made at
        mov     [.AdjDown],si                 ; that time
; Initial error term; reflects an initial step of 0.5 along the Y axis.
        sub     dx,si                           ;(XDelta % YDelta) - (YDelta * 2)
                                                ;DX = initial error term
; The initial and last runs are partial, because Y advances only 0.5 for
; these runs, rather than 1. Divide one full run, plus the initial pixel,
; between the initial and last runs.
        mov     si,cx                           ;SI = YDelta
        mov     cx,ax                           ;whole step (minimum run length)
        shr     cx,1
        inc     cx                              ;initial pixel count = (whole step / 2) + 1;
                                                ; (may be adjusted later). This is also the
                                                ; final run pixel count
        push    cx                              ;remember final run pixel count for later
; If the basic run length is even and there's no fractional advance, we have
; one pixel that could go to either the initial or last partial run, which
; we'll arbitrarily allocate to the last run.
; If there is an odd number of pixels per run, we have one pixel that can't
; be allocated to either the initial or last partial run, so we'll add 0.5 to
; the error term so this pixel will be handled by the normal full-run loop.
        add     dx,si                           ;assume odd length, add YDelta to error term
                                                ; (add 0.5 of a pixel to the error term)
        test    al,1                            ;is run length even?
        jnz     .XMajorAdjustDone                ;no, already did work for odd case, all set
        sub     dx,si                           ;length is even, undo odd stuff we just did
        and     bx,bx                           ;is the adjust up equal to 0?
        jnz     .XMajorAdjustDone                ;no (don't need to check for odd length,
                                        ; because of the above test)
        dec     cx                              ;both conditions met; make initial run 1
                                                ; shorter
.XMajorAdjustDone:
        mov     [.WholeStep],ax       ;whole step (minimum run length)
        mov     al,[.Color]                   ;AL = drawing color
; Draw the first, partial run of pixels.
        rep     stosb                           ;draw the final run
        add     di,SCREEN_WIDTH                 ;advance along the minor axis (Y)
; Draw all full runs.
        cmp     si,1                            ;are there more than 2 scans, so there are
                                                ; some full runs? (SI = # scans - 1)
        jna     .XMajorDrawLast                  ;no, no full runs
        dec     dx                              ;adjust error term by -1 so we can use
                                                ; carry test
        shr     si,1                            ;convert from scan to scan-pair count
        jnc     .XMajorFullRunsOddEntry          ;if there is an odd number of scans,
                                                ; do the odd scan now
.XMajorFullRunsLoop:
        mov     cx,[.WholeStep]      ;run is at least this long
        add     dx,bx                           ;advance the error term and add an extra
        jnc     .XMajorNoExtra                   ; pixel if the error term so indicates
        inc     cx                              ;one extra pixel in run
        sub     dx,[.AdjDown]                 ;reset the error term
.XMajorNoExtra:
        rep     stosb                       ;draw this scan line's run
        add     di,SCREEN_WIDTH                 ;advance along the minor axis (Y)
.XMajorFullRunsOddEntry:                         ;enter loop here if there is an odd number
                                                ; of full runs
        mov     cx,[.WholeStep]       ;run is at least this long
        add     dx,bx                           ;advance the error term and add an extra
        jnc     .XMajorNoExtra2                  ; pixel if the error term so indicates
        inc     cx                              ;one extra pixel in run
        sub     dx,[.AdjDown]                 ;reset the error term
.XMajorNoExtra2:
        rep     stosb                       ;draw this scan line's run
        add     di,SCREEN_WIDTH                 ;advance along the minor axis (Y)

        dec     si
        jnz     .XMajorFullRunsLoop
; Draw the final run of pixels.
.XMajorDrawLast:
        pop     cx                              ;get back the final run pixel length
        rep     stosb                           ;draw the final run

        cld                                     ;restore normal direction flag
        jmp     .Done

; Y-major (more vertical than horizontal) line.
align 2
.YMajor:
        mov     [.XAdvance],bx                ;remember which way X advances
        mov     ax,SCREEN_SEGMENT
        mov     ds,ax                                   ;point DS:DI to the first byte to draw
        mov     ax,cx                           ;YDelta
        mov     cx,dx                           ;XDelta
        sub     dx,dx                           ;prepare for division
        div     cx                              ;AX = YDelta/XDelta
                                                ; (minimum # of pixels in a run in this line)
                                                ;DX = YDelta % XDelta
        mov     bx,dx                           ;error term adjust each time X steps by 1;
        add     bx,bx                           ; used to tell when one extra pixel should be
        mov     [.AdjUp],bx                   ; drawn as part of a run, to account for
                                                ; fractional steps along the Y axis per
                                                ; 1-pixel steps along X
        mov     si,cx                           ;error term adjust when the error term turns
        add     si,si                           ; over, used to factor out the Y step made at
        mov     [.AdjDown],si                 ; that time

; Initial error term; reflects an initial step of 0.5 along the X axis.
        sub     dx,si                           ;(YDelta % XDelta) - (XDelta * 2)
                                                ;DX = initial error term
; The initial and last runs are partial, because X advances only 0.5 for
; these runs, rather than 1. Divide one full run, plus the initial pixel,
; between the initial and last runs.
        mov     si,cx                           ;SI = XDelta
        mov     cx,ax                           ;whole step (minimum run length)
        shr     cx,1
        inc     cx                              ;initial pixel count = (whole step / 2) + 1;
                                                ; (may be adjusted later)
        push    cx                              ;remember final run pixel count for later

; If the basic run length is even and there's no fractional advance, we have
; one pixel that could go to either the initial or last partial run, which
; we'll arbitrarily allocate to the last run.
; If there is an odd number of pixels per run, we have one pixel that can't
; be allocated to either the initial or last partial run, so we'll add 0.5 to
; the error term so this pixel will be handled by the normal full-run loop.
        add     dx,si                           ;assume odd length, add XDelta to error term
        test    al,1                            ;is run length even?
        jnz     .YMajorAdjustDone                ;no, already did work for odd case, all set
        sub     dx,si                           ;length is even, undo odd stuff we just did
        and     bx,bx                           ;is the adjust up equal to 0?
        jnz     .YMajorAdjustDone                ;no (don't need to check for odd length,
                 ; because of the above test)
        dec     cx                              ;both conditions met; make initial run 1
                                                ; shorter
.YMajorAdjustDone:
        mov     [.WholeStep],ax       ;whole step (minimum run length)
        mov     al,[.Color]           ;AL = drawing color
        mov     bx,[.XAdvance]        ;which way X advances
; Draw the first, partial run of pixels.
.YMajorFirstLoop:
        mov     [di],al                         ;draw the pixel
        add     di,SCREEN_WIDTH                 ;advance along the major axis (Y)
        dec     cx
        jnz     .YMajorFirstLoop
        add     di,bx                           ;advance along the minor axis (X)
; Draw all full runs.
        cmp     si,1                            ;# of full runs. Are there more than 2
                                                ; columns, so there are some full runs?
                                                ; (SI = # columns - 1)
        jna     .YMajorDrawLast                  ;no, no full runs
        dec     dx                              ;adjust error term by -1 so we can use
                                                ; carry test
        shr     si,1                            ;convert from column to column-pair count
        jnc     .YMajorFullRunsOddEntry          ;if there is an odd number of
                                                ; columns, do the odd column now
.YMajorFullRunsLoop:
        mov     cx,[.WholeStep]       ;run is at least this long
        add     dx,[.AdjUp]                   ;advance the error term and add an extra
        jnc     .YMajorNoExtra                   ; pixel if the error term so indicates
        inc     cx                              ;one extra pixel in run
        sub     dx,[.AdjDown]                 ;reset the error term
.YMajorNoExtra:
                                                ;draw the run
.YMajorRunLoop:
        mov     [di],al                         ;draw the pixel
        add     di,SCREEN_WIDTH                 ;advance along the major axis (Y)
        dec     cx
        jnz     .YMajorRunLoop
        add     di,bx                           ;advance along the minor axis (X)
.YMajorFullRunsOddEntry:                         ;enter loop here if there is an odd number
                                                ; of full runs
        mov     cx,[.WholeStep]       ;run is at least this long
        add     dx,[.AdjUp]                  ;advance the error term and add an extra
        jnc     .YMajorNoExtra2                  ; pixel if the error term so indicates
        inc     cx                              ;one extra pixel in run
        sub     dx,[.AdjDown]                 ;reset the error term
.YMajorNoExtra2:
                                                ;draw the run
.YMajorRunLoop2:
        mov     [di],al                         ;draw the pixel
        add     di,SCREEN_WIDTH                 ;advance along the major axis (Y)
        dec     cx
        jnz     .YMajorRunLoop2
        add     di,bx                           ;advance along the minor axis (X)

        dec     si
        jnz     .YMajorFullRunsLoop
; Draw the final run of pixels.
.YMajorDrawLast:
        pop     cx                              ;get back the final run pixel length
.YMajorLastLoop:
        mov     [di],al                         ;draw the pixel
        add     di,SCREEN_WIDTH                 ;advance along the major axis (Y)
        dec     cx
        jnz     .YMajorLastLoop
.Done:
        pop     ds                                  ;restore caller's DS
        pop     di
        pop     si                                  ;restore C register variables
        mov     sp,bp                       ;deallocate local variables
        pop     bp                                  ;restore caller's stack frame
        retn    2*5
    

_________________
Coding a 3D game engine with fasm is like trying to eat an elephant,
you just have to keep focused and take it one 'byte' at a time.
Post 07 Sep 2010, 00:26
View user's profile Send private message Reply with quote
bitRAKE



Joined: 21 Jul 2003
Posts: 4073
Location: vpcmpistri
bitRAKE 07 Sep 2010, 07:04
Po-Han Lin claims his algorithm is the fastest due to no branching. (The inner loop I posted above is for his type of algorithm.) In general I would have to agree, but now-a-day displays are high resolution or we also want anti-aliasing. Seriously doubt he was the first to concoct the method in 2005.
Post 07 Sep 2010, 07:04
View user's profile Send private message Visit poster's website Reply with quote
Madis731



Joined: 25 Sep 2003
Posts: 2139
Location: Estonia
Madis731 07 Sep 2010, 08:17
I take it with skepticism: "Pentium III 450Mhz, Visual C++ 6.0"

Pentium III was fast, but with the emergence of Core 2 class CPUs, many things have changed.
-branch predition is better
-branch target buffer entries have quadrupled since then
-micro/macro-op fusion (especially with jumps)

PIII had a misprediction penalty of 10-20 clocks.
While with P4 they made it worse - 24+ - they got back to 15 with Core 2.
IMUL latency has come down from 4 to 3.

BUT... I will bite and try it sometimes. Maybe my 64-bit Bresenham implementation doesn't beat EFLA.

UPDATE:
I'm starting to get interested:
http://www.simppa.fi/blog/extremely-fast-line-algorithm-as3-optimized/
http://jacksondunstan.com/articles/506
hmm..

UPDATE2:
Is it okay if I used this trick instead:
Code:
        mov     [shortLen],r9d  ;int shortLen=y2-y;
        sub     [shortLen],edx
        sbb     eax,eax
        mov     [longLen],r8d   ;int longLen=x2-x;
        sub     [longLen],ecx
        sbb     ebx,ebx

        xor     eax,[shortLen]  ;if (abs(shortLen)>abs(longLen)) {
        xor     ebx,[longLen]
        cmp     eax,ebx
        jle     @f
        ;{ ... }
      @@:
    
?
Post 07 Sep 2010, 08:17
View user's profile Send private message Visit poster's website Yahoo Messenger MSN Messenger Reply with quote
bitshifter



Joined: 04 Dec 2007
Posts: 796
Location: Massachusetts, USA
bitshifter 08 Sep 2010, 05:36
EFLA doesnt look so fast to me...
It uses 2 abs calls and does some swapping.
It shouldnt be very hard for my optimizing friend Madis to kill it. Smile

*ramble*
Sad news, my Pentium 4 has died a slow death.
It took my Agner Fog PMC port along with it.
I wont be timing much code for a while...
Post 08 Sep 2010, 05:36
View user's profile Send private message Reply with quote
Madis731



Joined: 25 Sep 2003
Posts: 2139
Location: Estonia
Madis731 08 Sep 2010, 06:25
On topic:
I cannot get rid of the division. And its killing the performance on every line shorter than 100px. DIV only takes 4 clocks, but the results lag 14-23 clocks. By that time Bresenham's would have had drawn at least 14 pixels...by WHICH time Intel's branch prediction would have predicted the sequence of jump-no jump. I think it will be faster with high-level code, where abs(), putpixel() and xchg() take up so much time that you don't notice one DIV there Smile {and I'm only being sarcastic}

Off topic:
@bitshifter: I guess you need to look on the bright side and realize that your next CPU will be at least of class Core i3 (~$115). I guess P4 did cost you much more and the performance was so-so.
SSE4.1: ROUNDxx, DPPx for 3D calculations, BLEND, PTEST for bitwizardry
SSE4.2: PCMPxSTRx, CRC32, POPCNT etc.
http://ark.intel.com/Product.aspx?id=48140
Post 08 Sep 2010, 06:25
View user's profile Send private message Visit poster's website Yahoo Messenger MSN Messenger Reply with quote
edfed



Joined: 20 Feb 2006
Posts: 4353
Location: Now
edfed 09 Sep 2010, 09:19
when using a putpixel sub for th line algo, there is a singular trick...

X and Y are incremented (decremented) by 1 or 0.
of course it is faster to put the pixel in the loop, then, increment Y by BPSL (Bytes Per Scan Line), and X by BPP (Byte Per Pixel).

the annoying poin is that X86 don't have 2bits micro registers, able to contain a signed value between -1 and 1.

then, just to have the Xincrement and Yincrement, you should use an entire registers (16, 32 or 64 bits) just to increment or decrement.

i have counted the number of datas needed for a line computation with besenham algo:

Line max lengh
Accumulator for iterative division.
X current
Y current
Delta Min
Delta Max
X increment 1 (-1, 0, 1)
Y increment 1 (//)
X increment 2 (//)
Y increment 2 (//)
Color (or value)
in total, 11 registers are needed to compute the dots.


X (or X1)
Y (or Y1)
Xlengh (or X2)
Ylengh (or Y2)
Color (or value)
and 5 registers to pass parameters to line function.

when using putpixel for the line, you can easyly use the same algo to interpolate datas in an array. for example, create an audio ramp between two samples. or whatever application you want.

because a line is not always a geometric thing.

for those who don't understand how works the besenham algo:

delta min and delta max are picked from Xlengh and Ylengh
Xlengh and Ylengh are picked from X1-X2 and Y1-Y2.
encode the line parameters with X,Y,XL,YL is faster because we don't need to compute X1-X2 and Y1-Y2.

ones we know XL and YL, we need to set the Xincrements and Yincrements to 1.

the test for sign of XL and YL will give the sign to Xinc and Yinc, followed by abs(Xl) and abs(Yl).
compare abs(XL) and abs(YL) will give deltamin and deltamax. and will set the Xinc1 & 2, and Yinc1 & 2 to 0 or |1|.

ones we have deltas and incs sets, we can compute the line.

set line max lengh to delta max.
set accumulator to deltamax/2
set Xcurrent to X (or X1)
set Ycurrent to Y (or Y1)


enter the main loop:

decrement line max lengh
if negative, then end

plot a dot using Xcurrent and Ycurrent

Xcurrent + Xinc1
Ycurrent + Yinc1
accumulator + delta min
compare accumulator with deltamax
if lower, then, main loop

secondary loop:
Xcurrent + Xinc2
Ycurrent + Yinc2
accumulator - deltamax
main loop.

end:
ret

that's maybe why it can be good to decompose the algo in 8 cadrans.
then, it will avoid the need of Xinc1 & 2, and Yinc1 & 2.
Post 09 Sep 2010, 09:19
View user's profile Send private message Visit poster's website Reply with quote
Madis731



Joined: 25 Sep 2003
Posts: 2139
Location: Estonia
Madis731 09 Sep 2010, 12:47
I put the EFLA topic in another thread:
http://board.flatassembler.net/topic.php?p=118753
Post 09 Sep 2010, 12:47
View user's profile Send private message Visit poster's website Yahoo Messenger MSN Messenger Reply with quote
Display posts from previous:
Post new topic Reply to topic

Jump to:  


< Last Thread | Next Thread >
Forum Rules:
You cannot post new topics in this forum
You cannot reply to topics in this forum
You cannot edit your posts in this forum
You cannot delete your posts in this forum
You cannot vote in polls in this forum
You cannot attach files in this forum
You can download files in this forum


Copyright © 1999-2025, Tomasz Grysztar. Also on GitHub, YouTube.

Website powered by rwasa.