flat assembler
Message board for the users of flat assembler. Index > Main > Bresenham's line algorithm
Author
 Thread  klavs.pr

Joined: 20 Apr 2010
Posts: 20
Location: Latvia
klavs.pr
Can anyone tell me whats wrong with this code? It is drawing a line but that line is not correct. Sometimes it is not drawing anything.

Algorithm: http://en.wikipedia.org/wiki/Bresenham%27s_line_algorithm#Optimization

Klavs Prieditis.

Code:
```proc dbg_draw_line x0:WORD, y0:WORD, x1:WORD, y1:WORD, color:DWORD
locals
steep db 0 ; true
deltax dw ?
deltay dw ?
error dw ?
ystep dw ?
y dw ?
x dw ?
tmp1 dw ?
; tmp2 dw ?
div2 db 2
endl

mov ax, [x1]          ; ax = x1
sub ax, [x0]          ; ax = x1-x0

cmp ax, 0             ; if ax
jae a1                ;   >= 0 goto a1
neg ax                ; else ax = -ax
a1:
mov [tmp1], ax        ; tmp1 = x1-x0

mov ax, [y1]          ; ax = y1
sub ax, [y0]          ; ax = y1-y0

cmp ax, 0             ; if ax
jae a2                ;   >= 0 goto a2
neg ax                ; else ax = -ax
a2:

cmp ax, [tmp1]        ; if y1-y0
jae true              ;   >= x1-x0 goto true
mov [steep], -1       ; else steep = false
true:

cmp [steep], -1       ; if steep
je l1                 ;   == false goto l1

mov ax, [x0]          ; ax = x0
xchg ax, [y0]         ; y0 = x0; ax = y0
mov [x0], ax          ; x0 = ax

mov ax, [x1]          ; ax = x1
xchg ax, [y1]         ; y1 = x1; ax = y1
mov [x1], ax          ; x1 = y1

l1:
mov ax, [x1]          ; ax = x1
cmp [x0], ax          ; if x0
jbe l2                ;   <= x1 goto l2

;mov ax, [x0]
;xcng ax, [x1]
;mov [x0], ax          ;swap
xchg ax, [x0]         ; x0 = x1; ax = x0
mov [x1], ax          ; x1 = x0

mov ax, [y0]          ; ax = y0
xchg ax, [y1]         ; y1 = y0; ax = y1
mov [y0], ax          ; y0 = y1

l2:
mov ax, [y1]          ; ax = y1
sub ax, [y0]          ; ax = y1-y0
cmp ax, 0             ; if ax
jae a3                ;   >= 0 goto a3
neg ax                ; else ax = -ax
a3:
mov [deltay], ax      ; deltay = abs(y1-y0)

mov ax, [x1]          ; ax = x1
sub ax, [x0]          ; ax = x1-x0
mov [deltax], ax      ; deltax = x1-x0

;mov ax, [deltax]
div byte [div2]       ; ax = deltax/2
;and ax, 0x00FF
mov byte [error], al  ; error = deltax/2

push [y0]             ; stack = y0
pop [y]               ; y = y0

mov ax, [y0]          ; ax = y0
cmp ax, [y1]          ; if y0
jae l3                ;   >= y1 goto l3
mov [ystep], 1        ; else ystep = 1
jmp l4                ; goto l4
l3:
mov [ystep], -1       ; ystep = -1
l4:
push [x0]             ; stack = x0
pop [x]               ; x = x0
l5:
cmp [steep], -1       ; if steep
je l6                 ;   == false goto l6
stdcall dbg_put_pixel, dword[y], dword[x], [color] ; else plot(y,x)
jmp l7                ; goto l7
l6:
stdcall dbg_put_pixel, dword[x], dword[y], [color] ; plot (x,y)
l7:
mov ax, [error]       ; ax = error
sub ax, [deltay]      ; ax = error - deltay
mov [error], ax       ; error = error - deltay

cmp ax, 0             ; if error
jae l8                ;   >= 0 goto l8
mov ax, [ystep]       ; ax = ystep
add [y], ax           ; y = y + ystep

mov ax, [deltax]      ; ax = deltax
add [error], ax       ; ax = deltax+error
l8:
mov ax, [x]           ; ax = x
cmp ax, [x1]          ; if x
inc [x]               ; /x++
jb l5                 ;    < x1 goto l5
ret
endp

``` 22 Aug 2010, 16:39  DJ Mauretto

Joined: 14 Mar 2007
Posts: 464
Location: Rome,Italy
DJ Mauretto
Some time ago i wrote a line algorithm...[code]

This tool test the algorithm... _________________
Nil Volentibus Arduum Last edited by DJ Mauretto on 06 Dec 2011, 08:06; edited 1 time in total 22 Aug 2010, 16:56  luke77

Joined: 14 May 2010
Posts: 18
luke77
;-------------------------------------------------2002 AK---------
; line.asm Draw a line using Bresenham's algorithmn
;
;

.model small
.stack 200h ; 512 byte stack

.data

vid_mode db 0
start_x dw 0
end_x dw 319
start_y dw 0
end_y dw 199
color db 2

diagonal_y_increment dw ?
diagonal_x_increment dw ?
short_distance dw ?
straight_x_increment dw ?
straight_y_increment dw ?
straight_count dw ?
diagonal_count dw ?

prescence db ' è---é '

.code

start:
mov ax,@data
mov ds,ax

mov ah,0fh ; determine current video mode
int 10h
mov vid_mode,al ; move mode value into buffer

;set screen mode
mov ah,0
mov al,4 ; orig. value = 4
int 10h

mov cx,1
mov dx,1

mov di,end_y
sub di,start_y
jge keep_y
neg dx
neg di
keep_y:
mov diagonal_y_increment,dx
mov si,end_x
sub si,start_x
jge keep_x
neg cx
neg si
keep_x:
mov diagonal_y_increment,cx
cmp si,di
jge horz_seg
mov cx,0
xchg si,di
jmp save_values
horz_seg:
mov dx,0
save_values:
mov short_distance,di
mov straight_x_increment,cx
mov straight_y_increment,dx

mov ax,short_distance
shl ax,1
mov straight_count,ax
sub ax,si
mov bx,ax
sub ax,si
mov diagonal_count,ax

mov cx,start_x
mov dx,start_y
inc si
mov al,color

mainloop:
dec si
jz line_finished
mov ah,12
int 10h
cmp bx,0
jge diagonal_line

jmp mainloop

diagonal_line:

jmp mainloop

line_finished:

mov ah,0 ; function no. for read
int 1ah ; get the time of day count
mov bx,dx ; store end of delay value in bx
repeat:
int 1ah
cmp dx,bx
jne repeat

mov ah,0 ; restore original video mode
mov al,vid_mode ;
int 10h

exit:
mov ax,4c00h
int 21h

end start

Code:
`    ` 22 Aug 2010, 20:16  Treant

Joined: 09 Oct 2009
Posts: 16
Location: Russia
Treant
My version:
Code:
```;all 8 registers saved
; print point with certain color on screen
;  in a stack:
;   sp
;   sp+4 - x1
;   sp+8 - y1
;   sp+12- x2
;   sp+16- y2
;   sp+20- 32-bit ARGB color
line:
pusha
add esp, 00000020h ; 8 registers 8*4=32=20h bytes
pop ebp ; return point
pop eax ; x1
pop ebx ; y1
pop esi ; x2
pop edi ; y2
pop [color] ; color
sub esp, 00000038h ; 8 registes + 5 function parametres + return point = 8*4+5*4+4=56=38h bytes
; clear error
mov [error], 00000000h
; calculate line's width and height
; dxx=|x1-x2|
mov ecx, eax
sub ecx, esi ; ecx = x1-x2
; in ecx number without sign
mov edx, ecx ; save ecx in edx
neg ecx ; invert number sign
cmovl ecx, edx ; if ecx<edx then place edx in ecx
mov [dxx], ecx ; save |x1-x2| in [dxx]
; dyy=|y1-y2|
mov ecx, ebx
sub ecx, edi ; ecx = y1-y2
; in ecx number without sign
mov edx, ecx ; save ecx in edx
neg ecx ; invert number sign
cmovl ecx, edx ; if ecx<edx then place edx in ecx
mov [dyy], ecx ; save |y1-y2| in [dyy]
; select painting direction
cmp [dxx], ecx ; |x1-x2|?|y1-y2|
jge h_line ; if |x1-x2|>=|y1-y2|
jmp v_line ; if |y1-y2|>|x1-x2|
h_line: ; begin paint
; painting always from left to right
cmp eax, esi
jg swap_h; if x1>x2 then interchange  x and y coordinates
jmp start_paint_h
swap_h: ; x=y y=x
xchg eax, esi
xchg ebx, edi
start_paint_h:
mov ecx, [dxx] ; ecx=|x1-x2|
mov [y], ebx ; y=y0
mov [x], eax ; x=x0
; start pixel
push [color] ; color
push [y] ;y
push [x] ;x
call put_pixel
loop_paint_h:
inc [x]
push [color] ; color
push [y] ;y
push [x] ;x
call put_pixel
mov edx, [dyy]
mov [error], edx
imul edx, 00000002h ; edx=error*2
cmp edx, [dxx] ; if [error]*2>=[dxx]
jge inc_y
jmp next_step_h
inc_y:
inc [y]
mov edx, [error]
sub edx, [dxx]
mov [error], edx ; [error]=[error]-[dxx]
next_step_h:
loop loop_paint_h ; print next point
jmp exit_line
v_line: ; begin paint
; painting always from left to right
cmp ebx, edi
jg swap_v; if y1>y2 then interchange  x and y coordinates
jmp start_paint_v
swap_v: ; x=y y=x
xchg eax, esi
xchg ebx, edi
start_paint_v:
mov ecx, [dyy] ; ecx=|y1-y2|
mov [y], ebx ; y=y0
mov [x], eax ; x=x0
; start pixel
push [color] ; color
push [y] ;y
push [x] ;x
call put_pixel
loop_paint_v:
inc [y]
push [color] ; color
push [y] ;y
push [x] ;x
call put_pixel
mov edx, [dxx]
mov [error], edx
imul edx, 00000002h ; edx=error*2
cmp edx, [dyy] ; if [error]*2>=[dxx]
jge inc_x
jmp next_step_v
inc_x:
inc [x]
mov edx, [error]
sub edx, [dyy]
mov [error], edx ; [error]=[error]-[dxx]
next_step_v:
loop loop_paint_v ; print next point
exit_line:
popa
retn 14h

error dd 0
dxx   dd 0
dyy   dd 0
x     dd 0
y     dd 0
color dd 0
``` 23 Aug 2010, 04:00  Madis731 Joined: 25 Sep 2003 Posts: 2139 Location: Estonia Madis731 @klavs.pr you probably have a flag set in reverse somewhere. That is the most common error with Bresenham's. First try changing all your above/below flags to greater/less because you're dealing with signed numbers. Your abs() function would start working then. Line 117 is correct (jb l5 ;< x1 goto l5) - don't change that Code:```;@Line 33 - change jae true ; >= x1-x0 goto true ;to jg true ; > x1-x0 goto true (like in the C-source on wiki) ;@Line 77 - optimization div byte [div2] ; ax = deltax/2 ;to shr ax,1 ; ax = deltax/2 ;@Line 79 - optimization mov byte [error], al ; error = deltax/2 ;to mov [error], ax ; error = deltax/2 ``` after some more optimizations you should get: Code:```proc dbg_draw_line x0:WORD, y0:WORD, x1:WORD, y1:WORD, color:DWORD locals steep db 0 ; true deltax dw ? deltay dw ? error dw ? ystep dw ? y dw ? x dw ? tmp1 dw ? ; tmp2 dw ? ; div2 db 2 - you don't need slow division to divide by 2 endl mov ax, [x1] ; ax = x1 sub ax, [x0] ; ax = x1-x0 cmp ax, 0 ; if ax jge a1 ; >= 0 goto a1 neg ax ; else ax = -ax a1: mov [tmp1], ax ; tmp1 = x1-x0 mov ax, [y1] ; ax = y1 sub ax, [y0] ; ax = y1-y0 cmp ax, 0 ; if ax jge a2 ; >= 0 goto a2 neg ax ; else ax = -ax a2: cmp ax, [tmp1] ; if y1-y0 jg true ; >= x1-x0 goto true mov [steep], -1 ; else steep = false true: cmp [steep], -1 ; if steep je l1 ; == false goto l1 mov ax, [x0] ; ax = x0 xchg ax, [y0] ; y0 = x0; ax = y0 mov [x0], ax ; x0 = ax mov ax, [x1] ; ax = x1 xchg ax, [y1] ; y1 = x1; ax = y1 mov [x1], ax ; x1 = y1 l1: mov ax, [x1] ; ax = x1 cmp [x0], ax ; if x0 jle l2 ; <= x1 goto l2 ;mov ax, [x0] ;xcng ax, [x1] ;mov [x0], ax ;swap xchg ax, [x0] ; x0 = x1; ax = x0 mov [x1], ax ; x1 = x0 mov ax, [y0] ; ax = y0 xchg ax, [y1] ; y1 = y0; ax = y1 mov [y0], ax ; y0 = y1 l2: mov ax, [y1] ; ax = y1 sub ax, [y0] ; ax = y1-y0 cmp ax, 0 ; if ax jge a3 ; >= 0 goto a3 neg ax ; else ax = -ax a3: mov [deltay], ax ; deltay = abs(y1-y0) mov ax, [x1] ; ax = x1 sub ax, [x0] ; ax = x1-x0 mov [deltax], ax ; deltax = x1-x0 ;mov ax, [deltax] shr ax,1 ; ax = deltax/2 ;and ax, 0x00FF mov [error],ax ; error = deltax/2 push [y0] ; stack = y0 pop [y] ; y = y0 mov ax, [y0] ; ax = y0 cmp ax, [y1] ; if y0 jge l3 ; >= y1 goto l3 mov [ystep], 1 ; else ystep = 1 jmp l4 ; goto l4 l3: mov [ystep], -1 ; ystep = -1 l4: push [x0] ; stack = x0 pop [x] ; x = x0 l5: cmp [steep], -1 ; if steep je l6 ; == false goto l6 stdcall dbg_put_pixel, dword[y], dword[x], [color] ; else plot(y,x) jmp l7 ; goto l7 l6: stdcall dbg_put_pixel, dword[x], dword[y], [color] ; plot (x,y) l7: mov ax, [error] ; ax = error sub ax, [deltay] ; ax = error - deltay mov [error], ax ; error = error - deltay cmp ax, 0 ; if error jge l8 ; >= 0 goto l8 mov ax, [ystep] ; ax = ystep add [y], ax ; y = y + ystep mov ax, [deltax] ; ax = deltax add [error], ax ; ax = deltax+error l8: mov ax, [x] ; ax = x cmp ax, [x1] ; if x inc [x] ; /x++ jb l5 ; < x1 goto l5 ret endp ``` I love how you only used 1 register  23 Aug 2010, 19:25
klavs.pr

Joined: 20 Apr 2010
Posts: 20
Location: Latvia
klavs.pr
Thanks Madis731! Now it is finally working.
About that one register - I do not know where have I read it, but one wise man has said: first make your program work, than make it work fast. Now that my program is working, I will try to make it run faster.  24 Aug 2010, 20:49  bitshifter

Joined: 04 Dec 2007
Posts: 786
Location: Massachusetts, USA
bitshifter
I made this DDA line algorithm in C/C++ a while back.
My goal was to do as little calculations as possible.
Note that i traded size for speed in this way.
Code:
```///////////////////////////////////////////////////////////////////////////////
// Renders a line from point a to point b.
// The pixel at point b is rendered.
// No clipping is performed.
//
//   315      0       45
//     \      |      /
//      \  8  |  1  /
//       \    |    /
//        \   |   /
//         \  |  /
//       7  \ | /  2
//           \|/
// 270 ------(*)------ 90
//           /|\
//       6  / | \  3
//         /  |  \
//        /   |   \
//       /    |    \
//      /  5  |  4  \
//     /      |      \
//   225     180     135
//
// NOTE: All rendering loops were adjust as to always increment the minor value.
//       This is required for float to integer conversions to be straight-forward.
//
///////////////////////////////////////////////////////////////////////////////

#define TRACE_SECTOR(n) printf("TRACE: SECTOR(%i)\n",(n))

void DrawLine(
long x1,
long y1,
long x2,
long y2,
long color)
{
// if going right -> possible sectors are [1,2,3,4]
if(x1 < x2)
{
long dx = x2 - x1;

// if going down -> possible sectors are [3,4]
if(y1 < y2)
{
long dy = y2 - y1;

// if more vert (SECTOR 4)
if(dx < dy)
{
TRACE_SECTOR(4);

float m = float(dx) / float(dy);
float x = float(x1);

for(long y = y1; y <= y2; y++)
{
SetPixel(long(x),y,color);
x += m;
}
}
// else more horz (SECTOR 3)
else
{
TRACE_SECTOR(3);

float m = float(dy) / float(dx);
float y = float(y1);

for(long x = x1; x <= x2; x++)
{
SetPixel(x,long(y),color);
y += m;
}
}
}
// else going up -> possible sectors are [1,2]
else
{
long dy = y1 - y2;

// if more vert (SECTOR 1)
if(dx < dy)
{
TRACE_SECTOR(1);

float m = float(dx) / float(dy);
float x = float(x1);

for(long y = y1; y >= y2; y--)
{
SetPixel(long(x),y,color);
x += m;
}
}
// else more horz (SECTOR 2)
else
{
TRACE_SECTOR(2);

float m = float(dy) / float(dx);
float y = float(y2);

for(long x = x2; x >= x1; x--)
{
SetPixel(x,long(y),color);
y += m;
}
}
}
}
// else going left -> possible sectors are [5,6,7,8]
else
{
long dx = x1 - x2;

// if going down -> possible sectors are [5,6]
if(y1 < y2)
{
long dy = y2 - y1;

// if more vert (SECTOR 5)
if(dx < dy)
{
TRACE_SECTOR(5);

float m = float(dx) / float(dy);
float x = float(x2);

for(long y = y2; y >= y1; y--)
{
SetPixel(long(x),y,color);
x += m;
}
}
// else more horz (SECTOR 6)
else
{
TRACE_SECTOR(6);

float m = float(dy) / float(dx);
float y = float(y1);

for(long x = x1; x >= x2; x--)
{
SetPixel(x,long(y),color);
y += m;
}
}
}
// else going up -> possible sectors are [7,8]
else
{
long dy = y1 - y2;

// if more vert (SECTOR 8)
if(dx < dy)
{
TRACE_SECTOR(8);

float m = float(dx) / float(dy);
float x = float(x2);

for(long y = y2; y <= y1; y++)
{
SetPixel(long(x),y,color);
x += m;
}
}
// else more horz (SECTOR 7)
else
{
TRACE_SECTOR(7);

float m = (float)dy / (float)dx;
float y = (float)y2;

for(long x = x2; x <= x1; x++)
{
SetPixel(x,long(y),color);
y += m;
}
}
}
}
}
```

I think its pretty fast.
The slowest part is converting float into integer into float.
By dissasembling HLL compiler output you will see why.
Maybe to have your own little routine for this conversion...
Code:
```float_into_long:
fld dword[fvalue]
fistp dword[ivalue]
```

I think a good mixture of GPR and FPU would be fastest.
Any comments or constructive criticism is greatly appreciated... 04 Sep 2010, 03:37  bitRAKE Joined: 21 Jul 2003 Posts: 3282 Location: vpcmipstrm bitRAKE The optimal inner loop is: Code:```inc .x stc adc .y_low,.slope_frac adc .y,0 SetPixel .x,.y,.color ```Of course, these four instructions can be integrated into the pixel plotting routine, but multiple versions are required for different line directions. Basically, one coordinate always advances one and the other has a chance of advancing when the fraction overflows. STC is required to allow both coordinates to advance every iteration - the 45 degree line. Only integer instructions are needed. Chapter 35, Michael Abrash's classic Graphics Programming Black Book http://www.gamedev.net/reference/articles/article1698.asp http://www.edepot.com/algorithm.html 04 Sep 2010, 05:20
bitshifter

Joined: 04 Dec 2007
Posts: 786
Location: Massachusetts, USA
bitshifter
Ahh, the Black Book In chapter 37 we have run slice length lines...
Very efficient at drawing long'ish lines.
So i ported some code from the book to FASM...
It runs in .COM enviroment where INT 20h is recognized.
It also uses some BIOS keyboard and video interrupts.
I tested on WinXP but i think it will run on anything earlier.
Also there are ways to optimize (see chapter 37 for details)
Code:
```SCREEN_WIDTH            equ 320
SCREEN_SEGMENT          equ 0a000h

org 0100h
use16

mov     ax,13h
int     10h

mov     cx,0ffh
loop    \$

cld

;left border
push    04h     ;color
push    199     ;y2
push    0       ;x2
push    0       ;y1
push    0       ;x1
call    vga_320x200_line
;right border
push    05h     ;color
push    199     ;y2
push    319     ;x2
push    0       ;y1
push    319     ;x1
call    vga_320x200_line
;top border
push    06h     ;color
push    0       ;y2
push    319     ;x2
push    0       ;y1
push    0       ;x1
call    vga_320x200_line
;bottom border
push    07h     ;color
push    199     ;y2
push    319     ;x2
push    199     ;y1
push    0       ;x1
call    vga_320x200_line

mov     ah,00h
int     16h

mov     ax,03h
int     10h

mov     cx,0ffh
loop    \$

mov     ax,0
int     20h
jmp     \$

;============================================================================
; void near stdcall
; vga_320x200_line(
;       unsigned short XStart,
;       unsigned short YStart,
;       unsigned short XEnd,
;       unsigned short YEnd,
;       unsigned short Color)
;
; Draws a colored line between the specified endpoints.
;============================================================================

vga_320x200_line:

push    bp                      ;preserve caller's stack frame
mov     bp,sp                   ;point to our stack frame

; Proc parameters.
label .XStart word at bp+4              ;X start coordinate of line
label .YStart word at bp+6              ;Y start coordinate of line
label .XEnd   word at bp+8              ;X end coordinate of line
label .YEnd   word at bp+10             ;Y end coordinate of line
label .Color  byte at bp+12             ;color in which to draw line

sub     sp,8                    ; Allocate space for local variables.

; Local variables.
label .AdjDown   word at bp-4           ;error term adjust down when error term turns over
label .WholeStep word at bp-6           ;minimum run length
label .XAdvance  word at bp-8           ;1 or -1, for direction in which X advances

push    si                      ;preserve C register variables
push    di
push    ds                      ;preserve caller's DS
; We'll draw top to bottom, to reduce the number of cases we have to handle,
; and to make lines between the same endpoints always draw the same pixels.
mov     ax,[.YStart]
cmp     ax,[.YEnd]
jle     .LineIsTopToBottom
; Swap endpoints.
xchg    [.YEnd],ax
mov     [.YStart],ax
mov     bx,[.XStart]
xchg    [.XEnd],bx
mov     [.XStart],bx
.LineIsTopToBottom:
; Point DI to the first pixel to draw.
mov     dx,SCREEN_WIDTH
mul     dx                      ;YStart * SCREEN_WIDTH
mov     si,[.XStart]
mov     di,si
add     di,ax                   ;DI = YStart * SCREEN_WIDTH + XStart
; = offset of initial pixel
; Figure out how far we're going vertically (guaranteed to be positive).
mov     cx,[.YEnd]
sub     cx,[.YStart]            ;CX = YDelta
; Figure out whether we're going left or right, and how far we're going
; horizontally. In the process, special-case vertical lines, for speed and
; to avoid nasty boundary conditions and division by 0.
mov     dx,[.XEnd]
sub     dx,si                   ;XDelta
jnz     .NotVerticalLine        ;XDelta == 0 means vertical line

; Special-case code for vertical lines.
mov     ax,SCREEN_SEGMENT
mov     ds,ax                   ;point DS:DI to the first byte to draw
mov     al,[.Color]
.VerticalLineLoop:
mov     [di],al
dec     cx
jns     .VerticalLineLoop
jmp     .Done

; Special-case code for horizontal lines.
align 2
.IsHorizontalLine:
mov     ax,SCREEN_SEGMENT
mov     es,ax                                   ;point ES:DI to the first byte to draw
mov     al,[.Color]
mov     ah,al                           ;duplicate in high byte for word access
and     bx,bx                                   ;left to right?
jns     .DirSet                                  ;yes
sub     di,dx                                   ;currently right to left, point to left end so we
; can go left to right (avoids unpleasantness with
; right to left REP STOSW)
.DirSet:
mov     cx,dx
inc     cx                              ;# of pixels to draw
shr     cx,1                                    ;# of words to draw
rep     stosw                                   ;do as many words as possible
rep     stosb                                   ;do the odd byte, if there is one
jmp     .Done

; Special-case code for diagonal lines.
align 2
.IsDiagonalLine:
mov     ax,SCREEN_SEGMENT
mov     ds,ax                                   ;point DS:DI to the first byte to draw
mov     al,[.Color]
.DLoop:
mov     [di],al
dec     cx
jns     .DLoop
jmp     .Done

align 2
.NotVerticalLine:
mov     bx,1                            ;assume left to right, so XAdvance = 1
;***leaves flags unchanged***
jns     .LeftToRight                     ;left to right, all set
neg     bx                                      ;right to left, so XAdvance = -1
neg     dx                                      ;|XDelta|
.LeftToRight:
; Special-case horizontal lines.
and     cx,cx                                   ;YDelta == 0?
jz      .IsHorizontalLine ;yes
; Special-case diagonal lines.
cmp     cx,dx   ;YDelta == XDelta?
jz      .IsDiagonalLine  ;yes
; Determine whether the line is X or Y major, and handle accordingly.
cmp     dx,cx
jae     .XMajor
jmp     .YMajor

; X-major (more horizontal than vertical) line.
align 2
.XMajor:
mov     ax,SCREEN_SEGMENT
mov     es,ax                                   ;point ES:DI to the first byte to draw
and     bx,bx                   ;left to right?
jns     .DFSet                   ;yes, CLD is already set
std                             ;right to left, so draw backwards
.DFSet:
mov     ax,dx                   ;XDelta
sub     dx,dx                   ;prepare for division
div     cx                              ;AX = XDelta/YDelta
; (minimum # of pixels in a run in this line)
;DX = XDelta % YDelta
mov     bx,dx                           ;error term adjust each time Y steps by 1;
add     bx,bx                           ; used to tell when one extra pixel should be
mov     [.AdjUp],bx                   ; drawn as part of a run, to account for
; fractional steps along the X axis per
; 1-pixel steps along Y
mov     si,cx                           ;error term adjust when the error term turns
add     si,si                           ; over, used to factor out the X step made at
; Initial error term; reflects an initial step of 0.5 along the Y axis.
sub     dx,si                           ;(XDelta % YDelta) - (YDelta * 2)
;DX = initial error term
; The initial and last runs are partial, because Y advances only 0.5 for
; these runs, rather than 1. Divide one full run, plus the initial pixel,
; between the initial and last runs.
mov     si,cx                           ;SI = YDelta
mov     cx,ax                           ;whole step (minimum run length)
shr     cx,1
inc     cx                              ;initial pixel count = (whole step / 2) + 1;
; (may be adjusted later). This is also the
; final run pixel count
push    cx                              ;remember final run pixel count for later
; If the basic run length is even and there's no fractional advance, we have
; one pixel that could go to either the initial or last partial run, which
; we'll arbitrarily allocate to the last run.
; If there is an odd number of pixels per run, we have one pixel that can't
; be allocated to either the initial or last partial run, so we'll add 0.5 to
; the error term so this pixel will be handled by the normal full-run loop.
; (add 0.5 of a pixel to the error term)
test    al,1                            ;is run length even?
sub     dx,si                           ;length is even, undo odd stuff we just did
and     bx,bx                           ;is the adjust up equal to 0?
jnz     .XMajorAdjustDone                ;no (don't need to check for odd length,
; because of the above test)
dec     cx                              ;both conditions met; make initial run 1
; shorter
mov     [.WholeStep],ax       ;whole step (minimum run length)
mov     al,[.Color]                   ;AL = drawing color
; Draw the first, partial run of pixels.
rep     stosb                           ;draw the final run
; Draw all full runs.
cmp     si,1                            ;are there more than 2 scans, so there are
; some full runs? (SI = # scans - 1)
jna     .XMajorDrawLast                  ;no, no full runs
dec     dx                              ;adjust error term by -1 so we can use
; carry test
shr     si,1                            ;convert from scan to scan-pair count
jnc     .XMajorFullRunsOddEntry          ;if there is an odd number of scans,
; do the odd scan now
.XMajorFullRunsLoop:
mov     cx,[.WholeStep]      ;run is at least this long
jnc     .XMajorNoExtra                   ; pixel if the error term so indicates
inc     cx                              ;one extra pixel in run
sub     dx,[.AdjDown]                 ;reset the error term
.XMajorNoExtra:
rep     stosb                       ;draw this scan line's run
.XMajorFullRunsOddEntry:                         ;enter loop here if there is an odd number
; of full runs
mov     cx,[.WholeStep]       ;run is at least this long
jnc     .XMajorNoExtra2                  ; pixel if the error term so indicates
inc     cx                              ;one extra pixel in run
sub     dx,[.AdjDown]                 ;reset the error term
.XMajorNoExtra2:
rep     stosb                       ;draw this scan line's run

dec     si
jnz     .XMajorFullRunsLoop
; Draw the final run of pixels.
.XMajorDrawLast:
pop     cx                              ;get back the final run pixel length
rep     stosb                           ;draw the final run

cld                                     ;restore normal direction flag
jmp     .Done

; Y-major (more vertical than horizontal) line.
align 2
.YMajor:
mov     ax,SCREEN_SEGMENT
mov     ds,ax                                   ;point DS:DI to the first byte to draw
mov     ax,cx                           ;YDelta
mov     cx,dx                           ;XDelta
sub     dx,dx                           ;prepare for division
div     cx                              ;AX = YDelta/XDelta
; (minimum # of pixels in a run in this line)
;DX = YDelta % XDelta
mov     bx,dx                           ;error term adjust each time X steps by 1;
add     bx,bx                           ; used to tell when one extra pixel should be
mov     [.AdjUp],bx                   ; drawn as part of a run, to account for
; fractional steps along the Y axis per
; 1-pixel steps along X
mov     si,cx                           ;error term adjust when the error term turns
add     si,si                           ; over, used to factor out the Y step made at

; Initial error term; reflects an initial step of 0.5 along the X axis.
sub     dx,si                           ;(YDelta % XDelta) - (XDelta * 2)
;DX = initial error term
; The initial and last runs are partial, because X advances only 0.5 for
; these runs, rather than 1. Divide one full run, plus the initial pixel,
; between the initial and last runs.
mov     si,cx                           ;SI = XDelta
mov     cx,ax                           ;whole step (minimum run length)
shr     cx,1
inc     cx                              ;initial pixel count = (whole step / 2) + 1;
push    cx                              ;remember final run pixel count for later

; If the basic run length is even and there's no fractional advance, we have
; one pixel that could go to either the initial or last partial run, which
; we'll arbitrarily allocate to the last run.
; If there is an odd number of pixels per run, we have one pixel that can't
; be allocated to either the initial or last partial run, so we'll add 0.5 to
; the error term so this pixel will be handled by the normal full-run loop.
test    al,1                            ;is run length even?
sub     dx,si                           ;length is even, undo odd stuff we just did
and     bx,bx                           ;is the adjust up equal to 0?
jnz     .YMajorAdjustDone                ;no (don't need to check for odd length,
; because of the above test)
dec     cx                              ;both conditions met; make initial run 1
; shorter
mov     [.WholeStep],ax       ;whole step (minimum run length)
mov     al,[.Color]           ;AL = drawing color
; Draw the first, partial run of pixels.
.YMajorFirstLoop:
mov     [di],al                         ;draw the pixel
dec     cx
jnz     .YMajorFirstLoop
; Draw all full runs.
cmp     si,1                            ;# of full runs. Are there more than 2
; columns, so there are some full runs?
; (SI = # columns - 1)
jna     .YMajorDrawLast                  ;no, no full runs
dec     dx                              ;adjust error term by -1 so we can use
; carry test
shr     si,1                            ;convert from column to column-pair count
jnc     .YMajorFullRunsOddEntry          ;if there is an odd number of
; columns, do the odd column now
.YMajorFullRunsLoop:
mov     cx,[.WholeStep]       ;run is at least this long
jnc     .YMajorNoExtra                   ; pixel if the error term so indicates
inc     cx                              ;one extra pixel in run
sub     dx,[.AdjDown]                 ;reset the error term
.YMajorNoExtra:
;draw the run
.YMajorRunLoop:
mov     [di],al                         ;draw the pixel
dec     cx
jnz     .YMajorRunLoop
.YMajorFullRunsOddEntry:                         ;enter loop here if there is an odd number
; of full runs
mov     cx,[.WholeStep]       ;run is at least this long
jnc     .YMajorNoExtra2                  ; pixel if the error term so indicates
inc     cx                              ;one extra pixel in run
sub     dx,[.AdjDown]                 ;reset the error term
.YMajorNoExtra2:
;draw the run
.YMajorRunLoop2:
mov     [di],al                         ;draw the pixel
dec     cx
jnz     .YMajorRunLoop2

dec     si
jnz     .YMajorFullRunsLoop
; Draw the final run of pixels.
.YMajorDrawLast:
pop     cx                              ;get back the final run pixel length
.YMajorLastLoop:
mov     [di],al                         ;draw the pixel
dec     cx
jnz     .YMajorLastLoop
.Done:
pop     ds                                  ;restore caller's DS
pop     di
pop     si                                  ;restore C register variables
mov     sp,bp                       ;deallocate local variables
pop     bp                                  ;restore caller's stack frame
retn    2*5
```

_________________
Coding a 3D game engine with fasm is like trying to eat an elephant,
you just have to keep focused and take it one 'byte' at a time. 07 Sep 2010, 00:26  bitRAKE Joined: 21 Jul 2003 Posts: 3282 Location: vpcmipstrm bitRAKE Po-Han Lin claims his algorithm is the fastest due to no branching. (The inner loop I posted above is for his type of algorithm.) In general I would have to agree, but now-a-day displays are high resolution or we also want anti-aliasing. Seriously doubt he was the first to concoct the method in 2005. 07 Sep 2010, 07:04
 Madis731 Joined: 25 Sep 2003 Posts: 2139 Location: Estonia Madis731 I take it with skepticism: "Pentium III 450Mhz, Visual C++ 6.0" Pentium III was fast, but with the emergence of Core 2 class CPUs, many things have changed. -branch predition is better -branch target buffer entries have quadrupled since then -micro/macro-op fusion (especially with jumps) PIII had a misprediction penalty of 10-20 clocks. While with P4 they made it worse - 24+ - they got back to 15 with Core 2. IMUL latency has come down from 4 to 3. BUT... I will bite and try it sometimes. Maybe my 64-bit Bresenham implementation doesn't beat EFLA. UPDATE: I'm starting to get interested: http://www.simppa.fi/blog/extremely-fast-line-algorithm-as3-optimized/ http://jacksondunstan.com/articles/506 hmm.. UPDATE2: Is it okay if I used this trick instead: Code:``` mov [shortLen],r9d ;int shortLen=y2-y; sub [shortLen],edx sbb eax,eax mov [longLen],r8d ;int longLen=x2-x; sub [longLen],ecx sbb ebx,ebx xor eax,[shortLen] ;if (abs(shortLen)>abs(longLen)) { xor ebx,[longLen] cmp eax,ebx jle @f ;{ ... } @@: ```? 07 Sep 2010, 08:17
bitshifter

Joined: 04 Dec 2007
Posts: 786
Location: Massachusetts, USA
bitshifter
EFLA doesnt look so fast to me...
It uses 2 abs calls and does some swapping.
It shouldnt be very hard for my optimizing friend Madis to kill it. *ramble*
Sad news, my Pentium 4 has died a slow death.
It took my Agner Fog PMC port along with it.
I wont be timing much code for a while... 08 Sep 2010, 05:36  Madis731 Joined: 25 Sep 2003 Posts: 2139 Location: Estonia Madis731 On topic: I cannot get rid of the division. And its killing the performance on every line shorter than 100px. DIV only takes 4 clocks, but the results lag 14-23 clocks. By that time Bresenham's would have had drawn at least 14 pixels...by WHICH time Intel's branch prediction would have predicted the sequence of jump-no jump. I think it will be faster with high-level code, where abs(), putpixel() and xchg() take up so much time that you don't notice one DIV there {and I'm only being sarcastic} Off topic: @bitshifter: I guess you need to look on the bright side and realize that your next CPU will be at least of class Core i3 (~\$115). I guess P4 did cost you much more and the performance was so-so. SSE4.1: ROUNDxx, DPPx for 3D calculations, BLEND, PTEST for bitwizardry SSE4.2: PCMPxSTRx, CRC32, POPCNT etc. http://ark.intel.com/Product.aspx?id=48140 08 Sep 2010, 06:25
 edfed Joined: 20 Feb 2006 Posts: 4252 Location: Now edfed when using a putpixel sub for th line algo, there is a singular trick... X and Y are incremented (decremented) by 1 or 0. of course it is faster to put the pixel in the loop, then, increment Y by BPSL (Bytes Per Scan Line), and X by BPP (Byte Per Pixel). the annoying poin is that X86 don't have 2bits micro registers, able to contain a signed value between -1 and 1. then, just to have the Xincrement and Yincrement, you should use an entire registers (16, 32 or 64 bits) just to increment or decrement. i have counted the number of datas needed for a line computation with besenham algo: Line max lengh Accumulator for iterative division. X current Y current Delta Min Delta Max X increment 1 (-1, 0, 1) Y increment 1 (//) X increment 2 (//) Y increment 2 (//) Color (or value) in total, 11 registers are needed to compute the dots. X (or X1) Y (or Y1) Xlengh (or X2) Ylengh (or Y2) Color (or value) and 5 registers to pass parameters to line function. when using putpixel for the line, you can easyly use the same algo to interpolate datas in an array. for example, create an audio ramp between two samples. or whatever application you want. because a line is not always a geometric thing. for those who don't understand how works the besenham algo: delta min and delta max are picked from Xlengh and Ylengh Xlengh and Ylengh are picked from X1-X2 and Y1-Y2. encode the line parameters with X,Y,XL,YL is faster because we don't need to compute X1-X2 and Y1-Y2. ones we know XL and YL, we need to set the Xincrements and Yincrements to 1. the test for sign of XL and YL will give the sign to Xinc and Yinc, followed by abs(Xl) and abs(Yl). compare abs(XL) and abs(YL) will give deltamin and deltamax. and will set the Xinc1 & 2, and Yinc1 & 2 to 0 or |1|. ones we have deltas and incs sets, we can compute the line. set line max lengh to delta max. set accumulator to deltamax/2 set Xcurrent to X (or X1) set Ycurrent to Y (or Y1) enter the main loop: decrement line max lengh if negative, then end plot a dot using Xcurrent and Ycurrent Xcurrent + Xinc1 Ycurrent + Yinc1 accumulator + delta min compare accumulator with deltamax if lower, then, main loop secondary loop: Xcurrent + Xinc2 Ycurrent + Yinc2 accumulator - deltamax main loop. end: ret that's maybe why it can be good to decompose the algo in 8 cadrans. then, it will avoid the need of Xinc1 & 2, and Yinc1 & 2. _________________ fool gitlab fasmstuff gitlab foolstuff 09 Sep 2010, 09:19
 Madis731 Joined: 25 Sep 2003 Posts: 2139 Location: Estonia Madis731 I put the EFLA topic in another thread: http://board.flatassembler.net/topic.php?p=118753 09 Sep 2010, 12:47
 Display posts from previous: All Posts1 Day7 Days2 Weeks1 Month3 Months6 Months1 Year Oldest FirstNewest First  Jump to: Select a forum Official----------------AssemblyPeripheria General----------------MainTutorials and ExamplesDOSWindowsLinuxUnixMenuetOS Specific----------------MacroinstructionsOS ConstructionIDE DevelopmentProjects and IdeasNon-x86 architecturesHigh Level LanguagesProgramming Language DesignCompiler Internals Other----------------FeedbackHeapTest Area

Forum Rules:
 You cannot post new topics in this forumYou cannot reply to topics in this forumYou cannot edit your posts in this forumYou cannot delete your posts in this forumYou cannot vote in polls in this forumYou cannot attach files in this forumYou can download files in this forum