Problem with qsort

Index > Linux > Problem with qsort

Author

Thread

Ton

Joined: 06 Jan 2005
Posts: 22

Ton 15 Jan 2009, 11:18

I wanted to use the qsort example from
http://board.flatassembler.net/topic.php?t=3048

At the time I had problem with the same qsort, and actually still have, If
used against a small array it works nicely. However the example presented
won't run. I get a 'Segmentation fault'. I added some print lines,
and see that the sorting is actually working, but suddenly the problem
pops up when eip=0x8. This happens in the .qc5 section of the qsort call.

If I run the program next is displayed

lo hi x a[x]
0 f 7 6
A a[]=3 1 4 1 5 3 2 5 5 3 6 8 9 7 9 9
0x24 0x6 0x1 0x28 0x804808b 0x8048141 0x24 0x0 0x804807c 0x3c 0x0

meaning

lo=0
hi=15
x=7
a[7]=6
A means quicksort is invoked from .qc4
B means quicksort is invoked from .qc5
the array is printed
the stack is printed

$ ./prog

lo hi x a[x]
a[]=3 1 4 1 5 9 2 6 5 3 5 8 9 7 9 3
0x10 0x804832a 0x1 0x10 0x804808b 0x804807c 0x3c 0x0 0x1 0xbfa13737 0x0
0 f 7 6
A a[]=3 1 4 1 5 3 2 5 5 3 6 8 9 7 9 9
0x24 0x6 0x1 0x28 0x804808b 0x8048141 0x24 0x0 0x804807c 0x3c 0x0
0 9 4 5
A a[]=3 1 4 1 3 3 2 5 5 5 6 8 9 7 9 9
0x1c 0x5 0x1 0x20 0x804808b 0x8048141 0x1c 0x0 0x8048141 0x24 0x0
0 7 3 1
B a[]=1 1 4 3 3 3 2 5 5 5 6 8 9 7 9 9
0x0 0x1 0x1 0x8 0x804808b 0x804816c 0x1c 0x8 0x8048141 0x1c 0x0
2 7 4 3
A a[]=1 1 2 3 3 3 4 5 5 5 6 8 9 7 9 9
0xc 0x3 0x1 0x14 0x804808b 0x8048141 0xc 0x8 0x804816c 0x1c 0x8
2 3 2 2
B a[]=1 1 2 3 3 3 4 5 5 5 6 8 9 7 9 9
0xc 0x2 0x1 0xc 0x804808b 0x804816c 0x1c 0xc 0x804816c 0x1c 0x8
3 7 5 3
B a[]=1 1 2 3 3 3 4 5 5 5 6 8 9 7 9 9
0xc 0x3 0x1 0x14 0x804808b 0x804816c 0x1c 0x14 0x804816c 0x1c 0xc
5 7 6 4

If running the problem in ald next is shown: eip=0x8

Program received signal SIGSEGV (Segmentation fault)
Location: 0x00000008
eax = 0x0000001C ebx = 0x00000001 ecx = 0x00000004 edx = 0x00000014
esp = 0xBFE343B0 ebp = 0x00000000 esi = 0x00000000 edi = 0x080482D6
ds = 0x007B es = 0x007B fs = 0x0000 gs = 0x0000
ss = 0x007B cs = 0x0073 eip = 0x00000008 eflags = 0x00010246

Flags: PF ZF IF RF

Error disassembling next instruction (address: 0x00000008)

My problem is that I can't figure out what is the cuase of the problem.
Is somebody willing to have a look as well, and steer me into the right
direction? Attached a minimal example. Many thanks.

Best Regards,
Ton

format elf executable

entry main

main:

mov ecx, string_address ; address of string
mov edx, [string_address.length] ; length of string
mov ebx, 1 ; stdout
mov eax, 4 ; sys_write
int $80

push [lo] [hi]
call quicksort

jmp exit

;--------------------------------

quicksort: ; void quicksort (int[] a, int lo, int hi)
;;; begin debug
call prarr
call prstack
push eax
mov eax, [esp+12]
shr eax, 2
call dot ; lo
mov eax, [esp+8]
shr eax, 2
call dot ; hi
pop eax
;;; end debug

; eax = i=lo
; ecx = x
; edx = j=hi

mov eax,[esp+8] ;{
mov edx,[esp+4] ;// lo is the lower index, hi is the upper index
lea ecx,[eax+edx] ;// of the region of array a that is to be sorted
shr ecx,3 ; int i=lo, j=hi, h;
shl ecx,2

;;; begin debug
push eax
mov eax, ecx
shr eax, 2
call dot ; x
pop eax
;;; end debug

mov ecx,[a+ecx] ; int x=a[(lo+hi)/2];

;;; begin debug
push eax
mov eax, ecx
call dot ; a[x]
mov eax, 10
call emit ; cr
pop eax
;;; end debug

.qc1: ;
cmp [a+eax],ecx ; // partition
jnc .qc2 ; do
add eax,4
jmp .qc1 ; {
.qc2: ; while (a[i]<x) i++; // .qc1
cmp ecx,[a+edx] ; while (a[j]>x) j--; // .qc2
jnc .qc3 ; if (i<=j) // .qc3
sub edx,4 ; {
jmp .qc2 ; h=a[i]; a[i]=a[j]; a[j]=h;
.qc3: ; i++; j--;
cmp edx,eax ; }
jc .qc4 ; } while (i<=j);
push [a+eax] [a+edx] ;
pop [a+eax] [a+edx] ; // recursion
add eax,4 ; if (lo<j) quicksort(a, lo, j); // .qc4
sub edx,4 ; if (i<hi) quicksort(a, i, hi); // .qc5
cmp edx,eax ;} // .qc6
jnc .qc1
.qc4:
cmp [esp+8],edx
jnc .qc5
push dword [esp+8]
push edx

;;; debug
push eax
mov eax, 'A'
call emit
mov eax, ' '
call emit
pop eax
;;; end debug

call quicksort
pop edx
pop dword [esp+12]
.qc5:
cmp eax,[esp+4]
jnc .qc6
push eax
push dword [esp+8]

;;; debug
push eax
mov eax, 'B'
call emit
mov eax, ' '
call emit
pop eax
;;; end debug

call quicksort
pop dword [esp+12]
pop eax
.qc6:
ret

align 4
a dd 3,1,4,1,5,9,2,6,5,3,5,8,9,7,9,3 ;16
lo dd 0
hi dd 15*4

;--------------------------------

prstack: ; print the stack
push eax
push ebx
push ecx
push edx
mov ebx,$10 ; \
mov [base],ebx ; configure dot
xor edx,edx ; /

xor edx,edx
.a: mov eax,'0'
call emit
mov eax,'x'
call emit
mov eax,[esp+edx]
call dot
add edx,4
cmp edx,40
jna .a

mov eax, 10
call emit
pop edx
pop ecx
pop ebx
pop eax
ret

;--------------------------------

prarr: ; print the array
push eax
push ebx
push ecx
push edx
mov eax,'a'
call emit
mov eax,'['
call emit
mov eax,']'
call emit
mov eax,'='
call emit
mov ebx,10 ; \
mov [base],ebx ; configure dot
xor edx,edx

.a:mov eax,[a+edx]
call dot
add edx,4
cmp edx,[hi]
jna .a

mov eax, 10
call emit
pop edx
pop ecx
pop ebx
pop eax
ret

;--------------------------------

dot: ; print a number
; eax = number, ebx = base, edi = buffer
push eax
push ebx
push ecx
push edx
mov ebx, [base]
cmp ebx,10
jne udot
mov edi,buffer
or eax,eax ; negative?
jns udot
push eax
mov al,'-' ; eax is altered
stosb ; only al is stored
mov edx, 1
mov ecx, buffer
mov ebx, 1 ; stdout
mov eax, 4 ; sys_write
int 80h ; Linux syscall
pop eax
neg eax
udot: ; print a unsigned number
; eax = number, ebx = base, edi = buffer
mov ebx, [base]
mov edi,buffer
xor ecx,ecx
.new:
xor edx,edx
div ebx
push edx ; push remainder
inc ecx ; loop cnt, see below
test eax,eax
jnz .new
mov edx,ecx ; for type above
.loop:
pop eax
add al,30h
cmp al,'9'
jng .ok
add al,27h ; lowercases
.ok:
stosb ; mov edi,[al], and inc edi, inc edx
loop .loop ; loop bails out on eci=0
mov al,32 ; terminate with space
inc edx
stosb
type:
; edx = count
mov ecx, buffer
mov ebx, 1 ; stdout
mov eax, 4 ; sys_write
int 80h ; Linux syscall
pop edx
pop ecx
pop ebx
pop eax
ret

buffer rb 33 ; 32 + 1
base rd 1

;--------------------------------

emit: ; print one character
push eax
push ebx
push ecx
push edx
mov [char], eax
mov edx, 1 ; count
mov ecx, char
mov ebx, 1 ; stdout
mov eax, 4 ; sys_write
int 80h ; Linux syscall
pop edx
pop ecx
pop ebx
pop eax
ret

char dd 0

;--------------------------------

exit:
xor ebx,ebx ; exit 0
mov eax, 1
int 80h

;--------------------------------

string_address db 'lo hi x a[x]',10
.length dd $ - string_address

[url][/url]

Description:	Program with debug lines demonstrating the issue	Download
Filename:	prog.asm
Filesize:	5.78 KB
Downloaded:	894 Time(s)

15 Jan 2009, 11:18

Endre

Joined: 29 Dec 2003
Posts: 215
Location: Budapest, Hungary

Endre 15 Jan 2009, 15:35

In quicksort function you assume lo and hi variables to reside at esp + 4/8 and esp + 8/12. But when calling to quicksort again and again and recursively esp will change (think of return address). That is esp + 4/8 and esp + 8/12 will change as well. So when reading at those addresses you won't read lo or hi as you would expect but something else. Additionally you have a good chance to overwrite return addresses of the previous recursive calls when popping into [esp + 8] or [esp + 12] from the top of the stack.

Otherwise there is one more problem I found. At the beginning after calling to quicksort you forgot to reset esp:

Code:

push [lo] [hi]
call quicksort
add  esp, 8 ; <--- add this to reset esp

15 Jan 2009, 15:35

Ton

Joined: 06 Jan 2005
Posts: 22

Ton 15 Jan 2009, 18:33

Thanks Endre, for your reply. I checked the stack again, and indeed there are 2, 3, or 4 values. I have to think this over if this is solvable.

15 Jan 2009, 18:33

Ton

Joined: 06 Jan 2005
Posts: 22

Ton 17 Jan 2009, 16:09

I found the cause. It should be

Code:

    
    push dword [esp+8]
    push edx
    call quicksort
    pop  edx
    pop  dword [esp+8]

rather then:

Code:

    
    push dword [esp+8]
    push edx
    call quicksort
    pop  edx
    pop  dword [esp+12]   ; note the 12

Attached the correct code with all debug lines.

Below the correct quicksort:

Code:

quicksort:                  ; void quicksort (int[] a, int lo, int hi)

; eax = i=lo
; ecx = x
; edx = j=hi

    mov eax,[esp+8]         ;{
    mov edx,[esp+4]         ;//  lo is the lower index, hi is the upper index
    lea ecx,[eax+edx]       ;//  of the region of array a that is to be sorted
    shr ecx,3               ;    int i=lo, j=hi, h;
    shl ecx,2
    mov ecx,[a+ecx]         ;    int x=a[(lo+hi)/2];
  .qc1:                     ;
    cmp [a+eax],ecx         ;    //  partition
    jnc  .qc2               ;    do
    add eax,4
    jmp .qc1                ;    {
  .qc2:                     ;        while (a[i]<x) i++;      // .qc1
    cmp ecx,[a+edx]         ;        while (a[j]>x) j--;      // .qc2
    jnc .qc3                ;        if (i<=j)                // .qc3
    sub edx,4               ;        {
    jmp .qc2                ;            h=a[i]; a[i]=a[j]; a[j]=h;
  .qc3:                     ;            i++; j--;
    cmp edx,eax             ;        }
    jc  .qc4                ;    } while (i<=j);             
    push [a+eax] [a+edx]    ;
    pop  [a+eax] [a+edx]    ;    //  recursion
    add eax,4               ;    if (lo<j) quicksort(a, lo, j);  // .qc4
    sub edx,4               ;    if (i<hi) quicksort(a, i, hi);  // .qc5
    cmp edx,eax             ;}                                   // .qc6
    jnc .qc1
  .qc4:
    cmp  [esp+8],edx
    jnc  .qc5
    push dword [esp+8]
    push edx  
    call quicksort
    pop  edx
    pop  dword [esp+8]
  .qc5:
    cmp  eax,[esp+4]
    jnc  .qc6
    push eax
    push dword [esp+8]
    call quicksort
    pop  dword [esp+8]
    pop  eax
  .qc6:
    ret

align 4
a  dd 3,1,4,1,5,9,2,6,5,3,5,8,9,7,9,3   ;16
lo dd 0
hi dd 15*4

Description:	Working quicksort (with debug lines)	Download
Filename:	prog.asm
Filesize:	5.88 KB
Downloaded:	910 Time(s)

17 Jan 2009, 16:09

Ton

Joined: 06 Jan 2005
Posts: 22

Ton 21 Jan 2009, 17:35

The previous example only dealt postive numbers. Here an improved version:

Code:

format ELF

section '.text' executable

public _start
public quicksort
public a
public lo
public hi

_start:

   push [lo] [hi]
   call quicksort
   add  esp, 8

   xor ebx,ebx  ; exit 0
   mov eax, 1
   int 80h

quicksort:
    mov eax,[esp+8]
    mov edx,[esp+4]
    lea ecx,[eax+edx]
    shr ecx,3
    shl ecx,2
    mov ecx,[a+ecx]

  .a:
  @@:
    cmp [a+eax],ecx
    jge @f
    add eax,4
    jmp @b
  @@:
    cmp ecx,[a+edx]
    jge @f
    sub edx,4
    jmp @b
  @@:
    cmp eax,edx
    jg @f
    push [a+eax] [a+edx]
    pop [a+eax] [a+edx]
    add eax,4
    sub edx,4
  @@:
    cmp eax,edx
    jle .a
    cmp [esp+8],edx
    jge  @f
    push dword [esp+8]
    push edx
    call quicksort
    pop  edx
    pop  dword [esp+8]
  @@:
    cmp  eax,[esp+4]
    jge  @f
    push eax
    push dword [esp+8]
    call quicksort
    pop  dword [esp+8]
    pop  eax
  @@:
    ret

align 4
a  dd 3,-1,4,1,5,-9,2,6,5,3,5,8,9,7,9,3   ;16
lo dd 0
hi dd 15*4

Code:

fasm prog.asm
ld -o prog prog.o

21 Jan 2009, 17:35

r22

Joined: 27 Dec 2004
Posts: 805

r22 22 Jan 2009, 17:20

RadixSort is the fastest for 32-bit integers.

http://board.flatassembler.net/topic.php?t=5081

22 Jan 2009, 17:20

revolution
When all else fails, read the source

Joined: 24 Aug 2004
Posts: 20700
Location: In your JS exploiting you and your system

revolution 23 Jan 2009, 00:33

r22 wrote:

RadixSort is the fastest for 32-bit integers.

http://board.flatassembler.net/topic.php?t=5081

It depends upon many factors as to what is the fastest sorting method to use. I think that a blanket statement saying ANY particular sort method is fastest cannot be supported unless the exact details of the requirement are known. I have always found qsort to be a very good general sort method but in certain situations it can be very slow. Radixsort is very good in some special situations but not very good for more general problems.

23 Jan 2009, 00:33

r22

Joined: 27 Dec 2004
Posts: 805

r22 23 Jan 2009, 14:49

revolution wrote:

r22 wrote:
RadixSort is the fastest for 32-bit integers.

http://board.flatassembler.net/topic.php?t=5081
It depends upon many factors as to what is the fastest sorting method to use. I think that a blanket statement saying ANY particular sort method is fastest cannot be supported unless the exact details of the requirement are known. I have always found qsort to be a very good general sort method but in certain situations it can be very slow. Radixsort is very good in some special situations but not very good for more general problems.

By that logic, you wouldn't be able to safely say "SSE instructions are the fastest for summing floating point numbers"

Everything 'depends on many factors' but questioning a generally true statement because of that is ludicrous. How much proof do you require before you can accept a statement as "true enough".

Q1: What's the GENERAL case for using an optimized or complex (better than bubble or insertion sort) sorting algorithm?
A1: Sorting MANY UNsorted (possibly random) values.

Q2: What's the GENERAL case for supporting the use of RadixSort?
A2: Many UNsorted (possibly random) 32bit integers.

Q3: Which algorithm has the better AVERAGE/GENERAL case?
A3: RadixSort O(n), while QSort O(n log(n)), so RadixSort.

Q4: Is there empirical evidence that supports using one algorithm over the other for 32bit integers?
A4: Yes, the board topic linked above shows timings of QSort implementations vs RadixSort for unsigned 32bit integers.

I think saying "RadixSort is faster for 32bit integers" should be generally accepted, whether it's 100% true in every case and corner case is irrelevant to its accuracy as a statement.

"The sky is blue"
What about clouds and fog and other planets?

"There are 365 days in a year"
What about leap days/seconds 365.24...?

"Light is the fastest phenomena"
What about frequencies and media, is it in a vocuum or not.

"Cheetah are the fastest animals"
What about a guy in a rocket, he's basically an animal and he's faster than a cheetah? What if the cheetah has a broken leg?

HOWEVER debating the semantics of the word 'fastest' and the 'depends on many factors' argument is entertaining in its own right Very Happy

23 Jan 2009, 14:49

revolution
When all else fails, read the source

Joined: 24 Aug 2004
Posts: 20700
Location: In your JS exploiting you and your system

revolution 23 Jan 2009, 15:04

The big O notation does not take into account the implementation details, just the complexity. This notation is insufficient to give a speed estimate of any particular implementation or case of application.

Example: If I need to sort two 32bit values I can do a single comparison and swap if they are in the wrong order, a bubble sort. Using some other more advanced sort method will usually involve some form of overhead with the variable setup or memory initialisation etc. before applying the sort algorithm. Both radix sort and qsort will be slower than bubble sort in this case.

Another-example: If I need to sort 100 32bit values but the values are expected to be in almost sorted order then again a simple bubble sort may well turn out to be superior in speed simply because of the case it applies to.

A-third-example: If I need to sort 1000 values but no expected order is known (random ordering). Now we expect a bubble sort to be rather slow and decide to use either qsort or radix sort. This is where we have to test to see which is faster. Radix sort requires more overhead and setup and after doing such things may turn out to be slower than qsort with the smaller overhead. These fixed time overheads may overwhelm the algorithmic advantage for smallish value sets.

A-fourth-example: If I need to sort 1000000 values. Now we may find the algorithmic advantage of radix sort is enough to overcome the extra overheads and will beat all other methods.

It is situation dependant. No algorithm can be best in all cases.

[edit]
Oh, I forgot to mention that the general case is meant in the context of the app that the sort algo is used within. Some programs may only ever require sorting no more than 50 values at a time, others may require sorting millions. So even the general case is situation dependant.

A good example of how the Big O notation thing can be misleading is the AKS primality proving algorithm. It "should" be the fastest method available if one looks at the Big O complexity given. But in a practical situation it is very slow because of the implementation details that it requires. The break even point is not known and would probably be in the trillions+ of digits, or higher, range.

23 Jan 2009, 15:04

< Last Thread | Next Thread >

Forum Rules:

You cannot post new topics in this forum
You cannot reply to topics in this forum
You cannot edit your posts in this forum
You cannot delete your posts in this forum
You cannot vote in polls in this forum
You cannot attach files in this forum
You can download files in this forum