flat assembler
Message board for the users of flat assembler.
Index
> Linux > Problem with qsort |
Author |
|
Ton 15 Jan 2009, 11:18
I wanted to use the qsort example from
http://board.flatassembler.net/topic.php?t=3048 At the time I had problem with the same qsort, and actually still have, If used against a small array it works nicely. However the example presented won't run. I get a 'Segmentation fault'. I added some print lines, and see that the sorting is actually working, but suddenly the problem pops up when eip=0x8. This happens in the .qc5 section of the qsort call. If I run the program next is displayed lo hi x a[x] 0 f 7 6 A a[]=3 1 4 1 5 3 2 5 5 3 6 8 9 7 9 9 0x24 0x6 0x1 0x28 0x804808b 0x8048141 0x24 0x0 0x804807c 0x3c 0x0 meaning lo=0 hi=15 x=7 a[7]=6 A means quicksort is invoked from .qc4 B means quicksort is invoked from .qc5 the array is printed the stack is printed $ ./prog lo hi x a[x] a[]=3 1 4 1 5 9 2 6 5 3 5 8 9 7 9 3 0x10 0x804832a 0x1 0x10 0x804808b 0x804807c 0x3c 0x0 0x1 0xbfa13737 0x0 0 f 7 6 A a[]=3 1 4 1 5 3 2 5 5 3 6 8 9 7 9 9 0x24 0x6 0x1 0x28 0x804808b 0x8048141 0x24 0x0 0x804807c 0x3c 0x0 0 9 4 5 A a[]=3 1 4 1 3 3 2 5 5 5 6 8 9 7 9 9 0x1c 0x5 0x1 0x20 0x804808b 0x8048141 0x1c 0x0 0x8048141 0x24 0x0 0 7 3 1 B a[]=1 1 4 3 3 3 2 5 5 5 6 8 9 7 9 9 0x0 0x1 0x1 0x8 0x804808b 0x804816c 0x1c 0x8 0x8048141 0x1c 0x0 2 7 4 3 A a[]=1 1 2 3 3 3 4 5 5 5 6 8 9 7 9 9 0xc 0x3 0x1 0x14 0x804808b 0x8048141 0xc 0x8 0x804816c 0x1c 0x8 2 3 2 2 B a[]=1 1 2 3 3 3 4 5 5 5 6 8 9 7 9 9 0xc 0x2 0x1 0xc 0x804808b 0x804816c 0x1c 0xc 0x804816c 0x1c 0x8 3 7 5 3 B a[]=1 1 2 3 3 3 4 5 5 5 6 8 9 7 9 9 0xc 0x3 0x1 0x14 0x804808b 0x804816c 0x1c 0x14 0x804816c 0x1c 0xc 5 7 6 4 If running the problem in ald next is shown: eip=0x8 Program received signal SIGSEGV (Segmentation fault) Location: 0x00000008 eax = 0x0000001C ebx = 0x00000001 ecx = 0x00000004 edx = 0x00000014 esp = 0xBFE343B0 ebp = 0x00000000 esi = 0x00000000 edi = 0x080482D6 ds = 0x007B es = 0x007B fs = 0x0000 gs = 0x0000 ss = 0x007B cs = 0x0073 eip = 0x00000008 eflags = 0x00010246 Flags: PF ZF IF RF Error disassembling next instruction (address: 0x00000008) My problem is that I can't figure out what is the cuase of the problem. Is somebody willing to have a look as well, and steer me into the right direction? Attached a minimal example. Many thanks. Best Regards, Ton format elf executable entry main main: mov ecx, string_address ; address of string mov edx, [string_address.length] ; length of string mov ebx, 1 ; stdout mov eax, 4 ; sys_write int $80 push [lo] [hi] call quicksort jmp exit ;-------------------------------- quicksort: ; void quicksort (int[] a, int lo, int hi) ;;; begin debug call prarr call prstack push eax mov eax, [esp+12] shr eax, 2 call dot ; lo mov eax, [esp+8] shr eax, 2 call dot ; hi pop eax ;;; end debug ; eax = i=lo ; ecx = x ; edx = j=hi mov eax,[esp+8] ;{ mov edx,[esp+4] ;// lo is the lower index, hi is the upper index lea ecx,[eax+edx] ;// of the region of array a that is to be sorted shr ecx,3 ; int i=lo, j=hi, h; shl ecx,2 ;;; begin debug push eax mov eax, ecx shr eax, 2 call dot ; x pop eax ;;; end debug mov ecx,[a+ecx] ; int x=a[(lo+hi)/2]; ;;; begin debug push eax mov eax, ecx call dot ; a[x] mov eax, 10 call emit ; cr pop eax ;;; end debug .qc1: ; cmp [a+eax],ecx ; // partition jnc .qc2 ; do add eax,4 jmp .qc1 ; { .qc2: ; while (a[i]<x) i++; // .qc1 cmp ecx,[a+edx] ; while (a[j]>x) j--; // .qc2 jnc .qc3 ; if (i<=j) // .qc3 sub edx,4 ; { jmp .qc2 ; h=a[i]; a[i]=a[j]; a[j]=h; .qc3: ; i++; j--; cmp edx,eax ; } jc .qc4 ; } while (i<=j); push [a+eax] [a+edx] ; pop [a+eax] [a+edx] ; // recursion add eax,4 ; if (lo<j) quicksort(a, lo, j); // .qc4 sub edx,4 ; if (i<hi) quicksort(a, i, hi); // .qc5 cmp edx,eax ;} // .qc6 jnc .qc1 .qc4: cmp [esp+8],edx jnc .qc5 push dword [esp+8] push edx ;;; debug push eax mov eax, 'A' call emit mov eax, ' ' call emit pop eax ;;; end debug call quicksort pop edx pop dword [esp+12] .qc5: cmp eax,[esp+4] jnc .qc6 push eax push dword [esp+8] ;;; debug push eax mov eax, 'B' call emit mov eax, ' ' call emit pop eax ;;; end debug call quicksort pop dword [esp+12] pop eax .qc6: ret align 4 a dd 3,1,4,1,5,9,2,6,5,3,5,8,9,7,9,3 ;16 lo dd 0 hi dd 15*4 ;-------------------------------- prstack: ; print the stack push eax push ebx push ecx push edx mov ebx,$10 ; \ mov [base],ebx ; configure dot xor edx,edx ; / xor edx,edx .a: mov eax,'0' call emit mov eax,'x' call emit mov eax,[esp+edx] call dot add edx,4 cmp edx,40 jna .a mov eax, 10 call emit pop edx pop ecx pop ebx pop eax ret ;-------------------------------- prarr: ; print the array push eax push ebx push ecx push edx mov eax,'a' call emit mov eax,'[' call emit mov eax,']' call emit mov eax,'=' call emit mov ebx,10 ; \ mov [base],ebx ; configure dot xor edx,edx .a:mov eax,[a+edx] call dot add edx,4 cmp edx,[hi] jna .a mov eax, 10 call emit pop edx pop ecx pop ebx pop eax ret ;-------------------------------- dot: ; print a number ; eax = number, ebx = base, edi = buffer push eax push ebx push ecx push edx mov ebx, [base] cmp ebx,10 jne udot mov edi,buffer or eax,eax ; negative? jns udot push eax mov al,'-' ; eax is altered stosb ; only al is stored mov edx, 1 mov ecx, buffer mov ebx, 1 ; stdout mov eax, 4 ; sys_write int 80h ; Linux syscall pop eax neg eax udot: ; print a unsigned number ; eax = number, ebx = base, edi = buffer mov ebx, [base] mov edi,buffer xor ecx,ecx .new: xor edx,edx div ebx push edx ; push remainder inc ecx ; loop cnt, see below test eax,eax jnz .new mov edx,ecx ; for type above .loop: pop eax add al,30h cmp al,'9' jng .ok add al,27h ; lowercases .ok: stosb ; mov edi,[al], and inc edi, inc edx loop .loop ; loop bails out on eci=0 mov al,32 ; terminate with space inc edx stosb type: ; edx = count mov ecx, buffer mov ebx, 1 ; stdout mov eax, 4 ; sys_write int 80h ; Linux syscall pop edx pop ecx pop ebx pop eax ret buffer rb 33 ; 32 + 1 base rd 1 ;-------------------------------- emit: ; print one character push eax push ebx push ecx push edx mov [char], eax mov edx, 1 ; count mov ecx, char mov ebx, 1 ; stdout mov eax, 4 ; sys_write int 80h ; Linux syscall pop edx pop ecx pop ebx pop eax ret char dd 0 ;-------------------------------- exit: xor ebx,ebx ; exit 0 mov eax, 1 int 80h ;-------------------------------- string_address db 'lo hi x a[x]',10 .length dd $ - string_address [url][/url]
|
|||||||||||
15 Jan 2009, 11:18 |
|
Ton 15 Jan 2009, 18:33
Thanks Endre, for your reply. I checked the stack again, and indeed there are 2, 3, or 4 values. I have to think this over if this is solvable.
|
|||
15 Jan 2009, 18:33 |
|
Ton 17 Jan 2009, 16:09
I found the cause. It should be
Code: push dword [esp+8] push edx call quicksort pop edx pop dword [esp+8] rather then: Code: push dword [esp+8] push edx call quicksort pop edx pop dword [esp+12] ; note the 12 Attached the correct code with all debug lines. Below the correct quicksort: Code: quicksort: ; void quicksort (int[] a, int lo, int hi) ; eax = i=lo ; ecx = x ; edx = j=hi mov eax,[esp+8] ;{ mov edx,[esp+4] ;// lo is the lower index, hi is the upper index lea ecx,[eax+edx] ;// of the region of array a that is to be sorted shr ecx,3 ; int i=lo, j=hi, h; shl ecx,2 mov ecx,[a+ecx] ; int x=a[(lo+hi)/2]; .qc1: ; cmp [a+eax],ecx ; // partition jnc .qc2 ; do add eax,4 jmp .qc1 ; { .qc2: ; while (a[i]<x) i++; // .qc1 cmp ecx,[a+edx] ; while (a[j]>x) j--; // .qc2 jnc .qc3 ; if (i<=j) // .qc3 sub edx,4 ; { jmp .qc2 ; h=a[i]; a[i]=a[j]; a[j]=h; .qc3: ; i++; j--; cmp edx,eax ; } jc .qc4 ; } while (i<=j); push [a+eax] [a+edx] ; pop [a+eax] [a+edx] ; // recursion add eax,4 ; if (lo<j) quicksort(a, lo, j); // .qc4 sub edx,4 ; if (i<hi) quicksort(a, i, hi); // .qc5 cmp edx,eax ;} // .qc6 jnc .qc1 .qc4: cmp [esp+8],edx jnc .qc5 push dword [esp+8] push edx call quicksort pop edx pop dword [esp+8] .qc5: cmp eax,[esp+4] jnc .qc6 push eax push dword [esp+8] call quicksort pop dword [esp+8] pop eax .qc6: ret align 4 a dd 3,1,4,1,5,9,2,6,5,3,5,8,9,7,9,3 ;16 lo dd 0 hi dd 15*4
|
|||||||||||
17 Jan 2009, 16:09 |
|
Ton 21 Jan 2009, 17:35
The previous example only dealt postive numbers. Here an improved version:
Code: format ELF section '.text' executable public _start public quicksort public a public lo public hi _start: push [lo] [hi] call quicksort add esp, 8 xor ebx,ebx ; exit 0 mov eax, 1 int 80h quicksort: mov eax,[esp+8] mov edx,[esp+4] lea ecx,[eax+edx] shr ecx,3 shl ecx,2 mov ecx,[a+ecx] .a: @@: cmp [a+eax],ecx jge @f add eax,4 jmp @b @@: cmp ecx,[a+edx] jge @f sub edx,4 jmp @b @@: cmp eax,edx jg @f push [a+eax] [a+edx] pop [a+eax] [a+edx] add eax,4 sub edx,4 @@: cmp eax,edx jle .a cmp [esp+8],edx jge @f push dword [esp+8] push edx call quicksort pop edx pop dword [esp+8] @@: cmp eax,[esp+4] jge @f push eax push dword [esp+8] call quicksort pop dword [esp+8] pop eax @@: ret align 4 a dd 3,-1,4,1,5,-9,2,6,5,3,5,8,9,7,9,3 ;16 lo dd 0 hi dd 15*4 Code:
fasm prog.asm
ld -o prog prog.o
|
|||
21 Jan 2009, 17:35 |
|
r22 22 Jan 2009, 17:20
|
|||
22 Jan 2009, 17:20 |
|
revolution 23 Jan 2009, 00:33
r22 wrote: RadixSort is the fastest for 32-bit integers. |
|||
23 Jan 2009, 00:33 |
|
r22 23 Jan 2009, 14:49
revolution wrote:
By that logic, you wouldn't be able to safely say "SSE instructions are the fastest for summing floating point numbers" Everything 'depends on many factors' but questioning a generally true statement because of that is ludicrous. How much proof do you require before you can accept a statement as "true enough". Q1: What's the GENERAL case for using an optimized or complex (better than bubble or insertion sort) sorting algorithm? A1: Sorting MANY UNsorted (possibly random) values. Q2: What's the GENERAL case for supporting the use of RadixSort? A2: Many UNsorted (possibly random) 32bit integers. Q3: Which algorithm has the better AVERAGE/GENERAL case? A3: RadixSort O(n), while QSort O(n log(n)), so RadixSort. Q4: Is there empirical evidence that supports using one algorithm over the other for 32bit integers? A4: Yes, the board topic linked above shows timings of QSort implementations vs RadixSort for unsigned 32bit integers. I think saying "RadixSort is faster for 32bit integers" should be generally accepted, whether it's 100% true in every case and corner case is irrelevant to its accuracy as a statement. "The sky is blue" What about clouds and fog and other planets? "There are 365 days in a year" What about leap days/seconds 365.24...? "Light is the fastest phenomena" What about frequencies and media, is it in a vocuum or not. "Cheetah are the fastest animals" What about a guy in a rocket, he's basically an animal and he's faster than a cheetah? What if the cheetah has a broken leg? HOWEVER debating the semantics of the word 'fastest' and the 'depends on many factors' argument is entertaining in its own right |
|||
23 Jan 2009, 14:49 |
|
revolution 23 Jan 2009, 15:04
The big O notation does not take into account the implementation details, just the complexity. This notation is insufficient to give a speed estimate of any particular implementation or case of application.
Example: If I need to sort two 32bit values I can do a single comparison and swap if they are in the wrong order, a bubble sort. Using some other more advanced sort method will usually involve some form of overhead with the variable setup or memory initialisation etc. before applying the sort algorithm. Both radix sort and qsort will be slower than bubble sort in this case. Another-example: If I need to sort 100 32bit values but the values are expected to be in almost sorted order then again a simple bubble sort may well turn out to be superior in speed simply because of the case it applies to. A-third-example: If I need to sort 1000 values but no expected order is known (random ordering). Now we expect a bubble sort to be rather slow and decide to use either qsort or radix sort. This is where we have to test to see which is faster. Radix sort requires more overhead and setup and after doing such things may turn out to be slower than qsort with the smaller overhead. These fixed time overheads may overwhelm the algorithmic advantage for smallish value sets. A-fourth-example: If I need to sort 1000000 values. Now we may find the algorithmic advantage of radix sort is enough to overcome the extra overheads and will beat all other methods. It is situation dependant. No algorithm can be best in all cases. [edit] Oh, I forgot to mention that the general case is meant in the context of the app that the sort algo is used within. Some programs may only ever require sorting no more than 50 values at a time, others may require sorting millions. So even the general case is situation dependant. A good example of how the Big O notation thing can be misleading is the AKS primality proving algorithm. It "should" be the fastest method available if one looks at the Big O complexity given. But in a practical situation it is very slow because of the implementation details that it requires. The break even point is not known and would probably be in the trillions+ of digits, or higher, range. |
|||
23 Jan 2009, 15:04 |
|
< Last Thread | Next Thread > |
Forum Rules:
|
Copyright © 1999-2024, Tomasz Grysztar. Also on GitHub, YouTube.
Website powered by rwasa.