flat assembler
Message board for the users of flat assembler.

 Index > Main > how DIV works? Goto page Previous  1, 2
Author
 Thread
revolution
When all else fails, read the source

Joined: 24 Aug 2004
Posts: 20212
Location: In your JS exploiting you and your system
revolution 22 Feb 2010, 10:37
LSD = Least Significant Digit

The meaning of digit could be decimal or any other base you choose up to 0xFFFFFFFF (2^32-1)
22 Feb 2010, 10:37
Picnic

Joined: 05 May 2007
Posts: 1388
Location: Piraeus, Greece
Picnic 23 Feb 2010, 19:09
serfasm wrote:
How can divide a dqword(128bit) by a dword?

Another sample shows arbitrary precision division can be found on topic your thoughts on division...
23 Feb 2010, 19:09
edemko

Joined: 18 Jul 2009
Posts: 549
edemko 24 Feb 2010, 11:39
Thanks for the help guys:
Code:
```
; edx:dqword[multiplier] = dqword[multiplier] * dword InstantMultiplier.
; flags destroyed, multiplication is unsigned.
proc muldq; multiplier:dword, InstantMultiplier:dword
xchg    edi,[esp+4]
push    eax ebx

mov     eax,[edi+12]
mul     dword[esp+16]
mov     [edi+12],eax
mov     ebx,edx
mov     eax,[edi+8]
mul     dword[esp+16]
mov     [edi+8],eax
add     [edi+12],edx
adc     ebx,0
mov     eax,[edi+4]
mul     dword[esp+16]
mov     [edi+4],eax
add     [edi+8],edx
adc     dword[edi+12],0
adc     ebx,0
mov     eax,[edi]
mul     dword[esp+16]
mov     [edi],eax
add     [edi+4],edx
adc     dword[edi+8],0
adc     dword[edi+12],0
adc     ebx,0
neg     ebx
neg     ebx
mov     edx,ebx

pop     ebx eax
mov     edi,[esp+4]
ret     8
endp

; dqword[to] = dqword[to] + dqword[from].
; flags destroyed.
proc adddq; to:dword, from:dword
xchg    edi,[esp+4]
xchg    esi,[esp+8]
push    eax

mov     eax,[esi]
add     [edi],eax
adc     dword[edi+4],0
adc     dword[edi+8],0
adc     dword[edi+12],0
mov     eax,[esi+4]
add     [edi+4],eax
adc     dword[edi+8],0
adc     dword[edi+12],0
mov     eax,[esi+8]
add     [edi+8],eax
adc     dword[edi+12],0
mov     eax,[esi+12]
add     [edi+12],eax

pop     eax
mov     esi,[esp+8]
mov     edi,[esp+4]
ret     8
endp

; dqword[value] = neg dqword[value].
; flags destroyed.
proc negdq; value:dword
xchg    eax,[esp+4]

sub     dword[eax],1
sbb     dword[eax+4],0
sbb     dword[eax+8],0
sbb     dword[eax+12],0
not     dword[eax]
not     dword[eax+4]
not     dword[eax+8]
not     dword[eax+12]

mov     eax,[esp+4]
ret     4
endp

; dqword[minuend] = dqword[minuend] - dqword[subtrahend].
; flags destroyed.
proc subdq ;minuend:dword, subtrahend:dword
stdcall negdq,dword[esp+8]
stdcall adddq,dword[esp+8],dword[esp+8]
stdcall negdq,dword[esp+8]
ret     8
endp

; dqword[dividend] = dqword[dividend] div dword InstantDivisor.
; flags destroyed, division is unsigned, edx=remainder.
proc divdq; dividend:dword, InstantDivisor:dword
xchg    edi,[esp+4]
push    eax

xor     edx,edx
mov     eax,[edi+12]
div     dword[esp+12]
mov     [edi+12],eax
mov     eax,[edi+8]
div     dword[esp+12]
mov     [edi+8],eax
mov     eax,[edi+4]
div     dword[esp+12]
mov     [edi+4],eax
mov     eax,[edi]
div     dword[esp+12]
mov     [edi],eax

pop     eax
mov     edi,[esp+4]
ret     8
endp

; [buf41byte] = ascii dqword[value].
; flags saved.
proc dq2a; value:dword, signed?:dword, buf41byte:dword
xchg    esi,[esp+4]
xchg    eax,[esp+8]
xchg    edi,[esp+12]
push    ecx edx
pushfd

push    dword[esi+12]
push    dword[esi+8]
push    dword[esi+4]
push    dword[esi]
cmp     al,0
mov     al,'+'
je      .pos
test    byte[esp+15],10000000b
jz      .pos
mov     al,'-'
stdcall negdq,esp
.pos: cld
stosb
std
mov     ecx,39
add     edi,ecx
mov     al,ch
stosb
mov     esi,esp
.get: stdcall divdq,esi,10
lea     eax,[edx+'0']
stosb
loop    .get
add     esp,16

popfd
pop     edx ecx
mov     esi,[esp+4]
mov     eax,[esp+8]
mov     edi,[esp+12]
ret     12
endp

; dqword[dest] = dqword ascii[a]; use as many ' ' as you want.
; aActiveCount is ascii string length excluding #0(etc) character.
; flags saved.
proc a2dq; a:dword, aActiveCount:dword, dest:dword, signed?:dword
stc
pushfd
xchg    ecx,[esp+12]
test    ecx,ecx
jz      ._exit
xchg    esi,[esp+8]
xchg    edi,[esp+16]
xchg    ebx,[esp+20]
shl     ebx,31
push    eax edx ebp
cld
xor     eax,eax
stosd
stosd
stosd
stosd
sub     edi,16
sub     esp,16
std
lea     esi,[esi+ecx-1]
.load:lodsb
cmp     al,' '
je      .continue
cmp     al,'+'
jne     .not_plus
.but_minus:
xchg    bh,al
test    al,al
jnz     .exit
jmp     .continue
.not_plus:
cmp     al,'-'
jne     .not_minus
bt      ebx,31
jnc     .exit
jmp     .but_minus
.not_minus:
sub     al,'0'
cmp     al,9
ja      .exit
cmp     bl,39
je      .exit
call    .byte2dq
test    bl,bl
jz      .add
mov     ebp,esp
push    ecx
movzx   ecx,bl
.mul: stdcall muldq,ebp,10
loop    .mul
pop     ecx
.add: stdcall adddq,edi,esp
jc      .exit
inc     bl
.continue:
loop    .load
test    bl,bl
jz      .exit

bt      ebx,31
jnc     .drop_cf
rcr     ecx,1
test    [edi+12],ecx
jnz     .positive_overflow?
cmp     bh,'-'
jne     .drop_cf
stdcall negdq,edi
jmp     .drop_cf
.positive_overflow?:
cmp     bh,'-'
jne     .exit
cmp     [edi+12],ecx
jne     .exit
shl     ecx,1
cmp     [edi+8],ecx
jnz     .exit
cmp     [edi+4],ecx
jnz     .exit
cmp     [edi],ecx
jnz     .exit

.drop_cf:
dec     byte[esp+28]
.exit:add esp,16
pop     ebp edx eax
mov     esi,[esp+8]
mov     edi,[esp+16]
mov     ebx,[esp+20]
._exit:
mov     ecx,[esp+12]
popfd
ret     16
.byte2dq:
pop     ebp
add     esp,16
xor     edx,edx
push    edx
push    edx
push    edx
mov     dl,al
push    edx
jmp     ebp
endp

```
24 Feb 2010, 11:39
revolution
When all else fails, read the source

Joined: 24 Aug 2004
Posts: 20212
Location: In your JS exploiting you and your system
revolution 24 Feb 2010, 13:32
serfasm: A dqword (16 byte) x dqword (16 byte) multiply would have a 32 byte result size. It requires 16 (4*4) cross multiplies. Do you want to try and code it?

Also, you can make the adder more efficient:
Code:
```mov eax,[first+0]
mov ebx,[first+4]
mov ecx,[first+8]
mov edx,[first+12]
add eax,[second+0]
adc ebx,[second+4]
adc ecx,[second+8]
adc edx,[second+12]
mov [result+0],eax
mov [result+4],ebx
mov [result+8],ecx
mov [result+12],edx    ```
Or if the result is the same as the first source:
Code:
```mov eax,[second+0]
mov ebx,[second+4]
mov ecx,[second+8]
mov edx,[second+12]
add [first+0],eax
adc [first+4],ebx
adc [first+8],ecx
adc [first+12],edx    ```
24 Feb 2010, 13:32
edemko

Joined: 18 Jul 2009
Posts: 549
edemko 26 Feb 2010, 07:56
revolution i'll view through your proposal some later: optimizing is of my interest. Below is a dq2a generating more readable number placing a space 0x20 between every triple:

Code:
```; [buf53bytes] = ascii dqword[value].
; flags saved.
proc dq2a; value:dword, signed?:dword, buf53bytes:dword
xchg    esi,[esp+4]
xchg    eax,[esp+8]
xchg    edi,[esp+12]
push    ebx ecx edx
pushfd

push    dword[esi+12]
push    dword[esi+8]
push    dword[esi+4]
push    dword[esi]
cmp     al,0
mov     al,'+'
je      .pos
test    byte[esp+15],10000000b
jz      .pos
mov     al,'-'
stdcall negdq,esp
.pos: cld
stosb
std
mov     ecx,39+12                ;2^128=10^x; x=log(10;2)*128~38 -> 39 chars + 12 spaces between every triple
add     edi,ecx
mov     al,ch
stosb
mov     esi,esp
mov     bx,' ' shl 8 or 39+12-3
.get: cmp    cl,bl
jne     .not_3rd
sub     bl,1+3
mov     al,bh
jmp     .3rd
.not_3rd:
stdcall divdq,esi,10
lea     eax,[edx+'0']
.3rd:
stosb
loop    .get
add     esp,16

popfd
pop     edx ecx ebx
mov     esi,[esp+4]
mov     eax,[esp+8]
mov     edi,[esp+12]
ret     12
endp
```
26 Feb 2010, 07:56
edemko

Joined: 18 Jul 2009
Posts: 549
edemko 26 Feb 2010, 08:03
revolution wrote:

A dqword (16 byte) x dqword (16 byte) multiply would have a 32 byte result size. It requires 16 (4*4) cross multiplies. Do you want to try and code it?

simd?

revolution:
Code:
```; dqword[to] = dqword[to] + dqword[from].
; flags destroyed.
proc adddq; to:dword, from:dword
xchg    edi,[esp+4]
call    .revo
xchg    edi,[esp+8]
add     eax,[edi]
adc     ebx,[edi+4]
adc     ecx,[edi+8]
adc     edx,[edi+12]
mov     edi,[esp+8]
call    .revo
mov     edi,[esp+4]
ret     8
.revo:xchg    eax,[edi]
xchg    ebx,[edi+4]
xchg    ecx,[edi+8]
xchg    edx,[edi+12]
ret     0
endp     ```

edit
there is an unlimited_length_chain numbers algo
a bit in russian, contact me whenever you need the translation or just use http://translate.google.com, ok:?
file download link at wasm.ru:
http://wasm.ru/forum/attachment.php?item=4023
source code:
http://wasm.ru/forum/viewtopic.php?id=37619, post #9
good luck

 Description: dq test functions Download Filename: dq.rar Filesize: 2.36 KB Downloaded: 444 Time(s)

26 Feb 2010, 08:03
 Display posts from previous: All Posts1 Day7 Days2 Weeks1 Month3 Months6 Months1 Year Oldest FirstNewest First

 Jump to: Select a forum Official----------------AssemblyPeripheria General----------------MainTutorials and ExamplesDOSWindowsLinuxUnixMenuetOS Specific----------------MacroinstructionsOS ConstructionIDE DevelopmentProjects and IdeasNon-x86 architecturesHigh Level LanguagesProgramming Language DesignCompiler Internals Other----------------FeedbackHeapTest Area
Goto page Previous  1, 2

Forum Rules:
 You cannot post new topics in this forumYou cannot reply to topics in this forumYou cannot edit your posts in this forumYou cannot delete your posts in this forumYou cannot vote in polls in this forumYou cannot attach files in this forumYou can download files in this forum

Copyright © 1999-2024, Tomasz Grysztar. Also on GitHub, YouTube.

Website powered by rwasa.