revolution
LSD = Least Significant Digit

The meaning of digit could be decimal or any other base you choose up to 0xFFFFFFFF (2^32-1)
22 Feb 2010, 10:37
serfasm wrote:
How can divide a dqword(128bit) by a dword?

Another sample shows arbitrary precision division can be found on topic your thoughts on division...
23 Feb 2010, 19:09
Thanks for the help guys:
Code:
```
; edx:dqword[multiplier] = dqword[multiplier] * dword InstantMultiplier.
; flags destroyed, multiplication is unsigned.
proc muldq; multiplier:dword, InstantMultiplier:dword
xchg    edi,[esp+4]
push    eax ebx

mov     eax,[edi+12]
mul     dword[esp+16]
mov     [edi+12],eax
mov     ebx,edx
mov     eax,[edi+8]
mul     dword[esp+16]
mov     [edi+8],eax
mov     eax,[edi+4]
mul     dword[esp+16]
mov     [edi+4],eax
mov     eax,[edi]
mul     dword[esp+16]
mov     [edi],eax
neg     ebx
neg     ebx
mov     edx,ebx

pop     ebx eax
mov     edi,[esp+4]
ret     8
endp

; dqword[to] = dqword[to] + dqword[from].
; flags destroyed.
xchg    edi,[esp+4]
xchg    esi,[esp+8]
push    eax

mov     eax,[esi]
mov     eax,[esi+4]
mov     eax,[esi+8]
mov     eax,[esi+12]

pop     eax
mov     esi,[esp+8]
mov     edi,[esp+4]
ret     8
endp

; dqword[value] = neg dqword[value].
; flags destroyed.
proc negdq; value:dword
xchg    eax,[esp+4]

sub     dword[eax],1
sbb     dword[eax+4],0
sbb     dword[eax+8],0
sbb     dword[eax+12],0
not     dword[eax]
not     dword[eax+4]
not     dword[eax+8]
not     dword[eax+12]

mov     eax,[esp+4]
ret     4
endp

; dqword[minuend] = dqword[minuend] - dqword[subtrahend].
; flags destroyed.
proc subdq ;minuend:dword, subtrahend:dword
stdcall negdq,dword[esp+8]
stdcall negdq,dword[esp+8]
ret     8
endp

; dqword[dividend] = dqword[dividend] div dword InstantDivisor.
; flags destroyed, division is unsigned, edx=remainder.
proc divdq; dividend:dword, InstantDivisor:dword
xchg    edi,[esp+4]
push    eax

xor     edx,edx
mov     eax,[edi+12]
div     dword[esp+12]
mov     [edi+12],eax
mov     eax,[edi+8]
div     dword[esp+12]
mov     [edi+8],eax
mov     eax,[edi+4]
div     dword[esp+12]
mov     [edi+4],eax
mov     eax,[edi]
div     dword[esp+12]
mov     [edi],eax

pop     eax
mov     edi,[esp+4]
ret     8
endp

; [buf41byte] = ascii dqword[value].
; flags saved.
proc dq2a; value:dword, signed?:dword, buf41byte:dword
xchg    esi,[esp+4]
xchg    eax,[esp+8]
xchg    edi,[esp+12]
push    ecx edx
pushfd

push    dword[esi+12]
push    dword[esi+8]
push    dword[esi+4]
push    dword[esi]
cmp     al,0
mov     al,'+'
je      .pos
test    byte[esp+15],10000000b
jz      .pos
mov     al,'-'
stdcall negdq,esp
.pos: cld
stosb
std
mov     ecx,39
mov     al,ch
stosb
mov     esi,esp
.get: stdcall divdq,esi,10
lea     eax,[edx+'0']
stosb
loop    .get

popfd
pop     edx ecx
mov     esi,[esp+4]
mov     eax,[esp+8]
mov     edi,[esp+12]
ret     12
endp

; dqword[dest] = dqword ascii[a]; use as many ' ' as you want.
; aActiveCount is ascii string length excluding #0(etc) character.
; flags saved.
proc a2dq; a:dword, aActiveCount:dword, dest:dword, signed?:dword
stc
pushfd
xchg    ecx,[esp+12]
test    ecx,ecx
jz      ._exit
xchg    esi,[esp+8]
xchg    edi,[esp+16]
xchg    ebx,[esp+20]
shl     ebx,31
push    eax edx ebp
cld
xor     eax,eax
stosd
stosd
stosd
stosd
sub     edi,16
sub     esp,16
std
lea     esi,[esi+ecx-1]
cmp     al,' '
je      .continue
cmp     al,'+'
jne     .not_plus
.but_minus:
xchg    bh,al
test    al,al
jnz     .exit
jmp     .continue
.not_plus:
cmp     al,'-'
jne     .not_minus
bt      ebx,31
jnc     .exit
jmp     .but_minus
.not_minus:
sub     al,'0'
cmp     al,9
ja      .exit
cmp     bl,39
je      .exit
call    .byte2dq
test    bl,bl
mov     ebp,esp
push    ecx
movzx   ecx,bl
.mul: stdcall muldq,ebp,10
loop    .mul
pop     ecx
jc      .exit
inc     bl
.continue:
test    bl,bl
jz      .exit

bt      ebx,31
jnc     .drop_cf
rcr     ecx,1
test    [edi+12],ecx
jnz     .positive_overflow?
cmp     bh,'-'
jne     .drop_cf
stdcall negdq,edi
jmp     .drop_cf
.positive_overflow?:
cmp     bh,'-'
jne     .exit
cmp     [edi+12],ecx
jne     .exit
shl     ecx,1
cmp     [edi+8],ecx
jnz     .exit
cmp     [edi+4],ecx
jnz     .exit
cmp     [edi],ecx
jnz     .exit

.drop_cf:
dec     byte[esp+28]
pop     ebp edx eax
mov     esi,[esp+8]
mov     edi,[esp+16]
mov     ebx,[esp+20]
._exit:
mov     ecx,[esp+12]
popfd
ret     16
.byte2dq:
pop     ebp
xor     edx,edx
push    edx
push    edx
push    edx
mov     dl,al
push    edx
jmp     ebp
endp

```
24 Feb 2010, 11:39
serfasm: A dqword (16 byte) x dqword (16 byte) multiply would have a 32 byte result size. It requires 16 (4*4) cross multiplies. Do you want to try and code it?

Also, you can make the adder more efficient:
Code:
```mov eax,[first+0]
mov ebx,[first+4]
mov ecx,[first+8]
mov edx,[first+12]
mov [result+0],eax
mov [result+4],ebx
mov [result+8],ecx
mov [result+12],edx    ```
Or if the result is the same as the first source:
Code:
```mov eax,[second+0]
mov ebx,[second+4]
mov ecx,[second+8]
mov edx,[second+12]
24 Feb 2010, 13:32
revolution i'll view through your proposal some later: optimizing is of my interest. Below is a dq2a generating more readable number placing a space 0x20 between every triple:

Code:
```; [buf53bytes] = ascii dqword[value].
; flags saved.
proc dq2a; value:dword, signed?:dword, buf53bytes:dword
xchg    esi,[esp+4]
xchg    eax,[esp+8]
xchg    edi,[esp+12]
push    ebx ecx edx
pushfd

push    dword[esi+12]
push    dword[esi+8]
push    dword[esi+4]
push    dword[esi]
cmp     al,0
mov     al,'+'
je      .pos
test    byte[esp+15],10000000b
jz      .pos
mov     al,'-'
stdcall negdq,esp
.pos: cld
stosb
std
mov     ecx,39+12                ;2^128=10^x; x=log(10;2)*128~38 -> 39 chars + 12 spaces between every triple
mov     al,ch
stosb
mov     esi,esp
mov     bx,' ' shl 8 or 39+12-3
.get: cmp    cl,bl
jne     .not_3rd
sub     bl,1+3
mov     al,bh
jmp     .3rd
.not_3rd:
stdcall divdq,esi,10
lea     eax,[edx+'0']
.3rd:
stosb
loop    .get

popfd
pop     edx ecx ebx
mov     esi,[esp+4]
mov     eax,[esp+8]
mov     edi,[esp+12]
ret     12
endp
```
26 Feb 2010, 07:56
revolution wrote:

A dqword (16 byte) x dqword (16 byte) multiply would have a 32 byte result size. It requires 16 (4*4) cross multiplies. Do you want to try and code it?

simd?

revolution:
Code:
```; dqword[to] = dqword[to] + dqword[from].
; flags destroyed.
xchg    edi,[esp+4]
call    .revo
xchg    edi,[esp+8]
mov     edi,[esp+8]
call    .revo
mov     edi,[esp+4]
ret     8
.revo:xchg    eax,[edi]
xchg    ebx,[edi+4]
xchg    ecx,[edi+8]
xchg    edx,[edi+12]
ret     0
endp     ```

edit
there is an unlimited_length_chain numbers algo
a bit in russian, contact me whenever you need the translation or just use http://translate.google.com, ok:?
http://wasm.ru/forum/attachment.php?item=4023
source code:
http://wasm.ru/forum/viewtopic.php?id=37619, post #9
good luck

26 Feb 2010, 08:03
