When all else fails, read the source

revolution 22 Feb 2010, 10:37
LSD = Least Significant Digit

The meaning of digit could be decimal or any other base you choose up to 0xFFFFFFFF (2^32-1)
Picnic 23 Feb 2010, 19:09
serfasm wrote:
How can divide a dqword(128bit) by a dword?

Another sample shows arbitrary precision division can be found on topic your thoughts on division...
edemko 24 Feb 2010, 11:39
Thanks for the help guys:

; edx:dqword[multiplier] = dqword[multiplier] * dword InstantMultiplier.
; flags destroyed, multiplication is unsigned.
proc muldq; multiplier:dword, InstantMultiplier:dword
 xchg    edi,[esp+4]
 push    eax ebx

 mov     eax,[edi+12]
        mul     dword[esp+16]
       mov     [edi+12],eax
        mov     ebx,edx
     mov     eax,[edi+8]
 mul     dword[esp+16]
       mov     [edi+8],eax
 add     [edi+12],edx
        adc     ebx,0
       mov     eax,[edi+4]
 mul     dword[esp+16]
       mov     [edi+4],eax
 add     [edi+8],edx
 adc     dword[edi+12],0
     adc     ebx,0
       mov     eax,[edi]
   mul     dword[esp+16]
       mov     [edi],eax
   add     [edi+4],edx
 adc     dword[edi+8],0
      adc     dword[edi+12],0
     adc     ebx,0
       neg     ebx
 neg     ebx
 mov     edx,ebx

 pop     ebx eax
     mov     edi,[esp+4]
 ret     8

; dqword[to] = dqword[to] + dqword[from].
; flags destroyed.
proc adddq; to:dword, from:dword
    xchg    edi,[esp+4]
 xchg    esi,[esp+8]
 push    eax

     mov     eax,[esi]
   add     [edi],eax
   adc     dword[edi+4],0
      adc     dword[edi+8],0
      adc     dword[edi+12],0
     mov     eax,[esi+4]
 add     [edi+4],eax
 adc     dword[edi+8],0
      adc     dword[edi+12],0
     mov     eax,[esi+8]
 add     [edi+8],eax
 adc     dword[edi+12],0
     mov     eax,[esi+12]
        add     [edi+12],eax

    pop     eax
 mov     esi,[esp+8]
 mov     edi,[esp+4]
 ret     8

; dqword[value] = neg dqword[value].
; flags destroyed.
proc negdq; value:dword
      xchg    eax,[esp+4]

     sub     dword[eax],1
        sbb     dword[eax+4],0
      sbb     dword[eax+8],0
      sbb     dword[eax+12],0
     not     dword[eax]
  not     dword[eax+4]
        not     dword[eax+8]
        not     dword[eax+12]

   mov     eax,[esp+4]
 ret     4

; dqword[minuend] = dqword[minuend] - dqword[subtrahend].
; flags destroyed.
proc subdq ;minuend:dword, subtrahend:dword
 stdcall negdq,dword[esp+8]
  stdcall adddq,dword[esp+8],dword[esp+8]
     stdcall negdq,dword[esp+8]
  ret     8

; dqword[dividend] = dqword[dividend] div dword InstantDivisor.
; flags destroyed, division is unsigned, edx=remainder.
proc divdq; dividend:dword, InstantDivisor:dword
 xchg    edi,[esp+4]
 push    eax

     xor     edx,edx
     mov     eax,[edi+12]
        div     dword[esp+12]
       mov     [edi+12],eax
        mov     eax,[edi+8]
 div     dword[esp+12]
       mov     [edi+8],eax
 mov     eax,[edi+4]
 div     dword[esp+12]
       mov     [edi+4],eax
 mov     eax,[edi]
   div     dword[esp+12]
       mov     [edi],eax

       pop     eax
 mov     edi,[esp+4]
 ret     8

; [buf41byte] = ascii dqword[value].
; flags saved.
proc dq2a; value:dword, signed?:dword, buf41byte:dword
   xchg    esi,[esp+4]
 xchg    eax,[esp+8]
 xchg    edi,[esp+12]
        push    ecx edx

  push    dword[esi+12]
       push    dword[esi+8]
        push    dword[esi+4]
        push    dword[esi]
  cmp     al,0
        mov     al,'+'
    je      .pos
        test    byte[esp+15],10000000b
      jz      .pos
        mov     al,'-'
    stdcall negdq,esp
  .pos: cld
 mov     ecx,39
      add     edi,ecx
     mov     al,ch
       mov     esi,esp
  .get: stdcall divdq,esi,10
 lea     eax,[edx+'0']
       loop    .get
        add     esp,16

       pop     edx ecx
     mov     esi,[esp+4]
 mov     eax,[esp+8]
 mov     edi,[esp+12]
        ret     12

; dqword[dest] = dqword ascii[a]; use as many ' ' as you want.
; aActiveCount is ascii string length excluding #0(etc) character.
; flags saved.
proc a2dq; a:dword, aActiveCount:dword, dest:dword, signed?:dword
      xchg    ecx,[esp+12]
        test    ecx,ecx
     jz      ._exit
      xchg    esi,[esp+8]
 xchg    edi,[esp+16]
        xchg    ebx,[esp+20]
        shl     ebx,31
      push    eax edx ebp
 xor     eax,eax
       sub     edi,16
      sub     esp,16
 lea     esi,[esi+ecx-1]
        cmp     al,' '
    je      .continue
   cmp     al,'+'
    jne     .not_plus
      xchg    bh,al
       test    al,al
       jnz     .exit
       jmp     .continue
       cmp     al,'-'
    jne     .not_minus
  bt      ebx,31
      jnc     .exit
       jmp     .but_minus
     sub     al,'0'
    cmp     al,9
        ja      .exit
       cmp     bl,39
       je      .exit
       call    .byte2dq
    test    bl,bl
       jz      .add
        mov     ebp,esp
     push    ecx
 movzx   ecx,bl
  .mul: stdcall muldq,ebp,10
  loop    .mul
        pop     ecx
  .add: stdcall adddq,edi,esp
    jc      .exit
       inc     bl
      loop    .load
       test    bl,bl
       jz      .exit

   bt      ebx,31
      jnc     .drop_cf
    rcr     ecx,1
       test    [edi+12],ecx
        jnz     .positive_overflow?
 cmp     bh,'-'
    jne     .drop_cf
    stdcall negdq,edi
   jmp     .drop_cf
      cmp     bh,'-'
    jne     .exit
       cmp     [edi+12],ecx
        jne     .exit
       shl     ecx,1
       cmp     [edi+8],ecx
 jnz     .exit
       cmp     [edi+4],ecx
 jnz     .exit
       cmp     [edi],ecx
   jnz     .exit

        dec     byte[esp+28]
  .exit:add esp,16
      pop     ebp edx eax
 mov     esi,[esp+8]
 mov     edi,[esp+16]
        mov     ebx,[esp+20]
       mov     ecx,[esp+12]
       ret     16
       pop     ebp
 add     esp,16
      xor     edx,edx
     push    edx
 push    edx
 push    edx
 mov     dl,al
       push    edx
 jmp     ebp

revolution 24 Feb 2010, 13:32
serfasm: A dqword (16 byte) x dqword (16 byte) multiply would have a 32 byte result size. It requires 16 (4*4) cross multiplies. Do you want to try and code it?

Also, you can make the adder more efficient:
mov eax,[first+0]
mov ebx,[first+4]
mov ecx,[first+8]
mov edx,[first+12]
add eax,[second+0]
adc ebx,[second+4]
adc ecx,[second+8]
adc edx,[second+12]
mov [result+0],eax
mov [result+4],ebx
mov [result+8],ecx
mov [result+12],edx    
Or if the result is the same as the first source:
mov eax,[second+0]
mov ebx,[second+4]
mov ecx,[second+8]
mov edx,[second+12]
add [first+0],eax
adc [first+4],ebx
adc [first+8],ecx
adc [first+12],edx    
edemko 26 Feb 2010, 07:56
revolution i'll view through your proposal some later: optimizing is of my interest. Below is a dq2a generating more readable number placing a space 0x20 between every triple:

; [buf53bytes] = ascii dqword[value].
; flags saved.
proc dq2a; value:dword, signed?:dword, buf53bytes:dword
 xchg    esi,[esp+4]
 xchg    eax,[esp+8]
 xchg    edi,[esp+12]
        push    ebx ecx edx

  push    dword[esi+12]
       push    dword[esi+8]
        push    dword[esi+4]
        push    dword[esi]
  cmp     al,0
        mov     al,'+'
    je      .pos
        test    byte[esp+15],10000000b
      jz      .pos
        mov     al,'-'
    stdcall negdq,esp
  .pos: cld
 mov     ecx,39+12                ;2^128=10^x; x=log(10;2)*128~38 -> 39 chars + 12 spaces between every triple
    add     edi,ecx
     mov     al,ch
       mov     esi,esp
     mov     bx,' ' shl 8 or 39+12-3
  .get: cmp    cl,bl
       jne     .not_3rd
    sub     bl,1+3
      mov     al,bh
       jmp     .3rd
   stdcall divdq,esi,10
        lea     eax,[edx+'0']
       loop    .get
        add     esp,16

       pop     edx ecx ebx
 mov     esi,[esp+4]
 mov     eax,[esp+8]
 mov     edi,[esp+12]
        ret     12
edemko 26 Feb 2010, 08:03
revolution wrote:

A dqword (16 byte) x dqword (16 byte) multiply would have a 32 byte result size. It requires 16 (4*4) cross multiplies. Do you want to try and code it?


; dqword[to] = dqword[to] + dqword[from].
; flags destroyed.
proc adddq; to:dword, from:dword
        xchg    edi,[esp+4]
        call    .revo
        xchg    edi,[esp+8]
        add     eax,[edi]
        adc     ebx,[edi+4]
        adc     ecx,[edi+8]
        adc     edx,[edi+12]
        mov     edi,[esp+8]
        call    .revo
        mov     edi,[esp+4]
        ret     8
  .revo:xchg    eax,[edi]
        xchg    ebx,[edi+4]
        xchg    ecx,[edi+8]
        xchg    edx,[edi+12]
        ret     0

there is an unlimited_length_chain numbers algo
a bit in russian, contact me whenever you need the translation or just use http://translate.google.com, ok:?
file download link at wasm.ru:
source code:
http://wasm.ru/forum/viewtopic.php?id=37619, post #9
good luck

