format pe gui 4.0
include 'win32ax.inc'
section '' code executable import readable writeable
library kernel32,'kernel32.dll',\
user32,'user32.dll'
include 'api\kernel32.inc'
include 'api\user32.inc'
a dt 3333333333333333333.0
entry $
movzx esi,word[a+8] ;head
and si,$7fff ;positive head
mov edx,dword[a+4] ;high dword
mov ebx,dword[a+0] ;low dword
mov eax,$0000'0000 ;lowest dword
repeat 19 ;any power of 10
call tg_div
add esi,ecx ;final number
end repeat
push esi edx ebx
fld tbyte[esp]
add esp,3*4
repeat 19
call tg_mul
add esi,ecx
end repeat
push esi edx ebx
fld tbyte[esp]
add esp,3*4
hlt ;check st1,st0
ret 0
; normalized mantissa -> edx:ebx:eax
; edx:ebx:eax <- if tg_div: normalized (mantissa * 0.1)
; if tg_mul: normalized (mantissa * 10.0)
; ecx <- exponent delta
; flags <- ?
proc tg_div
push edx ebx eax 0
mov eax,esp
mov ebx,$cccc'cccc
lea ecx,[ebx+1]
push ebx ebx ecx 0
mov ebx,esp
stdcall _128mul128,ebx,eax
add esp,4*4
pop ecx eax ebx edx
test edx,edx
setns cl
shld edx,ebx,cl
shld ebx,eax,cl
shld eax,ecx,cl
movzx ecx,cl
neg ecx
sub ecx,3
ret 0
endp
;fasm internals simulants, test version
proc tg_mul
push edx ebx eax 0
mov eax,esp
push $a000'0000 0 0 0
mov ebx,esp
stdcall _128mul128,ebx,eax
add esp,4*4
pop ecx eax ebx edx
test edx,edx
setns cl
shld edx,ebx,cl
shld ebx,eax,cl
shld eax,ecx,cl
movzx ecx,cl
neg ecx
add ecx,4
ret 0
endp
;/*
; [m2:m1] = [m2]*[m1].
; flags <-?
; low 128 bits stored at <m1> then high 128 bits stored at <m2>
; there will be fun when buffers overlap :)
;*/
proc _128mul128; m1,m2
push ebp
mov ebp,esp
push edi
mov edi,[ebp+2*4] ;m1
push esi
mov esi,[ebp+3*4] ;m2
push edx ecx ebx eax ;we'll use stepping _64mul64
;[m1]*[m2] = [m1.1]*[m2.1] + ;00I
; + ([m1.1]*[m2.2]) shl 064 + ;0II
; + ([m1.2]*[m2.1]) shl 064 + ;III
; + ([m1.2]*[m2.2]) shl 128 ;0IV
mov edx,[esi+4]
mov ecx,[esi]
mov ebx,[edi+4]
mov eax,[edi]
call _64mul64
push 0 0 0 0 edx ecx ebx eax ;00I
mov edx,[esi+12]
mov ecx,[esi+8]
mov ebx,[edi+4]
mov eax,[edi]
call _64mul64
add [ebp-12*4],eax
adc [ebp-11*4],ebx
adc [ebp-10*4],ecx
adc [ebp-9*4],edx
adc dword[ebp-8*4],0 ;0II
mov edx,[esi+4]
mov ecx,[esi]
mov ebx,[edi+12]
mov eax,[edi+8]
call _64mul64
add [ebp-12*4],eax
adc [ebp-11*4],ebx
adc [ebp-10*4],ecx
adc [ebp-9*4],edx
adc dword[ebp-8*4],0 ;III
mov edx,[esi+12]
mov ecx,[esi+8]
mov ebx,[edi+12]
mov eax,[edi+8]
call _64mul64
add [ebp-10*4],eax
adc [ebp-9*4],ebx
adc [ebp-8*4],ecx
adc [ebp-7*4],edx ;0IV
pop dword[edi] dword[edi+4] dword[edi+8] dword[edi+12] dword[esi] dword[esi+4] dword[esi+8] dword[esi+12]
pop eax ebx ecx edx
pop esi edi
leave
ret 8 ;2010_08_20
endp
; edx:ecx:ebx:eax <- edx:ecx * ebx:eax = (edx*ebx:eax) shl 32 + ecx*ebx:eax
; flags <- result.edx + 0
;
; $ffffffff'ffffffff * $ffffffff'ffffffff = $ffffffff'ffffffff * $1'00000000'00000000 - $ffffffff'ffffffff = $ffffffff'ffffffff'00000000'00000000 - $ffffffff'ffffffff = $ffffffff'fffffffe'00000000'00000001 = 128bit
proc _64mul64
push ecx ebx eax
mov ecx,edx
call _64mul32
xchg ecx,[esp+8]
xchg ebx,[esp+4]
xchg eax,[esp]
call _64mul32
pop edx
add ebx,edx
pop edx
adc ecx,edx
pop edx
adc edx,0
ret 0
endp
; ecx:ebx:eax <- ecx * ebx:eax = (ecx*ebx) shl 32 + ecx*eax
; edx <- ecx*eax
; flags <- result.ecx + 0
;
; $ffffffff'ffffffff * $ffffffff = $ffffffff'ffffffff * $1'00000000 - $ffffffff'ffffffff = $ffffffff'ffffffff'00000000 - $ffffffff'ffffffff = $fffffffe'ffffffff'00000001 = 96bit
proc _64mul32
xchg ebx,eax
mul ecx
xchg ebx,eax
xchg edx,ecx
mul edx
add ebx,edx
adc ecx,0
ret 0
endp