format PE console 4.0
entry start
include '%fasmpath%\include\win32a.inc'
section '.code' code readable writeable executable
;a7c5.ac471b478423 hex is 42949.67296 decimal (2^32/1e5)
;68db8.bac710cb296 hex is 429496.7296 decimal (2^32/1e4)
n_from equ 1234567890
n_to equ 1234567899
magic1 equ 0a7c5ac47h
magic2 equ 068db8badh
start:
invoke GetStdHandle, STD_OUTPUT_HANDLE
mov [OutHandle],eax
mov esi,n_from
next_num:
mov edi,result
call bin2ascii
mov ecx,12
push esi
mov esi,result
call buffered_write
pop esi
inc esi
cmp esi,n_to
jbe next_num
call buffer_flush
ret
bin2ascii:
;On entry: esi - number to be converted, edi - destination of ASCII result
;This algorithm generates decimal digits by successive multiplications of fractions by 10.
;The objective is to eliminate slow div instructions. Lets see how:
;Starting from a 32 bit binary number 0XXXXXXXXh, we know that 0XXXXXXXXh <= 4294967295d
;To keep precision, it is convenient to separate this number into high and low order
;decimal numbers, each with 5 digits, and work separately with both parts. This also
;helps optimizing to modern processors, but not in this didactic example.
;For example, 4294967295d would be separated in 42949 (high) and 67295 (low).
;This could be done with a div instruction, dividing by 100000d, but to avoid the div,
;we multiply by the reciprocal, i.e. 1/100000, in hex: 0.0000a7c5ac471b478423
;This number has to be fitted in integer registers to use the mul instruction.
;Thus: edx:eax = 0000.0000:a7c5ac47, after mul by 0XXXXXXXXh we obtain:
;edx:eax = qqqq.rrrr:rrrrrrrr (q - quotient, r - remainder, fraction form)
mov eax,magic1
mul esi
;Turns out this has not enough precision, because many digits of the fraction
;0.0000a7c5ac471b478423 were ignored. I verified that it is enough to had just
;another precision nibble, rounding to 0.0000a7c5ac472
;multiply by 0.20000000h = divide by 8 = shr by 3
mov ecx,esi
shr ecx,3
;sum the two partial terms, obtaining the final edx:eax = qqqq.rrrr:rrrrrrrr
add eax,ecx
adc edx,0
;now we separate the quotient from the remainder:
;qqqq.rrrr:rrrrrrrr -> qqqq.0000h, 0.rrrrrrrh (have to keep track of the hexadecimal point)
shrd eax,edx,20 ;separate remainder
and edx,0FFFF0000h ;mask quotient
inc eax ;we loose 4 significand remainder nibbles, round up
and eax,0FFFFFFFh ;remove quotient nibble from remainder.
push eax ;store remainder
;Now we can process the quotient to obtain the five high decimal digits.
;We have qqqq.0000h in edx, and we know that qqqqh <= 42949d, so if we divide by 10000d
;using the multiply by reciprocal method, we obtain q.rrrrrrrh in edx. To do this, we
;multiply by 0.000068db8badh
mov eax,magic2
mul edx
inc edx ;round up
;We already have the first decimal digit isolated in the high nibble of edx, so now it can
;be stored, and masked out to keep the remainder.
mov eax,edx
shr edx,28
and eax,0FFFFFFFh
add dl,'0'
mov [edi],dl
;The rest of the digits are extracted from the remainder by successive multiplies by 10d,
;masking the remainder for the next step.
mov ecx,4
prox_dig_hi:
lea eax,[4*eax+eax] ;multiply by 5
inc edi
add eax,eax ;multiply by 2
mov edx,eax
shr edx,28
and eax,0FFFFFFFh
add dl,'0'
dec ecx
mov [edi],dl
jnz prox_dig_hi
;Recover the lower digits and repeat the same process.
pop eax
mov ecx,5
prox_dig_lo:
lea eax,[4*eax+eax]
inc edi
add eax,eax
mov edx,eax
shr edx,28
and eax,0FFFFFFFh
add dl,'0'
dec ecx
mov [edi],dl
jnz prox_dig_lo
ret
buffered_write:
mov edi,[buff_ptr]
lea eax,[edi+ecx]
cmp eax,2048
ja buff_overflow
mov [buff_ptr],eax
add edi,buffer
rep movsb
ret
buff_overflow:
push ecx
call buffer_flush
pop ecx
mov edi,buffer
mov [buff_ptr],ecx
rep movsb
ret
buffer_flush:
cmp [buff_ptr],0
jz skip_write
invoke WriteFile, [OutHandle], buffer, [buff_ptr], nwriten, 0
mov [buff_ptr],0
skip_write:
ret
section '.idata' import data readable writeable
library kernel,'KERNEL32.DLL'
import kernel,\
GetStdHandle,'GetStdHandle',\
WriteFile,'WriteFile'
align 4
nwriten dd 0
OutHandle dd 0
result rb 10
db 13,10,'$'
align 4
buff_ptr dd 0
align 16
buffer rb 2048
buff_end: