
include '%fasminc%/win32ax.inc'

macro regcall proc, [arg]
{
	argcount = 0
	if ~ arg eq
	  forward
	    argcount = argcount + 1
	    if argcount = 1
	       mov eax, arg
	    else if argcount = 2
	       mov edx, arg
	    else if argcount = 3
	       mov ecx, arg
	    else if argcount = 4
	       mov ebx, arg
	    end if
	end if
	common
	  call	  proc

}

macro stdcall proc, [arg]
{
	if ~ arg eq
	  forward
	    pushd arg
	end if
	common
	  call	  proc

}

.data
    S dd 0, 0
    F dd 0, 0

    I dd 0
    T1 dd 0, 0
    T2 dd 0, 0

__buf	rb	10
zero	db	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

.code

UIntToStr:
	push	edi
	mov	edi, edx
	mov	[edi+0], byte '0'
	mov	[edi+1], byte '0'
	mov	[edi+2], byte '0'
	mov	[edi+3], byte '0'
	mov	[edi+4], byte '0'
	mov	[edi+5], byte '0'
	mov	[edi+6], byte '0'
	mov	[edi+7], byte '0'
	mov	[edi+8], byte '0'
	mov	[edi+9], byte '0'
	add	edi, 10
    .loop:
	mov	ecx, 10
	xor	edx, edx
	div	ecx
	add	dl, '0'
	dec	edi
	mov	[edi], dl
	or	eax, eax
	jnz	.loop

	mov	eax, edi

	pop	edi
	ret

ShowUInt:
	lea	edx, [__buf]
	call	UIntToStr
	jmp	ShowMessage

ShowMessage:
	invoke	MessageBox,HWND_DESKTOP,eax,"CallConv",MB_OK
	ret

; register, pass-by-value
Int64Add1:
	add	eax, ecx
	adc	edx, ebx
	ret

; register, pass-by-reference
Int64Add2:
	mov	ecx, [eax]
	add	[edx], ecx
	mov	ecx, [eax+4]
	adc	[edx+4], ecx
	ret

; stack, pass-by-value
Int64Add3:
	mov	ecx, [esp+4]
	add	[esp+12], ecx
	mov	ecx, [esp+8]
	adc	[esp+16], ecx
	ret	8

; stack, pass-by-reference
Int64Add4:
	mov	eax, [esp+4]
	mov	edx, [esp+8]

	mov	ecx, [eax]
	add	[edx], ecx
	mov	ecx, [eax+4]
	adc	[edx+4], ecx
	ret	8

start:
	invoke	Sleep, 0
	invoke	QueryPerformanceCounter, S

	mov	[I], 1000000
.loop1: regcall Int64Add1, [T1], [T1+4], [T2], [T2+4]
	dec	[I]
	jnz	.loop1

	invoke	QueryPerformanceCounter, F

	mov	eax, [S]
	sub	[F], eax
	mov	eax, [S+4]
	sbb	[F+4], eax

	mov	eax, [F]
	call	ShowUInt

	invoke	Sleep, 0
	invoke	QueryPerformanceCounter, S

	mov	[I], 1000000
.loop2: regcall Int64Add2, T1, T2
	dec	[I]
	jnz	.loop2

	invoke	QueryPerformanceCounter, F

	mov	eax, [S]
	sub	[F], eax
	mov	eax, [S+4]
	sbb	[F+4], eax

	mov	eax, [F]
	call	ShowUInt

	invoke	Sleep, 0
	invoke	QueryPerformanceCounter, S

	mov	[I], 1000000
.loop3: stdcall Int64Add3, T1, T2
	dec	[I]
	jnz	.loop3

	invoke	QueryPerformanceCounter, F

	mov	eax, [S]
	sub	[F], eax
	mov	eax, [S+4]
	sbb	[F+4], eax

	mov	eax, [F]
	call	ShowUInt

	invoke	Sleep, 0
	invoke	QueryPerformanceCounter, S

	mov	[I], 1000000
.loop4: stdcall Int64Add4, T1, T2
	dec	[I]
	jnz	.loop4

	invoke	QueryPerformanceCounter, F

	mov	eax, [S]
	sub	[F], eax
	mov	eax, [S+4]
	sbb	[F+4], eax

	mov	eax, [F]
	call	ShowUInt
.exit:
	invoke	ExitProcess, 0
.end start