flat assembler
Message board for the users of flat assembler.

Index > Tutorials and Examples > Extended Division

Author
Thread Post new topic Reply to topic
QQ2976501934



Joined: 06 Jul 2021
Posts: 3
QQ2976501934 23 Oct 2021, 14:18
32-bit x86 assembly:
Code:
ExtDivision:
        ; ptDivisible at ebp+8
        ; cnDivisible at ebp+12
        ; ptDivisor at ebp+16
        ; cnDivisor at ebp+20
        ; ptResult at ebp+24
        ; cnResult at ebp+28
        ; ptRemainder at ebp+32
        ; cnRemainder at ebp+36   
        push ebp
        mov ebp, esp
        push edi
        push esi
        push ebx
        sub esp, 8
        mov esi, [ebp+16]
        mov ebx, [ebp+20]
.L1:
        mov eax, [esi+ebx*4-4]
        bsr edx, eax
        jnz .L2
        dec ebx
        jnz .L1
        jmp .L14
.L2:
        mov edi, [ebp+8]
        xor eax, eax
        mov ecx, [ebp+12]
        lea edi, [edi+ecx*4-4]
        std
        repz scasd
        cld
        jz .L16
        inc ecx
        mov [ebp+12], ecx
        mov [ebp+20], ebx
        mov ecx, 31
        xor ecx, edx
        mov edx, [ebp+12]
        lea edx, [edx*4]
        lea eax, [edx+ebx*4+4]
        sub esp, eax
        lea edx, [esp+edx+4]
        mov [ebp-16], edx
        mov [ebp-20], ecx
        mov edi, [ebp+8]
        mov ebx, [ebp+12]
        xor eax, eax
.L3:
        mov edx, [edi+ebx*4-4]
        shld eax, edx, cl
        mov [esp+ebx*4], eax
        mov eax, edx
        dec ebx
        jnz .L3
        shl eax, cl
        mov [esp], eax
        mov edi, [ebp-16]
        mov ebx, [ebp+20]
        mov eax, [esi+ebx*4-4]
        dec ebx
        jz .L5
.L4:
        mov edx, [esi+ebx*4-4]
        shld eax, edx, cl
        mov [edi+ebx*4], eax
        mov eax, edx
        dec ebx
        jnz .L4
.L5:
        shl eax, cl
        mov [edi], eax
        mov [ebp+8], esp
        mov [ebp+16], edi
        mov eax, [ebp+12]
        mov ebx, [ebp+20]
        sub eax, ebx
        jc .L15
        mov [ebp-16], eax
        mov ebx, [edi+ebx*4-4]
        inc ebx
        jz .L6
        xor eax, eax
        xor edx, edx
        sub edx, ebx
        div ebx
        mov ebx, eax
.L6:
        mov edx, [ebp-16]
        mov ecx, [ebp+28]
        sub ecx, edx
        jbe .L7
        xor eax, eax
        mov edi, [ebp+24]
        lea edi, [edi+edx*4]
        rep stosd
.L7:
        mov esi, [ebp+16]
        mov edi, [ebp+8]
        lea edi, [edi+edx*4]
        mov ecx, [ebp+20]
        call ExtGetQuotient
        mov ecx, [ebp-16]
        cmp ecx, [ebp+28]
        jae .L8
        mov edi, [ebp+24]
        mov [edi+ecx*4], eax
.L8:
        mov edx, [ebp-16]
        sub edx, 1
        mov [ebp-16], edx
        jnc .L7
.L9:
        mov eax, [ebp+36]
        test eax, eax
        jz .L13
        mov esi, [ebp+8]
        mov edi, [ebp+32]
        mov ecx, [ebp-20]
        push ebp
        mov ebx, 1
        mov edx, [esi]
        mov ebp, [ebp+20]
        cmp eax, ebp
        jbe .L10
        mov ebp, eax
.L10:
        cmp ebx, ebp
        je .L12
.L11:
        mov eax, [esi+ebx*4]
        shrd edx, eax, cl
        mov [edi+ebx*4-4], edx
        mov edx, eax
        inc ebx
        cmp ebx, ebp
        jne .L11
.L12:
        shr edx, cl
        mov [edi+ebx*4-4], edx
        pop ebp
        mov ecx, [ebp+36]
        sub ecx, ebx
        jbe .L13
        xor eax, eax
        lea edi, [edi+ebx*4]
        rep stosd
.L13:
        mov eax, 1
.L14:
        lea esp, [ebp-12]
        pop ebx
        pop esi
        pop edi
        pop ebp
        ret
.L15:
        xor eax, eax
        mov edi, [ebp+24]
        mov ecx, [ebp+28]
        rep stosd
        jmp .L9
.L16:
        xor eax, eax
        mov edi, [ebp+24]
        mov ecx, [ebp+28]
        rep stosd
        mov edi, [ebp+32]
        mov ecx, [ebp+36]
        rep stosd
        jmp .L13

ExtGetQuotient:
        push ebx
        push ebp
        push 0
        push ecx
.L21:
        mov ebp, [edi+ecx*4]
        test ebp, ebp
        jz .L24
        mov eax, [esp+12]
        mul ebp
        add ebp, edx
.L22:
        xor ebx, ebx
        xor ecx, ecx
.L23:
        mov eax, [esi+ecx*4]
        mul ebp
        add eax, ebx
        adc edx, 0
        sub [edi+ecx*4], eax
        adc edx, 0
        mov ebx, edx
        inc ecx
        cmp ecx, [esp]
        jb .L23
        sub [edi+ecx*4], edx
        add [esp+4], ebp
        jmp .L21
.L24:
        mov eax, [esp+4]
        mov ebx, ecx
.L25:
        sub ecx, 1
        jc .L26
        mov edx, [edi+ecx*4]
        cmp edx, [esi+ecx*4]
        je .L25
        jb .L28
.L26:
        xor ecx, ecx
        xor edx, edx
.L27:
        add edx, edx
        mov edx, [esi+ecx*4]
        sbb [edi+ecx*4], edx
        sbb edx, edx
        inc ecx
        cmp ebx, ecx
        jnz .L27
        inc eax
        jmp .L25
.L28:
        add esp, 8
        pop ebp
        pop ebx
        ret
    

64-bit x86 assembly:
Code:
ExtDivision:
        ; ptDivisible at rbp+16
        ; cnDivisible at rbp+24
        ; ptDivisor at rbp+32
        ; cnDivisor at rbp+40
        ; ptResult at rbp+48
        ; cnResult at rbp+56
        ; ptRemainder at rbp+64
        ; cnRemainder at rbp+72
        mov [rsp+8], rcx
        mov [rsp+16], rdx
        mov [rsp+24], r8
        mov [rsp+32], r9
        push rbp
        mov rbp, rsp
        push rdi
        push rsi
        push rbx
        sub rsp, 16
        mov rsi, r8
        mov rbx, r9
.L1:
        mov rax, [rsi+rbx*8-8]
        bsr rdx, rax
        jnz .L2
        dec rbx
        jnz .L1
        jmp .L14
.L2:
        mov rdi, [rbp+16]
        xor eax, eax
        mov rcx, [rbp+24]
        lea rdi, [rdi+rcx*8-8]
        std
        repz scasq
        cld
        jz .L16
        inc rcx
        mov [rbp+24], rcx
        mov [rbp+40], rbx
        mov ecx, 63
        xor ecx, edx
        mov rdx, [rbp+24]
        lea rdx, [rdx*8]
        lea rax, [rdx+rbx*8+8]
        sub rsp, rax
        lea rdx, [rsp+rdx+8]
        mov [rbp-32], rdx
        mov [rbp-40], rcx
        mov rdi, [rbp+16]
        mov rbx, [rbp+24]
        xor eax, eax
.L3:
        mov rdx, [rdi+rbx*8-8]
        shld rax, rdx, cl
        mov [rsp+rbx*8], rax
        mov rax, rdx
        dec rbx
        jnz .L3
        shl rax, cl
        mov [rsp], rax
        mov rdi, [rbp-32]
        mov rbx, [rbp+40]
        mov rax, [rsi+rbx*8-8]
        dec rbx
        jz .L5
.L4:
        mov rdx, [rsi+rbx*8-8]
        shld rax, rdx, cl
        mov [rdi+rbx*8], rax
        mov rax, rdx
        dec rbx
        jnz .L4
.L5:
        shl rax, cl
        mov [rdi], rax
        mov [rbp+16], rsp
        mov [rbp+32], rdi
        mov rax, [rbp+24]
        mov rbx, [rbp+40]
        sub rax, rbx
        jc .L15
        mov [rbp-32], rax
        mov rbx, [rdi+rbx*8-8]
        inc rbx
        jz .L6
        xor eax, eax
        xor edx, edx
        sub rdx, rbx
        div rbx
        mov rbx, rax
.L6:
        mov rdx, [rbp-32]
        mov rcx, [rbp+56]
        sub rcx, rdx
        jbe .L7
        xor eax, eax
        mov rdi, [rbp+48]
        lea rdi, [rdi+rdx*8]
        rep stosq
.L7:
        mov rsi, [rbp+32]
        mov rdi, [rbp+16]
        lea rdi, [rdi+rdx*8]
        mov rcx, [rbp+40]
        call ExtGetQuotient
        mov rcx, [rbp-32]
        cmp rcx, [rbp+56]
        jae .L8
        mov rdi, [rbp+48]
        mov [rdi+rcx*8], rax
.L8:
        mov rdx, [rbp-32]
        sub rdx, 1
        mov [rbp-32], rdx
        jnc .L7
.L9:
        mov rax, [rbp+72]
        test rax, rax
        jz .L13
        mov rsi, [rbp+16]
        mov rdi, [rbp+64]
        mov rcx, [rbp-40]
        mov ebx, 1
        mov rdx, [esi]
        mov r10, [rbp+40]
        cmp rax, r10
        jbe .L10
        mov r10, rax
.L10:
        cmp rbx, r10
        je .L12
.L11:
        mov rax, [rsi+rbx*8]
        shrd rdx, rax, cl
        mov [rdi+rbx*8-8], rdx
        mov rdx, rax
        inc rbx
        cmp rbx, r10
        jne .L11
.L12:
        shr rdx, cl
        mov [rdi+rbx*8-8], rdx
        mov rcx, [rbp+72]
        sub rcx, rbx
        jbe .L13
        xor eax, eax
        lea rdi, [rdi+rbx*8]
        rep stosq
.L13:
        mov eax, 1
.L14:
        lea rsp, [rbp-24]
        pop rbx
        pop rsi
        pop rdi
        pop rbp
        ret
.L15:
        xor eax, eax
        mov rdi, [rbp+48]
        mov rcx, [rbp+56]
        rep stosq
        jmp .L9
.L16:
        xor eax, eax
        mov rdi, [ebp+48]
        mov rcx, [ebp+56]
        rep stosq
        mov rdi, [ebp+64]
        mov rcx, [ebp+72]
        rep stosq
        jmp .L13

ExtGetQuotient:
        push rbx
        push 0
        push rcx
.L21:
        mov r10, [rdi+rcx*8]
        test r10, r10
        jz .L24
        mov rax, [rsp+16]
        mul r10
        add r10, rdx
.L22:
        xor ebx, ebx
        xor ecx, ecx
.L23:
        mov rax, [rsi+rcx*8]
        mul r10
        add rax, rbx
        adc rdx, 0
        sub [rdi+rcx*8], rax
        adc rdx, 0
        mov rbx, rdx
        inc rcx
        cmp rcx, [rsp]
        jb .L23
        sub [rdi+rcx*8], rdx
        add [rsp+8], r10
        jmp .L21
.L24:
        mov rax, [rsp+8]
        mov rbx, rcx
.L25:
        sub rcx, 1
        jc .L26
        mov rdx, [rdi+rcx*8]
        cmp rdx, [rsi+rcx*8]
        je .L25
        jb .L28
.L26:
        xor ecx, ecx
        xor edx, edx
.L27:
        add rdx, rdx
        mov rdx, [rsi+rcx*8]
        sbb [rdi+rcx*8], rdx
        sbb rdx, rdx
        inc rcx
        cmp rbx, rcx
        jnz .L27
        inc rax
        jmp .L25
.L28:
        add rsp, 16
        pop rbx
        ret
    


Last edited by QQ2976501934 on 18 Dec 2021, 11:21; edited 7 times in total
Post 23 Oct 2021, 14:18
View user's profile Send private message Reply with quote
macomics



Joined: 26 Jan 2021
Posts: 1040
Location: Russia
macomics 23 Oct 2021, 15:23
The names of the parameters could be substituted for clarity. This C compiler does not care how and where. You have written comparisons of formal and actual parameters to him.
Code:
label ptDivisible_or_Divisor at ebp+8
label cnDivisible_or_Divisor at ebp+12
label ptAnything at ebp+16
label cnAnything at ebp+20
label ptResult_or_Remainder at ebp+24
label cnResult_or_Remainder at ebp+28
label ptSomething at ebp+32
label dwSomething at ebp+36    
When using cdecl, this is not obvious because it is not known how many arguments are needed for the function to pass through the stack.
Post 23 Oct 2021, 15:23
View user's profile Send private message Reply with quote
revolution
When all else fails, read the source


Joined: 24 Aug 2004
Posts: 20445
Location: In your JS exploiting you and your system
revolution 23 Oct 2021, 19:55
I think also that function name could be made clearer to state what it actually does.

Divide_int64_by_int64?
Divide_uint64_by_uint64?
Divide_double64_by_double64?

Or is it some other combination?


Last edited by revolution on 23 Oct 2021, 20:30; edited 1 time in total
Post 23 Oct 2021, 19:55
View user's profile Send private message Visit poster's website Reply with quote
macomics



Joined: 26 Jan 2021
Posts: 1040
Location: Russia
macomics 23 Oct 2021, 20:25
revolution wrote:
I think also that function name could be made clearer to state what it actually does.

Divide_int64_by_int64?
Divide_uint64_by_uint64?
Divide_double64_by_double64?

Or is it some other combination?
It's just easier with this:
Code:
Divide_IntInfiniteLength_by_IntInfiniteLength    
moreover, division is realized through the operation of subtraction.
Post 23 Oct 2021, 20:25
View user's profile Send private message Reply with quote
revolution
When all else fails, read the source


Joined: 24 Aug 2004
Posts: 20445
Location: In your JS exploiting you and your system
revolution 23 Oct 2021, 21:14
Perhaps:

Divide_IntArbitraryLength_by_IntArbitraryLength

Infinite length numbers take quite a while to process. Razz
Post 23 Oct 2021, 21:14
View user's profile Send private message Visit poster's website Reply with quote
macomics



Joined: 26 Jan 2021
Posts: 1040
Location: Russia
macomics 23 Oct 2021, 21:33
revolution wrote:
Perhaps:

Divide_IntArbitraryLength_by_IntArbitraryLength

Infinite length numbers take quite a while to process. Razz
If we take into account the dimension of the variable specifying the length, then it turns out: Divide_IntX_by_IntX where X = (2^37 - 32)-bit. In most practical cases, you are talking about a deterministic discrete system. Therefore, I apply the concepts of infinity in conjunction with the possible limitations of the current system. For example, in a 16-bit system, values greater than or equal to 65536 are infinity because they cannot be achieved within a 16-bit system. But in an ideal system, infinity will be impossible to achieve. Then it will be possible to talk either about hanging, or about infinitely long execution. Very Happy
Post 23 Oct 2021, 21:33
View user's profile Send private message Reply with quote
Display posts from previous:
Post new topic Reply to topic

Jump to:  


< Last Thread | Next Thread >
Forum Rules:
You cannot post new topics in this forum
You cannot reply to topics in this forum
You cannot edit your posts in this forum
You cannot delete your posts in this forum
You cannot vote in polls in this forum
You cannot attach files in this forum
You can download files in this forum


Copyright © 1999-2025, Tomasz Grysztar. Also on GitHub, YouTube.

Website powered by rwasa.