chris03_dev
Joined: 24 Oct 2020
Posts: 3
|
Greetings. This is a bit of a newbie question, as I have just been learning floating-point assembly... SSE in particular.
I am currently using FASM as a compiler backend, and currently testing for Linux. I am currently stumped, as this code should have worked perfectly fine, but instead caused an infinite loop:
.c0_l:
; x /= y
pxor xmm0, xmm0
pxor xmm1, xmm1
movss xmm1, dword [rbp-16]
movss xmm0, dword [rbp-8]
divss xmm0, xmm1
movss [rbp-8], xmm0
; hello:5 += 1
inc [hello+5]
; hello:5 >= 127
movzx eax, byte [hello+5]
cmp eax, 127
setae al
and al, 1
movzx eax, al
cmp eax, 0
je .c1_0
; hello:5 = 32
mov [hello+5], byte 32
.c1_0:
.c1e:
mov rsi, hello
mov rdx, 13
mov rax, 1
mov rdi, 1
syscall
; x >= 50
pxor xmm0, xmm0
movss xmm0, dword [rbp-8]
ucomiss xmm0, dword [__F4]
setge al
and al, 1
movzx eax, al
cmp eax, 1
jge .c0_l
Subsequently, I have also inserted some system calls for string output in order to verify that the program is indeed having an infinite loop.
Just in case the problem is somewhere else within the program, I shall also provide the whole source code, down below:
format elf64 executable
segment writeable
align 128
__F0 dd 2500.000000
align 128
__F1 dd 100.000000
align 128
__F2 dd 5.000000
align 128
__F3 dd 5.000000
align 128
__F4 dd 50.000000
align 128
__F5 dd 5.000000
align 128
__F6 dd 0.000000
align 128
__F7 dd 5.000000
align 128
__F8 dd 7.000000
align 64
hello db 5 dup ("Hello",'"'," World",10,"")
world db "World",10
ptr3 rq 1
segment executable
align 128
include 'rv64.asm'
strlen:
push rbp
mov rbp, rsp
sub rsp, 20
; len = rbp - 12
; float = rbp - 20
; len = 0
mov [rbp-12], word 0
; strlen
mov rax, strlen
; strlen:5:2[1, 2]
; 1
mov eax, 1
mov rdi, rax
; 2
mov eax, 2
mov rsi, rax
mov rax, [strlen+40]
mov rax, [rax+16]
call rax
; len~:4:2[2]:2[3]{u1}:7
; 2
mov eax, 2
mov rdi, rax
mov rax, [rbp-12]
mov rax, [rax+32]
mov rax, [rax+16]
call rax
push rax
; 3
mov eax, 3
mov rdi, rax
pop rax
mov rax, [rax+8]
call rax
mov eax, dword [rbp-12]
mov rdx, rax
movzx eax, byte [rdx+7]
add rsp, 20
pop rbp
ret
entry main
main:
push rbp
mov rbp, rsp
sub rsp, 80
; x = rbp - 8
; y = rbp - 16
; z = rbp - 24
; float = rbp - 36
; ptr1 = rbp - 48
; ptr2 = rbp - 60
; ptr3 = rbp - 72
; ptr1 = ptr2
lea rcx, [rbp-60]
mov [rbp-48], rcx
; ptr2 = ptr3
lea rcx, [rbp-72]
mov [rbp-60], rcx
; ptr3 = hello + 0
mov r10, hello
add r10, 0
mov [rbp-72], r10
; x *= ptr3:1
pxor xmm0, xmm0
mov rcx, [rbp-72]
movzx ecx, byte [rcx+1]
cvtsi2ss xmm1, rcx
; ptr1 + 1
lea rax, [rbp-48]
inc rax
; ptr2~ *= 1
mov rdx, [rbp-60]
imul eax, [rdx], 1
mov [rdx], al
; x = 2500
pxor xmm0, xmm0
movss xmm0, dword [__F0]
movss [rbp-8], xmm0
; y = 100
pxor xmm0, xmm0
movss xmm0, dword [__F1]
movss [rbp-16], xmm0
; z = 5
pxor xmm0, xmm0
movss xmm0, dword [__F2]
movss [rbp-24], xmm0
; hello:1 = 'a'
mov [hello+1], byte 'a'
; ptr3:2 = 'b'
mov rdx, [rbp-72]
mov [rdx+2], byte 'b'
; ptr2~:3 = 'c'
mov rdx, [rbp-60]
mov rdx, [rdx]
mov [rdx+3], byte 'c'
; ptr1~~:4 = 'd'
mov rdx, [rbp-48]
mov rdx, [rdx]
mov rdx, [rdx]
mov [rdx+4], byte 'd'
; ptr1~~:4 -= 32
mov rdx, [rbp-48]
mov rdx, [rdx]
mov rdx, [rdx]
sub [rdx+4], byte 32
; float = x * 5
pxor xmm2, xmm2
movss xmm2, dword [rbp-8]
mulss xmm2, dword [__F3]
pxor xmm0, xmm0
movsd [rbp-36], xmm0
; strlen[1 + 5, 2, 3]{i4} >> 5
mov [rsp], rax
; 1 + 5
mov eax, 6
mov rdi, rax
; 2
mov eax, 2
mov rsi, rax
; 3
mov eax, 3
mov rdx, rax
mov rax, [rsp]
.c0_l:
; x /= y
pxor xmm0, xmm0
pxor xmm1, xmm1
movss xmm1, dword [rbp-16]
movss xmm0, dword [rbp-8]
divss xmm0, xmm1
movss [rbp-8], xmm0
; hello:5 += 1
inc [hello+5]
; hello:5 >= 127
movzx eax, byte [hello+5]
cmp eax, 127
setae al
and al, 1
movzx eax, al
cmp eax, 0
je .c1_0
; hello:5 = 32
mov [hello+5], byte 32
.c1_0:
.c1e:
mov rsi, hello
mov rdx, 13
mov rax, 1
mov rdi, 1
syscall
; x >= 50
pxor xmm0, xmm0
movss xmm0, dword [rbp-8]
ucomiss xmm0, dword [__F4]
setge al
and al, 1
movzx eax, al
cmp eax, 1
jge .c0_l
; x >= 5
pxor xmm0, xmm0
movss xmm0, dword [rbp-8]
ucomiss xmm0, dword [__F5]
setge al
and al, 1
movzx eax, al
cmp eax, 0
je .c2_0
mov rsi, hello
mov rdx, 13
jmp .c2e
.c2_0:
; x > 0
pxor xmm0, xmm0
movss xmm0, dword [rbp-8]
ucomiss xmm0, dword [__F6]
setg al
and al, 1
movzx eax, al
cmp eax, 0
je .c2_1
mov rsi, world
mov rdx, 5
jmp .c2e
.c2_1:
mov rsi, world
mov rdx, 3
.c2e:
mov rax, 1
mov rdi, 1
syscall
; x * (z + 5) * 7 * z
pxor xmm2, xmm2
movss xmm2, dword [rbp-24]
addss xmm2, [__F7]
pxor xmm0, xmm0
movss xmm0, dword [rbp-8]
mulss xmm0, dword [__F8]
pxor xmm1, xmm1
movss xmm1, dword [rbp-24]
; 0
mov eax, 0
add rsp, 80
pop rbp
mov rdi, rax
mov rax, 60
syscall
Thank you for your assistance.
_________________ Every rule has an exception...
Never trust deep learning algorithms to make the final decisions.
|