; This program changes the byte order mark of UTF-8 files.
; $ fasm bomtoggle.asm bomtoggle
; Usage: $ ./bomtoggle <file.utf8>

; @(#) $Id: bomtoggle.asm,v 1.3 2024/12/30 19:14:41 user Exp $

format ELF64 executable 3
use64
entry start

macro writeznl string*, stream*
{
;FIND LENGTH
    local .again, .length, .newline
    mov rdi, string
    cmp byte [rdi], 0
    jz .newline
    mov rdx, rdi
.again:
    cmp byte [rdx], 0
    jz .length
    inc rdx
    jmp .again
.length:
;PRINT STRING
    sub rdx, rdi ;length
    mov eax, 1 ;write syscall
    mov rsi, rdi ;address of string
    mov edi, stream ;fd 1 is stdout, 2 is stderr
    syscall
;ADD A NEWLINE:
.newline:
    mov di, 10
    push di
    mov edx, 1 ;length
    mov rsi, rsp ;address
    mov edi, 1 ;stdout
    mov eax, 1 ;write syscall
    syscall
    add rsp, 2
}

macro exit status*
{
    mov eax, 60
    mov edi, status
    syscall
}

segment readable writeable
filename_length = 512
buffer_length = 256
whatis: db 'bomtoggle - Add or remove an UTF-8 Byte Order Mark to or from the beginning of a text file', 0
usage: db 'Usage: bomtoggle <file>', 0
mksstr: db '_XXXXXX', 0
bom: db 0xEF, 0xBB, 0xBF ;ï»¿
hasbom: db 'File already has a byte order mark, removing it.', 0
hasnobom: db 'Adding a byte order mark to the file', 0
errmsg1: db 'STAT ERROR', 0
errmsg2: db 'ARGUMENT NOT REGULAR FILE ERROR', 0
errmsg3: db 'MKSTEMP ERROR', 0
errmsg4: db 'SYS_OPEN ERROR', 0
errmsg5: db 'SYS_LSEEK ERROR', 0
errmsg6: db 'SYS_WRITE ERROR', 0
errmsg7: db 'SHORT WRITE ERROR', 0
errmsg8: db 'SYS_CLOSE ERROR', 0
errmsg9: db 'SYS_FCHMOD ERROR', 0
errmsg10: db 'SYS_UNLINK ERROR', 0
errmsg11: db 'SYS_RENAME ERROR', 0
struct_stat: rb 144
old_filename: rb filename_length
new_filename: rb filename_length
buffer: rb buffer_length
filedes_input: rd 1
filedes_output: rd 1
rand: rd 1
nread: rq 1

segment readable executable
start:
    pop r8
    cmp r8d, 2
    jne failure0

;FILENAME EXISTS AND IS A PLAIN FILE
    mov eax, 4 ;syscall number for stat
    mov rdi, qword [rsp+8]
    lea rsi, [struct_stat]
    syscall
    test eax, eax
    js failure1

    mov r10d, dword [struct_stat+24]
    and r10d, 170000o
    cmp r10d, 100000o
    jne failure2

;SAVE FILENAME
    lea rdi, [old_filename]
    mov rsi, qword [rsp+8]
    mov rdx, filename_length
    call asm_strlcpy

;SET UP TEMPORARY FILE
    lea rdi, [new_filename]
    mov rsi, qword [rsp+8]
    mov rdx, filename_length
    call asm_strlcpy

    lea rdi, [new_filename]
    mov rsi, mksstr
    mov rdx, filename_length
    call asm_strlcat

    lea rdi, [new_filename]
    call asm_mkstemp
    test eax, eax
    js failure3
    mov [filedes_output], eax

;READ 3 BYTES FROM INPUT FILE
    mov eax, 2 ;syscall number for open
    lea rdi, [old_filename]
    mov esi, 0 ;O_RDONLY
    mov edx, 0
    syscall
    test eax, eax
    js failure4
    mov [filedes_input], eax ;save file descriptor for later

    mov eax, 0 ;syscall number for read
    mov edi, [filedes_input] ;file descriptor
    lea rsi, [buffer]
    mov edx, 3
    syscall
    cmp eax, 3
    jb .else ;file smaller than 3 bytes

;COMPARE FIRST 3 BYTES WITH BYTE ORDER MARK
    mov dil, byte [bom]
    mov sil, byte [buffer]
    cmp dil, sil
    jne .else
    mov dil, byte [bom+1]
    mov sil, byte [buffer+1]
    cmp dil, sil
    jne .else
    mov dil, byte [bom+2]
    mov sil, byte [buffer+2]
    cmp dil, sil
    jne .else

    writeznl hasbom, 1

    mov eax, 8 ;lseek
    mov rdi, [filedes_input]
    mov rsi, 3 ;offset
    mov rdx, 0 ;SEEK_SET
    syscall
    test eax, eax
    js failure5

    jmp .cpyloop

.else:
    writeznl hasnobom, 1

    mov eax, 8 ;lseek
    mov rdi, [filedes_input]
    mov rsi, 0 ;offset
    mov rdx, 0 ;SEEK_SET
    syscall
    test eax, eax
    js failure5

;WRITE OUT BOM FIRST
    mov eax, 1 ;write syscall
    mov rdi, [filedes_output]
    lea rsi, [bom]
    mov rdx, 3
    syscall
    test eax, eax
    js failure6
    cmp eax, 3
    jb failure7

;WRITE OUT REST OF THE FILE
.cpyloop:
    mov eax, 0 ;read syscall
    mov rdi, [filedes_input]
    lea rsi, [buffer]
    mov rdx, buffer_length
    syscall
    mov [nread], eax

    mov eax, 1 ;write syscall
    mov rdi, [filedes_output]
    lea rsi, [buffer]
    mov rdx, [nread]
    syscall
    cmp eax, buffer_length
    jb .close
    jmp .cpyloop

.close:
    mov eax, 91 ;fchmod syscall
    mov rdi, [filedes_output]
    mov esi, dword [struct_stat+24]
    syscall
    test eax, eax
    js failure9

    mov eax, 3 ;close syscall
    mov rdi, [filedes_input]
    syscall
    test eax, eax
    js failure8

    mov eax, 3 ;close syscall
    mov rdi, [filedes_output]
    syscall
    test eax, eax
    js failure8

;GET RID OF INPUT FILE
    mov eax, 87 ;unlink syscall
    lea rdi, [old_filename]
    syscall
    test eax, eax
    js failure10

;RENAME OUTPUT FILE TO INPUT FILE
    mov eax, 82 ;rename syscall
    lea rdi, [new_filename]
    lea rsi, [old_filename]
    syscall
    test eax, eax
    js failure11

    exit 0

failure0:
    writeznl whatis, 2
    writeznl usage, 2
    exit 1

failure1:
    writeznl errmsg1, 2
    exit 1

failure2:
    writeznl errmsg2, 2
    exit 1

failure3:
    writeznl errmsg3, 2
    exit 1

failure4:
    writeznl errmsg4, 2
    exit 1

failure5:
    writeznl errmsg5, 2
    exit 1

failure6:
    writeznl errmsg6, 2
    exit 1

failure7:
    writeznl errmsg7, 2
    exit 1

failure8:
    writeznl errmsg8, 2
    exit 1

failure9:
    writeznl errmsg9, 2
    exit 1

failure10:
    writeznl errmsg10, 2
    exit 1

failure11:
    writeznl errmsg11, 2
    exit 1

asm_strlcpy:
    push rbp
    mov rbp, rsp

    mov r8, rsi
    lea r9, [rdi+rdx-1]

.label3:
    cmp rdi, r9
    je .label2
    lodsb
    stosb
    test al, al
    jnz .label3

    mov rsi, r8
    jmp .label1

.label2:
    mov byte [rdi], 0

.label1:
    cmp byte [rsi], 0
    jz .label0
    inc rsi
    jmp .label1

.label0:
    mov rax, rsi
    sub rax, r8

    leave
    ret ;asm_strlcpy()

asm_strlcat:
    push rbp
    mov rbp, rsp

    mov rcx, rdi
    mov r8, rsi
    mov r9, rdx

.label6:
    test r9, r9
    jz .label5

    dec r9
    cmp byte [rcx], 0
    jz .label5

    inc rcx
    jmp .label6

.label5:
    mov r10, rcx
    sub r10, rdi
    mov r9, rdx
    sub r9, r10
    mov r11, r8

.label4:
    cmp byte [r11], 0
    jz .label3

    inc r11
    jmp .label4

.label3:
    sub r11, r8
    test r9, r9
    jnz .label2

    lea rax, [r10+r11]
    ret

.label2:
    mov byte al, [r8]
    test byte al, al
    jz .label0

    cmp r9, 1
    je .label1

    mov byte [rcx], al
    inc rcx
    dec r9

.label1:
    inc r8
    jmp .label2

.label0:
    mov	byte [rcx], 0 ;terminate
    lea rax, [r8+r10]
    sub rax, rsi

    leave
    ret ;asm_strlcat()

asm_mkstemp:
    push rbp
    mov rbp, rsp
    push rbx
    push r12
    push r13
    push r14
    push r15

    xor r14d, r14d
    mov r12, rdi
    mov r10, rdi

.count:
    cmp byte [r10], 0
    jz .done
    inc r10
    jmp .count

.done:
    sub r10, rdi
    sub r10, 6
    mov r15, r12
    add r15, r10

    cmp r15, r12
    jb .error

    xor ebx, ebx
.loop1:
    cmp ebx, 6
    je .getentropy
    cmp byte [r15+rbx], 'X'
    jne .error
    inc ebx
    jmp .loop1

.getentropy:
    mov eax, 318 ;syscall number for SYS_getrandom
    lea rdi, [rand]
    mov rsi, 4
    mov edx, 0
    syscall
    test eax, eax
    js .error

    xor ebx, ebx
.loop0:
    cmp ebx, 6
    je .open

    mov eax, 5
    mov r11d, ebx
    mul r11d
    mov cl, al
    mov r13d, [rand]
    shr r13d, cl
    and r13d, 0x1f

    cmp r13d, 9
    jna .other

    add r13d, 'a'
    sub r13d, 10
    jmp .move

.other:
    add r13d, '0'

.move:
    mov byte [r15+rbx], r13b
    inc ebx
    jmp .loop0

.open:
    mov eax, 2 ;open syscall
    mov rdi, r12 ;template
    mov rsi, 400000o ;O_NOFOLLOW
    or rsi, 200o ;O_EXCL
    or rsi, 100o ;O_CREAT
    or rsi, 2o ;O_RDWR
    mov edx, 600o
    syscall
    test eax, eax
    js .tryagain
    jmp .end ;eax unchanged

.tryagain:
    inc r14d
    cmp r14d, 3
    je .error
    jmp .getentropy

.error:
    mov eax, -1

.end:
    pop r15
    pop r14
    pop r13
    pop r12
    pop rbx
    leave
    ret ;asm_mkstemp()

; vim: set ts=4 sw=4 sts=4 syn=fasm:
