I've been trying to load a secondary program over my kernel, but it seems to crash every time. I can't seem to find the bug. I inserted some checks to see how far it gets, but I still can't find the bug. Can somebody please help?
    
progornot:
    ;mov ax, 0003
    ;int 10h
    mov         si, buff
    mov         di, filen
    call        parsefat
    mov         byte[filen+11d], 24h
    mov         ah, 09h
    mov         dx, filen
    int         21h
    jc          progornot_notprog
    xor     cx, cx
        xor     dx, dx
        mov     ax, 20h
        mul     word[RootEntries]
        div     word[BytesPerSector]
        xchg    cx, ax
        xor     ax, ax
        mov     al, byte[NumberofFATs]
        mul     word[SectorsPerFAT]
        add     ax, word[ReservedSectors]
        mov     word[datasector], ax
        add     word[datasector], cx
        ;push    es
        push    100h
        pop     es
        mov     bx, 0x0200
        call    ReadSectors
    ;pop es
    mov cx, word[RootEntries]
    mov di, 0x0200
file_loop:
    push  cx
    mov   cx, 11d
    mov si, filen
    push    di
    rep cmpsb
    pop di
    jz load_fat
    add di, 32d
    loop    file_loop
    jmp     progornot_notprog
load_fat:
        mov     dx, word[di+1ah]
        mov     word[cluster], dx
        xor     ax, ax
        mov     al, [NumberofFATs]
        mul     word[SectorsPerFAT]
        mov     cx, ax
        mov     ax, word[ReservedSectors]
        mov     bx, 0x200
        call    ReadSectors
        mov     ax, 1000h
        mov     es, ax
        xor     bx, bx
        push    bx
loadimage:
        mov     ax, word[cluster]
        pop     bx
        call    ClusterLBA
        xor     cx, cx
        mov     cl, byte[SectorsPerCluster]
        call    ReadSectors
        push    bx
        mov     ax, word[cluster]
        mov     cx, ax
        mov     dx, ax
        shr     dx, 1
        add     cx, dx
        mov     bx, 0x200
        add     bx, cx
        mov     dx, word[bx]
        test    ax, 1
        jnz     odd_cluster
even_cluster:
        and     dx, 0000111111111111b
        jmp     exdone
odd_cluster:
        shr     dx, 4
exdone:
        mov     word[cluster], dx
        cmp     dx, 0ff0h
        jb      loadimage
execute:
        mov     dl, byte[DriveNumber]
        push    0x1000
        push    0
        retf
progornot_notprog:
        clc
        ret
ReadSectors:
         mov     di, 5
@@:
        push    ax
        push    bx
        push    cx
        call    LBACHS
        mov     ah, 02h
        mov     al, 1
        mov     cl, byte[sector]
        mov     ch, byte[track]
        mov     dl, byte[DriveNumber]
        mov     dh, byte[head]
        int     13h
        jnc     success
        xor     ax, ax
        int     13h
        dec     di
        pop     cx
        pop     bx
        pop     ax
        jnz     @b
        mov     ax, 0e46h
        int     10h
        int     18h
success:
        mov     al, 'S'
        mov     ah, 0eh
        int     10h
        pop     cx bx ax
        add     bx, word[BytesPerSector]
        inc     ax
        loop    ReadSectors
        ret
ClusterLBA:
        sub     ax, 2
        xor     cx, cx
        mov     cl, byte[SectorsPerCluster]
        mul     cx
        add     ax, word[datasector]
        ret
LBACHS:
        xor     dx, dx
        div     word[SectorsPerTrack]
        inc     dl
        mov     byte[sector], dl
        xor     dx, dx
        div     word[HeadsPerCylinder]
        mov     byte[head], dl
        mov     byte[track], al
        ret
sector  db      0
head    db      0
track   db      0
datasector      dw      0
cluster         dw      0
 BytesPerSector  dw      512
        SectorsPerCluster db    1
        ReservedSectors   dw    1
        NumberofFATs      db    2
        RootEntries       dw    224
        SectorsPerFAT     dw    9
        SectorsPerTrack   dw    18
        HeadsPerCylinder  dw    2
        DriveNumber       db    0