uefi debugger for EFI_IMAGE_MACHINE_x64

documentation of debugger design
version 0000
2012-mar-11
author: Feryno

[0] assumption(s):
- only one CPU is executing even in SMP systems (bootstrap CPU bit 8. of MSR 0000001Bh MSR_APIC_BASE.BSP=1, application CPUs are halted)
  this ensures hooked interrupts on the CPU will work (else the only way is to send IPI to all CPUs so every CPU installs hook)
- no one exception handler is used by UEFI (as int 08h was used in BIOS for IRQ0 system timer and int 09h for IRQ1 keyboard interrupt)
- debugger as well the debuggee share the same virtual memory (have the same virtual memory translation tables, the same base from CR3)
- executables are running at CPL0

[1] debugger does these tasks in this order:
hook_exceptions (int 00h - int 1Fh)
sets rflags.TF
calls immediatelly the start procedure of the debuggee (so debug exception is generated just before the first instruction of debuggee)
unhook_exceptions (int 00h - int 1Fh)

[2] accessing memory (e.g. doing memory inspection, writing int3 breakpoint):
hook these exceptions: #UD Invalid opcode, #NP Segment not present, #SS Stack, #GP General protection, #PF Page fault, #AC Alignment check
try to access the memory
if no hook hit then the access was successfull, else the memory is not present, in case of partial transfer calculate the count of bytes transferred and return it
unhook exceptions

[3] exception handler:
save all registers
enable interrupts
wait for command done by user
disable interrupts
restore all registers
IRETQ to the interrupted debuggee

-------

hooking exceptions is done in a way like:
push rcx ax
sidt [rsp]
pop ax rcx
cmp ax,20h*4-1
jc idt_too_small
; now we can hook int00 - int 1Fh
cli
save memory from rcx to rcx+20h*10h
write new handlers there, construct them from current CS and the new hook address
sti

-------

exception handler:
; !!! N.B. no implemented so thouroughly yet !!!
; current implementation saves only upto 16 GPRs (rax...r15)
; todo - maybe also use IST feature in TSS so exceptions always switch to good stack



    cli
    ;
    ; All interrupt handlers are invoked through interrupt gates, so
    ; IF flag automatically cleared at the entry point
    ;
    ;
    ; Calculate vector number
    ;
    xchg    rcx, [rsp] ; get the return address of call, actually, it is the address of vector number.
    movzx   ecx, word ptr [rcx]
    cmp     ecx, 32         ; Intel reserved vector for exceptions?
    jae     NoErrorCode
    bt      mErrorCodeFlag, ecx
    jc      @F

NoErrorCode:

    ;
    ; Push a dummy error code on the stack
    ; to maintain coherent stack map
    ;
    push    [rsp]
    mov     qword ptr [rsp + 8], 0
@@:       
    push    rbp
    mov     rbp, rsp

    ;
    ; Stack:
    ; +---------------------+ <-- 16-byte aligned ensured by processor
    ; +    Old SS           +
    ; +---------------------+
    ; +    Old RSP          +
    ; +---------------------+
    ; +    RFlags           +
    ; +---------------------+
    ; +    CS               +
    ; +---------------------+
    ; +    RIP              +
    ; +---------------------+
    ; +    Error Code       +
    ; +---------------------+
    ; + RCX / Vector Number +
    ; +---------------------+
    ; +    RBP              +
    ; +---------------------+ <-- RBP, 16-byte aligned
    ;


    ;
    ; Since here the stack pointer is 16-byte aligned, so
    ; EFI_FX_SAVE_STATE_X64 of EFI_SYSTEM_CONTEXT_x64
    ; is 16-byte aligned
    ;

;; UINT64  Rdi, Rsi, Rbp, Rsp, Rbx, Rdx, Rcx, Rax;
;; UINT64  R8, R9, R10, R11, R12, R13, R14, R15;
    push r15
    push r14
    push r13
    push r12
    push r11
    push r10
    push r9
    push r8
    push rax
    push qword ptr [rbp + 8]   ; RCX
    push rdx
    push rbx
    push qword ptr [rbp + 48]  ; RSP
    push qword ptr [rbp]       ; RBP
    push rsi
    push rdi

;; UINT64  Gs, Fs, Es, Ds, Cs, Ss;  insure high 16 bits of each is zero
    movzx   rax, word ptr [rbp + 56]
    push    rax                      ; for ss
    movzx   rax, word ptr [rbp + 32]
    push    rax                      ; for cs
    mov     rax, ds
    push    rax
    mov     rax, es
    push    rax
    mov     rax, fs
    push    rax
    mov     rax, gs
    push    rax

    mov     [rbp + 8], rcx               ; save vector number

;; UINT64  Rip;
    push    qword ptr [rbp + 24]

;; UINT64  Gdtr[2], Idtr[2];
    xor     rax, rax
    push    rax
    push    rax
    sidt    [rsp]
    xchg    rax, [rsp + 2]
    xchg    rax, [rsp]
    xchg    rax, [rsp + 8]

    xor     rax, rax
    push    rax
    push    rax
    sgdt    [rsp]
    xchg    rax, [rsp + 2]
    xchg    rax, [rsp]
    xchg    rax, [rsp + 8]

;; UINT64  Ldtr, Tr;
    xor     rax, rax
    str     ax
    push    rax
    sldt    ax
    push    rax

;; UINT64  RFlags;
    push    qword ptr [rbp + 40]

;; UINT64  Cr0, Cr1, Cr2, Cr3, Cr4, Cr8;
    mov     rax, cr8
    push    rax
    mov     rax, cr4
    or      rax, 208h
    mov     cr4, rax
    push    rax
    mov     rax, cr3
    push    rax
    mov     rax, cr2
    push    rax
    xor     rax, rax
    push    rax
    mov     rax, cr0
    push    rax

;; UINT64  Dr0, Dr1, Dr2, Dr3, Dr6, Dr7;
    mov     rax, dr7
    push    rax
;; clear Dr7 while executing debugger itself
    xor     rax, rax
    mov     dr7, rax

    mov     rax, dr6
    push    rax
;; insure all status bits in dr6 are clear...
    xor     rax, rax
    mov     dr6, rax

    mov     rax, dr3
    push    rax
    mov     rax, dr2
    push    rax
    mov     rax, dr1
    push    rax
    mov     rax, dr0
    push    rax

;; FX_SAVE_STATE_X64 FxSaveState;
    sub rsp, 512
    mov rdi, rsp
    db 0fh, 0aeh, 07h ;fxsave [rdi]

;; UINT32  ExceptionData;
    push    qword ptr [rbp + 16]

;; call into exception handler
    mov     rcx, [rbp + 8]
    mov     rax, ExternalVectorTablePtr  ; get the interrupt vectors base
    mov     rax, [rax + rcx * 8]       
    or      rax, rax                        ; NULL?

    je    nonNullValue;

;; Prepare parameter and call
;  mov     rcx, [rbp + 8]
    mov     rdx, rsp
    ;
    ; Per X64 calling convention, allocate maximum parameter stack space
    ; and make sure RSP is 16-byte aligned
    ;
    sub     rsp, 4 * 8 + 8
    call    rax
    add     rsp, 4 * 8 + 8

nonNullValue:
    cli
;; UINT64  ExceptionData;
    add     rsp, 8

;; FX_SAVE_STATE_X64 FxSaveState;

    mov rsi, rsp
    db 0fh, 0aeh, 0Eh ; fxrstor [rsi]
    add rsp, 512

;; UINT64  Dr0, Dr1, Dr2, Dr3, Dr6, Dr7;
    pop     rax
    mov     dr0, rax
    pop     rax
    mov     dr1, rax
    pop     rax
    mov     dr2, rax
    pop     rax
    mov     dr3, rax
;; skip restore of dr6.  We cleared dr6 during the context save.
    add     rsp, 8
    pop     rax
    mov     dr7, rax

;; UINT64  Cr0, Cr1, Cr2, Cr3, Cr4, Cr8;
    pop     rax
    mov     cr0, rax
    add     rsp, 8   ; not for Cr1
    pop     rax
    mov     cr2, rax
    pop     rax
    mov     cr3, rax
    pop     rax
    mov     cr4, rax
    pop     rax
    mov     cr8, rax

;; UINT64  RFlags;
    pop     qword ptr [rbp + 40]

;; UINT64  Ldtr, Tr;
;; UINT64  Gdtr[2], Idtr[2];
;; Best not let anyone mess with these particular registers...
    add     rsp, 48

;; UINT64  Rip;
    pop     qword ptr [rbp + 24]

;; UINT64  Gs, Fs, Es, Ds, Cs, Ss;
    pop     rax
    ; mov     gs, rax ; not for gs
    pop     rax
    ; mov     fs, rax ; not for fs
    ; (X64 will not use fs and gs, so we do not restore it)
    pop     rax
    mov     es, rax
    pop     rax
    mov     ds, rax
    pop     qword ptr [rbp + 32]  ; for cs
    pop     qword ptr [rbp + 56]  ; for ss

;; UINT64  Rdi, Rsi, Rbp, Rsp, Rbx, Rdx, Rcx, Rax;
;; UINT64  R8, R9, R10, R11, R12, R13, R14, R15;
    pop     rdi
    pop     rsi
    add     rsp, 8               ; not for rbp
    pop     qword ptr [rbp + 48] ; for rsp
    pop     rbx
    pop     rdx
    pop     rcx
    pop     rax
    pop     r8
    pop     r9
    pop     r10
    pop     r11
    pop     r12
    pop     r13
    pop     r14
    pop     r15

    mov     rsp, rbp
    pop     rbp
    add     rsp, 16
    iretq

CommonInterruptEntry ENDP

-------

a page may be mapped as read only, then remapping it as writeable or easier way is by erasing CR0.WP (bit 16. of CR0 when clear, allows supervisor-level procedures to write into read-only pages regardless of the U/S bit setting) which allows to write into them, but then also erase the dirty bit of the last level of paging tables to hide the write to read only page ?

