;
; DOS PE/PX example using LOADPEX (also DOS/32A possible)
;
; Compile with FASM, requires LOADPEX.BIN from HX 2.14 or later
; What's worse, executable must be cracked using PATCHPE, otherwise not run
;
; Code has no absolute addresses thus no silly relox/fixups
;
; Speeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeed test
; Loads file "TEST" (1 ... 200 MiB) and reports the performance
;
; http://board.flatassembler.net/topic.php?t=8670
;
define pope pop
format PE console on "LOADPEX.BIN" ; No "PX" support  use32
org $00400000
; ************
; *   MAIN   *
; ************
; Stack
        mov   ebp,esp   ; Make space for 5 silly 32bit vars below EBP
        sub   esp,20
; EBP-4  Load address
; EBP-8  Old timer
; EBP-12 Diff timer
; EBP-16 Bloat
; EBP-20 Time in "10ms" units
; Welcome
        call  ssprint
        db    $0D,$0A,'Hello from protected mode
use32
org $00400000
; ************
; *   MAIN   *
; ************
; Stack
        mov   ebp,esp   ; Make space for 5 silly 32bit vars below EBP
        sub   esp,20
; EBP-4  Load address
; EBP-8  Old timer
; EBP-12 Diff timer
; EBP-16 Bloat
; EBP-20 Time in "10ms" units
; Welcome
        call  ssprint
        db    $0D,$0A,'Hello from protected mode  ',$0D,$0A
        db    'LOADPEX & FASM | Speeeeeeeeeeeeeeeeeeeeed test !!!',$0D,$0A
        db    '(CL) 2008-05-02 Public Domain , ABUSE at YOUR own RISK !!!'
        db    $0D,$0A,'Hint: need file "TEST" 1'
        db    '...200 MiB in size and taking at least 1 sec to read'
        db    $0D,$0A,$0D,$0A,0
; Alloc
        mov   ax,$0501    ; Alloc 200 MiB
        mov   bx,200*16   ; HI
        mov   cx,0        ; LO
        int   $31         ; Alloc trashes BX,CX,SI,DI
        jnc   @f          ; OK
        call  sf
        call  ssprint
        db    'ALLOC failed',$0D,$0A,0
        jmp   qqq4         ; The END
        ;---------
@@:     shl   ebx,16
        mov   bx,cx        ; EBX: lin address of our mem
        mov   [ebp-4],ebx
        call  ssprint
        db    'WOW: ALLOC success',$0D,$0A,0
; Open
        xor   eax,eax
        push  eax          ; PUSHD 0
        push  'TEST'       ; Filename
        mov   ah,$3D       ; OPEN
        mov   edx,esp      ; Filename
        int   $21          ; OPEN it | Result: AX : handle
        pope  ecx
        pope  ecx
        jnc   @f
        call  sf
        call  ssprint
        db    'OPEN failed',$0D,$0A,0
        jmp   qqq4
        ;---------
@@:     push  ax          ; Handle
        call  ssprint
        db    'WOW: OPEN success',$0D,$0A,0
; Timer backup
        mov   eax,[$046C]  ; We are ZERO-based
',$0D,$0A
        db    'LOADPEX & FASM | Speeeeeeeeeeeeeeeeeeeeed test !!!',$0D,$0A
        db    '(CL) 2008-05-02 Public Domain , ABUSE at YOUR own RISK !!!'
        db    $0D,$0A,'Hint: need file "TEST" 1'
        db    '...200 MiB in size and taking at least 1 sec to read'
        db    $0D,$0A,$0D,$0A,0
; Alloc
        mov   ax,$0501    ; Alloc 200 MiB
        mov   bx,200*16   ; HI
        mov   cx,0        ; LO
        int   $31         ; Alloc trashes BX,CX,SI,DI
        jnc   @f          ; OK
        call  sf
        call  ssprint
        db    'ALLOC failed',$0D,$0A,0
        jmp   qqq4         ; The END
        ;---------
@@:     shl   ebx,16
        mov   bx,cx        ; EBX: lin address of our mem
        mov   [ebp-4],ebx
        call  ssprint
        db    'WOW: ALLOC success',$0D,$0A,0
; Open
        xor   eax,eax
        push  eax          ; PUSHD 0
        push  'TEST'       ; Filename
        mov   ah,$3D       ; OPEN
        mov   edx,esp      ; Filename
        int   $21          ; OPEN it | Result: AX : handle
        pope  ecx
        pope  ecx
        jnc   @f
        call  sf
        call  ssprint
        db    'OPEN failed',$0D,$0A,0
        jmp   qqq4
        ;---------
@@:     push  ax          ; Handle
        call  ssprint
        db    'WOW: OPEN success',$0D,$0A,0
; Timer backup
        mov   eax,[$046C]  ; We are ZERO-based  mov   [ebp-8],eax  ; Save timer
; Read
        pope  bx              ; Handle
        mov   ah,$3F          ; Read
        mov   ecx,200*1048576 ; How much - 200 MiB max
        mov   edx,[ebp-4]     ; Where
        int   $21             ; EAX will reveal how much data we found
        jnc   @f              ; OK
        xor   eax,eax         ; MOVNTQ EAX,0 | 0 is illegal size
@@:     mov   [ebp-16],eax    ; Save bloat
; Sub timer
        mov   eax,[$046C]
        sub   eax,[ebp-8]     ; Sub old timer
        mov   [ebp-12],eax    ; Save diff timer
; Close & report reading result
        mov   ah,$3E          ; CLOSE
        int   $21
        cmp   dword [ebp-16],0
        jne   qqq1            ; Reading OK
        call  sf
        call  ssprint
        db    'Reading failed',$0D,$0A,0
        jmp   qqq4
        ;---------
qqq1:   call  ssprint
        db    'WOW: Read success: ',0 ; No EOL !!!
; Report bloat and ...
        mov   eax,[ebp-16]
        push  eax
        call  ssdec32
        call  ssprint
        db    ' = $',0
        pope  eax
        call  sshex32
        call  ssprint
        db    ' bytes',$0D,$0A,'WOW: Took: ',0
; Convert time
        mov   eax,[ebp-12]      ; Diff timer
        cmp   eax,3             ; At least 0.2 s ???
        ja    @f                ; YES
        xor   eax,eax           ; NO
        mov   [ebp-8],eax  ; Save timer
; Read
        pope  bx              ; Handle
        mov   ah,$3F          ; Read
        mov   ecx,200*1048576 ; How much - 200 MiB max
        mov   edx,[ebp-4]     ; Where
        int   $21             ; EAX will reveal how much data we found
        jnc   @f              ; OK
        xor   eax,eax         ; MOVNTQ EAX,0 | 0 is illegal size
@@:     mov   [ebp-16],eax    ; Save bloat
; Sub timer
        mov   eax,[$046C]
        sub   eax,[ebp-8]     ; Sub old timer
        mov   [ebp-12],eax    ; Save diff timer
; Close & report reading result
        mov   ah,$3E          ; CLOSE
        int   $21
        cmp   dword [ebp-16],0
        jne   qqq1            ; Reading OK
        call  sf
        call  ssprint
        db    'Reading failed',$0D,$0A,0
        jmp   qqq4
        ;---------
qqq1:   call  ssprint
        db    'WOW: Read success: ',0 ; No EOL !!!
; Report bloat and ...
        mov   eax,[ebp-16]
        push  eax
        call  ssdec32
        call  ssprint
        db    ' = $',0
        pope  eax
        call  sshex32
        call  ssprint
        db    ' bytes',$0D,$0A,'WOW: Took: ',0
; Convert time
        mov   eax,[ebp-12]      ; Diff timer
        cmp   eax,3             ; At least 0.2 s ???
        ja    @f                ; YES
        xor   eax,eax           ; NO  @@:     mov   ebx,11
        mul   ebx               ; MUL EAX by 11
        shr   eax,1             ; Convert in "10ms" - units
        mov   [ebp-20],eax      ; Save time in new units
; Report time , see ^^^ above also for ssprint of "Took"
        xor   edx,edx           ; High 32 bits
        mov   ebx,100           ; Divisor
        div   ebx               ; Remainder in EDX, quotient in EAX
        push  edx               ; For later, 4 lines below
        call  ssdec32           ; Seconds
        mov   al,46             ; "." decimal dot
        call  ssonechar
        pope  eax               ; 1/100 seconds
        cmp   eax,9
        ja    @f                ; >= 10/100 sec
        push  eax
        mov   al,48             ; "0" - SSDEC32 will cut leading ZERO's !!!
        call  ssonechar
        pope  eax
@@:     call  ssdec32
        call  ssprint
        db    ' seconds',$0D,$0A,0
; Final division now -> need KiB/s -> perf = bloat / time
; Bloat must be 1 ... 200 MiB | Time must be >= 1 s
        xor   edx,edx           ; High 32 bits
        mov   eax,[ebp-16]      ; Bloat
        cmp   eax,1048576       ; Bloat >= 1 MiB ?
        jb    qqq2              ; NO
        mov   ebx,[ebp-20]      ; Timer
        cmp   ebx,100           ; Time >= 1 sec ?
        jb    qqq2              ; NO
        div   ebx               ; Result in EAX -> Bytes / "10ms"
        mov   ebx,100
        mul   ebx               ; MUL EAX by 100 -> Bytes / sec
        shr   eax,10            ; KiB / sec
        push  eax
        call  ssprint
        db    'WOW: Performance: ',0
        pope  eax
        call  ssdec32
        call  ssprint
        db    ' KiB/sec',$0D,$0A,0
qqq2:
; Done
        call  ssprint
        db    $0D,$0A,'DONE !!!',0
; Failure jump target
qqq4:   call  sseol
        jmp   lleof    ; OK ... leaving leaked mem and messed stack
@@:     mov   ebx,11
        mul   ebx               ; MUL EAX by 11
        shr   eax,1             ; Convert in "10ms" - units
        mov   [ebp-20],eax      ; Save time in new units
; Report time , see ^^^ above also for ssprint of "Took"
        xor   edx,edx           ; High 32 bits
        mov   ebx,100           ; Divisor
        div   ebx               ; Remainder in EDX, quotient in EAX
        push  edx               ; For later, 4 lines below
        call  ssdec32           ; Seconds
        mov   al,46             ; "." decimal dot
        call  ssonechar
        pope  eax               ; 1/100 seconds
        cmp   eax,9
        ja    @f                ; >= 10/100 sec
        push  eax
        mov   al,48             ; "0" - SSDEC32 will cut leading ZERO's !!!
        call  ssonechar
        pope  eax
@@:     call  ssdec32
        call  ssprint
        db    ' seconds',$0D,$0A,0
; Final division now -> need KiB/s -> perf = bloat / time
; Bloat must be 1 ... 200 MiB | Time must be >= 1 s
        xor   edx,edx           ; High 32 bits
        mov   eax,[ebp-16]      ; Bloat
        cmp   eax,1048576       ; Bloat >= 1 MiB ?
        jb    qqq2              ; NO
        mov   ebx,[ebp-20]      ; Timer
        cmp   ebx,100           ; Time >= 1 sec ?
        jb    qqq2              ; NO
        div   ebx               ; Result in EAX -> Bytes / "10ms"
        mov   ebx,100
        mul   ebx               ; MUL EAX by 100 -> Bytes / sec
        shr   eax,10            ; KiB / sec
        push  eax
        call  ssprint
        db    'WOW: Performance: ',0
        pope  eax
        call  ssdec32
        call  ssprint
        db    ' KiB/sec',$0D,$0A,0
qqq2:
; Done
        call  ssprint
        db    $0D,$0A,'DONE !!!',0
; Failure jump target
qqq4:   call  sseol
        jmp   lleof    ; OK ... leaving leaked mem and messed stack  ;----------
; ************
; *   SUBS   *
; ************
; EXDEC.ASM written by MAD for use with the Assembly Tutorial Chapter 4
; Upgraded to 32 bits
        ;----------
; ************
; *   SUBS   *
; ************
; EXDEC.ASM written by MAD for use with the Assembly Tutorial Chapter 4
; Upgraded to 32 bits  In : EAX
; Converts a number in EAX to decimal format and outputs it to the screen
; Trashes EAX, EBX, ECX, EDX, preserves ESI, EDI, EBP
ssdec32:
     xor   ecx,ecx        ; MOVNTQ CL,0 | POPE counter - preset to 0
     xor   ebx,ebx        ; MOVNTQ EBX,0
     mov   bl,10          ; Divisor: divide by 10
deciloop:
     xor   edx,edx        ; Set high 32 bits coming into divison to zero
     div   ebx            ; Remainder in EDX, quotient in EAX
     inc   cl             ; Increase POPE counter
     push  edx            ; And PUSH , no BYTE PUSH exists
 In : EAX
; Converts a number in EAX to decimal format and outputs it to the screen
; Trashes EAX, EBX, ECX, EDX, preserves ESI, EDI, EBP
ssdec32:
     xor   ecx,ecx        ; MOVNTQ CL,0 | POPE counter - preset to 0
     xor   ebx,ebx        ; MOVNTQ EBX,0
     mov   bl,10          ; Divisor: divide by 10
deciloop:
     xor   edx,edx        ; Set high 32 bits coming into divison to zero
     div   ebx            ; Remainder in EDX, quotient in EAX
     inc   cl             ; Increase POPE counter
     push  edx            ; And PUSH , no BYTE PUSH exists  test  eax,eax        ; Is quotient zero?
     jnz   deciloop       ; If not, get one more number
popeloop:
     pope  eax            ; Get number
     add   al,48          ; Add ASCII base (48="0")
     call  ssonechar      ; Preserves ECX
     loop  popeloop       ; DEC ECX + JNZ
     ret
     ;----
sf:       call  ssprint
          db    'F**K: ',0
          ret
          ;----
ssprint: ; High-end style
     test  eax,eax        ; Is quotient zero?
     jnz   deciloop       ; If not, get one more number
popeloop:
     pope  eax            ; Get number
     add   al,48          ; Add ASCII base (48="0")
     call  ssonechar      ; Preserves ECX
     loop  popeloop       ; DEC ECX + JNZ
     ret
     ;----
sf:       call  ssprint
          db    'F**K: ',0
          ret
          ;----
ssprint: ; High-end style  | Trashes AX, DX and ESI
          pope  esi
@@:       lodsb             ; Pick AL from [ESI] , INC ESI
          cmp   al,0        ; End ?
          je    ss1         ; YES
          call  ssonechar
          jmp   short @b
          ;-------------
ss1:      jmp   near esi
          ;-------------
sseol: ; Just EOL
 | Trashes AX, DX and ESI
          pope  esi
@@:       lodsb             ; Pick AL from [ESI] , INC ESI
          cmp   al,0        ; End ?
          je    ss1         ; YES
          call  ssonechar
          jmp   short @b
          ;-------------
ss1:      jmp   near esi
          ;-------------
sseol: ; Just EOL  mov   al,$0D
          call  ssonechar
          mov   al,$0A
          ; pass
ssonechar: ; One char | IN: AL
           mov   dl,al
           mov   ah,2
           int   $21
           ret
           ;----
sshex32: ; Input in EAX, will get trashed, also DX
           push  eax
           shr   eax,16
           call  sshex16
           mov   al,39      ; "'" apo
           call  ssonechar
           pope  eax
           ; pass
sshex16: ; Input in AX, full EAX will get trashed, also DX
           push  eax        ; EAX is more efficient than AX in use32
           shr   eax,8
           call  sshex8
           pope  eax
           ; pass
sshex8: ; Input in AL, semi-full AX will get trashed, also DX
             push   eax       ; No byte-push anymore
          mov   al,$0D
          call  ssonechar
          mov   al,$0A
          ; pass
ssonechar: ; One char | IN: AL
           mov   dl,al
           mov   ah,2
           int   $21
           ret
           ;----
sshex32: ; Input in EAX, will get trashed, also DX
           push  eax
           shr   eax,16
           call  sshex16
           mov   al,39      ; "'" apo
           call  ssonechar
           pope  eax
           ; pass
sshex16: ; Input in AX, full EAX will get trashed, also DX
           push  eax        ; EAX is more efficient than AX in use32
           shr   eax,8
           call  sshex8
           pope  eax
           ; pass
sshex8: ; Input in AL, semi-full AX will get trashed, also DX
             push   eax       ; No byte-push anymore  shr    al,4
             call   sshex4
             pope   eax
             ; pass
sshex4:
             and  al,$0F
             cmp  al,10       ; Decimal 10 !!!
             sbb  al,$69
             das              ; Digital Attack System , ASCII result in AL
             jmp  short ssonechar
             ;-------------------
lleof:       mov   ax,$4C00
             int   $21         ; END OF FUN, finally
             shr    al,4
             call   sshex4
             pope   eax
             ; pass
sshex4:
             and  al,$0F
             cmp  al,10       ; Decimal 10 !!!
             sbb  al,$69
             das              ; Digital Attack System , ASCII result in AL
             jmp  short ssonechar
             ;-------------------
lleof:       mov   ax,$4C00
             int   $21         ; END OF FUN, finally  ;--------
; END.
             ;--------
; END.
    
If anyone has DOS, please test and report results (and what DMA/cache drivers you have).