; ---------------------------------------------------------------------------
; Major.Minor.Build: 0.0.0
; -----------------------------------------------------------------------------
; Copyright (c) 2008, Alex Patterson, Greenville, WI. All rights reserved.
;
; License Terms
;
; The free distribution and use of this software is allowed (with or without
;  changes) provided that:
;
;  1. Source code distributions include the above copyright notice, this
;      list of conditions and the following disclaimer.
;
;  2. Binary distributions include the above copyright notice, this list
;      of conditions and the following disclaimer in their documentation.
;
;  3. The name of the copyright holder is not used to endorse products
;      built using this software without specific written permission.
;
; Disclaimer
;
; This software is provided 'as is' by the author, who assumes no
;  liability for any and all negative results of using this software.
; -----------------------------------------------------------------------------
; Issue Date < release date > day/month/year
;
; An AES implementation for x86 processors using the FASM assembler.
;
; This code utilizes the standard AES block size of 128 bits (16 bytes),
;  including key sizes of 128,192,& 256 bits (16,24,& 32 bytes), respectively.
;
; This library also offers five modes of encryption specified by the
;  Federal Information Processing Standards (FIPS) publication SP-800 38a,
;  which can be found at [1].
;
; These modes of operation for encrypting an arbitrary length of data include:
;
;  ECB {Electronic Codebook Mode}
;  CBC {Cipher Block Chaining Mode}
;  CFB {Cipher Feedback Mode}
;  OFB {Output Feedback Mode}
;  CTR {Counter Mode}
;
; Exported Functions:
;
;  < Key Schedules & IV Generation >
;
;  AES_EncryptKey_128 ( byte key[16] )
;  AES_EncryptKey_128 ( byte key[24] )
;  AES_EncryptKey_128 ( byte key[32] )
;  AES_DecryptKeys ( byte key[Nk*4], int bit_size )
;  AES_Generate_IV ( /* optional */ *seed )
;
;  < Encryption Modes >
;
;  AES_Encrypt_ECB ( byte in[16], byte out[16], int bit_size )
;  AES_Encrypt_CBC ( //         , //          , //           )
;  AES_Encrypt_CFB ( //         , //          , //           )
;  AES_Encrypt_OFB ( //         , //          , //           )
;  AES_Encrypt_CTR ( //         , //          , //           )
;
;  < Decryption Modes >
;
;  AES_Decrypt_OFB ( byte in[16], byte out[16],*initVector, int bit_size )
;  AES_Decrypt_CFB ( //         , //          , //        , //           )
;  AES_Decrypt_CBC ( //         , //          , //        , //           )
;  AES_Decrypt_ECB ( //         , //          , //        , //           )
;  AES_Decrypt_CTR ( //         , //          , //        , //           )
;
;
; Calling Conventions:
;
; In this implementation the stdcall convention is used, where the parameters
;  are pushed in reverse order onto the stack, and the callee clears the
;  stack frame.  The standard callback registers are preserved across calls
;  to these functions, including ebx,esi,edi,& ebp.
;
; Initialization Vector:
;
;  For all encryption functions, a pointer to the IV is returned
;   in the eax register so the user can later decrypt the data.  The IV is
;   stored internally in this library, so it must be taken PRIOR to unloading
;   the DLL. THE IV MUST NOT BE LOST, otherwise the corresponding data will
;   be unrecoverable (at least to the strength of 2^128).
;
;  To decrypt data using this library a pointer to the IV is passed to all
;   decryption functions (specified above).
;
; -----------------------------------------------------------------------------
;
; Modes of Operation
; -----------------------------------------------------------------------------
; Symbols & Terms
;
;   IV       =  Initialization vector used in CBC, CFB, OFB,& CTR modes.
;   I(i)     =  i'th block of input to the function
;   O(i)     =  i'th block of output from the function.
;   P(i)     =  i'th block of plaintext.
;   C(i)     =  i'th block of ciphertext.
;   Ctr(i)   =  i'th counter block used in CTR mode.
;   T(i)     =  i'th result of combination between Ctr(i) and IV.
;   Ciph(i)  =  Forward "encryption" function of the i'th block.
;   -Ciph(i) =  Reverse "decryption" function of the i'th block.
;
; Functions & Summaries
;
;  ECB {Electronic Codebook Mode}
;
;      C(j) =  Ciph(P(j))
;      P(j) = -Ciph(C(j))
;
;   Electronic codebook mode is the least secure, because each block of
;    ciphertext is a direct result of the plaintext.  This can reveal
;    clues about the data, see [2] for a more detailed explanation.
;
;  CBC {Cipher Block Chaining Mode}
;
;      C(1) = Ciph (P(j)) xor  IV)
;      C(j) = Ciph (P(j)) xor C(j-1))
;      P(1) = -Ciph(C(1)) xor  IV)
;      P(j) = -Ciph(C(j)) xor C(j-1))
;
;   This mode uses an initialization vector to seed the stream, which is
;    xor'd with the output from the cipher to produce the ciphertext. The
;    resulting ciphertext is a xor of the previous ciphertext and the
;    output of the encrypted plaintext.  Decryption follows the same way.
;
;  CFB {Cipher Feedback Mode}
;
;      I(1) = IV
;      I(j) = C(j-1)
;      O(j) = Ciph(I(j))
;      C(j) = P(j) xor O(j)
;      P(j) = C(j) xor O(j)
;
;    This mode is nearly identical to OFB, except it uses the resulting
;     ciphertext block for the next process instead of the output.
;
;  OFB {Output Feedback Mode}
;
;      I(1) = IV
;      I(j) = O(j-1)
;      O(j) = Ciph(I(j))
;      C(j) = P(j) xor O(j)
;      P(j) = C(j) xor O(j)
;
;    This mode uses a constant block of data termed I(), which is filled with
;     the IV to start.  The ciphertext is the result of a xor between the
;     current block of plaintext and the encrypted block I(). The block I()
;     is the cipher output of the previous set of blocks, excluding plaintext.
;
;  CTR {Counter Mode}
;
;      T(j) = Ctr(j) xor'd,added,or appended with IV
;      O(j) = Ciph(T(j))
;      C(j) = P(j) xor O(j)
;      O(j) = Ciph(T(j))
;      P(j) = C(j) xor O(j)
;
;    Counter mode, also known as Segmented Integer Counter (SIC) mode, generates
;     the ciphertext by encrypting a successive value called T(). The ciphertext
;     is a result of a xor between the plaintext and the encrypted T() block,
;     the individual ciphertext blocks are only linked by the counter.
;
;  As stated, more detailed explanations and visuals can be found at [1] & [2].
;
; -----------------------------------------------------------------------------
;
; Initialization Vector
; -----------------------------------------------------------------------------
; The initialization vector used in these operations is very important, and in
;  this code it is generated using the exported function AES_Generate_IV.  The
;  user can optionally include a pointer to a seed, a buffer the size of 16
;  bytes, but if a seed is not provided the parameter must be NULL.
;
; *IMPORTANT: This function MUST be called prior to using any encryption which
;   requires an initialization vector, including:
;
;   AES_Encrypt_CBC
;   AES_Encrypt_CFB
;   AES_Encrypt_OFB
;   AES_Encrypt_CTR
;
; -----------------------------------------------------------------------------
;
; Links
; -----------------------------------------------------------------------------
;
; [1] www.csrc.nist.gov/publications/nistpubs/800-38a/sp800-38a.pdf
; [2] www.en.wikipedia.org/wiki/Modes_of_operation
; [3] www.csrc.nist.gov/publications/fips/fips197/fips-197.pdf

; -----------------------------------------------------------------------------
;
; fasm PE file headers
; -----------------------------------------------------------------------------

 format PE GUI 4.0 DLL
 include '%fasm%\win32ax.inc'
 entry aes_init

; -----------------------------------------------------------------------------
;
; algorithm constants
; -----------------------------------------------------------------------------

 b equ byte
 d equ dword

; aes constants

 gf_modulus	     = 0x1b
 gf_affine	     = 0x1f
 gf_magic	     = 0x63
 alog_table	     = aes_tables.tab_t2
 log_table	     = aes_tables.tab_t2+256

 Nr		     = 14  ; aes-256 rounds
 Nk		     = 256 ; aes-256 key length
 Nb		     = 4   ; aes block size

; API constants

 DLL_PROCESS_ATTACH  = 1
 PROV_RSA_FULL	     = 1

; stack frame

 aes_seed_blk	     = 4   ; param - *seed
 aes_key_blk	     = 4   ; param - *input key
 aes_inp_blk	     = 4   ; param - *data block
 aes_out_blk	     = 8   ; param - *output block
 aes_int_size	     = 12  ; param - cipher bit size
 aes_stk_size	     = 16  ; space for callback registers

; -----------------------------------------------------------------------------
;
; structures
; -----------------------------------------------------------------------------

 struct aes_keys_
     keys_e	rd Nb*(Nr+1)
     keys_d	rd Nb*(Nr+1)
     key_size	rd 1
 ends

 struct aes_pools_
     pool_iv	rb 16
     csp_handle rd 1
 ends

 struct aes_tables_
     tab_s	rb 256
     tab_si	rb 256
     tab_t1	rd 256
     tab_t2	rd 256
     tab_t3	rd 256
     tab_t4	rd 256
     tab_t5	rd 256
     tab_t6	rd 256
     tab_t7	rd 256
     tab_t8	rd 256
     tab_u1	rd 256
     tab_u2	rd 256
     tab_u3	rd 256
     tab_u4	rd 256
     tab_rc	rd 30
 ends

; -----------------------------------------------------------------------------
;
; '.udata' section - memory of schedules | pools | tables
; -----------------------------------------------------------------------------
 section '.udata' data readable writeable
 align	    16
 aes_keys aes_keys_
 align	    16
 aes_pools aes_pools_
 align	    16
 aes_tables aes_tables_

; -----------------------------------------------------------------------------
;
; core schedule routine macros
; -----------------------------------------------------------------------------
 macro substitute    {
     xlatb
     ror    eax, 8
     xlatb
     ror    eax, 8
     xlatb
     ror    eax, 8
     xlatb	     }

; -----------------------------------------------------------------------------
;
; core aes routine macros
; -----------------------------------------------------------------------------

; forwards operations

; reverse operations

; -----------------------------------------------------------------------------
;
; common stack routine macros
; -----------------------------------------------------------------------------

 macro preserve_common_registers {

    sub     esp, aes_stk_size
    mov     [esp+ 0], ebx
    mov     [esp+ 4], ebp
    mov     [esp+ 8], edi
    mov     [esp+12], esi    }

 macro restore_common_registers ret_val {

    mov     ebx, [esp+ 0]
    mov     ebp, [esp+ 4]
    mov     edi, [esp+ 8]
    mov     esi, [esp+12]
    add     esp, aes_stk_size
    ret     ret_val  }

; -----------------------------------------------------------------------------
;
; '.code' section - table generation code
; -----------------------------------------------------------------------------
 section '.code' code readable writeable

 aes_init:
    cmp     d[esp+8],DLL_PROCESS_ATTACH
    jne .a
    push ebx esi edi

; alog and log byte[256] tables

    mov     eax, 1
    mov     b[alog_table], al
    mov     b[ log_table],  0
    mov     ecx, eax
@@: mov     bh, al
    add     bh, bh
    sbb     bl, bl
    and     bl, gf_modulus
    xor     bh, bl
    xor     al, bh
    mov     b[ecx+alog_table], al
    mov     b[eax+ log_table], cl
    add     cl, 1
    jnc     @b
    mov     b[eax+ log_table], cl

; substitution and inverse byte[] tables

    mov     edx, 255
    mov     ecx, gf_magic
    mov     b[000+ aes_tables.tab_s], cl
    mov     b[ecx+aes_tables.tab_si], 0
.s :movzx   eax, b[edx+log_table]
    not     al
    mov     al, b[eax+alog_table]
    mov     bl, gf_affine
    mov     cl, gf_magic
@@: shr     al, 1
    sbb     bh, bh
    and     bh, bl
    xor     cl, bh
    rol     bl, 1
    test    al, al
    jnz     @b
    mov     [edx+ aes_tables.tab_s], cl
    mov     [ecx+aes_tables.tab_si], dl
    sub     edx, 1
    jnz     .s

; t5, t6, t7, t8, u1, u2, u3 & u4 dword[256] tables

    xor     esi, esi
    xor     edx, edx
    mov     cl,  b[log_table+0x0e]
    mov     ch,  b[log_table+0x09]
    mov     bl,  b[log_table+0x0d]
    mov     bh,  b[log_table+0x0b]
.t: movzx   eax, b[esi+aes_tables.tab_si]
    lea     edi, [eax*4]
    test    eax, eax
    movzx   ebp, al
    jz	    .2
    mov     al, b[eax+ log_table]
    mov     dl, al
    add     dl, cl
    adc     dl, 0
    mov     dl, b[edx+alog_table]
    shrd    ebp,edx, 8
    mov     dl, al
    add     dl, ch
    adc     dl, 0
    mov     dl, b[edx+alog_table]
    shrd    ebp,edx, 8
    mov     dl, al
    add     dl, bl
    adc     dl, 0
    mov     dl, b[edx+alog_table]
    shrd    ebp,edx, 8
    add     al, bh
    adc     al, 0
    mov     al, b[eax+alog_table]
    shrd    ebp,eax, 8
.2: mov     [esi*4+aes_tables.tab_t5], ebp
    mov     [edi  +aes_tables.tab_u1], ebp
    rol     ebp, 8
    mov     [esi*4+aes_tables.tab_t6], ebp
    mov     [edi  +aes_tables.tab_u2], ebp
    rol     ebp, 8
    mov     [esi*4+aes_tables.tab_t7], ebp
    mov     [edi  +aes_tables.tab_u3], ebp
    rol     ebp, 8
    mov     [esi*4+aes_tables.tab_t8], ebp
    mov     [edi  +aes_tables.tab_u4], ebp
    add     esi, 1
    test    esi, 0xff
    jnz     .t

; t1, t2, t3 & t4 dword[256] tables

    xor     edx, edx
@@: movzx   eax, b[edx+aes_tables.tab_s]
    mov     ah, al
    add     ah, ah
    sbb     bl, bl
    and     bl, gf_modulus
    xor     ah, bl
    mov     ch, al
    xor     ch, ah
    mov     cl, al
    shl     ecx, 16
    mov     ch, al
    mov     cl, ah
    mov     [edx*4+aes_tables.tab_t1], ecx
    rol     ecx, 8
    mov     [edx*4+aes_tables.tab_t2], ecx
    rol     ecx, 8
    mov     [edx*4+aes_tables.tab_t3], ecx
    rol     ecx, 8
    mov     [edx*4+aes_tables.tab_t4], ecx
    add     dl, 1
    jnc     @b

; rcon dword[30] table

    xor     edx, edx
    mov     eax, 1
@@: mov     [edx*4+aes_tables.tab_rc], eax
    add     al, al
    sbb     bl, bl
    and     bl, gf_modulus
    xor     al, bl
    add     dl, 1
    cmp     dl, 30
    jb	    @b
.a: pop edi esi ebx
    ret 12

; -----------------------------------------------------------------------------
;
; 128-bit encryption key schedule - user interface
; -----------------------------------------------------------------------------
 aes_encryptkey1:

; preserve registers

    preserve_common_registers

; obtain input parameter, prepare registers for string instructions

    mov     esi, [esp+aes_stk_size+aes_key_blk] ; lodsd
    mov     edi, aes_keys.keys_e		; stosd
    mov     ebx, aes_tables.tab_s		; xlatb

; input the user's key

    cld
    mov     ecx, 4
    rep     movsd

; seed algorithm with previous key, prepare regs

    mov     eax, [edi-4]
    mov     ebp, aes_tables.tab_rc
    xor     ecx, ecx

; 128-bit key schedule loop

    align   16
.a: ror     eax, 16	  ; - little-endian conversion
    substitute		  ; temp = SubWord(RotWord(temp))
    xor     eax, [ebp]	  ; temp ^= Rcon[i/Nk]
    add     ebp, 4	  ; - next rcon
  rept 4  {		  ; - do normal 3 keys
    xor     eax, [edi-16] ; temp ^= w[i-Nk]
    stosd }		  ; w[i] = temp
    inc     ecx 	  ; i++
    cmp     ecx, 10	  ;
    jnz     .a		  ; while (i < Nb*(Nr+1))

; restore registers and return

    restore_common_registers 4

; -----------------------------------------------------------------------------
;
; 192-bit encryption key schedule - user interface
; -----------------------------------------------------------------------------
 aes_encryptkey2:

; preserve registers

    preserve_common_registers

; obtain input parameter, prepare registers for string instructions

    mov     esi, [esp+aes_stk_size+aes_key_blk] ; lodsd
    mov     edi, aes_keys.keys_e		; stosd
    mov     ebx, aes_tables.tab_s		; xlatb

; input the user's key

    cld
    mov     ecx, 6
    rep     movsd

; seed algorithm with previous key, prepare regs

    mov     eax, [edi-4]
    mov     ebp, aes_tables.tab_rc
    xor     ecx, ecx

; 192-bit key schedule loop

    align   16
.a: ror     eax, 16	  ; - little-endian conversion
    substitute		  ; temp = SubWord(RotWord(temp))
    xor     eax, [ebp]	  ; temp ^= Rcon[i/Nk]
    add     ebp, 4	  ; - next rcon
  rept 6  {		  ; - do normal 5 keys
    xor     eax, [edi-24] ; temp ^= w[i-Nk]
    stosd }		  ; w[i] = temp
    inc     ecx 	  ; i++
    cmp     ecx, 7	  ;
    jnz     .a		  ; while (i < Nb*(Nr+1))

    ror     eax, 16	  ; - final odd key
    substitute
    xor     eax, [ebp]
  rept 4  {		  ; - final normal 3 keys
    xor     eax, [edi-24]
    stosd }

; restore registers and return

    restore_common_registers 4

; -----------------------------------------------------------------------------
;
; 256-bit encryption key schedule - user interface
; -----------------------------------------------------------------------------
 aes_encryptkey3:

; preserve registers

    preserve_common_registers

; obtain input parameter, prepare registers for string instructions

    mov     esi, [esp+aes_stk_size+aes_key_blk] ; lodsd
    mov     edi, aes_keys.keys_e		; stosd
    mov     ebx, aes_tables.tab_s		; xlatb

; input the user's key

    cld
    mov     ecx, 8
    rep     movsd

; seed algorithm with previous key, prepare regs

    mov     eax, [edi-4]
    mov     ebp, aes_tables.tab_rc
    xor     ecx, ecx

; 256-bit key schedule loop

    align   16
.a: ror     eax, 16	  ; first key
    substitute
    xor     eax, [ebp]
    add     ebp, 4
  rept 4  {		  ; then 3 normal
    xor     eax, [edi-32]
    stosd }
    substitute		  ; middle key
    ror     eax, 8
  rept 4  {
    xor     eax, [edi-32] ; last 3 normal
    stosd }
    inc     ecx
    cmp     ecx, 6
    jnz     .a

    ror     eax, 16	  ; and last block.
    substitute
    xor     eax, [ebp]
  rept 4  {
    xor     eax, [edi-32]
    stosd }

; restore registers and return

    restore_common_registers 4

; -----------------------------------------------------------------------------
;
; decryption key schedule - user interface
; -----------------------------------------------------------------------------
 aes_decryptkeys:

; preserve registers

    preserve_common_registers

; restore registers and return

    restore_common_registers 4

; -----------------------------------------------------------------------------
;
; electronic codebook mode encryption - user interface
; -----------------------------------------------------------------------------
 aes_encrypt_ecb:

; preserve registers

    preserve_common_registers

; restore registers and return

    restore_common_registers 12

; -----------------------------------------------------------------------------
;
; electronic codebook mode decryption - user interface
; -----------------------------------------------------------------------------
 aes_decrypt_ecb:

; preserve registers

    preserve_common_registers

; restore registers and return

    restore_common_registers 12

; -----------------------------------------------------------------------------
;
; cipher block chaining mode encryption - user interface
; -----------------------------------------------------------------------------
 aes_encrypt_cbc:

; preserve registers

    preserve_common_registers

; restore registers and return

    restore_common_registers 12

; -----------------------------------------------------------------------------
;
; cipher block chaining mode decryption - user interface
; -----------------------------------------------------------------------------
 aes_decrypt_cbc:

; preserve registers

    preserve_common_registers

; restore registers and return

    restore_common_registers 12

; -----------------------------------------------------------------------------
;
; cipher feedback mode encryption - user interface
; -----------------------------------------------------------------------------
 aes_encrypt_cfb:

; preserve registers

    preserve_common_registers

; restore registers and return

    restore_common_registers 12

; -----------------------------------------------------------------------------
;
; cipher feedback mode decryption - user interface
; -----------------------------------------------------------------------------
 aes_decrypt_cfb:

; preserve registers

    preserve_common_registers

; restore registers and return

    restore_common_registers 12

; -----------------------------------------------------------------------------
;
; output feedback mode encryption - user interface
; -----------------------------------------------------------------------------
 aes_encrypt_ofb:

; preserve registers

    preserve_common_registers

; restore registers and return

    restore_common_registers 12

; -----------------------------------------------------------------------------
;
; output feedback mode decryption - user interface
; -----------------------------------------------------------------------------
 aes_decrypt_ofb:

; preserve registers

    preserve_common_registers

; restore registers and return

    restore_common_registers 12

; -----------------------------------------------------------------------------
;
; counter mode encryption - user interface
; -----------------------------------------------------------------------------
 aes_encrypt_ctr:

; preserve registers

    preserve_common_registers

; restore registers and return

    restore_common_registers 12

; -----------------------------------------------------------------------------
;
; counter mode decryption - user interface
; -----------------------------------------------------------------------------
 aes_decrypt_ctr:

; preserve registers

    preserve_common_registers

; restore registers and return

    restore_common_registers 12

; -----------------------------------------------------------------------------
;
; initialization vector generation - user interface
; -----------------------------------------------------------------------------
 aes_generate_iv:

; preserve registers

    preserve_common_registers

; if optional seed has been provided, then copy it to the IV

    cmp     d[esp+aes_stk_size+aes_seed_blk], 0
    jz	    @f
    mov     esi, [esp+aes_stk_size+aes_seed_blk]
    mov     edi, aes_pools.pool_iv
    mov     ecx, 4
    rep     movsd

; set up 'cryptographic service provider'

@@: invoke CryptAcquireContext,aes_pools.csp_handle,0,0,PROV_RSA_FULL,0
    cmp eax,0
    jz .e

; use win32 api call to generate 16 pseudo-random bytes

    invoke CryptGenRandom,[aes_pools.csp_handle],16,aes_pools.pool_iv
    cmp eax,0
    jz .e

; release the CSP

    invoke CryptReleaseContext,[aes_pools.csp_handle],0

; restore registers and return

.e: mov     eax, aes_pools.pool_iv
    restore_common_registers 4

; -----------------------------------------------------------------------------
;
; internal aes encryption function
; -----------------------------------------------------------------------------
 aes_enc:

 ret

; -----------------------------------------------------------------------------
;
; internal aes decryption function
; -----------------------------------------------------------------------------
 aes_dec:

 ret

; -----------------------------------------------------------------------------
;
; '.edata' section - library exports
; -----------------------------------------------------------------------------
 section '.edata' export data readable

 export 'AES.dll',\
    aes_encryptkey1,'AES_EncryptKey_128',\
    aes_encryptkey2,'AES_EncryptKey_192',\
    aes_encryptkey3,'AES_EncryptKey_256',\
    aes_decryptkeys,'AES_DecryptKeys',\
    aes_encrypt_ecb,'AES_Encrypt_ECB',\
    aes_decrypt_ecb,'AES_Decrypt_ECB',\
    aes_encrypt_cbc,'AES_Encrypt_CBC',\
    aes_decrypt_cbc,'AES_Decrypt_CBC',\
    aes_encrypt_cfb,'AES_Encrypt_CFB',\
    aes_decrypt_cfb,'AES_Decrypt_CFB',\
    aes_encrypt_ofb,'AES_Encrypt_OFB',\
    aes_decrypt_ofb,'AES_Decrypt_OFB',\
    aes_encrypt_ctr,'AES_Encrypt_CTR',\
    aes_decrypt_ctr,'AES_Decrypt_CTR',\
    aes_generate_iv,'AES_Generate_IV'

; -----------------------------------------------------------------------------
;
; '.idata' section - library imports
; -----------------------------------------------------------------------------
 section '.idata' import data readable

 library advapi,'Advapi32.dll'

 import advapi,\
    CryptAcquireContext,'CryptAcquireContextA',\
    CryptReleaseContext,'CryptReleaseContext' ,\
    CryptGenRandom,	'CryptGenRandom'

; -----------------------------------------------------------------------------
;
; '.reloc' section - library relocations
; -----------------------------------------------------------------------------
 section '.reloc' fixups data discardable