flat assembler
Message board for the users of flat assembler.

Index > Tutorials and Examples > Using the hardware AES instructions (Linux exe)

Author
Thread Post new topic Reply to topic
revolution
When all else fails, read the source


Joined: 24 Aug 2004
Posts: 20519
Location: In your JS exploiting you and your system
revolution 13 Feb 2025, 12:57
I tried the make the simplest possible code to demonstrate how to use the AES instructions included the x86 chips.
Code:
; see https://www.intel.com/content/dam/develop/external/us/en/documents/aes-wp-2012-09-22-v01-165683.pdf

USE_AVX         = 0     ; use VPSLLDQ, and eliminate an extra MOVDQA
AES128_rounds   = 10    ; as defined by the Rijndael spec
AES256_rounds   = 14    ; as defined by the Rijndael spec
AES_block_len   = 16    ; bytes per block
GF256_modulus   = 0x11b ; for the round constants

SYS_EXIT        = 60

format ELF64 executable
entry start

segment executable

start:
; test AES128
        movdqa  xmm0,[NIST128_key]
        lea     rdi,[expanded_keys]
        call    AES128_expand_key
; test AES128 encryption
        movdqa  xmm0,[NIST128_PT]
        lea     rdi,[expanded_keys]
        call    AES128_encrypt
        pcmpeqd xmm0,[NIST128_CT]
        movmskps eax,xmm0
        cmp     al,0xf
        jnz     .error
; test AES128 decryption
        movdqa  xmm0,[NIST128_CT]
        lea     rdi,[expanded_keys]
        call    AES128_decrypt
        pcmpeqd xmm0,[NIST128_PT]
        movmskps eax,xmm0
        cmp     al,0xf
        jnz     .error
; test AES256
        movdqa  xmm0,[NIST256_key+AES_block_len*0]
        movdqa  xmm1,[NIST256_key+AES_block_len*1]
        lea     rdi,[expanded_keys]
        call    AES256_expand_key
; test AES256 encryption
        movdqa  xmm0,[NIST256_PT]
        lea     rdi,[expanded_keys]
        call    AES256_encrypt
        pcmpeqd xmm0,[NIST256_CT]
        movmskps eax,xmm0
        cmp     al,0xf
        jnz     .error
; test AES256 decryption
        movdqa  xmm0,[NIST256_CT]
        lea     rdi,[expanded_keys]
        call    AES256_decrypt
        pcmpeqd xmm0,[NIST256_PT]
        movmskps eax,xmm0
        cmp     al,0xf
        jnz     .error
; done
        xor     edi,edi
        mov     eax,SYS_EXIT
        syscall
    .error:
        int3

macro key_sched source,rcon,dest,shuffle,t0,t1 {
        aeskeygenassist t0,source,rcon
        pshufd  t0,t0,shuffle
        if USE_AVX
                vpslldq t1,dest,4
        else
                movdqa  t1,dest
                pslldq  t1,4
        end if
        pxor    dest,t1
        pslldq  t1,4
        pxor    dest,t1
        pslldq  t1,4
        pxor    dest,t1
        pxor    dest,t0
    if rcon <> 0
        rcon = (rcon shl 1) xor (GF256_modulus and -((rcon shr 7) and 1))
    end if
}

AES128_expand_key:
        ; IN:
        ;  xmm0 = key
        ;  rdi = dest
        ; OUT:
        ;  [dest] = expanded key
        .rcon = 1
        movdqa  [rdi],xmm0
    repeat AES128_rounds-1
        key_sched xmm0,.rcon,xmm0,0xff,xmm1,xmm2
        aesimc  xmm1,xmm0
        movdqa  [rdi+AES_block_len*(2*%+0)],xmm0
        movdqa  [rdi+AES_block_len*(2*%-1)],xmm1
    end repeat
        key_sched xmm0,.rcon,xmm0,0xff,xmm1,xmm2
        movdqa  [rdi+AES_block_len*(2*AES128_rounds-1)],xmm0
        ret

AES128_encrypt:
        ; IN:
        ;  xmm0 = PT block
        ;  rdi = expanded key
        ; OUT:
        ;  xmm0 = CT block
        pxor    xmm0,[rdi+AES_block_len*0]
    repeat AES128_rounds-1
        aesenc  xmm0,[rdi+AES_block_len*2*%]
    end repeat  
        aesenclast xmm0,[rdi+AES_block_len*(2*AES128_rounds-1)]
        ret

AES128_decrypt:
        ; IN:
        ;  xmm0 = CT block
        ;  rdi = expanded key
        ; OUT:
        ;  xmm0 = PT block
        pxor    xmm0,[rdi+AES_block_len*(2*AES128_rounds-1)]
    repeat AES128_rounds-1
        aesdec  xmm0,[rdi+AES_block_len*(2*(AES128_rounds - %)-1)]
    end repeat
        aesdeclast xmm0,[rdi+AES_block_len*0]
        ret

AES256_expand_key:
        ; IN:
        ;  xmm0,xmm1 = key
        ;  rdi = dest
        ; OUT:
        ;  [dest] = expanded key
        .rcon = 1
        aesimc  xmm2,xmm1
        movdqa  [rdi+AES_block_len*0],xmm0
        movdqa  [rdi+AES_block_len*2],xmm1
        movdqa  [rdi+AES_block_len*1],xmm2
    repeat AES256_rounds shr 1 - 1
        key_sched xmm1,.rcon,xmm0,0xff,xmm2,xmm3
        key_sched xmm0,    0,xmm1,0xaa,xmm2,xmm3
        aesimc  xmm2,xmm0
        aesimc  xmm3,xmm1
        movdqa  [rdi+AES_block_len*(4*%+0)],xmm0
        movdqa  [rdi+AES_block_len*(4*%+2)],xmm1
        movdqa  [rdi+AES_block_len*(4*%-1)],xmm2
        movdqa  [rdi+AES_block_len*(4*%+1)],xmm3
    end repeat
        key_sched xmm1,.rcon,xmm0,0xff,xmm2,xmm3
        movdqa  [rdi+AES_block_len*(2*AES256_rounds-1)],xmm0
        ret

AES256_encrypt:
        ; IN:
        ;  xmm0 = PT block
        ;  rdi = expanded key
        ; OUT:
        ;  xmm0 = CT block
        pxor    xmm0,[rdi+AES_block_len*0]
    repeat AES256_rounds-1
        aesenc  xmm0,[rdi+AES_block_len*2*%]
    end repeat
        aesenclast xmm0,[rdi+AES_block_len*(2*AES256_rounds-1)]
        ret

AES256_decrypt:
        ; IN:
        ;  xmm0 = CT block
        ;  rdi = expanded key
        ; OUT:
        ;  xmm0 = PT block
        pxor    xmm0,[rdi+AES_block_len*(2*AES256_rounds-1)]
    repeat AES256_rounds-1
        aesdec  xmm0,[rdi+AES_block_len*(2*(AES256_rounds-%)-1)]
    end repeat
        aesdeclast xmm0,[rdi+AES_block_len*0]
        ret

purge key_sched

segment readable

; test vectors courtesy of FIPS-197
align 16
NIST128_key:
NIST256_key:    db      0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f
                db      0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f
NIST128_PT:
NIST256_PT:     db      0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77,0x88,0x99,0xaa,0xbb,0xcc,0xdd,0xee,0xff
NIST128_CT:     db      0x69,0xc4,0xe0,0xd8,0x6a,0x7b,0x04,0x30,0xd8,0xcd,0xb7,0x80,0x70,0xb4,0xc5,0x5a
NIST256_CT:     db      0x8e,0xa2,0xb7,0xca,0x51,0x67,0x45,0xbf,0xea,0xfc,0x49,0x90,0x4b,0x49,0x60,0x89

segment writeable readable

align 16
; needs two blocks per round
expanded_keys:  rb      2*AES_block_len*AES256_rounds    
All loops are unrolled by using repeat.

When run, if there is an error in the expected result then it just goes to a trap (int3), otherwise it exits with a zero error code.

There is one optional selection to use the AVX instruction VPSLLDQ. If your CPU has AVX then you can enable it and save few bytes of code space (and perhaps gain a small amount of efficiency).

The expanded keys for encryption and decryption are interleaved together and generated at the same time, so the call to AES???_expand_key only needs to be done once, and then both encrypt and decrypt can be used as often as needed during the lifetime of the key.
Post 13 Feb 2025, 12:57
View user's profile Send private message Visit poster's website Reply with quote
Ali.Z



Joined: 08 Jan 2018
Posts: 772
Ali.Z 13 Feb 2025, 23:47
iirc, there was one aes-ni example on board that demonstrate aes-ni using the 256-bit ymm registers and sse3/4 or avx.

_________________
Asm For Wise Humans


Last edited by Ali.Z on 20 Feb 2025, 16:35; edited 1 time in total
Post 13 Feb 2025, 23:47
View user's profile Send private message Reply with quote
revolution
When all else fails, read the source


Joined: 24 Aug 2004
Posts: 20519
Location: In your JS exploiting you and your system
revolution 14 Feb 2025, 09:40
Link?
Post 14 Feb 2025, 09:40
View user's profile Send private message Visit poster's website Reply with quote
revolution
When all else fails, read the source


Joined: 24 Aug 2004
Posts: 20519
Location: In your JS exploiting you and your system
revolution 20 Feb 2025, 00:28
I extended the example to add 192-bit key, and 256-bit block sizes. The 256-bit block is not part of the AES spec, but is defined by the original Rijndael paper, and can successfully leverage the hardware AES instructions with a small bit of code to swizzle the bytes around to get the correct output.
Code:
; see https://www.intel.com/content/dam/develop/external/us/en/documents/aes-wp-2012-09-22-v01-165683.pdf

USE_AVX         = 0     ; use VPxxx instructions, and eliminate extra MOVDQAs
AES128_rounds   = 10    ; as defined by the Rijndael spec
AES192_rounds   = 12    ; as defined by the Rijndael spec
AES256_rounds   = 14    ; as defined by the Rijndael spec
AES_block_len   = 16    ; bytes per block
RNDL_block_len  = 32    ; bytes per block
GF256_modulus   = 0x11b ; for the round constants

SYS_EXIT        = 60

format ELF64 executable
entry start

segment executable

start:
        call    test_AES128
        jnz     .error
        call    test_AES192
        jnz     .error
        call    test_AES256
        jnz     .error
        call    test_RNDL128
        jnz     .error
        call    test_RNDL192
        jnz     .error
        call    test_RNDL256
        jnz     .error
        xor     edi,edi
        mov     eax,SYS_EXIT
        syscall
    .error:
        int3

test_AES128:
        movdqa  xmm0,[NIST128_key]
        lea     rdi,[expanded_keys]
        call    AES128_expand_key
; test AES128 encryption
        movdqa  xmm0,[NIST128_PT]
        lea     rdi,[expanded_keys]
        call    AES128_encrypt
        pcmpeqd xmm0,[NIST128_CT]
        movmskps eax,xmm0
        cmp     al,0xf
        jnz     .done
; test AES128 decryption
        movdqa  xmm0,[NIST128_CT]
        lea     rdi,[expanded_keys]
        call    AES128_decrypt
        pcmpeqd xmm0,[NIST128_PT]
        movmskps eax,xmm0
        cmp     al,0xf
    .done:
        ret

test_AES192:
        movdqa  xmm0,[NIST192_key+AES_block_len*0]
        movdqa  xmm1,[NIST192_key+AES_block_len*1]
        lea     rdi,[expanded_keys]
        call    AES192_expand_key
; test AES192 encryption
        movdqa  xmm0,[NIST192_PT]
        lea     rdi,[expanded_keys]
        call    AES192_encrypt
        pcmpeqd xmm0,[NIST192_CT]
        movmskps eax,xmm0
        cmp     al,0xf
        jnz     .done
; test AES192 decryption
        movdqa  xmm0,[NIST192_CT]
        lea     rdi,[expanded_keys]
        call    AES192_decrypt
        pcmpeqd xmm0,[NIST192_PT]
        movmskps eax,xmm0
        cmp     al,0xf
    .done:
        ret

test_AES256:
        movdqa  xmm0,[NIST256_key+AES_block_len*0]
        movdqa  xmm1,[NIST256_key+AES_block_len*1]
        lea     rdi,[expanded_keys]
        call    AES256_expand_key
; test AES256 encryption
        movdqa  xmm0,[NIST256_PT]
        lea     rdi,[expanded_keys]
        call    AES256_encrypt
        pcmpeqd xmm0,[NIST256_CT]
        movmskps eax,xmm0
        cmp     al,0xf
        jnz     .done
; test AES256 decryption
        movdqa  xmm0,[NIST256_CT]
        lea     rdi,[expanded_keys]
        call    AES256_decrypt
        pcmpeqd xmm0,[NIST256_PT]
        movmskps eax,xmm0
        cmp     al,0xf
    .done:
        ret

test_RNDL128:
        movdqa  xmm0,[RNDL128_key+AES_block_len*0]
        lea     rdi,[expanded_keys]
        call    RNDL128_expand_key
; test RNDL128 encryption
        movdqa  xmm0,[RNDL128_PT+AES_block_len*0]
        movdqa  xmm1,[RNDL128_PT+AES_block_len*1]
        lea     rdi,[expanded_keys]
        call    RNDL128_encrypt
        pcmpeqd xmm0,[RNDL128_CT+AES_block_len*0]
        pcmpeqd xmm1,[RNDL128_CT+AES_block_len*1]
        pand    xmm0,xmm1
        movmskps eax,xmm0
        cmp     al,0xf
        jnz     .done
; test RNDL128 decryption
        movdqa  xmm0,[RNDL128_CT+AES_block_len*0]
        movdqa  xmm1,[RNDL128_CT+AES_block_len*1]
        lea     rdi,[expanded_keys]
        call    RNDL128_decrypt
        pcmpeqd xmm0,[RNDL128_PT+AES_block_len*0]
        pcmpeqd xmm1,[RNDL128_PT+AES_block_len*1]
        pand    xmm0,xmm1
        movmskps eax,xmm0
        cmp     al,0xf
    .done:
        ret

test_RNDL192:
        movdqa  xmm0,[RNDL192_key+AES_block_len*0]
        movdqa  xmm1,[RNDL192_key+AES_block_len*1]
        lea     rdi,[expanded_keys]
        call    RNDL192_expand_key
; test RNDL192 encryption
        movdqa  xmm0,[RNDL192_PT+AES_block_len*0]
        movdqa  xmm1,[RNDL192_PT+AES_block_len*1]
        lea     rdi,[expanded_keys]
        call    RNDL192_encrypt
        pcmpeqd xmm0,[RNDL192_CT+AES_block_len*0]
        pcmpeqd xmm1,[RNDL192_CT+AES_block_len*1]
        pand    xmm0,xmm1
        movmskps eax,xmm0
        cmp     al,0xf
        jnz     .done
; test RNDL192 decryption
        movdqa  xmm0,[RNDL192_CT+AES_block_len*0]
        movdqa  xmm1,[RNDL192_CT+AES_block_len*1]
        lea     rdi,[expanded_keys]
        call    RNDL192_decrypt
        pcmpeqd xmm0,[RNDL192_PT+AES_block_len*0]
        pcmpeqd xmm1,[RNDL192_PT+AES_block_len*1]
        pand    xmm0,xmm1
        movmskps eax,xmm0
        cmp     al,0xf
    .done:
        ret

test_RNDL256:
        movdqa  xmm0,[RNDL256_key+AES_block_len*0]
        movdqa  xmm1,[RNDL256_key+AES_block_len*1]
        lea     rdi,[expanded_keys]
        call    RNDL256_expand_key
; test RNDL256 encryption
        movdqa  xmm0,[RNDL256_PT+AES_block_len*0]
        movdqa  xmm1,[RNDL256_PT+AES_block_len*1]
        lea     rdi,[expanded_keys]
        call    RNDL256_encrypt
        pcmpeqd xmm0,[RNDL256_CT+AES_block_len*0]
        pcmpeqd xmm1,[RNDL256_CT+AES_block_len*1]
        pand    xmm0,xmm1
        movmskps eax,xmm0
        cmp     al,0xf
        jnz     .done
; test RNDL256 decryption
        movdqa  xmm0,[RNDL256_CT+AES_block_len*0]
        movdqa  xmm1,[RNDL256_CT+AES_block_len*1]
        lea     rdi,[expanded_keys]
        call    RNDL256_decrypt
        pcmpeqd xmm0,[RNDL256_PT+AES_block_len*0]
        pcmpeqd xmm1,[RNDL256_PT+AES_block_len*1]
        pand    xmm0,xmm1
        movmskps eax,xmm0
        cmp     al,0xf
    .done:
        ret

macro key_sched source,rcon,dest,shuffle,t0,t1 {
        aeskeygenassist t0,source,rcon
        pshufd  t0,t0,shuffle
        if USE_AVX
                vpslldq t1,dest,4
        else
                movdqa  t1,dest
                pslldq  t1,4
        end if
        pxor    dest,t1
        pslldq  t1,4
        pxor    dest,t1
        pslldq  t1,4
        pxor    dest,t1
        pxor    dest,t0
    if rcon <> 0
        rcon = (rcon shl 1) xor (GF256_modulus and -((rcon shr 7) and 1))
    end if
}

macro half_key_sched source,rcon,dest,shuffle,t0,t1 {
        key_sched source,rcon,dest,shuffle,t0,t1
        pshufd  t0,dest,0xff
        if USE_AVX
                vpslldq t1,source,4
        else
                movdqa  t1,source
                pslldq  t1,4
        end if
        pxor    source,t1
        pxor    source,t0
}

AES128_expand_key:
        ; IN:
        ;  xmm0 = key
        ;  rdi = dest
        ; OUT:
        ;  [dest] = expanded key
        .rcon = 1
        movdqa  [rdi],xmm0
    repeat AES128_rounds-1
        key_sched xmm0,.rcon,xmm0,0xff,xmm1,xmm2
        aesimc  xmm1,xmm0
        movdqa  [rdi+AES_block_len*(2*%+0)],xmm0
        movdqa  [rdi+AES_block_len*(2*%-1)],xmm1
    end repeat
        key_sched xmm0,.rcon,xmm0,0xff,xmm1,xmm2
        movdqa  [rdi+AES_block_len*(2*AES128_rounds-1)],xmm0
        ret

AES128_encrypt:
        ; IN:
        ;  xmm0 = PT block
        ;  rdi = expanded key
        ; OUT:
        ;  xmm0 = CT block
        pxor    xmm0,[rdi+AES_block_len*0]
    repeat AES128_rounds-1
        aesenc  xmm0,[rdi+AES_block_len*2*%]
    end repeat  
        aesenclast xmm0,[rdi+AES_block_len*(2*AES128_rounds-1)]
        ret

AES128_decrypt:
        ; IN:
        ;  xmm0 = CT block
        ;  rdi = expanded key
        ; OUT:
        ;  xmm0 = PT block
        pxor    xmm0,[rdi+AES_block_len*(2*AES128_rounds-1)]
    repeat AES128_rounds-1
        aesdec  xmm0,[rdi+AES_block_len*(2*(AES128_rounds - %)-1)]
    end repeat
        aesdeclast xmm0,[rdi+AES_block_len*0]
        ret

AES192_expand_key:
        ; IN:
        ;  xmm0,xmm1 = key
        ;  rdi = dest
        ; OUT:
        ;  [dest] = expanded key
        movdqa  [rdi],xmm0
        .rcon = 1
        .keys_per_rep = 3
    repeat AES192_rounds / .keys_per_rep
        movdqa  xmm4,xmm1
        half_key_sched xmm1,.rcon,xmm0,0x55,xmm2,xmm3
        shufpd  xmm4,xmm0,0
        aesimc  xmm2,xmm4
        movdqa  [rdi+(.keys_per_rep*2*%-4)*AES_block_len],xmm4
        movdqa  [rdi+(.keys_per_rep*2*%-5)*AES_block_len],xmm2
        movdqa  xmm4,xmm0
        shufpd  xmm4,xmm1,1
        aesimc  xmm2,xmm4
        movdqa  [rdi+(.keys_per_rep*2*%-2)*AES_block_len],xmm4
        movdqa  [rdi+(.keys_per_rep*2*%-3)*AES_block_len],xmm2
        half_key_sched xmm1,.rcon,xmm0,0x55,xmm2,xmm3
      if % < AES192_rounds / .keys_per_rep
        aesimc  xmm2,xmm0
        movdqa  [rdi+(.keys_per_rep*2*%-0)*AES_block_len],xmm0
        movdqa  [rdi+(.keys_per_rep*2*%-1)*AES_block_len],xmm2
      end if
    end repeat
        movdqa  [rdi+AES_block_len*(2*AES192_rounds-1)],xmm0
        ret

AES192_encrypt:
        ; IN:
        ;  xmm0 = PT block
        ;  rdi = expanded key
        ; OUT:
        ;  xmm0 = CT block
        pxor    xmm0,[rdi+AES_block_len*0]
    repeat AES192_rounds-1
        aesenc  xmm0,[rdi+AES_block_len*2*%]
    end repeat
        aesenclast xmm0,[rdi+AES_block_len*(2*AES192_rounds-1)]
        ret

AES192_decrypt:
        ; IN:
        ;  xmm0 = CT block
        ;  rdi = expanded key
        ; OUT:
        ;  xmm0 = PT block
        pxor    xmm0,[rdi+AES_block_len*(2*AES192_rounds-1)]
    repeat AES192_rounds-1
        aesdec  xmm0,[rdi+AES_block_len*(2*(AES192_rounds-%)-1)]
    end repeat
        aesdeclast xmm0,[rdi+AES_block_len*0]
        ret

AES256_expand_key:
        ; IN:
        ;  xmm0,xmm1 = key
        ;  rdi = dest
        ; OUT:
        ;  [dest] = expanded key
        .rcon = 1
        aesimc  xmm2,xmm1
        movdqa  [rdi+AES_block_len*0],xmm0
        movdqa  [rdi+AES_block_len*2],xmm1
        movdqa  [rdi+AES_block_len*1],xmm2
    repeat AES256_rounds shr 1 - 1
        key_sched xmm1,.rcon,xmm0,0xff,xmm2,xmm3
        key_sched xmm0,    0,xmm1,0xaa,xmm2,xmm3
        aesimc  xmm2,xmm0
        aesimc  xmm3,xmm1
        movdqa  [rdi+AES_block_len*(4*%+0)],xmm0
        movdqa  [rdi+AES_block_len*(4*%+2)],xmm1
        movdqa  [rdi+AES_block_len*(4*%-1)],xmm2
        movdqa  [rdi+AES_block_len*(4*%+1)],xmm3
    end repeat
        key_sched xmm1,.rcon,xmm0,0xff,xmm2,xmm3
        movdqa  [rdi+AES_block_len*(2*AES256_rounds-1)],xmm0
        ret

AES256_encrypt:
        ; IN:
        ;  xmm0 = PT block
        ;  rdi = expanded key
        ; OUT:
        ;  xmm0 = CT block
        pxor    xmm0,[rdi+AES_block_len*0]
    repeat AES256_rounds-1
        aesenc  xmm0,[rdi+AES_block_len*2*%]
    end repeat
        aesenclast xmm0,[rdi+AES_block_len*(2*AES256_rounds-1)]
        ret

AES256_decrypt:
        ; IN:
        ;  xmm0 = CT block
        ;  rdi = expanded key
        ; OUT:
        ;  xmm0 = PT block
        pxor    xmm0,[rdi+AES_block_len*(2*AES256_rounds-1)]
    repeat AES256_rounds-1
        aesdec  xmm0,[rdi+AES_block_len*(2*(AES256_rounds-%)-1)]
    end repeat
        aesdeclast xmm0,[rdi+AES_block_len*0]
        ret

RNDL128_expand_key:
        ; IN:
        ;  xmm0 = key
        ;  rdi = dest
        ; OUT:
        ;  [dest] = expanded key
        .rcon = 1
        movdqa  [rdi+RNDL_block_len*0+AES_block_len*0],xmm0
        key_sched xmm0,.rcon,xmm0,0xff,xmm1,xmm2
        movdqa  [rdi+RNDL_block_len*0+AES_block_len*1],xmm0
    repeat AES256_rounds - 1
        key_sched xmm0,.rcon,xmm0,0xff,xmm1,xmm2
        aesimc  xmm1,xmm0
        movdqa  [rdi+RNDL_block_len*(2*%+0)+AES_block_len*0],xmm0
        movdqa  [rdi+RNDL_block_len*(2*%-1)+AES_block_len*0],xmm1
        key_sched xmm0,.rcon,xmm0,0xff,xmm1,xmm2
        aesimc  xmm1,xmm0
        movdqa  [rdi+RNDL_block_len*(2*%+0)+AES_block_len*1],xmm0
        movdqa  [rdi+RNDL_block_len*(2*%-1)+AES_block_len*1],xmm1
    end repeat
        key_sched xmm0,.rcon,xmm0,0xff,xmm1,xmm2
        movdqa  [rdi+RNDL_block_len*(2*AES256_rounds-1)+AES_block_len*0],xmm0
        key_sched xmm0,.rcon,xmm0,0xff,xmm2,xmm3
        movdqa  [rdi+RNDL_block_len*(2*AES256_rounds-1)+AES_block_len*1],xmm0
        ret

RNDL192_expand_key:
        ; IN:
        ;  xmm0,xmm1 = key
        ;  rdi = dest
        ; OUT:
        ;  [dest] = expanded key
        movdqa  [rdi],xmm0
        .rcon = 1
        .keys_per_rep = 3
    repeat AES256_rounds * 2 / .keys_per_rep
        movdqa  xmm4,xmm1
        half_key_sched xmm1,.rcon,xmm0,0x55,xmm2,xmm3
        shufpd  xmm4,xmm0,0
      if % > 1
        aesimc  xmm2,xmm4
      end if
        movdqa  [rdi+.keys_per_rep*%*RNDL_block_len-(4+(% and 1))*AES_block_len],xmm4
      if % > 1
        movdqa  [rdi+.keys_per_rep*%*RNDL_block_len-(6+(% and 1))*AES_block_len],xmm2
      end if
        movdqa  xmm4,xmm0
        shufpd  xmm4,xmm1,1
        aesimc  xmm2,xmm4
        movdqa  [rdi+.keys_per_rep*%*RNDL_block_len-(3-(% and 1))*AES_block_len],xmm4
        movdqa  [rdi+.keys_per_rep*%*RNDL_block_len-(5-(% and 1))*AES_block_len],xmm2
        half_key_sched xmm1,.rcon,xmm0,0x55,xmm2,xmm3
        aesimc  xmm2,xmm0
        movdqa  [rdi+.keys_per_rep*%*RNDL_block_len-(0+(% and 1))*AES_block_len],xmm0
        movdqa  [rdi+.keys_per_rep*%*RNDL_block_len-(2+(% and 1))*AES_block_len],xmm2
    end repeat
        movdqa  xmm4,xmm1
        half_key_sched xmm1,.rcon,xmm0,0x55,xmm2,xmm3
        shufpd  xmm4,xmm0,0
        shufpd  xmm0,xmm1,1
        movdqa  [rdi+RNDL_block_len*(2*AES256_rounds-1)+AES_block_len*0],xmm4
        movdqa  [rdi+RNDL_block_len*(2*AES256_rounds-1)+AES_block_len*1],xmm0
        ret

RNDL256_expand_key:
        ; IN:
        ;  xmm0,xmm1 = key
        ;  rdi = dest
        ; OUT:
        ;  [dest] = expanded key
        .rcon = 1
        movdqa  [rdi+RNDL_block_len*0+AES_block_len*0],xmm0
        movdqa  [rdi+RNDL_block_len*0+AES_block_len*1],xmm1
    repeat AES256_rounds - 1
        key_sched xmm1,.rcon,xmm0,0xff,xmm2,xmm3
        key_sched xmm0,    0,xmm1,0xaa,xmm2,xmm3
        aesimc  xmm2,xmm0
        aesimc  xmm3,xmm1
        movdqa  [rdi+RNDL_block_len*(2*%+0)+AES_block_len*0],xmm0
        movdqa  [rdi+RNDL_block_len*(2*%+0)+AES_block_len*1],xmm1
        movdqa  [rdi+RNDL_block_len*(2*%-1)+AES_block_len*0],xmm2
        movdqa  [rdi+RNDL_block_len*(2*%-1)+AES_block_len*1],xmm3
    end repeat
        key_sched xmm1,.rcon,xmm0,0xff,xmm2,xmm3
        key_sched xmm0,    0,xmm1,0xaa,xmm2,xmm3
        movdqa  [rdi+RNDL_block_len*(2*AES256_rounds-1)+AES_block_len*0],xmm0
        movdqa  [rdi+RNDL_block_len*(2*AES256_rounds-1)+AES_block_len*1],xmm1
        ret

RNDL128_encrypt:
RNDL192_encrypt:
RNDL256_encrypt:
        ; IN:
        ;  xmm0,xmm1 = PT block
        ;  rdi = expanded key
        ; OUT:
        ;  xmm0,xmm1 = CT block
        if USE_AVX
                vpxor   xmm2,xmm0,[rdi+RNDL_block_len*0+AES_block_len*0]
        else
                movdqa  xmm2,xmm0
                pxor    xmm2,[rdi+RNDL_block_len*0+AES_block_len*0]
        end if
        pxor    xmm1,[rdi+RNDL_block_len*0+AES_block_len*1]
        movdqa  xmm4,[RNDL256_e_SHUF]
        movdqa  xmm0,[RNDL256_SWAP]
    repeat AES256_rounds
        pshufb  xmm1,xmm4
        if USE_AVX
                vpshufb xmm3,xmm2,xmm4
                vpblendvb xmm2,xmm3,xmm1,xmm0
        else
                pshufb  xmm2,xmm4
                movdqa  xmm3,xmm2
                pblendvb xmm2,xmm1;,xmm0
        end if
        pblendvb xmm1,xmm3;,xmm0
      if % < AES256_rounds
        aesenc  xmm2,[rdi+RNDL_block_len*2*%+AES_block_len*0]
        aesenc  xmm1,[rdi+RNDL_block_len*2*%+AES_block_len*1]
      else
        aesenclast xmm2,[rdi+RNDL_block_len*(2*%-1)+AES_block_len*0]
        aesenclast xmm1,[rdi+RNDL_block_len*(2*%-1)+AES_block_len*1]
      end if
    end repeat
        movdqa  xmm0,xmm2
        ret

RNDL128_decrypt:
RNDL192_decrypt:
RNDL256_decrypt:
        ; IN:
        ;  xmm0,xmm1 = CT block
        ;  rdi = expanded key
        ; OUT:
        ;  xmm0,xmm1 = PT block
        if USE_AVX
                vpxor   xmm2,xmm0,[rdi+RNDL_block_len*(2*AES256_rounds-1)+AES_block_len*0]
        else
                movdqa  xmm2,xmm0
                pxor    xmm2,[rdi+RNDL_block_len*(2*AES256_rounds-1)+AES_block_len*0]
        end if
        pxor    xmm1,[rdi+RNDL_block_len*(2*AES256_rounds-1)+AES_block_len*1]
        movdqa  xmm4,[RNDL256_d_SHUF]
        pshufd  xmm0,[RNDL256_SWAP],00_01_10_11b
    repeat AES256_rounds
        pshufb  xmm1,xmm4
        if USE_AVX
                vpshufb xmm3,xmm2,xmm4
                vpblendvb xmm2,xmm3,xmm1,xmm0
        else
                pshufb  xmm2,xmm4
                movdqa  xmm3,xmm2
                pblendvb xmm2,xmm1;,xmm0
        end if
        pblendvb xmm1,xmm3;,xmm0
      if % < AES256_rounds
        aesdec  xmm2,[rdi+RNDL_block_len*(2*(AES256_rounds-%)-1)+AES_block_len*0]
        aesdec  xmm1,[rdi+RNDL_block_len*(2*(AES256_rounds-%)-1)+AES_block_len*1]
      else
        aesdeclast xmm2,[rdi+RNDL_block_len*0+AES_block_len*0]
        aesdeclast xmm1,[rdi+RNDL_block_len*0+AES_block_len*1]
      end if
    end repeat
        movdqa  xmm0,xmm2
        ret

purge key_sched, half_key_sched

segment readable

align 16

; swizzling for 256-bit blocks
RNDL256_SWAP:   db      00,-1,-1,-1,00,00,-1,-1,00,00,00,-1,00,00,-1,-1
; swizzling for 256-bit blocks: encryption
RNDL256_e_SHUF: db      00,01,06,07,04,05,10,11,08,09,14,15,12,13,02,03
; swizzling for 256-bit blocks: decryption
RNDL256_d_SHUF: db      00,01,14,15,04,05,02,03,08,09,06,07,12,13,10,11

; test vectors courtesy of FIPS-197
NIST128_key:
NIST192_key:
NIST256_key:    db      0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f
                db      0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f
NIST128_PT:
NIST192_PT:
NIST256_PT:     db      0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77,0x88,0x99,0xaa,0xbb,0xcc,0xdd,0xee,0xff
NIST128_CT:     db      0x69,0xc4,0xe0,0xd8,0x6a,0x7b,0x04,0x30,0xd8,0xcd,0xb7,0x80,0x70,0xb4,0xc5,0x5a
NIST192_CT:     db      0xdd,0xa9,0x7c,0xa4,0x86,0x4c,0xdf,0xe0,0x6e,0xaf,0x70,0xa0,0xec,0x0d,0x71,0x91
NIST256_CT:     db      0x8e,0xa2,0xb7,0xca,0x51,0x67,0x45,0xbf,0xea,0xfc,0x49,0x90,0x4b,0x49,0x60,0x89

; test vectors courtesy of Brian Gladman
RNDL128_key:
RNDL192_key:
RNDL256_key:    db      0x2b,0x7e,0x15,0x16,0x28,0xae,0xd2,0xa6,0xab,0xf7,0x15,0x88,0x09,0xcf,0x4f,0x3c
                db      0x76,0x2e,0x71,0x60,0xf3,0x8b,0x4d,0xa5,0x6a,0x78,0x4d,0x90,0x45,0x19,0x0c,0xfe
RNDL128_PT:
RNDL192_PT:
RNDL256_PT:     db      0x32,0x43,0xf6,0xa8,0x88,0x5a,0x30,0x8d,0x31,0x31,0x98,0xa2,0xe0,0x37,0x07,0x34
                db      0x4a,0x40,0x93,0x82,0x22,0x99,0xf3,0x1d,0x00,0x82,0xef,0xa9,0x8e,0xc4,0xe6,0xc8
RNDL128_CT:     db      0x7d,0x15,0x47,0x90,0x76,0xb6,0x9a,0x46,0xff,0xb3,0xb3,0xbe,0xae,0x97,0xad,0x83
                db      0x13,0xf6,0x22,0xf6,0x7f,0xed,0xb4,0x87,0xde,0x9f,0x06,0xb9,0xed,0x9c,0x8f,0x19
RNDL192_CT:     db      0x5d,0x71,0x01,0x72,0x7b,0xb2,0x57,0x81,0xbf,0x67,0x15,0xb0,0xe6,0x95,0x52,0x82
                db      0xb9,0x61,0x0e,0x23,0xa4,0x3c,0x2e,0xb0,0x62,0x69,0x9f,0x0e,0xbf,0x58,0x87,0xb2         
RNDL256_CT:     db      0xa4,0x94,0x06,0x11,0x5d,0xfb,0x30,0xa4,0x04,0x18,0xaa,0xfa,0x48,0x69,0xb7,0xc6
                db      0xa8,0x86,0xff,0x31,0x60,0x2a,0x7d,0xd1,0x9c,0x88,0x9d,0xc6,0x4f,0x7e,0x4e,0x7a

segment writeable readable

align 16
; needs two blocks per round
expanded_keys:  rb      2*RNDL_block_len*AES256_rounds    
Post 20 Feb 2025, 00:28
View user's profile Send private message Visit poster's website Reply with quote
Ali.Z



Joined: 08 Jan 2018
Posts: 772
Ali.Z 20 Feb 2025, 16:30
revolution wrote:
Link?

sorry cant find it not even in my drive, but pretty sure it is here somewhere.

_________________
Asm For Wise Humans
Post 20 Feb 2025, 16:30
View user's profile Send private message Reply with quote
Display posts from previous:
Post new topic Reply to topic

Jump to:  


< Last Thread | Next Thread >
Forum Rules:
You cannot post new topics in this forum
You cannot reply to topics in this forum
You cannot edit your posts in this forum
You cannot delete your posts in this forum
You cannot vote in polls in this forum
You cannot attach files in this forum
You can download files in this forum


Copyright © 1999-2025, Tomasz Grysztar. Also on GitHub, YouTube.

Website powered by rwasa.