This commit is contained in:
Jay D Dee
2025-07-20 19:43:10 -04:00
parent aa47e880d5
commit 12480a3ea5
17 changed files with 507 additions and 504 deletions

View File

@@ -137,10 +137,24 @@
#define v128_unpackhi8 _mm_unpackhi_epi8
// AES
// Nokey means nothing on x86_64 but it saves an instruction and a register
// on ARM.
#define v128_aesenc _mm_aesenc_si128
// xor key with result after encryption, x86_64 format.
#define v128_aesencxor _mm_aesenc_si128
// default is x86_64 format.
#define v128_aesenc v128_aesencxor
// xor key with v before encryption, arm64 format.
#define v128_xoraesenc( v, k ) \
_mm_aesenc_si128( v128_xor( v, k ), v128_zero )
// xor v with k_in before encryption then xor the result with k_out afterward.
// Uses the applicable optimization based on the target.
#define v128_xoraesencxor( v, k_in, k_out ) \
_mm_aesenc_si128( v128_xor( v, k_in ), k_out )
// arm64 optimized
#define v128_aesenc_nokey(v) _mm_aesenc_si128( v, v128_zero )
#define v128_aesenclast _mm_aesenclast_si128
#define v128_aesenclast_nokey(v) _mm_aesenclast_si128( v, v128_zero )
#define v128_aesdec _mm_aesdec_si128

View File

@@ -187,9 +187,21 @@
// vzipq_u32 can do hi & lo and return uint32x4x2, no 64 bit version.
// AES
// consistent with Intel AES intrinsics, break up for optimizing
#define v128_aesenc( v, k ) \
v128_xor( k, vaesmcq_u8( vaeseq_u8( v, v128_zero ) ) )
// xor key with result after encryption, x86_64 format.
#define v128_aesencxor( v, k ) \
v128_xor( vaesmcq_u8( vaeseq_u8( v, v128_zero ) ), k )
// default is x86_64 format.
#define v128_aesenc v128_aesencxor
// xor key with v before encryption, arm64 format.
#define v128_xoraesenc( v, k ) \
vaesmcq_u8( vaeseq_u8( v, k ) )
// xor v with k_in before encryption then xor the result with k_out afterward.
// Uses the applicable optimization based on the target.
#define v128_xoraesencxor( v, k_in, k_out ) \
v128_xor( v128_xoraesenc( v, k_in ), k_out )
#define v128_aesenc_nokey( v ) \
vaesmcq_u8( vaeseq_u8( v, v128_zero ) )