This commit is contained in:
Jay D Dee
2020-01-08 14:44:47 -05:00
parent 3572cb53c4
commit 70089d1224
121 changed files with 1842 additions and 14893 deletions

View File

@@ -132,10 +132,19 @@ do { \
// Parallel AES, for when x is expected to be in a 256 bit register.
// Use same 128 bit key.
#if defined(__VAES__) && defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
#define mm256_aesenc_2x128( x, k ) \
_mm256_aesenc_epi128( x, m256_const1_128(k ) )
#else
#define mm256_aesenc_2x128( x, k ) \
mm256_concat_128( _mm_aesenc_si128( mm128_extr_hi128_256( x ), k ), \
_mm_aesenc_si128( mm128_extr_lo128_256( x ), k ) )
#endif
#define mm256_paesenc_2x128( y, x, k ) do \
{ \
__m128i *X = (__m128i*)x; \
@@ -546,14 +555,14 @@ static inline void memcpy_256( __m256i *dst, const __m256i *src, const int n )
#define mm256_ror512_128( v1, v2 ) \
do { \
__m256i t = _mm256_permute2x128( v1, v2, 0x03 ); \
v1 = _mm256__mm256_permute2x128( v2, v1, 0x21 ); \
v1 = _mm256_permute2x128( v2, v1, 0x21 ); \
v2 = t; \
} while(0)
#define mm256_rol512_128( v1, v2 ) \
do { \
__m256i t = _mm256_permute2x128( v1, v2, 0x03 ); \
v2 = _mm256__mm256_permute2x128( v2, v1, 0x21 ); \
v2 = _mm256_permute2x128( v2, v1, 0x21 ); \
v1 = t; \
} while(0)

View File

@@ -44,14 +44,14 @@
//
// Constants need to be composed at run time by assembling individual
// elements, very expensive. The cost is proportional to the number of
// elements therefor use the largest element size possible, even by
// merging smaller values.
// different elements therefore use the largest element size possible,
// merge smaller integer elements to 64 bits, and group repeated elements.
//
// Constants with repeating patterns can be optimized with the smaller
// patterns repeated more frequently being more efficient.
//
// Some specific constants can be very efficient. Zero is very efficient,
// 1 and -1 slightly less so.
// 1 and -1 slightly less so.
//
// If an expensive constant is to be reused in the same function it should
// be declared as a local variable defined once and reused.