v25.5

2026-02-22 16:33:08 +00:00 · 2025-07-09 01:32:38 -04:00
parent 66191db93c
commit aa47e880d5
12 changed files with 221 additions and 319 deletions
--- a/simd-utils/simd-256.h
+++ b/simd-utils/simd-256.h
@@ -217,7 +217,9 @@ static inline __m256i mm256_not( const __m256i v )
 // Equivalent of AVX512 _mm256_movepi64_mask & _mm256_movepi32_mask.
 // Returns 4 or 8 bit integer mask from MSBit of 64 or 32 bit elements.
 // Effectively a sign test.
-
+// The functions return int which can promote small integers to int when used
+// in an expression. Users should mask the slack bits strategically to maintain
+// data integrity.
 #define mm256_movmask_64( v ) \
   _mm256_movemask_pd( _mm256_castsi256_pd( v ) )

--- a/simd-utils/simd-512.h
+++ b/simd-utils/simd-512.h
@@ -14,12 +14,6 @@
 //   vectors. It is therefore not technically required for any 512 bit vector
 //   utilities defined below.

-// if avx10   // avx512 is always set
-//      if evex512: yes   
-// else if avx512 : yes   // avx512 is set but not avx10
-// else           : no    // avx512 not set or avx10.1 is set without evex512
-
-
 #if defined(SIMD512)

 //  AVX512 intrinsics have a few changes from previous conventions.
@@ -57,7 +51,7 @@
 //      - if an argument is to referenced multiple times a C inline function
 //        should be used instead of a macro to prevent an expression argument
 //        from being evaluated multiple times (wasteful) or produces side
-//         effects (very bad).
+//        effects (very bad).
 //
 //    There are 2 areas where overhead is a major concern: constants and
 //    permutations.
--- a/simd-utils/simd-neon.h
+++ b/simd-utils/simd-neon.h
@@ -4,9 +4,10 @@
 #if defined(__aarch64__) && defined(__ARM_NEON)

 // Targeted functions supporting NEON SIMD 128 & 64 bit vectors.
-// Element size matters!
 //
-// Intel naming is generally used.
+// Intel style naming is generally used, however, this not an attempt to emulate Intel
+// intructions. It's focussed on the functions used in this program and the best way
+// to implement them with NEON.
 //
 // Some advanced logical operations that require SHA3. Prior to GCC-13
 // they also require armv8.2