This commit is contained in:
Jay D Dee
2025-06-20 20:31:41 -04:00
parent dd99580a4c
commit 66191db93c
86 changed files with 2701 additions and 4322 deletions

View File

@@ -13,7 +13,7 @@
#if defined(LBRY_16WAY)
static __thread sha256_16way_context sha256_16w_mid;
static __thread sha256_16x32_context sha256_16w_mid;
void lbry_16way_hash( void* output, const void* input )
{
@@ -36,17 +36,17 @@ void lbry_16way_hash( void* output, const void* input )
uint32_t _ALIGN(64) h13[32];
uint32_t _ALIGN(64) h14[32];
uint32_t _ALIGN(64) h15[32];
sha256_16way_context ctx_sha256 __attribute__ ((aligned (64)));
sha512_8way_context ctx_sha512;
ripemd160_16way_context ctx_ripemd;
sha256_16x32_context ctx_sha256 __attribute__ ((aligned (64)));
sha512_8x64_context ctx_sha512;
ripemd160_16x32_context ctx_ripemd;
memcpy( &ctx_sha256, &sha256_16w_mid, sizeof(ctx_sha256) );
sha256_16way_update( &ctx_sha256, input + (LBRY_MIDSTATE<<4), LBRY_TAIL );
sha256_16way_close( &ctx_sha256, vhashA );
sha256_16x32_update( &ctx_sha256, input + (LBRY_MIDSTATE<<4), LBRY_TAIL );
sha256_16x32_close( &ctx_sha256, vhashA );
sha256_16way_init( &ctx_sha256 );
sha256_16way_update( &ctx_sha256, vhashA, 32 );
sha256_16way_close( &ctx_sha256, vhashA );
sha256_16x32_init( &ctx_sha256 );
sha256_16x32_update( &ctx_sha256, vhashA, 32 );
sha256_16x32_close( &ctx_sha256, vhashA );
// reinterleave to do sha512 4-way 64 bit twice.
dintrlv_16x32( h0, h1, h2, h3, h4, h5, h6, h7,
@@ -54,13 +54,13 @@ void lbry_16way_hash( void* output, const void* input )
intrlv_8x64( vhashA, h0, h1, h2, h3, h4, h5, h6, h7, 256 );
intrlv_8x64( vhashB, h8, h9, h10, h11, h12, h13, h14, h15, 256 );
sha512_8way_init( &ctx_sha512 );
sha512_8way_update( &ctx_sha512, vhashA, 32 );
sha512_8way_close( &ctx_sha512, vhashA );
sha512_8x64_init( &ctx_sha512 );
sha512_8x64_update( &ctx_sha512, vhashA, 32 );
sha512_8x64_close( &ctx_sha512, vhashA );
sha512_8way_init( &ctx_sha512 );
sha512_8way_update( &ctx_sha512, vhashB, 32 );
sha512_8way_close( &ctx_sha512, vhashB );
sha512_8x64_init( &ctx_sha512 );
sha512_8x64_update( &ctx_sha512, vhashB, 32 );
sha512_8x64_close( &ctx_sha512, vhashB );
// back to 8-way 32 bit
dintrlv_8x64( h0, h1, h2, h3, h4, h5, h6, h7, vhashA, 512 );
@@ -68,22 +68,22 @@ void lbry_16way_hash( void* output, const void* input )
intrlv_16x32( vhashA, h0, h1, h2, h3, h4, h5, h6, h7,
h8, h9, h10, h11, h12, h13, h14, h15, 512 );
ripemd160_16way_init( &ctx_ripemd );
ripemd160_16way_update( &ctx_ripemd, vhashA, 32 );
ripemd160_16way_close( &ctx_ripemd, vhashB );
ripemd160_16x32_init( &ctx_ripemd );
ripemd160_16x32_update( &ctx_ripemd, vhashA, 32 );
ripemd160_16x32_close( &ctx_ripemd, vhashB );
ripemd160_16way_init( &ctx_ripemd );
ripemd160_16way_update( &ctx_ripemd, vhashA+(8<<4), 32 );
ripemd160_16way_close( &ctx_ripemd, vhashC );
ripemd160_16x32_init( &ctx_ripemd );
ripemd160_16x32_update( &ctx_ripemd, vhashA+(8<<4), 32 );
ripemd160_16x32_close( &ctx_ripemd, vhashC );
sha256_16way_init( &ctx_sha256 );
sha256_16way_update( &ctx_sha256, vhashB, 20 );
sha256_16way_update( &ctx_sha256, vhashC, 20 );
sha256_16way_close( &ctx_sha256, vhashA );
sha256_16x32_init( &ctx_sha256 );
sha256_16x32_update( &ctx_sha256, vhashB, 20 );
sha256_16x32_update( &ctx_sha256, vhashC, 20 );
sha256_16x32_close( &ctx_sha256, vhashA );
sha256_16way_init( &ctx_sha256 );
sha256_16way_update( &ctx_sha256, vhashA, 32 );
sha256_16way_close( &ctx_sha256, output );
sha256_16x32_init( &ctx_sha256 );
sha256_16x32_update( &ctx_sha256, vhashA, 32 );
sha256_16x32_close( &ctx_sha256, output );
}
int scanhash_lbry_16way( struct work *work, uint32_t max_nonce,
@@ -115,8 +115,8 @@ int scanhash_lbry_16way( struct work *work, uint32_t max_nonce,
intrlv_16x32( vdata, edata, edata, edata, edata, edata, edata, edata,
edata, edata, edata, edata, edata, edata, edata, edata, edata, 1024 );
sha256_16way_init( &sha256_16w_mid );
sha256_16way_update( &sha256_16w_mid, vdata, LBRY_MIDSTATE );
sha256_16x32_init( &sha256_16w_mid );
sha256_16x32_update( &sha256_16w_mid, vdata, LBRY_MIDSTATE );
do
{
@@ -144,7 +144,7 @@ int scanhash_lbry_16way( struct work *work, uint32_t max_nonce,
#elif defined(LBRY_8WAY)
static __thread sha256_8way_context sha256_8w_mid;
static __thread sha256_8x32_context sha256_8w_mid;
void lbry_8way_hash( void* output, const void* input )
{
@@ -159,52 +159,52 @@ void lbry_8way_hash( void* output, const void* input )
uint32_t _ALIGN(32) h5[32];
uint32_t _ALIGN(32) h6[32];
uint32_t _ALIGN(32) h7[32];
sha256_8way_context ctx_sha256 __attribute__ ((aligned (64)));
sha512_4way_context ctx_sha512;
ripemd160_8way_context ctx_ripemd;
sha256_8x32_context ctx_sha256 __attribute__ ((aligned (64)));
sha512_4x64_context ctx_sha512;
ripemd160_8x32_context ctx_ripemd;
memcpy( &ctx_sha256, &sha256_8w_mid, sizeof(ctx_sha256) );
sha256_8way_update( &ctx_sha256, input + (LBRY_MIDSTATE<<3), LBRY_TAIL );
sha256_8way_close( &ctx_sha256, vhashA );
sha256_8x32_update( &ctx_sha256, input + (LBRY_MIDSTATE<<3), LBRY_TAIL );
sha256_8x32_close( &ctx_sha256, vhashA );
sha256_8way_init( &ctx_sha256 );
sha256_8way_update( &ctx_sha256, vhashA, 32 );
sha256_8way_close( &ctx_sha256, vhashA );
sha256_8x32_init( &ctx_sha256 );
sha256_8x32_update( &ctx_sha256, vhashA, 32 );
sha256_8x32_close( &ctx_sha256, vhashA );
// reinterleave to do sha512 4-way 64 bit twice.
dintrlv_8x32( h0, h1, h2, h3, h4, h5, h6, h7, vhashA, 256 );
intrlv_4x64( vhashA, h0, h1, h2, h3, 256 );
intrlv_4x64( vhashB, h4, h5, h6, h7, 256 );
sha512_4way_init( &ctx_sha512 );
sha512_4way_update( &ctx_sha512, vhashA, 32 );
sha512_4way_close( &ctx_sha512, vhashA );
sha512_4x64_init( &ctx_sha512 );
sha512_4x64_update( &ctx_sha512, vhashA, 32 );
sha512_4x64_close( &ctx_sha512, vhashA );
sha512_4way_init( &ctx_sha512 );
sha512_4way_update( &ctx_sha512, vhashB, 32 );
sha512_4way_close( &ctx_sha512, vhashB );
sha512_4x64_init( &ctx_sha512 );
sha512_4x64_update( &ctx_sha512, vhashB, 32 );
sha512_4x64_close( &ctx_sha512, vhashB );
// back to 8-way 32 bit
dintrlv_4x64( h0, h1, h2, h3, vhashA, 512 );
dintrlv_4x64( h4, h5, h6, h7, vhashB, 512 );
intrlv_8x32( vhashA, h0, h1, h2, h3, h4, h5, h6, h7, 512 );
ripemd160_8way_init( &ctx_ripemd );
ripemd160_8way_update( &ctx_ripemd, vhashA, 32 );
ripemd160_8way_close( &ctx_ripemd, vhashB );
ripemd160_8x32_init( &ctx_ripemd );
ripemd160_8x32_update( &ctx_ripemd, vhashA, 32 );
ripemd160_8x32_close( &ctx_ripemd, vhashB );
ripemd160_8way_init( &ctx_ripemd );
ripemd160_8way_update( &ctx_ripemd, vhashA+(8<<3), 32 );
ripemd160_8way_close( &ctx_ripemd, vhashC );
ripemd160_8x32_init( &ctx_ripemd );
ripemd160_8x32_update( &ctx_ripemd, vhashA+(8<<3), 32 );
ripemd160_8x32_close( &ctx_ripemd, vhashC );
sha256_8way_init( &ctx_sha256 );
sha256_8way_update( &ctx_sha256, vhashB, 20 );
sha256_8way_update( &ctx_sha256, vhashC, 20 );
sha256_8way_close( &ctx_sha256, vhashA );
sha256_8x32_init( &ctx_sha256 );
sha256_8x32_update( &ctx_sha256, vhashB, 20 );
sha256_8x32_update( &ctx_sha256, vhashC, 20 );
sha256_8x32_close( &ctx_sha256, vhashA );
sha256_8way_init( &ctx_sha256 );
sha256_8way_update( &ctx_sha256, vhashA, 32 );
sha256_8way_close( &ctx_sha256, output );
sha256_8x32_init( &ctx_sha256 );
sha256_8x32_update( &ctx_sha256, vhashA, 32 );
sha256_8x32_close( &ctx_sha256, output );
}
int scanhash_lbry_8way( struct work *work, uint32_t max_nonce,
@@ -235,8 +235,8 @@ int scanhash_lbry_8way( struct work *work, uint32_t max_nonce,
intrlv_8x32( vdata, edata, edata, edata, edata,
edata, edata, edata, edata, 1024 );
sha256_8way_init( &sha256_8w_mid );
sha256_8way_update( &sha256_8w_mid, vdata, LBRY_MIDSTATE );
sha256_8x32_init( &sha256_8w_mid );
sha256_8x32_update( &sha256_8w_mid, vdata, LBRY_MIDSTATE );
do
{

View File

@@ -57,7 +57,7 @@ do{ \
#define ROUND2(a, b, c, d, e, f, s, r, k) \
RR(a ## 2, b ## 2, c ## 2, d ## 2, e ## 2, f, s, r, K2 ## k)
static void ripemd160_4way_round( ripemd160_4way_context *sc )
static void ripemd160_4x32_round( ripemd160_4x32_context *sc )
{
const __m128i *in = (__m128i*)sc->buf;
__m128i *h = (__m128i*)sc->val;
@@ -249,7 +249,7 @@ static void ripemd160_4way_round( ripemd160_4way_context *sc )
h[0] = tmp;
}
void ripemd160_4way_init( ripemd160_4way_context *sc )
void ripemd160_4x32_init( ripemd160_4x32_context *sc )
{
sc->val[0] = _mm_set1_epi64x( 0x6745230167452301 );
sc->val[1] = _mm_set1_epi64x( 0xEFCDAB89EFCDAB89 );
@@ -259,7 +259,7 @@ void ripemd160_4way_init( ripemd160_4way_context *sc )
sc->count_high = sc->count_low = 0;
}
void ripemd160_4way_update( ripemd160_4way_context *sc, const void *data,
void ripemd160_4x32_update( ripemd160_4x32_context *sc, const void *data,
size_t len )
{
__m128i *vdata = (__m128i*)data;
@@ -281,7 +281,7 @@ void ripemd160_4way_update( ripemd160_4way_context *sc, const void *data,
len -= clen;
if ( ptr == block_size )
{
ripemd160_4way_round( sc );
ripemd160_4x32_round( sc );
ptr = 0;
}
clow = sc->count_low;
@@ -292,7 +292,7 @@ void ripemd160_4way_update( ripemd160_4way_context *sc, const void *data,
}
}
void ripemd160_4way_close( ripemd160_4way_context *sc, void *dst )
void ripemd160_4x32_close( ripemd160_4x32_context *sc, void *dst )
{
unsigned ptr, u;
uint32_t low, high;
@@ -306,7 +306,7 @@ void ripemd160_4way_close( ripemd160_4way_context *sc, void *dst )
if ( ptr > pad )
{
memset_zero_128( sc->buf + (ptr>>2), (block_size - ptr) >> 2 );
ripemd160_4way_round( sc );
ripemd160_4x32_round( sc );
memset_zero_128( sc->buf, pad>>2 );
}
else
@@ -317,7 +317,7 @@ void ripemd160_4way_close( ripemd160_4way_context *sc, void *dst )
low = low << 3;
sc->buf[ pad>>2 ] = _mm_set1_epi32( low );
sc->buf[ (pad>>2) + 1 ] = _mm_set1_epi32( high );
ripemd160_4way_round( sc );
ripemd160_4x32_round( sc );
for (u = 0; u < 5; u ++)
casti_v128u32( dst, u ) = sc->val[u];
}
@@ -357,7 +357,7 @@ do{ \
#define ROUND2_8W(a, b, c, d, e, f, s, r, k) \
RR_8W(a ## 2, b ## 2, c ## 2, d ## 2, e ## 2, f, s, r, K2 ## k)
static void ripemd160_8way_round( ripemd160_8way_context *sc )
static void ripemd160_8x32_round( ripemd160_8x32_context *sc )
{
const __m256i *in = (__m256i*)sc->buf;
__m256i *h = (__m256i*)sc->val;
@@ -550,7 +550,7 @@ static void ripemd160_8way_round( ripemd160_8way_context *sc )
}
void ripemd160_8way_init( ripemd160_8way_context *sc )
void ripemd160_8x32_init( ripemd160_8x32_context *sc )
{
sc->val[0] = _mm256_set1_epi64x( 0x6745230167452301 );
sc->val[1] = _mm256_set1_epi64x( 0xEFCDAB89EFCDAB89 );
@@ -560,7 +560,7 @@ void ripemd160_8way_init( ripemd160_8way_context *sc )
sc->count_high = sc->count_low = 0;
}
void ripemd160_8way_update( ripemd160_8way_context *sc, const void *data,
void ripemd160_8x32_update( ripemd160_8x32_context *sc, const void *data,
size_t len )
{
__m256i *vdata = (__m256i*)data;
@@ -582,7 +582,7 @@ void ripemd160_8way_update( ripemd160_8way_context *sc, const void *data,
len -= clen;
if ( ptr == block_size )
{
ripemd160_8way_round( sc );
ripemd160_8x32_round( sc );
ptr = 0;
}
clow = sc->count_low;
@@ -593,7 +593,7 @@ void ripemd160_8way_update( ripemd160_8way_context *sc, const void *data,
}
}
void ripemd160_8way_close( ripemd160_8way_context *sc, void *dst )
void ripemd160_8x32_close( ripemd160_8x32_context *sc, void *dst )
{
unsigned ptr, u;
uint32_t low, high;
@@ -607,7 +607,7 @@ void ripemd160_8way_close( ripemd160_8way_context *sc, void *dst )
if ( ptr > pad )
{
memset_zero_256( sc->buf + (ptr>>2), (block_size - ptr) >> 2 );
ripemd160_8way_round( sc );
ripemd160_8x32_round( sc );
memset_zero_256( sc->buf, pad>>2 );
}
else
@@ -618,7 +618,7 @@ void ripemd160_8way_close( ripemd160_8way_context *sc, void *dst )
low = low << 3;
sc->buf[ pad>>2 ] = _mm256_set1_epi32( low );
sc->buf[ (pad>>2) + 1 ] = _mm256_set1_epi32( high );
ripemd160_8way_round( sc );
ripemd160_8x32_round( sc );
for (u = 0; u < 5; u ++)
casti_m256i( dst, u ) = sc->val[u];
}
@@ -629,7 +629,6 @@ void ripemd160_8way_close( ripemd160_8way_context *sc, void *dst )
// RIPEMD-160 16 way
#define F16W_1(x, y, z) \
_mm512_xor_si512( _mm512_xor_si512( x, y ), z )
@@ -659,7 +658,7 @@ do{ \
#define ROUND2_16W(a, b, c, d, e, f, s, r, k) \
RR_16W(a ## 2, b ## 2, c ## 2, d ## 2, e ## 2, f, s, r, K2 ## k)
static void ripemd160_16way_round( ripemd160_16way_context *sc )
static void ripemd160_16x32_round( ripemd160_16x32_context *sc )
{
const __m512i *in = (__m512i*)sc->buf;
__m512i *h = (__m512i*)sc->val;
@@ -851,7 +850,7 @@ static void ripemd160_16way_round( ripemd160_16way_context *sc )
h[0] = tmp;
}
void ripemd160_16way_init( ripemd160_16way_context *sc )
void ripemd160_16x32_init( ripemd160_16x32_context *sc )
{
sc->val[0] = _mm512_set1_epi64( 0x6745230167452301 );
sc->val[1] = _mm512_set1_epi64( 0xEFCDAB89EFCDAB89 );
@@ -861,7 +860,7 @@ void ripemd160_16way_init( ripemd160_16way_context *sc )
sc->count_high = sc->count_low = 0;
}
void ripemd160_16way_update( ripemd160_16way_context *sc, const void *data,
void ripemd160_16x32_update( ripemd160_16x32_context *sc, const void *data,
size_t len )
{
__m512i *vdata = (__m512i*)data;
@@ -883,7 +882,7 @@ void ripemd160_16way_update( ripemd160_16way_context *sc, const void *data,
len -= clen;
if ( ptr == block_size )
{
ripemd160_16way_round( sc );
ripemd160_16x32_round( sc );
ptr = 0;
}
clow = sc->count_low;
@@ -894,7 +893,7 @@ void ripemd160_16way_update( ripemd160_16way_context *sc, const void *data,
}
}
void ripemd160_16way_close( ripemd160_16way_context *sc, void *dst )
void ripemd160_16x32_close( ripemd160_16x32_context *sc, void *dst )
{
unsigned ptr, u;
uint32_t low, high;
@@ -908,7 +907,7 @@ void ripemd160_16way_close( ripemd160_16way_context *sc, void *dst )
if ( ptr > pad )
{
memset_zero_512( sc->buf + (ptr>>2), (block_size - ptr) >> 2 );
ripemd160_16way_round( sc );
ripemd160_16x32_round( sc );
memset_zero_512( sc->buf, pad>>2 );
}
else
@@ -919,7 +918,7 @@ void ripemd160_16way_close( ripemd160_16way_context *sc, void *dst )
low = low << 3;
sc->buf[ pad>>2 ] = _mm512_set1_epi32( low );
sc->buf[ (pad>>2) + 1 ] = _mm512_set1_epi32( high );
ripemd160_16way_round( sc );
ripemd160_16x32_round( sc );
for (u = 0; u < 5; u ++)
casti_m512i( dst, u ) = sc->val[u];
}

View File

@@ -12,12 +12,12 @@ typedef struct
__m128i buf[64>>2];
__m128i val[5];
uint32_t count_high, count_low;
} __attribute__ ((aligned (64))) ripemd160_4way_context;
} __attribute__ ((aligned (64))) ripemd160_4x32_context;
void ripemd160_4way_init( ripemd160_4way_context *sc );
void ripemd160_4way_update( ripemd160_4way_context *sc, const void *data,
void ripemd160_4x32_init( ripemd160_4x32_context *sc );
void ripemd160_4x32_update( ripemd160_4x32_context *sc, const void *data,
size_t len );
void ripemd160_4way_close( ripemd160_4way_context *sc, void *dst );
void ripemd160_4x32_close( ripemd160_4x32_context *sc, void *dst );
#if defined (__AVX2__)
@@ -26,12 +26,12 @@ typedef struct
__m256i buf[64>>2];
__m256i val[5];
uint32_t count_high, count_low;
} __attribute__ ((aligned (128))) ripemd160_8way_context;
} __attribute__ ((aligned (128))) ripemd160_8x32_context;
void ripemd160_8way_init( ripemd160_8way_context *sc );
void ripemd160_8way_update( ripemd160_8way_context *sc, const void *data,
void ripemd160_8x32_init( ripemd160_8x32_context *sc );
void ripemd160_8x32_update( ripemd160_8x32_context *sc, const void *data,
size_t len );
void ripemd160_8way_close( ripemd160_8way_context *sc, void *dst );
void ripemd160_8x32_close( ripemd160_8x32_context *sc, void *dst );
#if defined(SIMD512)
@@ -40,12 +40,12 @@ typedef struct
__m512i buf[64>>2];
__m512i val[5];
uint32_t count_high, count_low;
} __attribute__ ((aligned (128))) ripemd160_16way_context;
} __attribute__ ((aligned (128))) ripemd160_16x32_context;
void ripemd160_16way_init( ripemd160_16way_context *sc );
void ripemd160_16way_update( ripemd160_16way_context *sc, const void *data,
void ripemd160_16x32_init( ripemd160_16x32_context *sc );
void ripemd160_16x32_update( ripemd160_16x32_context *sc, const void *data,
size_t len );
void ripemd160_16way_close( ripemd160_16way_context *sc, void *dst );
void ripemd160_16x32_close( ripemd160_16x32_context *sc, void *dst );
#endif // AVX512
#endif // __AVX2__