This commit is contained in:
Jay D Dee
2018-02-15 14:48:50 -05:00
parent e4265a6f11
commit d60a268972
57 changed files with 3469 additions and 2135 deletions

138
algo/ripemd/lbry-4way.c Normal file
View File

@@ -0,0 +1,138 @@
#include "lbry-gate.h"
#if defined(LBRY_4WAY)
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "algo/sha/sha2-hash-4way.h"
#include "ripemd-hash-4way.h"
static __thread sha256_4way_context sha256_mid;
void lbry_4way_hash( void* output, const void* input )
{
sha256_4way_context ctx_sha256 __attribute__ ((aligned (64)));
sha512_4way_context ctx_sha512;
ripemd160_4way_context ctx_ripemd;
uint32_t _ALIGN(64) vhashA[16<<2];
uint32_t _ALIGN(64) vhashB[16<<2];
uint32_t _ALIGN(64) vhashC[16<<2];
memcpy( &ctx_sha256, &sha256_mid, sizeof(ctx_sha256) );
sha256_4way( &ctx_sha256, input+(64<<2), 48 );
sha256_4way_close( &ctx_sha256, vhashA );
sha256_4way_init( &ctx_sha256 );
sha256_4way( &ctx_sha256, vhashA, 32 );
sha256_4way_close( &ctx_sha256, vhashA );
// sha512 64 bit data, 64 byte output
mm256_reinterleave_4x64( vhashB, vhashA, 256 );
sha512_4way_init( &ctx_sha512 );
sha512_4way( &ctx_sha512, vhashB, 32 );
sha512_4way_close( &ctx_sha512, vhashB );
mm256_reinterleave_4x32( vhashA, vhashB, 512 );
ripemd160_4way_init( &ctx_ripemd );
ripemd160_4way( &ctx_ripemd, vhashA, 32 );
ripemd160_4way_close( &ctx_ripemd, vhashB );
ripemd160_4way_init( &ctx_ripemd );
ripemd160_4way( &ctx_ripemd, vhashA+(8<<2), 32 );
ripemd160_4way_close( &ctx_ripemd, vhashC );
sha256_4way_init( &ctx_sha256 );
sha256_4way( &ctx_sha256, vhashB, 20 );
sha256_4way( &ctx_sha256, vhashC, 20 );
sha256_4way_close( &ctx_sha256, vhashA );
sha256_4way_init( &ctx_sha256 );
sha256_4way( &ctx_sha256, vhashA, 32 );
sha256_4way_close( &ctx_sha256, vhashA );
mm_deinterleave_4x32( output, output+32, output+64, output+96, vhashA, 256 );
}
int scanhash_lbry_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done)
{
uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[32*4] __attribute__ ((aligned (64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[27];
const uint32_t first_nonce = pdata[27];
const uint32_t Htarg = ptarget[7];
uint32_t edata[32] __attribute__ ((aligned (64)));
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 108; // 27*4
uint32_t *noncep1 = vdata + 109;
uint32_t *noncep2 = vdata + 110;
uint32_t *noncep3 = vdata + 111;
uint64_t htmax[] = { 0, 0xF, 0xFF,
0xFFF, 0xFFFF, 0x10000000 };
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
0xFFFFF000, 0xFFFF0000, 0 };
// we need bigendian data...
swab32_array( edata, pdata, 32 );
mm_interleave_4x32( vdata, edata, edata, edata, edata, 1024 );
sha256_4way_init( &sha256_mid );
sha256_4way( &sha256_mid, vdata, 64 );
for ( int m = 0; m < sizeof(masks); m++ ) if ( Htarg <= htmax[m] )
{
uint32_t mask = masks[m];
do
{
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
lbry_4way_hash( hash, vdata );
if ( !( hash[7] & mask ) && fulltest( hash, ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = pdata[27] = n;
work_set_target_ratio( work, hash );
}
if ( !( (hash+8)[7] & mask ) && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash+8 );
}
if ( !( (hash+16)[7] & mask ) && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash+16 );
}
if ( !( (hash+24)[7] & mask ) && fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash+24 );
}
n+=4;
} while ( ( num_found == 0 ) && ( n < max_nonce )
&& !work_restart[thr_id].restart );
break;
}
*hashes_done = n - first_nonce;
return num_found;
}
#endif

94
algo/ripemd/lbry-gate.c Normal file
View File

@@ -0,0 +1,94 @@
#include "lbry-gate.h"
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdio.h>
double lbry_calc_network_diff( struct work *work )
{
// sample for diff 43.281 : 1c05ea29
// todo: endian reversed on longpoll could be zr5 specific...
uint32_t nbits = swab32( work->data[ LBRY_NBITS_INDEX ] );
uint32_t bits = (nbits & 0xffffff);
int16_t shift = (swab32(nbits) & 0xff); // 0x1c = 28
double d = (double)0x0000ffff / (double)bits;
for (int m=shift; m < 29; m++) d *= 256.0;
for (int m=29; m < shift; m++) d /= 256.0;
if (opt_debug_diff)
applog(LOG_DEBUG, "net diff: %f -> shift %u, bits %08x", d, shift, bits);
return d;
}
// std_le should work but it doesn't
void lbry_le_build_stratum_request( char *req, struct work *work,
struct stratum_ctx *sctx )
{
unsigned char *xnonce2str;
uint32_t ntime, nonce;
char ntimestr[9], noncestr[9];
le32enc( &ntime, work->data[ LBRY_NTIME_INDEX ] );
le32enc( &nonce, work->data[ LBRY_NONCE_INDEX ] );
bin2hex( ntimestr, (char*)(&ntime), sizeof(uint32_t) );
bin2hex( noncestr, (char*)(&nonce), sizeof(uint32_t) );
xnonce2str = abin2hex( work->xnonce2, work->xnonce2_len);
snprintf( req, JSON_BUF_LEN,
"{\"method\": \"mining.submit\", \"params\": [\"%s\", \"%s\", \"%s\", \"%s\", \"%s\"], \"id\":4}",
rpc_user, work->job_id, xnonce2str, ntimestr, noncestr );
free(xnonce2str);
}
void lbry_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
{
unsigned char merkle_root[64] = { 0 };
size_t t;
int i;
algo_gate.gen_merkle_root( merkle_root, sctx );
// Increment extranonce2
for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
// Assemble block header
memset( g_work->data, 0, sizeof(g_work->data) );
g_work->data[0] = le32dec( sctx->job.version );
for ( i = 0; i < 8; i++ )
g_work->data[1 + i] = le32dec( (uint32_t *) sctx->job.prevhash + i );
for ( i = 0; i < 8; i++ )
g_work->data[9 + i] = be32dec( (uint32_t *) merkle_root + i );
for ( int i = 0; i < 8; i++ )
g_work->data[17 + i] = ((uint32_t*)sctx->job.claim)[i];
g_work->data[ LBRY_NTIME_INDEX ] = le32dec(sctx->job.ntime);
g_work->data[ LBRY_NBITS_INDEX ] = le32dec(sctx->job.nbits);
g_work->data[28] = 0x80000000;
}
void lbry_set_target( struct work* work, double job_diff )
{
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
}
int64_t lbry_get_max64() { return 0x1ffffLL; }
bool register_lbry_algo( algo_gate_t* gate )
{
gate->optimizations = SSE2_OPT | AVX_OPT | AVX2_OPT | SHA_OPT;
#if defined (LBRY_4WAY)
gate->scanhash = (void*)&scanhash_lbry_4way;
gate->hash = (void*)&lbry_4way_hash;
#else
gate->scanhash = (void*)&scanhash_lbry;
gate->hash = (void*)&lbry_hash;
#endif
gate->calc_network_diff = (void*)&lbry_calc_network_diff;
gate->get_max64 = (void*)&lbry_get_max64;
gate->build_stratum_request = (void*)&lbry_le_build_stratum_request;
gate->build_extraheader = (void*)&lbry_build_extraheader;
gate->set_target = (void*)&lbry_set_target;
gate->ntime_index = LBRY_NTIME_INDEX;
gate->nbits_index = LBRY_NBITS_INDEX;
gate->nonce_index = LBRY_NONCE_INDEX;
gate->work_data_size = LBRY_WORK_DATA_SIZE;
return true;
}

30
algo/ripemd/lbry-gate.h Normal file
View File

@@ -0,0 +1,30 @@
#ifndef LBRY_GATE_H__
#define LBRY_GATE_H__ 1
#include "algo-gate-api.h"
#include <stdint.h>
#if defined(__AVX2__)
#define LBRY_4WAY
#endif
#define LBRY_NTIME_INDEX 25
#define LBRY_NBITS_INDEX 26
#define LBRY_NONCE_INDEX 27
#define LBRY_WORK_DATA_SIZE 192
#define LBRY_WORK_CMP_SIZE 76 // same as default
bool register_lbry_algo( algo_gate_t* gate );
#if defined(LBRY_4WAY)
void lbry_4way_hash( void *state, const void *input );
int scanhash_lbry_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
#endif
void lbry_hash( void *state, const void *input );
int scanhash_lbry( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
#endif

146
algo/ripemd/lbry.c Normal file
View File

@@ -0,0 +1,146 @@
#include "lbry-gate.h"
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "sph_ripemd.h"
#include "algo/sha/sph_sha2.h"
#include <openssl/sha.h>
void lbry_hash(void* output, const void* input)
{
#ifndef USE_SPH_SHA
SHA256_CTX ctx_sha256 __attribute__ ((aligned (64)));
SHA512_CTX ctx_sha512 __attribute__ ((aligned (64)));
#else
sph_sha256_context ctx_sha256 __attribute__ ((aligned (64)));
sph_sha512_context ctx_sha512 __attribute__ ((aligned (64)));
#endif
sph_ripemd160_context ctx_ripemd __attribute__ ((aligned (64)));
uint32_t _ALIGN(64) hashA[16];
uint32_t _ALIGN(64) hashB[16];
uint32_t _ALIGN(64) hashC[16];
#ifndef USE_SPH_SHA
SHA256_Init( &ctx_sha256 );
SHA256_Update( &ctx_sha256, input, 112 );
SHA256_Final( (unsigned char*) hashA, &ctx_sha256 );
SHA256_Init( &ctx_sha256 );
SHA256_Update( &ctx_sha256, hashA, 32 );
SHA256_Final( (unsigned char*) hashA, &ctx_sha256 );
SHA512_Init( &ctx_sha512 );
SHA512_Update( &ctx_sha512, hashA, 32 );
SHA512_Final( (unsigned char*) hashA, &ctx_sha512 );
#else
sph_sha256_init( &ctx_sha256 );
sph_sha256 ( &ctx_sha256, input, 112 );
sph_sha256_close( &ctx_sha256, hashA );
sph_sha256_init( &ctx_sha256 );
sph_sha256 ( &ctx_sha256, hashA, 32 );
sph_sha256_close( &ctx_sha256, hashA );
sph_sha512_init( &ctx_sha512 );
sph_sha512 ( &ctx_sha512, hashA, 32 );
sph_sha512_close( &ctx_sha512, hashA );
#endif
sph_ripemd160_init( &ctx_ripemd );
sph_ripemd160 ( &ctx_ripemd, hashA, 32 );
sph_ripemd160_close( &ctx_ripemd, hashB );
sph_ripemd160_init( &ctx_ripemd );
sph_ripemd160 ( &ctx_ripemd, hashA+8, 32 );
sph_ripemd160_close( &ctx_ripemd, hashC );
#ifndef USE_SPH_SHA
SHA256_Init( &ctx_sha256 );
SHA256_Update( &ctx_sha256, hashB, 20 );
SHA256_Update( &ctx_sha256, hashC, 20 );
SHA256_Final( (unsigned char*) hashA, &ctx_sha256 );
SHA256_Init( &ctx_sha256 );
SHA256_Update( &ctx_sha256, hashA, 32 );
SHA256_Final( (unsigned char*) hashA, &ctx_sha256 );
#else
sph_sha256_init( &ctx_sha256 );
sph_sha256 ( &ctx_sha256, hashB, 20 );
sph_sha256 ( &ctx_sha256, hashC, 20 );
sph_sha256_close( &ctx_sha256, hashA );
sph_sha256_init( &ctx_sha256 );
sph_sha256 ( &ctx_sha256, hashA, 32 );
sph_sha256_close( &ctx_sha256, hashA );
#endif
memcpy( output, hashA, 32 );
}
int scanhash_lbry( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done)
{
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[27] - 1;
const uint32_t first_nonce = pdata[27];
const uint32_t Htarg = ptarget[7];
uint32_t hash64[8] __attribute__((aligned(64)));
uint32_t endiandata[32] __attribute__ ((aligned (64)));
uint64_t htmax[] = {
0,
0xF,
0xFF,
0xFFF,
0xFFFF,
0x10000000
};
uint32_t masks[] = {
0xFFFFFFFF,
0xFFFFFFF0,
0xFFFFFF00,
0xFFFFF000,
0xFFFF0000,
0
};
// we need bigendian data...
swab32_array( endiandata, pdata, 32 );
#ifdef DEBUG_ALGO
printf("[%d] Htarg=%X\n", thr_id, Htarg);
#endif
for (int m=0; m < sizeof(masks); m++) {
if (Htarg <= htmax[m]) {
uint32_t mask = masks[m];
do {
pdata[27] = ++n;
be32enc(&endiandata[27], n);
lbry_hash(hash64, &endiandata);
#ifndef DEBUG_ALGO
if ((!(hash64[7] & mask)) && fulltest(hash64, ptarget)) {
*hashes_done = n - first_nonce + 1;
return true;
}
#else
if (!(n % 0x1000) && !thr_id) printf(".");
if (!(hash64[7] & mask)) {
printf("[%d]",thr_id);
if (fulltest(hash64, ptarget)) {
*hashes_done = n - first_nonce + 1;
return true;
}
}
#endif
} while (n < max_nonce && !work_restart[thr_id].restart);
// see blake.c if else to understand the loop on htmax => mask
break;
}
}
*hashes_done = n - first_nonce + 1;
pdata[27] = n;
return 0;
}

View File

@@ -0,0 +1,323 @@
#include "ripemd-hash-4way.h"
#if defined(__AVX__)
#include <stddef.h>
#include <string.h>
/*
* Round functions for RIPEMD-128 and RIPEMD-160.
*/
#define F1(x, y, z) \
_mm_xor_si128( _mm_xor_si128( x, y ), z )
#define F2(x, y, z) \
_mm_xor_si128( _mm_and_si128( _mm_xor_si128( y, z ), x ), z )
#define F3(x, y, z) \
_mm_xor_si128( _mm_or_si128( x, mm_not( y ) ), z )
#define F4(x, y, z) \
_mm_xor_si128( _mm_and_si128( _mm_xor_si128( x, y ), z ), y )
#define F5(x, y, z) \
_mm_xor_si128( x, _mm_or_si128( y, mm_not( z ) ) )
static const uint32_t IV[5] =
{ 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0 };
/*
* Round constants for RIPEMD-160.
*/
#define K11 0x00000000
#define K12 0x5A827999
#define K13 0x6ED9EBA1
#define K14 0x8F1BBCDC
#define K15 0xA953FD4E
#define K21 0x50A28BE6
#define K22 0x5C4DD124
#define K23 0x6D703EF3
#define K24 0x7A6D76E9
#define K25 0x00000000
#define RR(a, b, c, d, e, f, s, r, k) \
do{ \
a = _mm_add_epi32( mm_rotl_32( _mm_add_epi32( _mm_add_epi32( \
_mm_add_epi32( a, f( b ,c, d ) ), r ), \
_mm_set1_epi32( k ) ), s ), e ); \
c = mm_rotl_32( c, 10 );\
} while (0)
#define ROUND1(a, b, c, d, e, f, s, r, k) \
RR(a ## 1, b ## 1, c ## 1, d ## 1, e ## 1, f, s, r, K1 ## k)
#define ROUND2(a, b, c, d, e, f, s, r, k) \
RR(a ## 2, b ## 2, c ## 2, d ## 2, e ## 2, f, s, r, K2 ## k)
static void ripemd160_4way_round( ripemd160_4way_context *sc )
{
const __m128i *in = (__m128i*)sc->buf;
__m128i *h = (__m128i*)sc->val;
register __m128i A1, B1, C1, D1, E1;
register __m128i A2, B2, C2, D2, E2;
__m128i tmp;
A1 = A2 = h[0];
B1 = B2 = h[1];
C1 = C2 = h[2];
D1 = D2 = h[3];
E1 = E2 = h[4];
ROUND1( A, B, C, D, E, F1, 11, in[ 0], 1 );
ROUND1( E, A, B, C, D, F1, 14, in[ 1], 1 );
ROUND1( D, E, A, B, C, F1, 15, in[ 2], 1 );
ROUND1( C, D, E, A, B, F1, 12, in[ 3], 1 );
ROUND1( B, C, D, E, A, F1, 5, in[ 4], 1 );
ROUND1( A, B, C, D, E, F1, 8, in[ 5], 1 );
ROUND1( E, A, B, C, D, F1, 7, in[ 6], 1 );
ROUND1( D, E, A, B, C, F1, 9, in[ 7], 1 );
ROUND1( C, D, E, A, B, F1, 11, in[ 8], 1 );
ROUND1( B, C, D, E, A, F1, 13, in[ 9], 1 );
ROUND1( A, B, C, D, E, F1, 14, in[10], 1 );
ROUND1( E, A, B, C, D, F1, 15, in[11], 1 );
ROUND1( D, E, A, B, C, F1, 6, in[12], 1 );
ROUND1( C, D, E, A, B, F1, 7, in[13], 1 );
ROUND1( B, C, D, E, A, F1, 9, in[14], 1 );
ROUND1( A, B, C, D, E, F1, 8, in[15], 1 );
ROUND1( E, A, B, C, D, F2, 7, in[ 7], 2 );
ROUND1( D, E, A, B, C, F2, 6, in[ 4], 2 );
ROUND1( C, D, E, A, B, F2, 8, in[13], 2 );
ROUND1( B, C, D, E, A, F2, 13, in[ 1], 2 );
ROUND1( A, B, C, D, E, F2, 11, in[10], 2 );
ROUND1( E, A, B, C, D, F2, 9, in[ 6], 2 );
ROUND1( D, E, A, B, C, F2, 7, in[15], 2 );
ROUND1( C, D, E, A, B, F2, 15, in[ 3], 2 );
ROUND1( B, C, D, E, A, F2, 7, in[12], 2 );
ROUND1( A, B, C, D, E, F2, 12, in[ 0], 2 );
ROUND1( E, A, B, C, D, F2, 15, in[ 9], 2 );
ROUND1( D, E, A, B, C, F2, 9, in[ 5], 2 );
ROUND1( C, D, E, A, B, F2, 11, in[ 2], 2 );
ROUND1( B, C, D, E, A, F2, 7, in[14], 2 );
ROUND1( A, B, C, D, E, F2, 13, in[11], 2 );
ROUND1( E, A, B, C, D, F2, 12, in[ 8], 2 );
ROUND1( D, E, A, B, C, F3, 11, in[ 3], 3 );
ROUND1( C, D, E, A, B, F3, 13, in[10], 3 );
ROUND1( B, C, D, E, A, F3, 6, in[14], 3 );
ROUND1( A, B, C, D, E, F3, 7, in[ 4], 3 );
ROUND1( E, A, B, C, D, F3, 14, in[ 9], 3 );
ROUND1( D, E, A, B, C, F3, 9, in[15], 3 );
ROUND1( C, D, E, A, B, F3, 13, in[ 8], 3 );
ROUND1( B, C, D, E, A, F3, 15, in[ 1], 3 );
ROUND1( A, B, C, D, E, F3, 14, in[ 2], 3 );
ROUND1( E, A, B, C, D, F3, 8, in[ 7], 3 );
ROUND1( D, E, A, B, C, F3, 13, in[ 0], 3 );
ROUND1( C, D, E, A, B, F3, 6, in[ 6], 3 );
ROUND1( B, C, D, E, A, F3, 5, in[13], 3 );
ROUND1( A, B, C, D, E, F3, 12, in[11], 3 );
ROUND1( E, A, B, C, D, F3, 7, in[ 5], 3 );
ROUND1( D, E, A, B, C, F3, 5, in[12], 3 );
ROUND1( C, D, E, A, B, F4, 11, in[ 1], 4 );
ROUND1( B, C, D, E, A, F4, 12, in[ 9], 4 );
ROUND1( A, B, C, D, E, F4, 14, in[11], 4 );
ROUND1( E, A, B, C, D, F4, 15, in[10], 4 );
ROUND1( D, E, A, B, C, F4, 14, in[ 0], 4 );
ROUND1( C, D, E, A, B, F4, 15, in[ 8], 4 );
ROUND1( B, C, D, E, A, F4, 9, in[12], 4 );
ROUND1( A, B, C, D, E, F4, 8, in[ 4], 4 );
ROUND1( E, A, B, C, D, F4, 9, in[13], 4 );
ROUND1( D, E, A, B, C, F4, 14, in[ 3], 4 );
ROUND1( C, D, E, A, B, F4, 5, in[ 7], 4 );
ROUND1( B, C, D, E, A, F4, 6, in[15], 4 );
ROUND1( A, B, C, D, E, F4, 8, in[14], 4 );
ROUND1( E, A, B, C, D, F4, 6, in[ 5], 4 );
ROUND1( D, E, A, B, C, F4, 5, in[ 6], 4 );
ROUND1( C, D, E, A, B, F4, 12, in[ 2], 4 );
ROUND1( B, C, D, E, A, F5, 9, in[ 4], 5 );
ROUND1( A, B, C, D, E, F5, 15, in[ 0], 5 );
ROUND1( E, A, B, C, D, F5, 5, in[ 5], 5 );
ROUND1( D, E, A, B, C, F5, 11, in[ 9], 5 );
ROUND1( C, D, E, A, B, F5, 6, in[ 7], 5 );
ROUND1( B, C, D, E, A, F5, 8, in[12], 5 );
ROUND1( A, B, C, D, E, F5, 13, in[ 2], 5 );
ROUND1( E, A, B, C, D, F5, 12, in[10], 5 );
ROUND1( D, E, A, B, C, F5, 5, in[14], 5 );
ROUND1( C, D, E, A, B, F5, 12, in[ 1], 5 );
ROUND1( B, C, D, E, A, F5, 13, in[ 3], 5 );
ROUND1( A, B, C, D, E, F5, 14, in[ 8], 5 );
ROUND1( E, A, B, C, D, F5, 11, in[11], 5 );
ROUND1( D, E, A, B, C, F5, 8, in[ 6], 5 );
ROUND1( C, D, E, A, B, F5, 5, in[15], 5 );
ROUND1( B, C, D, E, A, F5, 6, in[13], 5 );
ROUND2( A, B, C, D, E, F5, 8, in[ 5], 1 );
ROUND2( E, A, B, C, D, F5, 9, in[14], 1 );
ROUND2( D, E, A, B, C, F5, 9, in[ 7], 1 );
ROUND2( C, D, E, A, B, F5, 11, in[ 0], 1 );
ROUND2( B, C, D, E, A, F5, 13, in[ 9], 1 );
ROUND2( A, B, C, D, E, F5, 15, in[ 2], 1 );
ROUND2( E, A, B, C, D, F5, 15, in[11], 1 );
ROUND2( D, E, A, B, C, F5, 5, in[ 4], 1 );
ROUND2( C, D, E, A, B, F5, 7, in[13], 1 );
ROUND2( B, C, D, E, A, F5, 7, in[ 6], 1 );
ROUND2( A, B, C, D, E, F5, 8, in[15], 1 );
ROUND2( E, A, B, C, D, F5, 11, in[ 8], 1 );
ROUND2( D, E, A, B, C, F5, 14, in[ 1], 1 );
ROUND2( C, D, E, A, B, F5, 14, in[10], 1 );
ROUND2( B, C, D, E, A, F5, 12, in[ 3], 1 );
ROUND2( A, B, C, D, E, F5, 6, in[12], 1 );
ROUND2( E, A, B, C, D, F4, 9, in[ 6], 2 );
ROUND2( D, E, A, B, C, F4, 13, in[11], 2 );
ROUND2( C, D, E, A, B, F4, 15, in[ 3], 2 );
ROUND2( B, C, D, E, A, F4, 7, in[ 7], 2 );
ROUND2( A, B, C, D, E, F4, 12, in[ 0], 2 );
ROUND2( E, A, B, C, D, F4, 8, in[13], 2 );
ROUND2( D, E, A, B, C, F4, 9, in[ 5], 2 );
ROUND2( C, D, E, A, B, F4, 11, in[10], 2 );
ROUND2( B, C, D, E, A, F4, 7, in[14], 2 );
ROUND2( A, B, C, D, E, F4, 7, in[15], 2 );
ROUND2( E, A, B, C, D, F4, 12, in[ 8], 2 );
ROUND2( D, E, A, B, C, F4, 7, in[12], 2 );
ROUND2( C, D, E, A, B, F4, 6, in[ 4], 2 );
ROUND2( B, C, D, E, A, F4, 15, in[ 9], 2 );
ROUND2( A, B, C, D, E, F4, 13, in[ 1], 2 );
ROUND2( E, A, B, C, D, F4, 11, in[ 2], 2 );
ROUND2( D, E, A, B, C, F3, 9, in[15], 3 );
ROUND2( C, D, E, A, B, F3, 7, in[ 5], 3 );
ROUND2( B, C, D, E, A, F3, 15, in[ 1], 3 );
ROUND2( A, B, C, D, E, F3, 11, in[ 3], 3 );
ROUND2( E, A, B, C, D, F3, 8, in[ 7], 3 );
ROUND2( D, E, A, B, C, F3, 6, in[14], 3 );
ROUND2( C, D, E, A, B, F3, 6, in[ 6], 3 );
ROUND2( B, C, D, E, A, F3, 14, in[ 9], 3 );
ROUND2( A, B, C, D, E, F3, 12, in[11], 3 );
ROUND2( E, A, B, C, D, F3, 13, in[ 8], 3 );
ROUND2( D, E, A, B, C, F3, 5, in[12], 3 );
ROUND2( C, D, E, A, B, F3, 14, in[ 2], 3 );
ROUND2( B, C, D, E, A, F3, 13, in[10], 3 );
ROUND2( A, B, C, D, E, F3, 13, in[ 0], 3 );
ROUND2( E, A, B, C, D, F3, 7, in[ 4], 3 );
ROUND2( D, E, A, B, C, F3, 5, in[13], 3 );
ROUND2( C, D, E, A, B, F2, 15, in[ 8], 4 );
ROUND2( B, C, D, E, A, F2, 5, in[ 6], 4 );
ROUND2( A, B, C, D, E, F2, 8, in[ 4], 4 );
ROUND2( E, A, B, C, D, F2, 11, in[ 1], 4 );
ROUND2( D, E, A, B, C, F2, 14, in[ 3], 4 );
ROUND2( C, D, E, A, B, F2, 14, in[11], 4 );
ROUND2( B, C, D, E, A, F2, 6, in[15], 4 );
ROUND2( A, B, C, D, E, F2, 14, in[ 0], 4 );
ROUND2( E, A, B, C, D, F2, 6, in[ 5], 4 );
ROUND2( D, E, A, B, C, F2, 9, in[12], 4 );
ROUND2( C, D, E, A, B, F2, 12, in[ 2], 4 );
ROUND2( B, C, D, E, A, F2, 9, in[13], 4 );
ROUND2( A, B, C, D, E, F2, 12, in[ 9], 4 );
ROUND2( E, A, B, C, D, F2, 5, in[ 7], 4 );
ROUND2( D, E, A, B, C, F2, 15, in[10], 4 );
ROUND2( C, D, E, A, B, F2, 8, in[14], 4 );
ROUND2( B, C, D, E, A, F1, 8, in[12], 5 );
ROUND2( A, B, C, D, E, F1, 5, in[15], 5 );
ROUND2( E, A, B, C, D, F1, 12, in[10], 5 );
ROUND2( D, E, A, B, C, F1, 9, in[ 4], 5 );
ROUND2( C, D, E, A, B, F1, 12, in[ 1], 5 );
ROUND2( B, C, D, E, A, F1, 5, in[ 5], 5 );
ROUND2( A, B, C, D, E, F1, 14, in[ 8], 5 );
ROUND2( E, A, B, C, D, F1, 6, in[ 7], 5 );
ROUND2( D, E, A, B, C, F1, 8, in[ 6], 5 );
ROUND2( C, D, E, A, B, F1, 13, in[ 2], 5 );
ROUND2( B, C, D, E, A, F1, 6, in[13], 5 );
ROUND2( A, B, C, D, E, F1, 5, in[14], 5 );
ROUND2( E, A, B, C, D, F1, 15, in[ 0], 5 );
ROUND2( D, E, A, B, C, F1, 13, in[ 3], 5 );
ROUND2( C, D, E, A, B, F1, 11, in[ 9], 5 );
ROUND2( B, C, D, E, A, F1, 11, in[11], 5 );
tmp = _mm_add_epi32( _mm_add_epi32( h[1], C1 ), D2 );
h[1] = _mm_add_epi32( _mm_add_epi32( h[2], D1 ), E2 );
h[2] = _mm_add_epi32( _mm_add_epi32( h[3], E1 ), A2 );
h[3] = _mm_add_epi32( _mm_add_epi32( h[4], A1 ), B2 );
h[4] = _mm_add_epi32( _mm_add_epi32( h[0], B1 ), C2 );
h[0] = tmp;
}
void ripemd160_4way_init( ripemd160_4way_context *sc )
{
sc->val[0] = _mm_set1_epi32( IV[0] );
sc->val[1] = _mm_set1_epi32( IV[1] );
sc->val[2] = _mm_set1_epi32( IV[2] );
sc->val[3] = _mm_set1_epi32( IV[3] );
sc->val[4] = _mm_set1_epi32( IV[4] );
sc->count_high = sc->count_low = 0;
}
void ripemd160_4way( ripemd160_4way_context *sc, const void *data, size_t len )
{
__m128i *vdata = (__m128i*)data;
size_t ptr;
const int block_size = 64;
ptr = (unsigned)sc->count_low & (block_size - 1U);
while ( len > 0 )
{
size_t clen;
uint32_t clow, clow2;
clen = block_size - ptr;
if ( clen > len )
clen = len;
memcpy_128( sc->buf + (ptr>>2), vdata, clen>>2 );
vdata = vdata + (clen>>2);
ptr += clen;
len -= clen;
if ( ptr == block_size )
{
ripemd160_4way_round( sc );
ptr = 0;
}
clow = sc->count_low;
clow2 = clow + clen;
sc->count_low = clow2;
if ( clow2 < clow )
sc->count_high++;
}
}
void ripemd160_4way_close( ripemd160_4way_context *sc, void *dst )
{
unsigned ptr, u;
uint32_t low, high;
const int block_size = 64;
const int pad = block_size - 8;
ptr = (unsigned)sc->count_low & ( block_size - 1U);
sc->buf[ ptr>>2 ] = _mm_set1_epi32( 0x80 );
ptr += 4;
if ( ptr > pad )
{
memset_zero_128( sc->buf + (ptr>>2), (block_size - ptr) >> 2 );
ripemd160_4way_round( sc );
memset_zero_128( sc->buf, pad>>2 );
}
else
memset_zero_128( sc->buf + (ptr>>2), (pad - ptr) >> 2 );
low = sc->count_low;
high = (sc->count_high << 3) | (low >> 29);
low = low << 3;
sc->buf[ pad>>2 ] = _mm_set1_epi32( low );
sc->buf[ (pad>>2) + 1 ] = _mm_set1_epi32( high );
ripemd160_4way_round( sc );
for (u = 0; u < 5; u ++)
casti_m128i( dst, u ) = sc->val[u];
}
#endif

View File

@@ -0,0 +1,23 @@
#ifndef RIPEMD_HASH_4WAY_H__
#define RIPEMD_HASH_4WAY_H__
#include <stddef.h>
#include "algo/sha/sph_types.h"
#if defined(__AVX__)
#include "avxdefs.h"
typedef struct
{
__m128i buf[64>>2];
__m128i val[5];
uint32_t count_high, count_low;
} __attribute__ ((aligned (64))) ripemd160_4way_context;
void ripemd160_4way_init( ripemd160_4way_context *sc );
void ripemd160_4way( ripemd160_4way_context *sc, const void *data, size_t len );
void ripemd160_4way_close( ripemd160_4way_context *sc, void *dst );
#endif
#endif