mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v3.11.7
This commit is contained in:
@@ -28,26 +28,28 @@ int scanhash_keccak_8way( struct work *work, uint32_t max_nonce,
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
__m512i *noncev = (__m512i*)vdata + 9; // aligned
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
int thr_id = mythr->id;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
*noncev = mm512_intrlv_blend_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n , 0 ), *noncev );
|
||||
do {
|
||||
*noncev = mm512_intrlv_blend_32( mm512_bswap_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n , 0 ) ), *noncev );
|
||||
|
||||
keccakhash_8way( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if ( hash7[ lane<<1 ] <= Htarg )
|
||||
if unlikely( hash7[ lane<<1 ] <= Htarg && !bench )
|
||||
{
|
||||
extr_lane_8x64( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
if ( valid_hash( lane_hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
*noncev = _mm512_add_epi32( *noncev,
|
||||
m512_const1_64( 0x0000000800000000 ) );
|
||||
n += 8;
|
||||
|
||||
} while ( (n < max_nonce-8) && !work_restart[thr_id].restart);
|
||||
@@ -79,27 +81,28 @@ int scanhash_keccak_4way( struct work *work, uint32_t max_nonce,
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
int thr_id = mythr->id;
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
*noncev = mm256_intrlv_blend_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do {
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
|
||||
keccakhash_4way( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if ( hash7[ lane<<1 ] <= Htarg )
|
||||
if unlikely( hash7[ lane<<1 ] <= Htarg && !bench )
|
||||
{
|
||||
extr_lane_4x64( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
if ( valid_hash( lane_hash, ptarget ))
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
*noncev = _mm256_add_epi32( *noncev,
|
||||
m256_const1_64( 0x0000000400000000 ) );
|
||||
n += 4;
|
||||
|
||||
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
#include "keccak-gate.h"
|
||||
#include "sph_keccak.h"
|
||||
|
||||
int hard_coded_eb = 1;
|
||||
|
||||
// KECCAK
|
||||
|
||||
bool register_keccak_algo( algo_gate_t* gate )
|
||||
{
|
||||
@@ -19,6 +23,8 @@ bool register_keccak_algo( algo_gate_t* gate )
|
||||
return true;
|
||||
};
|
||||
|
||||
// KECCAKC
|
||||
|
||||
bool register_keccakc_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->optimizations = AVX2_OPT | AVX512_OPT;
|
||||
@@ -37,3 +43,50 @@ bool register_keccakc_algo( algo_gate_t* gate )
|
||||
return true;
|
||||
};
|
||||
|
||||
// SHA3D
|
||||
|
||||
void sha3d( void *state, const void *input, int len )
|
||||
{
|
||||
uint32_t _ALIGN(64) buffer[16], hash[16];
|
||||
sph_keccak_context ctx_keccak;
|
||||
|
||||
sph_keccak256_init( &ctx_keccak );
|
||||
sph_keccak256 ( &ctx_keccak, input, len );
|
||||
sph_keccak256_close( &ctx_keccak, (void*) buffer );
|
||||
|
||||
sph_keccak256_init( &ctx_keccak );
|
||||
sph_keccak256 ( &ctx_keccak, buffer, 32 );
|
||||
sph_keccak256_close( &ctx_keccak, (void*) hash );
|
||||
|
||||
memcpy(state, hash, 32);
|
||||
}
|
||||
|
||||
void sha3d_gen_merkle_root( char* merkle_root, struct stratum_ctx* sctx )
|
||||
{
|
||||
sha3d( merkle_root, sctx->job.coinbase, (int) sctx->job.coinbase_size );
|
||||
for ( int i = 0; i < sctx->job.merkle_count; i++ )
|
||||
{
|
||||
memcpy( merkle_root + 32, sctx->job.merkle[i], 32 );
|
||||
sha256d( merkle_root, merkle_root, 64 );
|
||||
}
|
||||
}
|
||||
|
||||
bool register_sha3d_algo( algo_gate_t* gate )
|
||||
{
|
||||
hard_coded_eb = 6;
|
||||
opt_extranonce = false;
|
||||
gate->optimizations = AVX2_OPT | AVX512_OPT;
|
||||
gate->gen_merkle_root = (void*)&sha3d_gen_merkle_root;
|
||||
#if defined (KECCAK_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_sha3d_8way;
|
||||
gate->hash = (void*)&sha3d_hash_8way;
|
||||
#elif defined (KECCAK_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_sha3d_4way;
|
||||
gate->hash = (void*)&sha3d_hash_4way;
|
||||
#else
|
||||
gate->scanhash = (void*)&scanhash_sha3d;
|
||||
gate->hash = (void*)&sha3d_hash;
|
||||
#endif
|
||||
return true;
|
||||
};
|
||||
|
||||
|
||||
@@ -10,24 +10,37 @@
|
||||
#define KECCAK_4WAY 1
|
||||
#endif
|
||||
|
||||
extern int hard_coded_eb;
|
||||
|
||||
#if defined(KECCAK_8WAY)
|
||||
|
||||
void keccakhash_8way( void *state, const void *input );
|
||||
int scanhash_keccak_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void sha3d_hash_8way( void *state, const void *input );
|
||||
int scanhash_sha3d_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#elif defined(KECCAK_4WAY)
|
||||
|
||||
void keccakhash_4way( void *state, const void *input );
|
||||
int scanhash_keccak_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void sha3d_hash_4way( void *state, const void *input );
|
||||
int scanhash_sha3d_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#else
|
||||
|
||||
void keccakhash( void *state, const void *input );
|
||||
int scanhash_keccak( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#endif
|
||||
void sha3d_hash( void *state, const void *input );
|
||||
int scanhash_sha3d( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include "keccak-hash-4way.h"
|
||||
#include "keccak-gate.h"
|
||||
|
||||
static const uint64_t RC[] = {
|
||||
0x0000000000000001, 0x0000000000008082,
|
||||
@@ -168,7 +169,7 @@ static void keccak64_8way_close( keccak64_ctx_m512i *kc, void *dst,
|
||||
size_t j;
|
||||
size_t m512_len = byte_len >> 3;
|
||||
|
||||
eb = 0x100 >> 8;
|
||||
eb = hard_coded_eb;
|
||||
if ( kc->ptr == (lim - 8) )
|
||||
{
|
||||
const uint64_t t = eb | 0x8000000000000000;
|
||||
@@ -349,7 +350,7 @@ static void keccak64_close( keccak64_ctx_m256i *kc, void *dst, size_t byte_len,
|
||||
size_t j;
|
||||
size_t m256_len = byte_len >> 3;
|
||||
|
||||
eb = 0x100 >> 8;
|
||||
eb = hard_coded_eb;
|
||||
if ( kc->ptr == (lim - 8) )
|
||||
{
|
||||
const uint64_t t = eb | 0x8000000000000000;
|
||||
|
||||
@@ -18,36 +18,34 @@ void keccakhash(void *state, const void *input)
|
||||
memcpy(state, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_keccak( struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr )
|
||||
int scanhash_keccak( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
//const uint32_t Htarg = ptarget[7];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
uint32_t _ALIGN(64) hash64[8];
|
||||
uint32_t _ALIGN(64) endiandata[32];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
|
||||
uint32_t _ALIGN(32) hash64[8];
|
||||
uint32_t endiandata[32];
|
||||
for ( int i=0; i < 19; i++ )
|
||||
be32enc( &endiandata[i], pdata[i] );
|
||||
|
||||
for (int i=0; i < 19; i++)
|
||||
be32enc(&endiandata[i], pdata[i]);
|
||||
do {
|
||||
be32enc( &endiandata[19], n );
|
||||
keccakhash( hash64, endiandata );
|
||||
if ( valid_hash( hash64, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n;
|
||||
submit_solution( work, hash64, mythr );
|
||||
}
|
||||
n++;
|
||||
} while ( n < last_nonce && !work_restart[thr_id].restart );
|
||||
|
||||
do {
|
||||
|
||||
pdata[19] = ++n;
|
||||
be32enc(&endiandata[19], n);
|
||||
keccakhash(hash64, endiandata);
|
||||
if (((hash64[7]&0xFFFFFF00)==0) &&
|
||||
fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return true;
|
||||
}
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
*hashes_done = n - first_nonce;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
126
algo/keccak/sha3d-4way.c
Normal file
126
algo/keccak/sha3d-4way.c
Normal file
@@ -0,0 +1,126 @@
|
||||
#include "keccak-gate.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include "sph_keccak.h"
|
||||
#include "keccak-hash-4way.h"
|
||||
|
||||
#if defined(KECCAK_8WAY)
|
||||
|
||||
void sha3d_hash_8way(void *state, const void *input)
|
||||
{
|
||||
uint32_t buffer[16*8] __attribute__ ((aligned (128)));
|
||||
keccak256_8way_context ctx;
|
||||
|
||||
keccak256_8way_init( &ctx );
|
||||
keccak256_8way_update( &ctx, input, 80 );
|
||||
keccak256_8way_close( &ctx, buffer );
|
||||
|
||||
keccak256_8way_init( &ctx );
|
||||
keccak256_8way_update( &ctx, buffer, 32 );
|
||||
keccak256_8way_close( &ctx, state );
|
||||
}
|
||||
|
||||
int scanhash_sha3d_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[24*8] __attribute__ ((aligned (128)));
|
||||
uint32_t hash[16*8] __attribute__ ((aligned (64)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[49]); // 3*16+1
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 8;
|
||||
__m512i *noncev = (__m512i*)vdata + 9; // aligned
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
mm512_bswap32_intrlv80_8x64( vdata, pdata );
|
||||
*noncev = mm512_intrlv_blend_32(
|
||||
_mm512_set_epi32( n+7, 0, n+6, 0, n+5, 0, n+4, 0,
|
||||
n+3, 0, n+2, 0, n+1, 0, n , 0 ), *noncev );
|
||||
do {
|
||||
sha3d_hash_8way( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ )
|
||||
if unlikely( hash7[ lane<<1 ] <= Htarg && !bench )
|
||||
{
|
||||
extr_lane_8x64( lane_hash, hash, lane, 256 );
|
||||
if ( valid_hash( lane_hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
*noncev = _mm512_add_epi32( *noncev,
|
||||
m512_const1_64( 0x0000000800000000 ) );
|
||||
n += 8;
|
||||
|
||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#elif defined(KECCAK_4WAY)
|
||||
|
||||
void sha3d_hash_4way(void *state, const void *input)
|
||||
{
|
||||
uint32_t buffer[16*4] __attribute__ ((aligned (64)));
|
||||
keccak256_4way_context ctx;
|
||||
|
||||
keccak256_4way_init( &ctx );
|
||||
keccak256_4way_update( &ctx, input, 80 );
|
||||
keccak256_4way_close( &ctx, buffer );
|
||||
|
||||
keccak256_4way_init( &ctx );
|
||||
keccak256_4way_update( &ctx, buffer, 32 );
|
||||
keccak256_4way_close( &ctx, state );
|
||||
}
|
||||
|
||||
int scanhash_sha3d_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[16*4] __attribute__ ((aligned (32)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *hash7 = &(hash[25]); // 3*8+1
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce - 4;
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const int thr_id = mythr->id;
|
||||
const bool bench = opt_benchmark;
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
*noncev = mm256_intrlv_blend_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ), *noncev );
|
||||
do {
|
||||
sha3d_hash_4way( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if unlikely( hash7[ lane<<1 ] <= Htarg && !bench )
|
||||
{
|
||||
extr_lane_4x64( lane_hash, hash, lane, 256 );
|
||||
if ( valid_hash( lane_hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = bswap_32( n + lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
*noncev = _mm256_add_epi32( *noncev,
|
||||
m256_const1_64( 0x0000000400000000 ) );
|
||||
n += 4;
|
||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
50
algo/keccak/sha3d.c
Normal file
50
algo/keccak/sha3d.c
Normal file
@@ -0,0 +1,50 @@
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include "sph_keccak.h"
|
||||
|
||||
void sha3d_hash(void *state, const void *input)
|
||||
{
|
||||
uint32_t buffer[16];
|
||||
sph_keccak256_context ctx_keccak;
|
||||
|
||||
sph_keccak256_init( &ctx_keccak );
|
||||
sph_keccak256 ( &ctx_keccak, input, 80 );
|
||||
sph_keccak256_close( &ctx_keccak, buffer );
|
||||
sph_keccak256_init( &ctx_keccak );
|
||||
sph_keccak256 ( &ctx_keccak, buffer, 32 );
|
||||
sph_keccak256_close( &ctx_keccak, state );
|
||||
}
|
||||
|
||||
int scanhash_sha3d( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(64) hash64[8];
|
||||
uint32_t _ALIGN(64) endiandata[32];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t last_nonce = max_nonce;
|
||||
const int thr_id = mythr->id;
|
||||
|
||||
for ( int i=0; i < 19; i++ )
|
||||
be32enc( &endiandata[i], pdata[i] );
|
||||
|
||||
do {
|
||||
be32enc( &endiandata[19], n );
|
||||
sha3d_hash( hash64, endiandata );
|
||||
if ( valid_hash( hash64, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n;
|
||||
submit_solution( work, hash64, mythr );
|
||||
}
|
||||
n++;
|
||||
} while ( n < last_nonce && !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = n - first_nonce;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -32,8 +32,8 @@
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "sph_keccak.h"
|
||||
#include "keccak-gate.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
@@ -1616,7 +1616,7 @@ keccak_core(sph_keccak_context *kc, const void *data, size_t len, size_t lim)
|
||||
} u; \
|
||||
size_t j; \
|
||||
\
|
||||
eb = (0x100 | (ub & 0xFF)) >> (8 - n); \
|
||||
eb = hard_coded_eb; \
|
||||
if (kc->ptr == (lim - 1)) { \
|
||||
if (n == 7) { \
|
||||
u.tmp[0] = eb; \
|
||||
|
||||
Reference in New Issue
Block a user