mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
237 lines
7.1 KiB
C
237 lines
7.1 KiB
C
#include "lyra2-gate.h"
|
|
|
|
|
|
// huge pages
|
|
//
|
|
// Use MAP_PRIVATE instead
|
|
// In register algo:
|
|
// replace thread safe whole matrix with a char**
|
|
// alloc huge pages matrixsize * threads
|
|
// make pointers to each thread to each thread, creating an
|
|
// array[thread][matrix].
|
|
// Each thread can create its own matrix pointer:
|
|
// my_matrix = the matrix + ( thread_id * matrix_size )
|
|
//
|
|
// Compiler version check?
|
|
// Fallback?
|
|
//
|
|
// create a generic utility to map & unmap huge pages.
|
|
// ptr = malloc_huge( size );
|
|
// Yespower wrapper checks for 64 byte alignment, seems unnecessary as
|
|
// it should be aligned to the page boundary. It may be desireable to
|
|
// have the matrix size rounded up if necessary to something bigger
|
|
// than 64 byte, say 4 kbytes a small page size.
|
|
|
|
// Define some constants for indivual parameters and matrix size for
|
|
// each algo. Use the parameter constants where apropriate.
|
|
// Convert algos that don't yet do so to use dynamic alllocation.
|
|
// Alloc huge pages globally. If ok each thread will create a pointer to
|
|
// its chunk. If fail each thread will use use _mm_alloc for itself.
|
|
// BLOCK_LEN_BYTES is 768.
|
|
|
|
#define LYRA2REV3_NROWS 4
|
|
#define LYRA2REV3_NCOLS 4
|
|
/*
|
|
#define LYRA2REV3_MATRIX_SIZE ((BLOCK_LEN_BYTES)*(LYRA2REV3_NCOLS)* \
|
|
(LYRA2REV3_NROWS)*8)
|
|
*/
|
|
|
|
#define LYRA2REV3_MATRIX_SIZE ((BLOCK_LEN_BYTES)<<4)
|
|
|
|
__thread uint64_t* l2v3_wholeMatrix;
|
|
|
|
bool lyra2rev3_thread_init()
|
|
{
|
|
const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * 4; // nCols
|
|
const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
|
|
int size = ROW_LEN_BYTES * 4; // nRows;
|
|
|
|
#if defined(LYRA2REV3_16WAY)
|
|
// l2v3_wholeMatrix = _mm_malloc( 2*size, 128 );
|
|
l2v3_wholeMatrix = _mm_malloc( 2*size, 64 );
|
|
init_lyra2rev3_16way_ctx();;
|
|
#else
|
|
l2v3_wholeMatrix = _mm_malloc( size, 64 );
|
|
#if defined (LYRA2REV3_8WAY)
|
|
init_lyra2rev3_8way_ctx();;
|
|
#elif defined (LYRA2REV3_4WAY)
|
|
init_lyra2rev3_4way_ctx();;
|
|
#else
|
|
init_lyra2rev3_ctx();
|
|
#endif
|
|
#endif
|
|
return l2v3_wholeMatrix;
|
|
}
|
|
|
|
bool register_lyra2rev3_algo( algo_gate_t* gate )
|
|
{
|
|
#if defined(LYRA2REV3_16WAY)
|
|
gate->scanhash = (void*)&scanhash_lyra2rev3_16way;
|
|
gate->hash = (void*)&lyra2rev3_16way_hash;
|
|
#elif defined (LYRA2REV3_8WAY)
|
|
gate->scanhash = (void*)&scanhash_lyra2rev3_8way;
|
|
gate->hash = (void*)&lyra2rev3_8way_hash;
|
|
#elif defined (LYRA2REV3_4WAY)
|
|
gate->scanhash = (void*)&scanhash_lyra2rev3_4way;
|
|
gate->hash = (void*)&lyra2rev3_4way_hash;
|
|
#else
|
|
gate->scanhash = (void*)&scanhash_lyra2rev3;
|
|
gate->hash = (void*)&lyra2rev3_hash;
|
|
#endif
|
|
gate->optimizations = SSE2_OPT | SSE42_OPT | AVX2_OPT | AVX512_OPT;
|
|
gate->miner_thread_init = (void*)&lyra2rev3_thread_init;
|
|
opt_target_factor = 256.0;
|
|
return true;
|
|
};
|
|
|
|
//////////////////////////////////
|
|
|
|
__thread uint64_t* l2v2_wholeMatrix;
|
|
|
|
bool lyra2rev2_thread_init()
|
|
{
|
|
const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * 4; // nCols
|
|
const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
|
|
|
|
int size = (int64_t)ROW_LEN_BYTES * 4; // nRows;
|
|
#if defined (LYRA2REV2_8WAY)
|
|
l2v2_wholeMatrix = _mm_malloc( 2 * size, 64 ); // 2 way
|
|
init_lyra2rev2_8way_ctx();;
|
|
#elif defined (LYRA2REV2_4WAY)
|
|
l2v2_wholeMatrix = _mm_malloc( size, 64 );
|
|
init_lyra2rev2_4way_ctx();;
|
|
#else
|
|
l2v2_wholeMatrix = _mm_malloc( size, 64 );
|
|
init_lyra2rev2_ctx();
|
|
#endif
|
|
return l2v2_wholeMatrix;
|
|
}
|
|
|
|
bool register_lyra2rev2_algo( algo_gate_t* gate )
|
|
{
|
|
#if defined (LYRA2REV2_8WAY)
|
|
gate->scanhash = (void*)&scanhash_lyra2rev2_8way;
|
|
gate->hash = (void*)&lyra2rev2_8way_hash;
|
|
#elif defined (LYRA2REV2_4WAY)
|
|
gate->scanhash = (void*)&scanhash_lyra2rev2_4way;
|
|
gate->hash = (void*)&lyra2rev2_4way_hash;
|
|
#else
|
|
gate->scanhash = (void*)&scanhash_lyra2rev2;
|
|
gate->hash = (void*)&lyra2rev2_hash;
|
|
#endif
|
|
gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX2_OPT | AVX512_OPT;
|
|
gate->miner_thread_init = (void*)&lyra2rev2_thread_init;
|
|
opt_target_factor = 256.0;
|
|
return true;
|
|
};
|
|
|
|
/////////////////////////////
|
|
|
|
bool register_lyra2z_algo( algo_gate_t* gate )
|
|
{
|
|
#if defined(LYRA2Z_8WAY)
|
|
gate->miner_thread_init = (void*)&lyra2z_8way_thread_init;
|
|
gate->scanhash = (void*)&scanhash_lyra2z_8way;
|
|
gate->hash = (void*)&lyra2z_8way_hash;
|
|
#elif defined(LYRA2Z_4WAY)
|
|
gate->miner_thread_init = (void*)&lyra2z_4way_thread_init;
|
|
gate->scanhash = (void*)&scanhash_lyra2z_4way;
|
|
gate->hash = (void*)&lyra2z_4way_hash;
|
|
#else
|
|
gate->miner_thread_init = (void*)&lyra2z_thread_init;
|
|
gate->scanhash = (void*)&scanhash_lyra2z;
|
|
gate->hash = (void*)&lyra2z_hash;
|
|
#endif
|
|
gate->optimizations = SSE42_OPT | AVX2_OPT;
|
|
opt_target_factor = 256.0;
|
|
return true;
|
|
};
|
|
|
|
|
|
////////////////////////
|
|
|
|
bool register_lyra2h_algo( algo_gate_t* gate )
|
|
{
|
|
#ifdef LYRA2H_4WAY
|
|
gate->miner_thread_init = (void*)&lyra2h_4way_thread_init;
|
|
gate->scanhash = (void*)&scanhash_lyra2h_4way;
|
|
gate->hash = (void*)&lyra2h_4way_hash;
|
|
#else
|
|
gate->miner_thread_init = (void*)&lyra2h_thread_init;
|
|
gate->scanhash = (void*)&scanhash_lyra2h;
|
|
gate->hash = (void*)&lyra2h_hash;
|
|
#endif
|
|
gate->optimizations = SSE42_OPT | AVX2_OPT;
|
|
opt_target_factor = 256.0;
|
|
return true;
|
|
};
|
|
|
|
/////////////////////////////////
|
|
|
|
bool register_allium_algo( algo_gate_t* gate )
|
|
{
|
|
#if defined (ALLIUM_4WAY)
|
|
gate->miner_thread_init = (void*)&init_allium_4way_ctx;
|
|
gate->scanhash = (void*)&scanhash_allium_4way;
|
|
gate->hash = (void*)&allium_4way_hash;
|
|
#else
|
|
gate->miner_thread_init = (void*)&init_allium_ctx;
|
|
gate->scanhash = (void*)&scanhash_allium;
|
|
gate->hash = (void*)&allium_hash;
|
|
#endif
|
|
gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX2_OPT;
|
|
opt_target_factor = 256.0;
|
|
return true;
|
|
};
|
|
|
|
/////////////////////////////////////////
|
|
|
|
bool phi2_has_roots;
|
|
bool phi2_use_roots = false;
|
|
|
|
int phi2_get_work_data_size() { return phi2_use_roots ? 144 : 128; }
|
|
|
|
void phi2_decode_extra_data( struct work *work )
|
|
{
|
|
phi2_use_roots = false;
|
|
if ( work->data[0] & ( 1<<30 ) ) phi2_use_roots = true;
|
|
else for ( int i = 20; i < 36; i++ )
|
|
{
|
|
if (work->data[i]) { phi2_use_roots = true; break; }
|
|
}
|
|
}
|
|
|
|
void phi2_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
|
|
{
|
|
uchar merkle_tree[64] = { 0 };
|
|
size_t t;
|
|
|
|
algo_gate.gen_merkle_root( merkle_tree, sctx );
|
|
// Increment extranonce2
|
|
for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
|
|
// Assemble block header
|
|
algo_gate.build_block_header( g_work, le32dec( sctx->job.version ),
|
|
(uint32_t*) sctx->job.prevhash, (uint32_t*) merkle_tree,
|
|
le32dec( sctx->job.ntime ), le32dec(sctx->job.nbits) );
|
|
for ( t = 0; t < 16; t++ )
|
|
g_work->data[ 20+t ] = ((uint32_t*)sctx->job.extra)[t];
|
|
}
|
|
|
|
|
|
bool register_phi2_algo( algo_gate_t* gate )
|
|
{
|
|
// init_phi2_ctx();
|
|
gate->optimizations = SSE2_OPT | AES_OPT | SSE42_OPT | AVX2_OPT;
|
|
gate->get_work_data_size = (void*)&phi2_get_work_data_size;
|
|
gate->decode_extra_data = (void*)&phi2_decode_extra_data;
|
|
gate->build_extraheader = (void*)&phi2_build_extraheader;
|
|
opt_target_factor = 256.0;
|
|
#if defined(PHI2_4WAY)
|
|
gate->scanhash = (void*)&scanhash_phi2_4way;
|
|
#else
|
|
init_phi2_ctx();
|
|
gate->scanhash = (void*)&scanhash_phi2;
|
|
#endif
|
|
return true;
|
|
}
|