This commit is contained in:
Jay D Dee
2017-02-22 22:47:44 -05:00
parent 4521b324e3
commit 33b1bb5cd4
31 changed files with 1597 additions and 559 deletions

View File

@@ -29,6 +29,7 @@ cpuminer_SOURCES = \
algo/shavite/shavite.c \ algo/shavite/shavite.c \
algo/echo/sph_echo.c \ algo/echo/sph_echo.c \
algo/blake/sph_blake.c \ algo/blake/sph_blake.c \
algo/blake/sph_blake2b.c \
algo/heavy/sph_hefty1.c \ algo/heavy/sph_hefty1.c \
algo/blake/mod_blakecoin.c \ algo/blake/mod_blakecoin.c \
algo/luffa/sph_luffa.c \ algo/luffa/sph_luffa.c \
@@ -62,7 +63,8 @@ cpuminer_SOURCES = \
algo/argon2/ar2/blake2b.c \ algo/argon2/ar2/blake2b.c \
algo/axiom.c \ algo/axiom.c \
algo/blake/blake.c \ algo/blake/blake.c \
algo/blake/blake2.c \ algo/blake/blake2b.c \
algo/blake/blake2s.c \
algo/blake/blakecoin.c \ algo/blake/blakecoin.c \
algo/blake/decred.c \ algo/blake/decred.c \
algo/blake/pentablake.c \ algo/blake/pentablake.c \
@@ -112,6 +114,7 @@ cpuminer_SOURCES = \
algo/pluck.c \ algo/pluck.c \
algo/quark/quark.c \ algo/quark/quark.c \
algo/qubit/qubit.c \ algo/qubit/qubit.c \
algo/qubit/deep.c \
algo/ripemd/sph_ripemd.c \ algo/ripemd/sph_ripemd.c \
algo/scrypt.c \ algo/scrypt.c \
algo/scryptjane/scrypt-jane.c \ algo/scryptjane/scrypt-jane.c \

View File

@@ -2,6 +2,14 @@ Compile instruction for Linux and Windows are at the bottom of this file.
Change Log Change Log
---------- ----------
V3.5.9
Reduced stack usage for hmq1725 and small speedup.
Added Deep algo optimized for AES and AVX2
Rewrite of SSE2 Luffa, midstate now supported in deep, qubit & timetravel
Small changes to algo-gate.
v3.5.8 v3.5.8
Lyra2RE fixed on Windows, broken in v3.5.6. Lyra2RE fixed on Windows, broken in v3.5.6.

View File

@@ -123,7 +123,8 @@ void init_algo_gate( algo_gate_t* gate )
gate->build_extraheader = (void*)&std_build_extraheader; gate->build_extraheader = (void*)&std_build_extraheader;
gate->set_work_data_endian = (void*)&do_nothing; gate->set_work_data_endian = (void*)&do_nothing;
gate->calc_network_diff = (void*)&std_calc_network_diff; gate->calc_network_diff = (void*)&std_calc_network_diff;
gate->prevent_dupes = (void*)&return_false; // gate->prevent_dupes = (void*)&return_false;
gate->ready_to_mine = (void*)&std_ready_to_mine;
gate->resync_threads = (void*)&do_nothing; gate->resync_threads = (void*)&do_nothing;
gate->do_this_thread = (void*)&return_true; gate->do_this_thread = (void*)&return_true;
gate->longpoll_rpc_call = (void*)&std_longpoll_rpc_call; gate->longpoll_rpc_call = (void*)&std_longpoll_rpc_call;
@@ -159,11 +160,13 @@ bool register_algo_gate( int algo, algo_gate_t *gate )
case ALGO_BASTION: register_bastion_algo ( gate ); break; case ALGO_BASTION: register_bastion_algo ( gate ); break;
case ALGO_BLAKE: register_blake_algo ( gate ); break; case ALGO_BLAKE: register_blake_algo ( gate ); break;
case ALGO_BLAKECOIN: register_blakecoin_algo ( gate ); break; case ALGO_BLAKECOIN: register_blakecoin_algo ( gate ); break;
// case ALGO_BLAKE2B: register_blake2b_algo ( gate ); break;
case ALGO_BLAKE2S: register_blake2s_algo ( gate ); break; case ALGO_BLAKE2S: register_blake2s_algo ( gate ); break;
case ALGO_C11: register_c11_algo ( gate ); break; case ALGO_C11: register_c11_algo ( gate ); break;
case ALGO_CRYPTOLIGHT: register_cryptolight_algo( gate ); break; case ALGO_CRYPTOLIGHT: register_cryptolight_algo( gate ); break;
case ALGO_CRYPTONIGHT: register_cryptonight_algo( gate ); break; case ALGO_CRYPTONIGHT: register_cryptonight_algo( gate ); break;
case ALGO_DECRED: register_decred_algo ( gate ); break; case ALGO_DECRED: register_decred_algo ( gate ); break;
case ALGO_DEEP: register_deep_algo ( gate ); break;
case ALGO_DROP: register_drop_algo ( gate ); break; case ALGO_DROP: register_drop_algo ( gate ); break;
case ALGO_FRESH: register_fresh_algo ( gate ); break; case ALGO_FRESH: register_fresh_algo ( gate ); break;
case ALGO_GROESTL: register_groestl_algo ( gate ); break; case ALGO_GROESTL: register_groestl_algo ( gate ); break;
@@ -267,6 +270,7 @@ const char* const algo_alias_map[][2] =
// alias proper // alias proper
{ "blake256r8", "blakecoin" }, { "blake256r8", "blakecoin" },
{ "blake256r8vnl", "vanilla" }, { "blake256r8vnl", "vanilla" },
{ "sia", "blake2b" },
{ "blake256r14", "blake" }, { "blake256r14", "blake" },
{ "cryptonote", "cryptonight" }, { "cryptonote", "cryptonight" },
{ "cryptonight-light", "cryptolight" }, { "cryptonight-light", "cryptolight" },

View File

@@ -105,7 +105,6 @@ inline bool set_excl ( set_t a, set_t b ) { return (a & b) == 0; }
typedef struct typedef struct
{ {
//migrate to use work instead of pdata & ptarget, see decred for example.
// mandatory functions, must be overwritten // mandatory functions, must be overwritten
int ( *scanhash ) ( int, struct work*, uint32_t, uint64_t* ); int ( *scanhash ) ( int, struct work*, uint32_t, uint64_t* );
@@ -127,11 +126,12 @@ bool ( *work_decode ) ( const json_t*, struct work* );
void ( *set_target) ( struct work*, double ); void ( *set_target) ( struct work*, double );
bool ( *submit_getwork_result ) ( CURL*, struct work* ); bool ( *submit_getwork_result ) ( CURL*, struct work* );
void ( *gen_merkle_root ) ( char*, struct stratum_ctx* ); void ( *gen_merkle_root ) ( char*, struct stratum_ctx* );
void ( *build_extraheader ) ( struct work*, struct stratum_ctx* );
void ( *build_stratum_request ) ( char*, struct work*, struct stratum_ctx* ); void ( *build_stratum_request ) ( char*, struct work*, struct stratum_ctx* );
void ( *set_work_data_endian ) ( struct work* ); void ( *set_work_data_endian ) ( struct work* );
double ( *calc_network_diff ) ( struct work* ); double ( *calc_network_diff ) ( struct work* );
void ( *build_extraheader ) ( struct work*, struct stratum_ctx* ); //bool ( *prevent_dupes ) ( struct work*, struct stratum_ctx*, int );
bool ( *prevent_dupes ) ( struct work*, struct stratum_ctx*, int ); bool ( *ready_to_mine ) ( struct work*, struct stratum_ctx*, int );
void ( *resync_threads ) ( struct work* ); void ( *resync_threads ) ( struct work* );
bool ( *do_this_thread ) ( int ); bool ( *do_this_thread ) ( int );
json_t* (*longpoll_rpc_call) ( CURL*, int*, char* ); json_t* (*longpoll_rpc_call) ( CURL*, int*, char* );
@@ -165,8 +165,8 @@ void algo_not_implemented();
// conventions results in different behaviour for pointers with different // conventions results in different behaviour for pointers with different
// target sizes requiring customized casting to make it work consistently. // target sizes requiring customized casting to make it work consistently.
// Rant mode: yet another thing I hate about c/c++. Array indexes should // Rant mode: yet another thing I hate about c/c++. Array indexes should
// be scaled, pointer offsets should always be bytes. Noconfusion and no hidden // be scaled, pointer offsets should always be bytes. No confusion and no
// math. // hidden math.
#define STD_NTIME_INDEX 17 #define STD_NTIME_INDEX 17
#define STD_NBITS_INDEX 18 #define STD_NBITS_INDEX 18
@@ -227,7 +227,7 @@ void std_le_build_stratum_request( char *req, struct work *work );
void std_be_build_stratum_request( char *req, struct work *work ); void std_be_build_stratum_request( char *req, struct work *work );
void jr2_build_stratum_request ( char *req, struct work *work ); void jr2_build_stratum_request ( char *req, struct work *work );
// default is do_nothing; // set_work_data_endian target, default is do_nothing;
void swab_work_data( struct work *work ); void swab_work_data( struct work *work );
double std_calc_network_diff( struct work *work ); double std_calc_network_diff( struct work *work );
@@ -240,6 +240,9 @@ json_t* jr2_longpoll_rpc_call( CURL *curl, int *err );
bool std_stratum_handle_response( json_t *val ); bool std_stratum_handle_response( json_t *val );
bool jr2_stratum_handle_response( json_t *val ); bool jr2_stratum_handle_response( json_t *val );
bool std_ready_to_mine( struct work* work, struct stratum_ctx* stratum,
int thr_id );
// Gate admin functions // Gate admin functions
// Called from main to initialize all gate functions and algo-specific data // Called from main to initialize all gate functions and algo-specific data

203
algo/blake/b/sia-rpc.cpp Normal file
View File

@@ -0,0 +1,203 @@
#include <ccminer-config.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include <inttypes.h>
#include <unistd.h>
#include <math.h>
#include <sys/time.h>
#include <time.h>
#include <signal.h>
#include <curl/curl.h>
#include <miner.h>
#include "sia-rpc.h"
static bool sia_debug_diff = false;
extern int share_result(int result, int pooln, double sharediff, const char *reason);
/* compute nbits to get the network diff */
static void calc_network_diff(struct work *work)
{
uint32_t nbits = work->data[11]; // unsure if correct
uint32_t bits = (nbits & 0xffffff);
int16_t shift = (swab32(nbits) & 0xff); // 0x1c = 28
uint64_t diffone = 0x0000FFFF00000000ull;
double d = (double)0x0000ffff / (double)bits;
for (int m=shift; m < 29; m++) d *= 256.0;
for (int m=29; m < shift; m++) d /= 256.0;
if (sia_debug_diff)
applog(LOG_DEBUG, "net diff: %f -> shift %u, bits %08x", d, shift, bits);
net_diff = d;
}
// ---- SIA LONGPOLL --------------------------------------------------------------------------------
struct data_buffer {
void *buf;
size_t len;
};
static size_t sia_data_cb(const void *ptr, size_t size, size_t nmemb,
void *user_data)
{
struct data_buffer *db = (struct data_buffer *)user_data;
size_t len = size * nmemb;
size_t oldlen, newlen;
void *newmem;
static const uchar zero = 0;
oldlen = db->len;
newlen = oldlen + len;
newmem = realloc(db->buf, newlen + 1);
if (!newmem)
return 0;
db->buf = newmem;
db->len = newlen;
memcpy((char*)db->buf + oldlen, ptr, len);
memcpy((char*)db->buf + newlen, &zero, 1); /* null terminate */
return len;
}
char* sia_getheader(CURL *curl, struct pool_infos *pool)
{
char curl_err_str[CURL_ERROR_SIZE] = { 0 };
struct data_buffer all_data = { 0 };
struct curl_slist *headers = NULL;
char data[256] = { 0 };
char url[512];
// nanopool
snprintf(url, 512, "%s/miner/header?address=%s&worker=%s", //&longpoll
pool->url, pool->user, pool->pass);
if (opt_protocol)
curl_easy_setopt(curl, CURLOPT_VERBOSE, 1);
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_POST, 0);
curl_easy_setopt(curl, CURLOPT_ENCODING, "");
curl_easy_setopt(curl, CURLOPT_FAILONERROR, 0);
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
curl_easy_setopt(curl, CURLOPT_TCP_NODELAY, 1);
curl_easy_setopt(curl, CURLOPT_TIMEOUT, opt_timeout);
curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1);
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curl_err_str);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, sia_data_cb);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &all_data);
headers = curl_slist_append(headers, "Accept: application/octet-stream");
headers = curl_slist_append(headers, "Expect:"); // disable Expect hdr
headers = curl_slist_append(headers, "User-Agent: Sia-Agent"); // required for now
// headers = curl_slist_append(headers, "User-Agent: " USER_AGENT);
// headers = curl_slist_append(headers, "X-Mining-Extensions: longpoll");
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
int rc = curl_easy_perform(curl);
if (rc && strlen(curl_err_str)) {
applog(LOG_WARNING, "%s", curl_err_str);
}
if (all_data.len >= 112)
cbin2hex(data, (const char*) all_data.buf, 112);
if (opt_protocol || all_data.len != 112)
applog(LOG_DEBUG, "received %d bytes: %s", (int) all_data.len, data);
curl_slist_free_all(headers);
return rc == 0 && all_data.len ? strdup(data) : NULL;
}
bool sia_work_decode(const char *hexdata, struct work *work)
{
uint8_t target[32];
if (!work) return false;
hex2bin((uchar*)target, &hexdata[0], 32);
swab256(work->target, target);
work->targetdiff = target_to_diff(work->target);
hex2bin((uchar*)work->data, &hexdata[64], 80);
// high 16 bits of the 64 bits nonce
work->data[9] = rand() << 16;
// use work ntime as job id
cbin2hex(work->job_id, (const char*)&work->data[10], 4);
calc_network_diff(work);
if (stratum_diff != work->targetdiff) {
stratum_diff = work->targetdiff;
applog(LOG_WARNING, "Pool diff set to %g", stratum_diff);
}
return true;
}
bool sia_submit(CURL *curl, struct pool_infos *pool, struct work *work)
{
char curl_err_str[CURL_ERROR_SIZE] = { 0 };
struct data_buffer all_data = { 0 };
struct curl_slist *headers = NULL;
char buf[256] = { 0 };
char url[512];
if (opt_protocol)
applog_hex(work->data, 80);
//applog_hex(&work->data[8], 16);
//applog_hex(&work->data[10], 4);
// nanopool
snprintf(url, 512, "%s/miner/header?address=%s&worker=%s",
pool->url, pool->user, pool->pass);
if (opt_protocol)
curl_easy_setopt(curl, CURLOPT_VERBOSE, 1);
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_ENCODING, "");
curl_easy_setopt(curl, CURLOPT_FAILONERROR, 0);
curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1);
curl_easy_setopt(curl, CURLOPT_TCP_NODELAY, 1);
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curl_err_str);
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
curl_easy_setopt(curl, CURLOPT_TIMEOUT, 10);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &all_data);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, sia_data_cb);
memcpy(buf, work->data, 80);
curl_easy_setopt(curl, CURLOPT_POST, 1);
curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE, 80);
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, (void*) buf);
// headers = curl_slist_append(headers, "Content-Type: application/octet-stream");
// headers = curl_slist_append(headers, "Content-Length: 80");
headers = curl_slist_append(headers, "Accept:"); // disable Accept hdr
headers = curl_slist_append(headers, "Expect:"); // disable Expect hdr
headers = curl_slist_append(headers, "User-Agent: Sia-Agent");
// headers = curl_slist_append(headers, "User-Agent: " USER_AGENT);
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
int res = curl_easy_perform(curl) == 0;
long errcode;
CURLcode c = curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &errcode);
if (errcode != 204) {
if (strlen(curl_err_str))
applog(LOG_ERR, "submit err %ld %s", errcode, curl_err_str);
res = 0;
}
share_result(res, work->pooln, work->sharediff[0], res ? NULL : (char*) all_data.buf);
curl_slist_free_all(headers);
return true;
}
// ---- END SIA LONGPOLL ----------------------------------------------------------------------------

6
algo/blake/b/sia-rpc.h Normal file
View File

@@ -0,0 +1,6 @@
#include <miner.h>
char* sia_getheader(CURL *curl, struct pool_infos *pool);
bool sia_work_decode(const char *hexdata, struct work *work);
bool sia_submit(CURL *curl, struct pool_infos *pool, struct work *work);

228
algo/blake/blake2b.c Normal file
View File

@@ -0,0 +1,228 @@
/**
* Blake2-B Implementation
* tpruvot@github 2015-2016
*/
#include "miner.h"
#include "algo-gate-api.h"
#include <string.h>
#include <stdint.h>
#include "algo/blake/sph_blake2b.h"
static __thread sph_blake2b_ctx s_midstate;
static __thread sph_blake2b_ctx s_ctx;
#define MIDLEN 76
#define A 64
void blake2b_hash(void *output, const void *input)
{
uint8_t _ALIGN(A) hash[32];
sph_blake2b_ctx ctx;
sph_blake2b_init(&ctx, 32, NULL, 0);
sph_blake2b_update(&ctx, input, 80);
sph_blake2b_final(&ctx, hash);
memcpy(output, hash, 32);
}
static void blake2b_hash_end(uint32_t *output, const uint32_t *input)
{
s_ctx.outlen = MIDLEN;
memcpy(&s_ctx, &s_midstate, 32 + 16 + MIDLEN);
sph_blake2b_update(&s_ctx, (uint8_t*) &input[MIDLEN/4], 80 - MIDLEN);
sph_blake2b_final(&s_ctx, (uint8_t*) output);
}
int scanhash_blake2b( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done )
{
uint32_t _ALIGN(A) vhashcpu[8];
uint32_t _ALIGN(A) endiandata[20];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
// const uint32_t first_nonce = pdata[19];
const uint32_t first_nonce = pdata[8];
uint32_t n = first_nonce;
for (int i=0; i < 19; i++) {
be32enc(&endiandata[i], pdata[i]);
}
// midstate (untested yet)
//blake2b_init(&s_midstate, 32, NULL, 0);
//blake2b_update(&s_midstate, (uint8_t*) endiandata, MIDLEN);
//memcpy(&s_ctx, &s_midstate, sizeof(blake2b_ctx));
do {
// be32enc(&endiandata[19], n);
be32enc(&endiandata[8], n);
//blake2b_hash_end(vhashcpu, endiandata);
blake2b_hash(vhashcpu, endiandata);
if (vhashcpu[7] < Htarg && fulltest(vhashcpu, ptarget)) {
work_set_target_ratio(work, vhashcpu);
*hashes_done = n - first_nonce + 1;
// pdata[19] = n;
pdata[8] = n;
return 1;
}
n++;
} while (n < max_nonce && !work_restart[thr_id].restart);
*hashes_done = n - first_nonce + 1;
// pdata[19] = n;
pdata[8] = n;
return 0;
}
static inline void swab256(void *dest_p, const void *src_p)
{
uint32_t *dest = (uint32_t *)dest_p;
const uint32_t *src = (uint32_t *)src_p;
dest[0] = swab32(src[7]);
dest[1] = swab32(src[6]);
dest[2] = swab32(src[5]);
dest[3] = swab32(src[4]);
dest[4] = swab32(src[3]);
dest[5] = swab32(src[2]);
dest[6] = swab32(src[1]);
dest[7] = swab32(src[0]);
}
/* compute nbits to get the network diff */
void blake2b_calc_network_diff(struct work *work)
{
// sample for diff 43.281 : 1c05ea29
uint32_t nbits = work->data[11]; // unsure if correct
uint32_t bits = (nbits & 0xffffff);
int16_t shift = (swab32(nbits) & 0xff); // 0x1c = 28
double d = (double)0x0000ffff / (double)bits;
for (int m=shift; m < 29; m++) d *= 256.0;
for (int m=29; m < shift; m++) d /= 256.0;
if (opt_debug_diff)
applog(LOG_DEBUG, "net diff: %f -> shift %u, bits %08x", d, shift, bits);
net_diff = d;
}
void blake2b_be_build_stratum_request( char *req, struct work *work )
{
unsigned char *xnonce2str;
uint32_t ntime, nonce;
char ntimestr[9], noncestr[9];
be32enc( &ntime, work->data[ algo_gate.ntime_index ] );
be32enc( &nonce, work->data[ algo_gate.nonce_index ] );
bin2hex( ntimestr, (char*)(&ntime), sizeof(uint32_t) );
bin2hex( noncestr, (char*)(&nonce), sizeof(uint32_t) );
uint16_t high_nonce = swab32(work->data[9]) >> 16;
xnonce2str = abin2hex((unsigned char*)(&high_nonce), 2);
snprintf( req, JSON_BUF_LEN,
"{\"method\": \"mining.submit\", \"params\": [\"%s\", \"%s\", \"%s\", \"%s\", \"%s\"], \"id\":4}",
rpc_user, work->job_id, xnonce2str, ntimestr, noncestr );
free( xnonce2str );
}
// merkle root handled here, no need for gen_merkle_root gate target
void blake2b_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
{
uchar merkle_root[64] = { 0 };
uint32_t extraheader[32] = { 0 };
int headersize = 0;
size_t t;
int i;
// merkle root
memcpy( merkle_root, sctx->job.coinbase, 32 );
headersize = min( (int)sctx->job.coinbase_size - 32, sizeof(extraheader) );
memcpy( extraheader, &sctx->job.coinbase[32], headersize );
// Increment extranonce2
for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
// Assemble block header
memset( g_work->data, 0, sizeof(g_work->data) );
// g_work->data[0] = le32dec( sctx->job.version );
// for ( i = 0; i < 8; i++ )
// g_work->data[1 + i] = le32dec( (uint32_t *) sctx->job.prevhash + i );
for ( i = 0; i < 8; i++ )
g_work->data[i] = ((uint32_t*)sctx->job.prevhash)[7-i];
// for ( i = 0; i < 8; i++ )
// g_work->data[9 + i] = be32dec( (uint32_t *) merkle_root + i );
g_work->data[8] = 0; // nonce
g_work->data[9] = swab32( extraheader[0] ) | ( rand() & 0xf0 );
g_work->data[10] = be32dec( sctx->job.ntime );
g_work->data[11] = be32dec( sctx->job.nbits );
for ( i = 0; i < 8; i++ )
g_work->data[12+i] = ( (uint32_t*)merkle_root )[i];
}
void blake2b_get_new_work( struct work* work, struct work* g_work, int thr_id,
uint32_t* end_nonce_ptr, bool clean_job )
{
const int wkcmp_sz = 32; // bytes
const int wkcmp_off = 32 + 16;
uint32_t *nonceptr = algo_gate.get_nonceptr( work->data );
if ( memcmp( &work->data[ wkcmp_off ], &g_work->data[ wkcmp_off ], wkcmp_sz )
&& ( clean_job || ( *nonceptr >= *end_nonce_ptr ) )
|| strcmp( work->job_id, g_work->job_id ) )
{
work_free( work );
work_copy( work, g_work );
*nonceptr = ( 0xffffffffU / opt_n_threads ) * thr_id;
if ( opt_randomize )
*nonceptr += ( (rand() *4 ) & UINT32_MAX ) / opt_n_threads;
*end_nonce_ptr = ( 0xffffffffU / opt_n_threads ) * (thr_id+1) - 0x20;
}
else
++(*nonceptr);
// suprnova job_id check without data/target/height change...
// we just may have copied new g_wwork to work so why this test here?
// if ( have_stratum && strcmp( work->job_id, g_work->job_id ) )
// exit thread loop
// continue;
// else
// {
// nonceptr[1] += 0x10;
// nonceptr[1] |= thr_id;
// }
}
bool blake2b_ready_to_mine( struct work* work, struct stratum_ctx* stratum,
int thr_id )
{
if ( have_stratum && strcmp( stratum->job.job_id, work->job_id ) )
// need to regen g_work..
return false;
// extradata: prevent duplicates
work->data[ 8 ] += 0x10;
work->data[ 8 + 1 ] |= thr_id;
return true;
}
double blake2b_get_max64() { return 0x1fffffLL; }
bool register_blake2b_algo( algo_gate_t* gate )
{
algo_not_tested();
gate->ntime_index = 10;
gate->nbits_index = 11;
gate->nonce_index = 8;
gate->work_cmp_size = 32;
gate->scanhash = (void*)&scanhash_blake2b;
gate->hash = (void*)&blake2b_hash;
gate->calc_network_diff = (void*)&blake2b_calc_network_diff;
gate->build_stratum_request = (void*)&blake2b_be_build_stratum_request;
gate->build_extraheader = (void*)&blake2b_build_extraheader;
gate->get_new_work = (void*)&blake2b_get_new_work;
gate->get_max64 = (void*)&blake2b_get_max64;
gate->ready_to_mine = (void*)&blake2b_ready_to_mine;
have_gbt = false;
return true;
}

View File

@@ -5,13 +5,14 @@
#include <string.h> #include <string.h>
#include <stdint.h> #include <stdint.h>
#include <memory.h> #include <memory.h>
/*
#ifndef min #ifndef min
#define min(a,b) (a>b ? b : a) #define min(a,b) (a>b ? b : a)
#endif #endif
#ifndef max #ifndef max
#define max(a,b) (a<b ? b : a) #define max(a,b) (a<b ? b : a)
#endif #endif
*/
#define DECRED_NBITS_INDEX 29 #define DECRED_NBITS_INDEX 29
#define DECRED_NTIME_INDEX 34 #define DECRED_NTIME_INDEX 34
@@ -179,9 +180,10 @@ void decred_be_build_stratum_request( char *req, struct work *work,
free(xnonce2str); free(xnonce2str);
} }
/*
// data shared between gen_merkle_root and build_extraheader. // data shared between gen_merkle_root and build_extraheader.
uint32_t decred_extraheader[32] = { 0 }; __thread uint32_t decred_extraheader[32] = { 0 };
int decred_headersize = 0; __thread int decred_headersize = 0;
void decred_gen_merkle_root( char* merkle_root, struct stratum_ctx* sctx ) void decred_gen_merkle_root( char* merkle_root, struct stratum_ctx* sctx )
{ {
@@ -191,31 +193,57 @@ void decred_gen_merkle_root( char* merkle_root, struct stratum_ctx* sctx )
sizeof(decred_extraheader) ); sizeof(decred_extraheader) );
memcpy( decred_extraheader, &sctx->job.coinbase[32], decred_headersize); memcpy( decred_extraheader, &sctx->job.coinbase[32], decred_headersize);
} }
*/
void decred_build_extraheader( struct work* work, struct stratum_ctx* sctx ) void decred_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
{ {
uchar merkle_root[64] = { 0 };
uint32_t extraheader[32] = { 0 };
int headersize = 0;
uint32_t* extradata = (uint32_t*) sctx->xnonce1; uint32_t* extradata = (uint32_t*) sctx->xnonce1;
size_t t;
int i; int i;
for ( i = 0; i < 8; i++ ) // prevhash
work->data[1 + i] = swab32( work->data[1 + i] ); // getwork over stratum, getwork merkle + header passed in coinb1
for ( i = 0; i < 8; i++ ) // merkle memcpy(merkle_root, sctx->job.coinbase, 32);
work->data[9 + i] = swab32( work->data[9 + i] ); headersize = min((int)sctx->job.coinbase_size - 32,
for ( i = 0; i < decred_headersize/4; i++ ) // header sizeof(extraheader) );
work->data[17 + i] = decred_extraheader[i]; memcpy( extraheader, &sctx->job.coinbase[32], headersize );
// Increment extranonce2
for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
// Assemble block header
memset( g_work->data, 0, sizeof(g_work->data) );
g_work->data[0] = le32dec( sctx->job.version );
for ( i = 0; i < 8; i++ )
g_work->data[1 + i] = swab32(
le32dec( (uint32_t *) sctx->job.prevhash + i ) );
for ( i = 0; i < 8; i++ )
g_work->data[9 + i] = swab32( be32dec( (uint32_t *) merkle_root + i ) );
// for ( i = 0; i < 8; i++ ) // prevhash
// g_work->data[1 + i] = swab32( g_work->data[1 + i] );
// for ( i = 0; i < 8; i++ ) // merkle
// g_work->data[9 + i] = swab32( g_work->data[9 + i] );
for ( i = 0; i < headersize/4; i++ ) // header
g_work->data[17 + i] = extraheader[i];
// extradata // extradata
for ( i = 0; i < sctx->xnonce1_size/4; i++ ) for ( i = 0; i < sctx->xnonce1_size/4; i++ )
work->data[ DECRED_XNONCE_INDEX + i ] = extradata[i]; g_work->data[ DECRED_XNONCE_INDEX + i ] = extradata[i];
for ( i = DECRED_XNONCE_INDEX + sctx->xnonce1_size/4; i < 45; i++ ) for ( i = DECRED_XNONCE_INDEX + sctx->xnonce1_size/4; i < 45; i++ )
work->data[i] = 0; g_work->data[i] = 0;
work->data[37] = (rand()*4) << 8; g_work->data[37] = (rand()*4) << 8;
sctx->bloc_height = work->data[32]; sctx->bloc_height = g_work->data[32];
//applog_hex(work->data, 180); //applog_hex(work->data, 180);
//applog_hex(&work->data[36], 36); //applog_hex(&work->data[36], 36);
} }
/*
bool decred_prevent_dupes( struct work* work, struct stratum_ctx* stratum, bool decred_prevent_dupes( struct work* work, struct stratum_ctx* stratum,
int thr_id ) int thr_id )
{ {
return false;
if ( have_stratum && strcmp(stratum->job.job_id, work->job_id) ) if ( have_stratum && strcmp(stratum->job.job_id, work->job_id) )
// need to regen g_work.. // need to regen g_work..
return true; return true;
@@ -224,6 +252,25 @@ bool decred_prevent_dupes( struct work* work, struct stratum_ctx* stratum,
work->data[ DECRED_XNONCE_INDEX + 1 ] |= thr_id; work->data[ DECRED_XNONCE_INDEX + 1 ] |= thr_id;
return false; return false;
} }
*/
bool decred_ready_to_mine( struct work* work, struct stratum_ctx* stratum,
int thr_id )
{
if ( have_stratum && strcmp(stratum->job.job_id, work->job_id) )
// need to regen g_work..
return false;
if ( have_stratum && !work->data[0] && !opt_benchmark )
{
sleep(1);
return false;
}
// extradata: prevent duplicates
work->data[ DECRED_XNONCE_INDEX ] += 1;
work->data[ DECRED_XNONCE_INDEX + 1 ] |= thr_id;
return true;
}
bool register_decred_algo( algo_gate_t* gate ) bool register_decred_algo( algo_gate_t* gate )
{ {
@@ -235,9 +282,10 @@ bool register_decred_algo( algo_gate_t* gate )
gate->get_max64 = (void*)&get_max64_0x3fffffLL; gate->get_max64 = (void*)&get_max64_0x3fffffLL;
gate->display_extra_data = (void*)&decred_decode_extradata; gate->display_extra_data = (void*)&decred_decode_extradata;
gate->build_stratum_request = (void*)&decred_be_build_stratum_request; gate->build_stratum_request = (void*)&decred_be_build_stratum_request;
gate->gen_merkle_root = (void*)&decred_gen_merkle_root; // gate->gen_merkle_root = (void*)&decred_gen_merkle_root;
gate->build_extraheader = (void*)&decred_build_extraheader; gate->build_extraheader = (void*)&decred_build_extraheader;
gate->prevent_dupes = (void*)&decred_prevent_dupes; // gate->prevent_dupes = (void*)&decred_prevent_dupes;
gate->ready_to_mine = (void*)&decred_ready_to_mine;
gate->nbits_index = DECRED_NBITS_INDEX; gate->nbits_index = DECRED_NBITS_INDEX;
gate->ntime_index = DECRED_NTIME_INDEX; gate->ntime_index = DECRED_NTIME_INDEX;
gate->nonce_index = DECRED_NONCE_INDEX; gate->nonce_index = DECRED_NONCE_INDEX;

195
algo/blake/sph_blake2b.c Normal file
View File

@@ -0,0 +1,195 @@
/*
* Copyright 2009 Colin Percival, 2014 savale
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* This file was originally written by Colin Percival as part of the Tarsnap
* online backup system.
*/
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include "algo/sha3/sph_types.h"
#include "sph_blake2b.h"
// Cyclic right rotation.
#ifndef ROTR64
#define ROTR64(x, y) (((x) >> (y)) ^ ((x) << (64 - (y))))
#endif
// Little-endian byte access.
#define B2B_GET64(p) \
(((uint64_t) ((uint8_t *) (p))[0]) ^ \
(((uint64_t) ((uint8_t *) (p))[1]) << 8) ^ \
(((uint64_t) ((uint8_t *) (p))[2]) << 16) ^ \
(((uint64_t) ((uint8_t *) (p))[3]) << 24) ^ \
(((uint64_t) ((uint8_t *) (p))[4]) << 32) ^ \
(((uint64_t) ((uint8_t *) (p))[5]) << 40) ^ \
(((uint64_t) ((uint8_t *) (p))[6]) << 48) ^ \
(((uint64_t) ((uint8_t *) (p))[7]) << 56))
// G Mixing function.
#define B2B_G(a, b, c, d, x, y) { \
v[a] = v[a] + v[b] + x; \
v[d] = ROTR64(v[d] ^ v[a], 32); \
v[c] = v[c] + v[d]; \
v[b] = ROTR64(v[b] ^ v[c], 24); \
v[a] = v[a] + v[b] + y; \
v[d] = ROTR64(v[d] ^ v[a], 16); \
v[c] = v[c] + v[d]; \
v[b] = ROTR64(v[b] ^ v[c], 63); }
// Initialization Vector.
static const uint64_t blake2b_iv[8] = {
0x6A09E667F3BCC908, 0xBB67AE8584CAA73B,
0x3C6EF372FE94F82B, 0xA54FF53A5F1D36F1,
0x510E527FADE682D1, 0x9B05688C2B3E6C1F,
0x1F83D9ABFB41BD6B, 0x5BE0CD19137E2179
};
// Compression function. "last" flag indicates last block.
static void blake2b_compress( sph_blake2b_ctx *ctx, int last )
{
const uint8_t sigma[12][16] = {
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 },
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
};
int i;
uint64_t v[16], m[16];
for (i = 0; i < 8; i++) { // init work variables
v[i] = ctx->h[i];
v[i + 8] = blake2b_iv[i];
}
v[12] ^= ctx->t[0]; // low 64 bits of offset
v[13] ^= ctx->t[1]; // high 64 bits
if (last) // last block flag set ?
v[14] = ~v[14];
for (i = 0; i < 16; i++) // get little-endian words
m[i] = B2B_GET64(&ctx->b[8 * i]);
for (i = 0; i < 12; i++) { // twelve rounds
B2B_G( 0, 4, 8, 12, m[sigma[i][ 0]], m[sigma[i][ 1]]);
B2B_G( 1, 5, 9, 13, m[sigma[i][ 2]], m[sigma[i][ 3]]);
B2B_G( 2, 6, 10, 14, m[sigma[i][ 4]], m[sigma[i][ 5]]);
B2B_G( 3, 7, 11, 15, m[sigma[i][ 6]], m[sigma[i][ 7]]);
B2B_G( 0, 5, 10, 15, m[sigma[i][ 8]], m[sigma[i][ 9]]);
B2B_G( 1, 6, 11, 12, m[sigma[i][10]], m[sigma[i][11]]);
B2B_G( 2, 7, 8, 13, m[sigma[i][12]], m[sigma[i][13]]);
B2B_G( 3, 4, 9, 14, m[sigma[i][14]], m[sigma[i][15]]);
}
for( i = 0; i < 8; ++i )
ctx->h[i] ^= v[i] ^ v[i + 8];
}
// Initialize the hashing context "ctx" with optional key "key".
// 1 <= outlen <= 64 gives the digest size in bytes.
// Secret key (also <= 64 bytes) is optional (keylen = 0).
int sph_blake2b_init( sph_blake2b_ctx *ctx, size_t outlen, const void *key,
size_t keylen ) // (keylen=0: no key)
{
size_t i;
if (outlen == 0 || outlen > 64 || keylen > 64)
return -1; // illegal parameters
for (i = 0; i < 8; i++) // state, "param block"
ctx->h[i] = blake2b_iv[i];
ctx->h[0] ^= 0x01010000 ^ (keylen << 8) ^ outlen;
ctx->t[0] = 0; // input count low word
ctx->t[1] = 0; // input count high word
ctx->c = 0; // pointer within buffer
ctx->outlen = outlen;
for (i = keylen; i < 128; i++) // zero input block
ctx->b[i] = 0;
if (keylen > 0) {
sph_blake2b_update(ctx, key, keylen);
ctx->c = 128; // at the end
}
return 0;
}
// Add "inlen" bytes from "in" into the hash.
void sph_blake2b_update( sph_blake2b_ctx *ctx, const void *in, size_t inlen )
{
size_t i;
for (i = 0; i < inlen; i++) {
if (ctx->c == 128) { // buffer full ?
ctx->t[0] += ctx->c; // add counters
if (ctx->t[0] < ctx->c) // carry overflow ?
ctx->t[1]++; // high word
blake2b_compress(ctx, 0); // compress (not last)
ctx->c = 0; // counter to zero
}
ctx->b[ctx->c++] = ((const uint8_t *) in)[i];
}
}
// Generate the message digest (size given in init).
// Result placed in "out".
void sph_blake2b_final( sph_blake2b_ctx *ctx, void *out )
{
size_t i;
ctx->t[0] += ctx->c; // mark last block offset
if (ctx->t[0] < ctx->c) // carry overflow
ctx->t[1]++; // high word
while (ctx->c < 128) // fill up with zeros
ctx->b[ctx->c++] = 0;
blake2b_compress(ctx, 1); // final block flag = 1
// little endian convert and store
for (i = 0; i < ctx->outlen; i++) {
((uint8_t *) out)[i] =
(ctx->h[i >> 3] >> (8 * (i & 7))) & 0xFF;
}
}

41
algo/blake/sph_blake2b.h Normal file
View File

@@ -0,0 +1,41 @@
#pragma once
#ifndef __BLAKE2B_H__
#define __BLAKE2B_H__
#include <stddef.h>
#include <stdint.h>
#if defined(_MSC_VER)
#include <inttypes.h>
#define inline __inline
#define ALIGN(x) __declspec(align(x))
#else
#define ALIGN(x) __attribute__((aligned(x)))
#endif
#if defined(_MSC_VER) || defined(__x86_64__) || defined(__x86__)
#define NATIVE_LITTLE_ENDIAN
#endif
// state context
ALIGN(64) typedef struct {
uint8_t b[128]; // input buffer
uint64_t h[8]; // chained state
uint64_t t[2]; // total number of bytes
size_t c; // pointer for b[]
size_t outlen; // digest size
} sph_blake2b_ctx;
#if defined(__cplusplus)
extern "C" {
#endif
int sph_blake2b_init( sph_blake2b_ctx *ctx, size_t outlen, const void *key, size_t keylen);
void sph_blake2b_update( sph_blake2b_ctx *ctx, const void *in, size_t inlen);
void sph_blake2b_final( sph_blake2b_ctx *ctx, void *out);
#if defined(__cplusplus)
}
#endif
#endif

View File

@@ -485,6 +485,7 @@ do { \
sph_enc64le(out + 8 * u, h1[v]); \ sph_enc64le(out + 8 * u, h1[v]); \
} while (0) } while (0)
/*
static void static void
compress_big(const unsigned char *data, const sph_u64 h[16], sph_u64 dh[16]) compress_big(const unsigned char *data, const sph_u64 h[16], sph_u64 dh[16])
{ {
@@ -499,6 +500,7 @@ compress_big(const unsigned char *data, const sph_u64 h[16], sph_u64 dh[16])
#undef H #undef H
#undef dH #undef dH
} }
*/
static const sph_u64 final_b[16] = { static const sph_u64 final_b[16] = {
SPH_C64(0xaaaaaaaaaaaaaaa0), SPH_C64(0xaaaaaaaaaaaaaaa1), SPH_C64(0xaaaaaaaaaaaaaaa0), SPH_C64(0xaaaaaaaaaaaaaaa1),

View File

@@ -202,7 +202,7 @@ void cryptonight_hash_aes( void *restrict output, const void *input, int len )
_mm_store_si128((__m128i *)&ctx.long_state[a[0] & 0x1FFFF0], b_x); _mm_store_si128((__m128i *)&ctx.long_state[a[0] & 0x1FFFF0], b_x);
uint64_t *nextblock = (uint64_t *)&ctx.long_state[c[0] & 0x1FFFF0]; uint64_t *nextblock = (uint64_t *)&ctx.long_state[c[0] & 0x1FFFF0];
uint64_t b[2]; // uint64_t b[2];
b[0] = nextblock[0]; b[0] = nextblock[0];
b[1] = nextblock[1]; b[1] = nextblock[1];

View File

@@ -24,10 +24,10 @@ static void transform( cubehashParam *sp )
__m256i x0, x1, x2, x3, y0, y1; __m256i x0, x1, x2, x3, y0, y1;
x0 = _mm256_load_si256( 0 + sp->x ); x0 = _mm256_load_si256( (__m256i*)sp->x );
x1 = _mm256_load_si256( 2 + sp->x ); x1 = _mm256_load_si256( (__m256i*)sp->x + 1 );
x2 = _mm256_load_si256( 4 + sp->x ); x2 = _mm256_load_si256( (__m256i*)sp->x + 2 );
x3 = _mm256_load_si256( 6 + sp->x ); x3 = _mm256_load_si256( (__m256i*)sp->x + 3 );
for ( r = 0; r < rounds; ++r ) for ( r = 0; r < rounds; ++r )
{ {
@@ -57,22 +57,22 @@ static void transform( cubehashParam *sp )
x3 = _mm256_shuffle_epi32( x3, 0xb1 ); x3 = _mm256_shuffle_epi32( x3, 0xb1 );
} }
_mm256_store_si256( 0 + sp->x, x0 ); _mm256_store_si256( (__m256i*)sp->x, x0 );
_mm256_store_si256( 2 + sp->x, x1 ); _mm256_store_si256( (__m256i*)sp->x + 1, x1 );
_mm256_store_si256( 4 + sp->x, x2 ); _mm256_store_si256( (__m256i*)sp->x + 2, x2 );
_mm256_store_si256( 6 + sp->x, x3 ); _mm256_store_si256( (__m256i*)sp->x + 3, x3 );
#else #else
__m128i x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3; __m128i x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3;
x0 = _mm_load_si128(0 + sp->x); x0 = _mm_load_si128( (__m128i*)sp->x );
x1 = _mm_load_si128(1 + sp->x); x1 = _mm_load_si128( (__m128i*)sp->x + 1 );
x2 = _mm_load_si128(2 + sp->x); x2 = _mm_load_si128( (__m128i*)sp->x + 2 );
x3 = _mm_load_si128(3 + sp->x); x3 = _mm_load_si128( (__m128i*)sp->x + 3 );
x4 = _mm_load_si128(4 + sp->x); x4 = _mm_load_si128( (__m128i*)sp->x + 4 );
x5 = _mm_load_si128(5 + sp->x); x5 = _mm_load_si128( (__m128i*)sp->x + 5 );
x6 = _mm_load_si128(6 + sp->x); x6 = _mm_load_si128( (__m128i*)sp->x + 6 );
x7 = _mm_load_si128(7 + sp->x); x7 = _mm_load_si128( (__m128i*)sp->x + 7 );
for (r = 0; r < rounds; ++r) { for (r = 0; r < rounds; ++r) {
x4 = _mm_add_epi32(x0, x4); x4 = _mm_add_epi32(x0, x4);
@@ -117,14 +117,14 @@ static void transform( cubehashParam *sp )
x7 = _mm_shuffle_epi32(x7, 0xb1); x7 = _mm_shuffle_epi32(x7, 0xb1);
} }
_mm_store_si128(0 + sp->x, x0); _mm_store_si128( (__m128i*)sp->x, x0 );
_mm_store_si128(1 + sp->x, x1); _mm_store_si128( (__m128i*)sp->x + 1, x1 );
_mm_store_si128(2 + sp->x, x2); _mm_store_si128( (__m128i*)sp->x + 2, x2 );
_mm_store_si128(3 + sp->x, x3); _mm_store_si128( (__m128i*)sp->x + 3, x3 );
_mm_store_si128(4 + sp->x, x4); _mm_store_si128( (__m128i*)sp->x + 4, x4 );
_mm_store_si128(5 + sp->x, x5); _mm_store_si128( (__m128i*)sp->x + 5, x5 );
_mm_store_si128(6 + sp->x, x6); _mm_store_si128( (__m128i*)sp->x + 6, x6 );
_mm_store_si128(7 + sp->x, x7); _mm_store_si128( (__m128i*)sp->x + 7, x7 );
#endif #endif
} // transform } // transform

View File

@@ -49,7 +49,6 @@ typedef struct {
sph_whirlpool_context whirlpool1, whirlpool2, whirlpool3, whirlpool4; sph_whirlpool_context whirlpool1, whirlpool2, whirlpool3, whirlpool4;
sph_sha512_context sha1, sha2; sph_sha512_context sha1, sha2;
sph_haval256_5_context haval1, haval2; sph_haval256_5_context haval1, haval2;
#ifdef NO_AES_NI #ifdef NO_AES_NI
sph_groestl512_context groestl1, groestl2; sph_groestl512_context groestl1, groestl2;
sph_echo512_context echo1, echo2; sph_echo512_context echo1, echo2;
@@ -57,10 +56,10 @@ typedef struct {
hashState_echo echo1, echo2; hashState_echo echo1, echo2;
hashState_groestl groestl1, groestl2; hashState_groestl groestl1, groestl2;
#endif #endif
} hmq1725_ctx_holder; } hmq1725_ctx_holder;
static hmq1725_ctx_holder hmq1725_ctx; static hmq1725_ctx_holder hmq1725_ctx;
static __thread sph_bmw512_context hmq_bmw_mid;
void init_hmq1725_ctx() void init_hmq1725_ctx()
{ {
@@ -122,174 +121,185 @@ void init_hmq1725_ctx()
#endif #endif
} }
void hmq_bmw512_midstate( const void* input )
{
memcpy( &hmq_bmw_mid, &hmq1725_ctx.bmw1, sizeof hmq_bmw_mid );
sph_bmw512( &hmq_bmw_mid, input, 64 );
}
__thread hmq1725_ctx_holder h_ctx;
extern void hmq1725hash(void *state, const void *input) extern void hmq1725hash(void *state, const void *input)
{ {
const uint32_t mask = 24; const uint32_t mask = 24;
uint32_t hashA[25], hashB[25]; uint32_t hashA[16] __attribute__((aligned(64)));
hmq1725_ctx_holder ctx; uint32_t hashB[16] __attribute__((aligned(64)));
const int midlen = 64; // bytes
const int tail = 80 - midlen; // 16
memcpy(&ctx, &hmq1725_ctx, sizeof(hmq1725_ctx)); memcpy(&h_ctx, &hmq1725_ctx, sizeof(hmq1725_ctx));
sph_bmw512 (&ctx.bmw1, input, 80); //0 memcpy( &h_ctx.bmw1, &hmq_bmw_mid, sizeof hmq_bmw_mid );
sph_bmw512_close(&ctx.bmw1, hashA); //1 sph_bmw512( &h_ctx.bmw1, input + midlen, tail );
sph_bmw512_close(&h_ctx.bmw1, hashA); //1
sph_whirlpool (&ctx.whirlpool1, hashA, 64); //0 sph_whirlpool (&h_ctx.whirlpool1, hashA, 64); //0
sph_whirlpool_close(&ctx.whirlpool1, hashB); //1 sph_whirlpool_close(&h_ctx.whirlpool1, hashB); //1
if ( hashB[0] & mask ) //1 if ( hashB[0] & mask ) //1
{ {
#ifdef NO_AES_NI #ifdef NO_AES_NI
sph_groestl512 (&ctx.groestl1, hashB, 64); //1 sph_groestl512 (&h_ctx.groestl1, hashB, 64); //1
sph_groestl512_close(&ctx.groestl1, hashA); //2 sph_groestl512_close(&h_ctx.groestl1, hashA); //2
#else #else
update_groestl( &ctx.groestl1, (char*)hashB, 512 ); update_and_final_groestl( &h_ctx.groestl1, (char*)hashA,
final_groestl( &ctx.groestl1, (char*)hashA ); (const char*)hashB, 512 );
#endif #endif
} }
else else
{ {
sph_skein512 (&ctx.skein1, hashB, 64); //1 sph_skein512 (&h_ctx.skein1, hashB, 64); //1
sph_skein512_close(&ctx.skein1, hashA); //2 sph_skein512_close(&h_ctx.skein1, hashA); //2
} }
sph_jh512 (&ctx.jh1, hashA, 64); //3 sph_jh512 (&h_ctx.jh1, hashA, 64); //3
sph_jh512_close(&ctx.jh1, hashB); //4 sph_jh512_close(&h_ctx.jh1, hashB); //4
sph_keccak512 (&ctx.keccak1, hashB, 64); //2 sph_keccak512 (&h_ctx.keccak1, hashB, 64); //2
sph_keccak512_close(&ctx.keccak1, hashA); //3 sph_keccak512_close(&h_ctx.keccak1, hashA); //3
if ( hashA[0] & mask ) //4 if ( hashA[0] & mask ) //4
{ {
sph_blake512 (&ctx.blake1, hashA, 64); // sph_blake512 (&h_ctx.blake1, hashA, 64); //
sph_blake512_close(&ctx.blake1, hashB); //5 sph_blake512_close(&h_ctx.blake1, hashB); //5
} }
else else
{ {
sph_bmw512 (&ctx.bmw2, hashA, 64); //4 sph_bmw512 (&h_ctx.bmw2, hashA, 64); //4
sph_bmw512_close(&ctx.bmw2, hashB); //5 sph_bmw512_close(&h_ctx.bmw2, hashB); //5
} }
update_luffa( &ctx.luffa1, (BitSequence*)hashB, 64 ); update_and_final_luffa( &h_ctx.luffa1, (BitSequence*)hashA,
final_luffa( &ctx.luffa1, (BitSequence*)hashA ); (const BitSequence*)hashB, 64 );
cubehashUpdate( &ctx.cube, (BitSequence *)hashA, 64 ); cubehashUpdateDigest( &h_ctx.cube, (BitSequence *)hashB,
cubehashDigest( &ctx.cube, (BitSequence *)hashB ); (const BitSequence *)hashA, 64 );
if ( hashB[0] & mask ) //7 if ( hashB[0] & mask ) //7
{ {
sph_keccak512 (&ctx.keccak2, hashB, 64); // sph_keccak512 (&h_ctx.keccak2, hashB, 64); //
sph_keccak512_close(&ctx.keccak2, hashA); //8 sph_keccak512_close(&h_ctx.keccak2, hashA); //8
} }
else else
{ {
sph_jh512 (&ctx.jh2, hashB, 64); //7 sph_jh512 (&h_ctx.jh2, hashB, 64); //7
sph_jh512_close(&ctx.jh2, hashA); //8 sph_jh512_close(&h_ctx.jh2, hashA); //8
} }
sph_shavite512 (&ctx.shavite1, hashA, 64); //3 sph_shavite512 (&h_ctx.shavite1, hashA, 64); //3
sph_shavite512_close(&ctx.shavite1, hashB); //4 sph_shavite512_close(&h_ctx.shavite1, hashB); //4
update_sd( &ctx.simd1, (BitSequence *)hashB, 512 ); update_final_sd( &h_ctx.simd1, (BitSequence *)hashA,
final_sd( &ctx.simd1, (BitSequence *)hashA ); (const BitSequence *)hashB, 512 );
if ( hashA[0] & mask ) //4 if ( hashA[0] & mask ) //4
{ {
sph_whirlpool (&ctx.whirlpool2, hashA, 64); // sph_whirlpool (&h_ctx.whirlpool2, hashA, 64); //
sph_whirlpool_close(&ctx.whirlpool2, hashB); //5 sph_whirlpool_close(&h_ctx.whirlpool2, hashB); //5
} }
else else
{ {
sph_haval256_5 (&ctx.haval1, hashA, 64); //4 sph_haval256_5 (&h_ctx.haval1, hashA, 64); //4
sph_haval256_5_close(&ctx.haval1, hashB); //5 sph_haval256_5_close(&h_ctx.haval1, hashB); //5
memset(&hashB[8], 0, 32); memset(&hashB[8], 0, 32);
} }
#ifdef NO_AES_NI #ifdef NO_AES_NI
sph_echo512 (&ctx.echo1, hashB, 64); //5 sph_echo512 (&h_ctx.echo1, hashB, 64); //5
sph_echo512_close(&ctx.echo1, hashA); //6 sph_echo512_close(&h_ctx.echo1, hashA); //6
#else #else
update_echo ( &ctx.echo1, (BitSequence *)hashB, 512 ); update_final_echo ( &h_ctx.echo1, (BitSequence *)hashA,
final_echo( &ctx.echo1, (BitSequence *)hashA ); (const BitSequence *)hashB, 512 );
#endif #endif
sph_blake512 (&ctx.blake2, hashA, 64); //6 sph_blake512 (&h_ctx.blake2, hashA, 64); //6
sph_blake512_close(&ctx.blake2, hashB); //7 sph_blake512_close(&h_ctx.blake2, hashB); //7
if ( hashB[0] & mask ) //7 if ( hashB[0] & mask ) //7
{ {
sph_shavite512 (&ctx.shavite2, hashB, 64); // sph_shavite512 (&h_ctx.shavite2, hashB, 64); //
sph_shavite512_close(&ctx.shavite2, hashA); //8 sph_shavite512_close(&h_ctx.shavite2, hashA); //8
} }
else else
{ {
update_luffa( &ctx.luffa2, (BitSequence *)hashB, 64 ); update_and_final_luffa( &h_ctx.luffa2, (BitSequence *)hashA,
final_luffa( &ctx.luffa2, (BitSequence *)hashA ); (const BitSequence *)hashB, 64 );
} }
sph_hamsi512 (&ctx.hamsi1, hashA, 64); //3 sph_hamsi512 (&h_ctx.hamsi1, hashA, 64); //3
sph_hamsi512_close(&ctx.hamsi1, hashB); //4 sph_hamsi512_close(&h_ctx.hamsi1, hashB); //4
sph_fugue512 (&ctx.fugue1, hashB, 64); //2 //// sph_fugue512 (&h_ctx.fugue1, hashB, 64); //2 ////
sph_fugue512_close(&ctx.fugue1, hashA); //3 sph_fugue512_close(&h_ctx.fugue1, hashA); //3
if ( hashA[0] & mask ) //4 if ( hashA[0] & mask ) //4
{ {
#ifdef NO_AES_NI #ifdef NO_AES_NI
sph_echo512 (&ctx.echo2, hashA, 64); // sph_echo512 (&h_ctx.echo2, hashA, 64); //
sph_echo512_close(&ctx.echo2, hashB); //5 sph_echo512_close(&h_ctx.echo2, hashB); //5
#else #else
update_echo ( &ctx.echo2, (BitSequence *)hashA, 512 ); update_final_echo ( &h_ctx.echo2, (BitSequence *)hashB,
final_echo( &ctx.echo2, (BitSequence *)hashB ); (const BitSequence *)hashA, 512 );
#endif #endif
} }
else else
{ {
update_sd( &ctx.simd2, (BitSequence *)hashA, 512 ); update_final_sd( &h_ctx.simd2, (BitSequence *)hashB,
final_sd( &ctx.simd2, (BitSequence *)hashB ); (const BitSequence *)hashA, 512 );
} }
sph_shabal512 (&ctx.shabal1, hashB, 64); //5 sph_shabal512 (&h_ctx.shabal1, hashB, 64); //5
sph_shabal512_close(&ctx.shabal1, hashA); //6 sph_shabal512_close(&h_ctx.shabal1, hashA); //6
sph_whirlpool (&ctx.whirlpool3, hashA, 64); //6 sph_whirlpool (&h_ctx.whirlpool3, hashA, 64); //6
sph_whirlpool_close(&ctx.whirlpool3, hashB); //7 sph_whirlpool_close(&h_ctx.whirlpool3, hashB); //7
if ( hashB[0] & mask ) //7 if ( hashB[0] & mask ) //7
{ {
sph_fugue512 (&ctx.fugue2, hashB, 64); // sph_fugue512 (&h_ctx.fugue2, hashB, 64); //
sph_fugue512_close(&ctx.fugue2, hashA); //8 sph_fugue512_close(&h_ctx.fugue2, hashA); //8
} }
else else
{ {
sph_sha512 (&ctx.sha1, hashB, 64); //7 sph_sha512 (&h_ctx.sha1, hashB, 64); //7
sph_sha512_close(&ctx.sha1, hashA); //8 sph_sha512_close(&h_ctx.sha1, hashA); //8
} }
#ifdef NO_AES_NI #ifdef NO_AES_NI
sph_groestl512 (&ctx.groestl2, hashA, 64); //3 sph_groestl512 (&h_ctx.groestl2, hashA, 64); //3
sph_groestl512_close(&ctx.groestl2, hashB); //4 sph_groestl512_close(&h_ctx.groestl2, hashB); //4
#else #else
update_groestl( &ctx.groestl2, (char*)hashA, 512 ); update_and_final_groestl( &h_ctx.groestl2, (char*)hashB,
final_groestl( &ctx.groestl2, (char*)hashB ); (const char*)hashA, 512 );
#endif #endif
sph_sha512 (&ctx.sha2, hashB, 64); //2 sph_sha512 (&h_ctx.sha2, hashB, 64); //2
sph_sha512_close(&ctx.sha2, hashA); //3 sph_sha512_close(&h_ctx.sha2, hashA); //3
if ( hashA[0] & mask ) //4 if ( hashA[0] & mask ) //4
{ {
sph_haval256_5 (&ctx.haval2, hashA, 64); // sph_haval256_5 (&h_ctx.haval2, hashA, 64); //
sph_haval256_5_close(&ctx.haval2, hashB); //5 sph_haval256_5_close(&h_ctx.haval2, hashB); //5
memset(&hashB[8], 0, 32); memset(&hashB[8], 0, 32);
} }
else else
{ {
sph_whirlpool (&ctx.whirlpool4, hashA, 64); //4 sph_whirlpool (&h_ctx.whirlpool4, hashA, 64); //4
sph_whirlpool_close(&ctx.whirlpool4, hashB); //5 sph_whirlpool_close(&h_ctx.whirlpool4, hashB); //5
} }
sph_bmw512 (&ctx.bmw3, hashB, 64); //5 sph_bmw512 (&h_ctx.bmw3, hashB, 64); //5
sph_bmw512_close(&ctx.bmw3, hashA); //6 sph_bmw512_close(&h_ctx.bmw3, hashA); //6
memcpy(state, hashA, 32); memcpy(state, hashA, 32);
} }
@@ -309,6 +319,8 @@ int scanhash_hmq1725( int thr_id, struct work *work, int32_t max_nonce,
for (int k = 0; k < 32; k++) for (int k = 0; k < 32; k++)
be32enc(&endiandata[k], pdata[k]); be32enc(&endiandata[k], pdata[k]);
hmq_bmw512_midstate( endiandata );
// if (opt_debug) // if (opt_debug)
// { // {
// applog(LOG_DEBUG, "Thr: %02d, firstN: %08x, maxN: %08x, ToDo: %d", thr_id, first_nonce, max_nonce, max_nonce-first_nonce); // applog(LOG_DEBUG, "Thr: %02d, firstN: %08x, maxN: %08x, ToDo: %d", thr_id, first_nonce, max_nonce, max_nonce-first_nonce);

View File

@@ -47,6 +47,21 @@ void hodl_le_build_stratum_request( char* req, struct work* work,
void hodl_build_extraheader( struct work* g_work, struct stratum_ctx *sctx ) void hodl_build_extraheader( struct work* g_work, struct stratum_ctx *sctx )
{ {
uchar merkle_root[64] = { 0 };
size_t t;
int i;
algo_gate.gen_merkle_root( merkle_root, sctx );
// Increment extranonce2
for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
// Assemble block header
memset( g_work->data, 0, sizeof(g_work->data) );
g_work->data[0] = le32dec( sctx->job.version );
for ( i = 0; i < 8; i++ )
g_work->data[1 + i] = le32dec( (uint32_t *) sctx->job.prevhash + i );
for ( i = 0; i < 8; i++ )
g_work->data[9 + i] = be32dec( (uint32_t *) merkle_root + i );
g_work->data[ algo_gate.ntime_index ] = le32dec( sctx->job.ntime ); g_work->data[ algo_gate.ntime_index ] = le32dec( sctx->job.ntime );
g_work->data[ algo_gate.nbits_index ] = le32dec( sctx->job.nbits ); g_work->data[ algo_gate.nbits_index ] = le32dec( sctx->job.nbits );
g_work->data[22] = 0x80000000; g_work->data[22] = 0x80000000;

View File

@@ -102,7 +102,7 @@ static DATA_ALIGN16(const unsigned char jhE8_bitslice_roundconstant[42][32])={
{0x35,0xb4,0x98,0x31,0xdb,0x41,0x15,0x70,0xea,0x1e,0xf,0xbb,0xed,0xcd,0x54,0x9b,0x9a,0xd0,0x63,0xa1,0x51,0x97,0x40,0x72,0xf6,0x75,0x9d,0xbf,0x91,0x47,0x6f,0xe2}}; {0x35,0xb4,0x98,0x31,0xdb,0x41,0x15,0x70,0xea,0x1e,0xf,0xbb,0xed,0xcd,0x54,0x9b,0x9a,0xd0,0x63,0xa1,0x51,0x97,0x40,0x72,0xf6,0x75,0x9d,0xbf,0x91,0x47,0x6f,0xe2}};
static void jhF8(jhState *state); /* the compression function F8 */ //static void jhF8(jhState *state); /* the compression function F8 */
/*The API functions*/ /*The API functions*/
@@ -236,7 +236,8 @@ static void jhF8(jhState *state); /* the compression function F8 */
jhSS(jhSx0,jhSx2,jhSx4,jhSx6,jhSx1,jhSx3,jhSx5,jhSx7, jhLOAD(jhE8_bitslice_roundconstant[r]), jhLOAD(jhE8_bitslice_roundconstant[r]+16) ); \ jhSS(jhSx0,jhSx2,jhSx4,jhSx6,jhSx1,jhSx3,jhSx5,jhSx7, jhLOAD(jhE8_bitslice_roundconstant[r]), jhLOAD(jhE8_bitslice_roundconstant[r]+16) ); \
jhlineartransform_R##nn(jhSx0,jhSx2,jhSx4,jhSx6,jhSx1,jhSx3,jhSx5,jhSx7); jhlineartransform_R##nn(jhSx0,jhSx2,jhSx4,jhSx6,jhSx1,jhSx3,jhSx5,jhSx7);
/*the compression function F8 */ /*
//the compression function F8
static void jhF8(jhState *state) static void jhF8(jhState *state)
{ {
return; return;
@@ -257,9 +258,9 @@ static void jhF8(jhState *state)
y6 = state->x6; y6 = state->x6;
y7 = state->x7; y7 = state->x7;
/*xor the 512-bit message with the fist half of the 1024-bit hash state*/ //xor the 512-bit message with the fist half of the 1024-bit hash state
/*perform 42 rounds*/ //perform 42 rounds
for (i = 0; i < 42; i = i+7) { for (i = 0; i < 42; i = i+7) {
jhround_function(00,i); jhround_function(00,i);
jhround_function(01,i+1); jhround_function(01,i+1);
@@ -270,7 +271,7 @@ static void jhF8(jhState *state)
jhround_function(06,i+6); jhround_function(06,i+6);
} }
/*xor the 512-bit message with the second half of the 1024-bit hash state*/ //xor the 512-bit message with the second half of the 1024-bit hash state
state->x0 = y0; state->x0 = y0;
state->x1 = y1; state->x1 = y1;
@@ -285,6 +286,7 @@ static void jhF8(jhState *state)
y7 = jhXOR(y7, jhLOAD(state->buffer+48)), y7 = jhXOR(y7, jhLOAD(state->buffer+48)),
state->x7 = y7; state->x7 = y7;
} }
*/
#define jhF8I \ #define jhF8I \
do { \ do { \

View File

@@ -146,12 +146,10 @@ double lbry_calc_network_diff( struct work *work )
{ {
// sample for diff 43.281 : 1c05ea29 // sample for diff 43.281 : 1c05ea29
// todo: endian reversed on longpoll could be zr5 specific... // todo: endian reversed on longpoll could be zr5 specific...
// uint32_t nbits = have_longpoll ? work->data[18] : swab32(work->data[18]);
uint32_t nbits = swab32( work->data[ LBRY_NBITS_INDEX ] ); uint32_t nbits = swab32( work->data[ LBRY_NBITS_INDEX ] );
uint32_t bits = (nbits & 0xffffff); uint32_t bits = (nbits & 0xffffff);
int16_t shift = (swab32(nbits) & 0xff); // 0x1c = 28 int16_t shift = (swab32(nbits) & 0xff); // 0x1c = 28
// uint64_t diffone = 0x0000FFFF00000000ull;
double d = (double)0x0000ffff / (double)bits; double d = (double)0x0000ffff / (double)bits;
for (int m=shift; m < 29; m++) d *= 256.0; for (int m=shift; m < 29; m++) d *= 256.0;
@@ -181,13 +179,27 @@ void lbry_le_build_stratum_request( char *req, struct work *work,
free(xnonce2str); free(xnonce2str);
} }
void lbry_build_extraheader( struct work* work, struct stratum_ctx* sctx ) void lbry_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
{ {
unsigned char merkle_root[64] = { 0 };
size_t t;
int i;
algo_gate.gen_merkle_root( merkle_root, sctx );
// Increment extranonce2
for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
// Assemble block header
memset( g_work->data, 0, sizeof(g_work->data) );
g_work->data[0] = le32dec( sctx->job.version );
for ( i = 0; i < 8; i++ )
g_work->data[1 + i] = le32dec( (uint32_t *) sctx->job.prevhash + i );
for ( i = 0; i < 8; i++ )
g_work->data[9 + i] = be32dec( (uint32_t *) merkle_root + i );
for ( int i = 0; i < 8; i++ ) for ( int i = 0; i < 8; i++ )
work->data[17 + i] = ((uint32_t*)sctx->job.claim)[i]; g_work->data[17 + i] = ((uint32_t*)sctx->job.claim)[i];
work->data[ LBRY_NTIME_INDEX ] = le32dec(sctx->job.ntime); g_work->data[ LBRY_NTIME_INDEX ] = le32dec(sctx->job.ntime);
work->data[ LBRY_NBITS_INDEX ] = le32dec(sctx->job.nbits); g_work->data[ LBRY_NBITS_INDEX ] = le32dec(sctx->job.nbits);
work->data[28] = 0x80000000; g_work->data[28] = 0x80000000;
} }
void lbry_set_target( struct work* work, double job_diff ) void lbry_set_target( struct work* work, double job_diff )

View File

@@ -20,20 +20,9 @@
#include <string.h> #include <string.h>
#include <emmintrin.h> #include <emmintrin.h>
#include "avxdefs.h"
#include "luffa_for_sse2.h" #include "luffa_for_sse2.h"
#ifdef HASH_BIG_ENDIAN
# define BYTES_SWAP32(x) x
#else
# define BYTES_SWAP32(x) \
((x << 24) | ((x & 0x0000ff00) << 8) | ((x & 0x00ff0000) >> 8) | (x >> 24))
#endif /* HASH_BIG_ENDIAN */
/* BYTES_SWAP256(x) stores each 32-bit word of 256 bits data in little-endian convention */
#define BYTES_SWAP256(x) { \
int _i = 8; while(_i--){x[_i] = BYTES_SWAP32(x[_i]);} \
}
#define MULT2(a0,a1) do \ #define MULT2(a0,a1) do \
{ \ { \
__m128i b; \ __m128i b; \
@@ -200,11 +189,10 @@
NMLTOM1024(r0,r1,r2,r3,s0,s1,s2,s3,p0,p1,p2,p3,q0,q1,q2,q3); NMLTOM1024(r0,r1,r2,r3,s0,s1,s2,s3,p0,p1,p2,p3,q0,q1,q2,q3);
static void Update512(hashState_luffa *state, const BitSequence *data, DataLength databitlen); static void rnd512( hashState_luffa *state, __m128i msg1, __m128i msg0 );
//static void rnd512( hashState_luffa *state );
static void rnd512(hashState_luffa *state); static void finalization512( hashState_luffa *state, uint32 *b );
static void finalization512(hashState_luffa *state, uint32 *b);
/* initial values of chaining variables */ /* initial values of chaining variables */
@@ -282,45 +270,44 @@ HashReturn init_luffa(hashState_luffa *state, int hashbitlen)
HashReturn update_luffa( hashState_luffa *state, const BitSequence *data, HashReturn update_luffa( hashState_luffa *state, const BitSequence *data,
size_t len ) size_t len )
{ {
HashReturn ret=SUCCESS; int i;
int i, j; int blocks = (int)len / 32;
int rem = len % 32; state-> rembytes = (int)len % 32;
int blocks = (int)( len / 32 );
uint8 *p = (uint8*)state->buffer;
// full blocks // full blocks
for ( j = 0; j < blocks; j++ ) for ( i = 0; i < blocks; i++ )
{ {
state->buffer[0] = BYTES_SWAP32( ((uint32*)data)[0] ); rnd512( state, mm_byteswap_epi32( casti_m128i( data, 1 ) ),
state->buffer[1] = BYTES_SWAP32( ((uint32*)data)[1] ); mm_byteswap_epi32( casti_m128i( data, 0 ) ) );
state->buffer[2] = BYTES_SWAP32( ((uint32*)data)[2] );
state->buffer[3] = BYTES_SWAP32( ((uint32*)data)[3] );
state->buffer[4] = BYTES_SWAP32( ((uint32*)data)[4] );
state->buffer[5] = BYTES_SWAP32( ((uint32*)data)[5] );
state->buffer[6] = BYTES_SWAP32( ((uint32*)data)[6] );
state->buffer[7] = BYTES_SWAP32( ((uint32*)data)[7] );
rnd512( state );
data += MSG_BLOCK_BYTE_LEN; data += MSG_BLOCK_BYTE_LEN;
} }
// remaining partial block, if any // 16 byte partial block exists for 80 byte len
for ( i = 0; i < rem/4; i++ ) // store in buffer for transform in final for midstate to work
state->buffer[i] = BYTES_SWAP32( ((uint32*)data)[i] ); if ( state->rembytes )
{
// remaining data bytes
casti_m128i( state->buffer, 0 ) = mm_byteswap_epi32( cast_m128i( data ) );
// padding of partial block
casti_m128i( state->buffer, 1 ) =
_mm_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0 );
}
// padding of partial block return SUCCESS;
memset( p+rem+1, 0, (31-rem)*sizeof(uint8) );
p[rem] = 0x80;
for ( i = rem/4; i < 8; i++ )
state->buffer[i] = BYTES_SWAP32(state->buffer[i]);
rnd512(state);
return ret;
} }
HashReturn final_luffa(hashState_luffa *state, BitSequence *hashval) HashReturn final_luffa(hashState_luffa *state, BitSequence *hashval)
{ {
// transform pad block
if ( state->rembytes )
// not empty, data is in buffer
rnd512( state, casti_m128i( state->buffer, 1 ),
casti_m128i( state->buffer, 0 ) );
else
// empty pad block, constant data
rnd512( state, _mm_setzero_si128(),
_mm_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0 ) );
finalization512(state, (uint32*) hashval); finalization512(state, (uint32*) hashval);
if ( state->hashbitlen > 512 ) if ( state->hashbitlen > 512 )
finalization512( state, (uint32*)( hashval+128 ) ); finalization512( state, (uint32*)( hashval+128 ) );
@@ -330,79 +317,66 @@ HashReturn final_luffa(hashState_luffa *state, BitSequence *hashval)
HashReturn update_and_final_luffa( hashState_luffa *state, BitSequence* output, HashReturn update_and_final_luffa( hashState_luffa *state, BitSequence* output,
const BitSequence* data, size_t inlen ) const BitSequence* data, size_t inlen )
{ {
HashReturn ret=SUCCESS; // Optimized for integrals of 16 bytes, good for 64 and 80 byte len
int i, j; int i;
int rem = inlen % 32;
int blocks = (int)( inlen / 32 ); int blocks = (int)( inlen / 32 );
uint8 *p = (uint8*)state->buffer; state->rembytes = inlen % 32;
// full blocks // full blocks
for ( j = 0; j < blocks; j++ ) for ( i = 0; i < blocks; i++ )
{ {
state->buffer[0] = BYTES_SWAP32( ((uint32*)data)[0] ); rnd512( state, mm_byteswap_epi32( casti_m128i( data, 1 ) ),
state->buffer[1] = BYTES_SWAP32( ((uint32*)data)[1] ); mm_byteswap_epi32( casti_m128i( data, 0 ) ) );
state->buffer[2] = BYTES_SWAP32( ((uint32*)data)[2] );
state->buffer[3] = BYTES_SWAP32( ((uint32*)data)[3] );
state->buffer[4] = BYTES_SWAP32( ((uint32*)data)[4] );
state->buffer[5] = BYTES_SWAP32( ((uint32*)data)[5] );
state->buffer[6] = BYTES_SWAP32( ((uint32*)data)[6] );
state->buffer[7] = BYTES_SWAP32( ((uint32*)data)[7] );
rnd512( state );
data += MSG_BLOCK_BYTE_LEN; data += MSG_BLOCK_BYTE_LEN;
} }
// remaining partial block, if any // 16 byte partial block exists for 80 byte len
for ( i = 0; i < rem/4; i++ ) if ( state->rembytes )
state->buffer[i] = BYTES_SWAP32( ((uint32*)data)[i] ); // remaining 16 data bytes + 16 bytes padding
rnd512( state, _mm_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0 ),
// padding of partial block mm_byteswap_epi32( cast_m128i( data ) ) );
memset( p+rem+1, 0, (31-rem)*sizeof(uint8) ); else
p[rem] = 0x80; // empty pad block
for ( i = rem/4; i < 8; i++ ) rnd512( state, _mm_setzero_si128(),
state->buffer[i] = BYTES_SWAP32(state->buffer[i]); _mm_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0 ) );
rnd512( state );
finalization512( state, (uint32*) output ); finalization512( state, (uint32*) output );
if ( state->hashbitlen > 512 ) if ( state->hashbitlen > 512 )
finalization512( state, (uint32*)( output+128 ) ); finalization512( state, (uint32*)( output+128 ) );
return SUCCESS;
return SUCCESS;
} }
/***************************************************/ /***************************************************/
/* Round function */ /* Round function */
/* state: hash context */ /* state: hash context */
static void rnd512( hashState_luffa *state, __m128i msg1, __m128i msg0 )
static void rnd512(hashState_luffa *state)
{ {
__m128i t[2]; __m128i t[2];
__m128i *chainv = state->chainv; __m128i *chainv = state->chainv;
__m128i msg[2];
__m128i tmp[2]; __m128i tmp[2];
__m128i x[8]; __m128i x[8];
int i;
// _mm_prefetch( chainv, _MM_HINT_T0 );
// _mm_prefetch( chainv + 4, _MM_HINT_T0 );
t[0] = chainv[0]; t[0] = chainv[0];
t[1] = chainv[1]; t[1] = chainv[1];
t[0] = _mm_xor_si128(t[0], chainv[2]); t[0] = _mm_xor_si128( t[0], chainv[2] );
t[1] = _mm_xor_si128(t[1], chainv[3]); t[1] = _mm_xor_si128( t[1], chainv[3] );
t[0] = _mm_xor_si128(t[0], chainv[4]); t[0] = _mm_xor_si128( t[0], chainv[4] );
t[1] = _mm_xor_si128(t[1], chainv[5]); t[1] = _mm_xor_si128( t[1], chainv[5] );
t[0] = _mm_xor_si128(t[0], chainv[6]); t[0] = _mm_xor_si128( t[0], chainv[6] );
t[1] = _mm_xor_si128(t[1], chainv[7]); t[1] = _mm_xor_si128( t[1], chainv[7] );
t[0] = _mm_xor_si128(t[0], chainv[8]); t[0] = _mm_xor_si128( t[0], chainv[8] );
t[1] = _mm_xor_si128(t[1], chainv[9]); t[1] = _mm_xor_si128( t[1], chainv[9] );
MULT2( t[0], t[1]); MULT2( t[0], t[1] );
msg[0] = _mm_load_si128 ( (__m128i*)&state->buffer[0] ); msg0 = _mm_shuffle_epi32( msg0, 27 );
msg[1] = _mm_load_si128 ( (__m128i*)&state->buffer[4] ); msg1 = _mm_shuffle_epi32( msg1, 27 );
msg[0] = _mm_shuffle_epi32( msg[0], 27 );
msg[1] = _mm_shuffle_epi32( msg[1], 27 );
chainv[0] = _mm_xor_si128( chainv[0], t[0] ); chainv[0] = _mm_xor_si128( chainv[0], t[0] );
chainv[1] = _mm_xor_si128( chainv[1], t[1] ); chainv[1] = _mm_xor_si128( chainv[1], t[1] );
@@ -468,30 +442,30 @@ static void rnd512(hashState_luffa *state)
MULT2( chainv[0], chainv[1] ); MULT2( chainv[0], chainv[1] );
chainv[0] = _mm_xor_si128( _mm_xor_si128( chainv[0], t[0] ), msg[0] ); chainv[0] = _mm_xor_si128( _mm_xor_si128( chainv[0], t[0] ), msg0 );
chainv[1] = _mm_xor_si128( _mm_xor_si128( chainv[1], t[1] ), msg[1] ); chainv[1] = _mm_xor_si128( _mm_xor_si128( chainv[1], t[1] ), msg1 );
MULT2( msg[0], msg[1]); MULT2( msg0, msg1);
chainv[2] = _mm_xor_si128( chainv[2], msg[0] ); chainv[2] = _mm_xor_si128( chainv[2], msg0 );
chainv[3] = _mm_xor_si128( chainv[3], msg[1] ); chainv[3] = _mm_xor_si128( chainv[3], msg1 );
MULT2( msg[0], msg[1]); MULT2( msg0, msg1);
chainv[4] = _mm_xor_si128( chainv[4], msg[0] ); chainv[4] = _mm_xor_si128( chainv[4], msg0 );
chainv[5] = _mm_xor_si128( chainv[5], msg[1] ); chainv[5] = _mm_xor_si128( chainv[5], msg1 );
MULT2( msg[0], msg[1]); MULT2( msg0, msg1);
chainv[6] = _mm_xor_si128( chainv[6], msg[0] ); chainv[6] = _mm_xor_si128( chainv[6], msg0 );
chainv[7] = _mm_xor_si128( chainv[7], msg[1] ); chainv[7] = _mm_xor_si128( chainv[7], msg1 );
MULT2( msg[0], msg[1]); MULT2( msg0, msg1);
chainv[8] = _mm_xor_si128( chainv[8], msg[0] ); chainv[8] = _mm_xor_si128( chainv[8], msg0 );
chainv[9] = _mm_xor_si128( chainv[9], msg[1] ); chainv[9] = _mm_xor_si128( chainv[9], msg1 );
MULT2( msg[0], msg[1]); MULT2( msg0, msg1);
chainv[3] = _mm_or_si128( _mm_slli_epi32(chainv[3], 1), chainv[3] = _mm_or_si128( _mm_slli_epi32(chainv[3], 1),
_mm_srli_epi32(chainv[3], 31) ); _mm_srli_epi32(chainv[3], 31) );
@@ -548,16 +522,16 @@ static void rnd512(hashState_luffa *state)
/* state: hash context */ /* state: hash context */
/* b[8]: hash values */ /* b[8]: hash values */
static void finalization512(hashState_luffa *state, uint32 *b) static void finalization512( hashState_luffa *state, uint32 *b )
{ {
uint32 hash[8] __attribute((aligned(64)));
__m128i* chainv = state->chainv; __m128i* chainv = state->chainv;
__m128i t[2]; __m128i t[2];
uint32 hash[8] __attribute((aligned(16)));
int i;
/*---- blank round with m=0 ----*/ /*---- blank round with m=0 ----*/
memset(state->buffer, 0, sizeof state->buffer ); rnd512( state, _mm_setzero_si128(), _mm_setzero_si128() );
rnd512(state);
// _mm_prefetch( b, _MM_HINT_T0 );
t[0] = chainv[0]; t[0] = chainv[0];
t[1] = chainv[1]; t[1] = chainv[1];
@@ -576,17 +550,10 @@ static void finalization512(hashState_luffa *state, uint32 *b)
_mm_store_si128((__m128i*)&hash[0], t[0]); _mm_store_si128((__m128i*)&hash[0], t[0]);
_mm_store_si128((__m128i*)&hash[4], t[1]); _mm_store_si128((__m128i*)&hash[4], t[1]);
b[0] = BYTES_SWAP32(hash[0]); casti_m128i( b, 0 ) = mm_byteswap_epi32( casti_m128i( hash, 0 ) );
b[1] = BYTES_SWAP32(hash[1]); casti_m128i( b, 1 ) = mm_byteswap_epi32( casti_m128i( hash, 1 ) );
b[2] = BYTES_SWAP32(hash[2]);
b[3] = BYTES_SWAP32(hash[3]);
b[4] = BYTES_SWAP32(hash[4]);
b[5] = BYTES_SWAP32(hash[5]);
b[6] = BYTES_SWAP32(hash[6]);
b[7] = BYTES_SWAP32(hash[7]);
memset(state->buffer, 0, sizeof state->buffer ); rnd512( state, _mm_setzero_si128(), _mm_setzero_si128() );
rnd512(state);
t[0] = chainv[0]; t[0] = chainv[0];
t[1] = chainv[1]; t[1] = chainv[1];
@@ -605,14 +572,8 @@ static void finalization512(hashState_luffa *state, uint32 *b)
_mm_store_si128((__m128i*)&hash[0], t[0]); _mm_store_si128((__m128i*)&hash[0], t[0]);
_mm_store_si128((__m128i*)&hash[4], t[1]); _mm_store_si128((__m128i*)&hash[4], t[1]);
b[ 8] = BYTES_SWAP32(hash[0]); casti_m128i( b, 2 ) = mm_byteswap_epi32( casti_m128i( hash, 0 ) );
b[ 9] = BYTES_SWAP32(hash[1]); casti_m128i( b, 3 ) = mm_byteswap_epi32( casti_m128i( hash, 1 ) );
b[10] = BYTES_SWAP32(hash[2]);
b[11] = BYTES_SWAP32(hash[3]);
b[12] = BYTES_SWAP32(hash[4]);
b[13] = BYTES_SWAP32(hash[5]);
b[14] = BYTES_SWAP32(hash[6]);
b[15] = BYTES_SWAP32(hash[7]);
return; return;
} }

View File

@@ -51,6 +51,7 @@ typedef struct {
// uint64 bitlen[2]; /* Message length in bits */ // uint64 bitlen[2]; /* Message length in bits */
// uint32 rembitlen; /* Length of buffer data to be hashed */ // uint32 rembitlen; /* Length of buffer data to be hashed */
int hashbitlen; int hashbitlen;
int rembytes;
} hashState_luffa; } hashState_luffa;
HashReturn init_luffa( hashState_luffa *state, int hashbitlen ); HashReturn init_luffa( hashState_luffa *state, int hashbitlen );

View File

@@ -67,7 +67,7 @@ int LYRA2REV2( uint64_t* wholeMatrix, void *K, uint64_t kLen, const void *pwd,
//Tries to allocate enough space for the whole memory matrix //Tries to allocate enough space for the whole memory matrix
const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * nCols; const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * nCols;
const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8; // const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
// for Lyra2REv2, nCols = 4, v1 was using 8 // for Lyra2REv2, nCols = 4, v1 was using 8
const int64_t BLOCK_LEN = (nCols == 4) ? BLOCK_LEN_BLAKE2_SAFE_INT64 const int64_t BLOCK_LEN = (nCols == 4) ? BLOCK_LEN_BLAKE2_SAFE_INT64
: BLOCK_LEN_BLAKE2_SAFE_BYTES; : BLOCK_LEN_BLAKE2_SAFE_BYTES;
@@ -228,7 +228,7 @@ int LYRA2Z( uint64_t* wholeMatrix, void *K, uint64_t kLen, const void *pwd,
//Tries to allocate enough space for the whole memory matrix //Tries to allocate enough space for the whole memory matrix
const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * nCols; const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * nCols;
const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8; // const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
//==== Getting the password + salt + basil padded with 10*1 ============// //==== Getting the password + salt + basil padded with 10*1 ============//
//OBS.:The memory matrix will temporarily hold the password: not for saving memory, //OBS.:The memory matrix will temporarily hold the password: not for saving memory,

View File

@@ -53,7 +53,7 @@ void lyra2re_hash(void *state, const void *input)
memcpy(&ctx, &lyra2re_ctx, sizeof(lyra2re_ctx)); memcpy(&ctx, &lyra2re_ctx, sizeof(lyra2re_ctx));
// uint32_t _ALIGN(128) hashA[8], hashB[8]; // uint32_t _ALIGN(128) hashA[8], hashB[8];
uint32_t _ALIGN(128) hash[32]; uint8_t _ALIGN(128) hash[32*8];
#define hashA hash #define hashA hash
#define hashB hash+16 #define hashB hash+16
@@ -61,7 +61,7 @@ void lyra2re_hash(void *state, const void *input)
const int tail = 80 - midlen; // 16 const int tail = 80 - midlen; // 16
memcpy( &ctx.blake, &lyra2_blake_mid, sizeof lyra2_blake_mid ); memcpy( &ctx.blake, &lyra2_blake_mid, sizeof lyra2_blake_mid );
sph_blake256( &ctx.blake, input + 64, 16 ); sph_blake256( &ctx.blake, input + midlen, tail );
// sph_blake256(&ctx.blake, input, 80); // sph_blake256(&ctx.blake, input, 80);
sph_blake256_close(&ctx.blake, hashA); sph_blake256_close(&ctx.blake, hashA);

162
algo/qubit/deep.c Normal file
View File

@@ -0,0 +1,162 @@
#include "miner.h"
#include "algo-gate-api.h"
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "algo/luffa/sph_luffa.h"
#include "algo/cubehash/sph_cubehash.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/simd/sph_simd.h"
#include "algo/echo/sph_echo.h"
#include "algo/luffa/sse2/luffa_for_sse2.h"
#include "algo/cubehash/sse2/cubehash_sse2.h"
#include "algo/simd/sse2/nist.h"
#include "algo/shavite/sph_shavite.h"
#ifndef NO_AES_NI
#include "algo/echo/aes_ni/hash_api.h"
#endif
typedef struct
{
hashState_luffa luffa;
cubehashParam cubehash;
sph_shavite512_context shavite;
hashState_sd simd;
#ifdef NO_AES_NI
sph_echo512_context echo;
#else
hashState_echo echo;
#endif
} deep_ctx_holder;
deep_ctx_holder deep_ctx;
static __thread hashState_luffa deep_luffa_mid;
void init_deep_ctx()
{
init_luffa( &deep_ctx.luffa, 512 );
cubehashInit( &deep_ctx.cubehash, 512, 16, 32 );
#ifdef NO_AES_NI
sph_echo512_init( &deep_ctx.echo );
#else
init_echo( &deep_ctx.echo, 512 );
#endif
};
void deep_luffa_midstate( const void* input )
{
memcpy( &deep_luffa_mid, &deep_ctx.luffa, sizeof deep_luffa_mid );
update_luffa( &deep_luffa_mid, input, 64 );
}
void deep_hash(void *output, const void *input)
{
unsigned char hash[128] __attribute((aligned(64)));
#define hashB hash+64
deep_ctx_holder ctx;
memcpy( &ctx, &deep_ctx, sizeof(deep_ctx) );
const int midlen = 64; // bytes
const int tail = 80 - midlen; // 16
memcpy( &ctx.luffa, &deep_luffa_mid, sizeof deep_luffa_mid );
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
(const BitSequence*)input + midlen, tail );
cubehashUpdateDigest( &ctx.cubehash, (byte*)hash,
(const byte*) hash,64);
#ifdef NO_AES_NI
sph_echo512 (&ctx.echo, (const void*) hash, 64);
sph_echo512_close(&ctx.echo, (void*) hash);
#else
update_final_echo ( &ctx.echo, (BitSequence *) hash,
(const BitSequence *) hash, 512);
#endif
asm volatile ("emms");
memcpy(output, hash, 32);
}
int scanhash_deep( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done)
{
uint32_t endiandata[20] __attribute__((aligned(64)));
uint32_t hash64[8] __attribute__((aligned(32)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
uint32_t n = pdata[19] - 1;
const uint32_t first_nonce = pdata[19];
const uint32_t Htarg = ptarget[7];
uint64_t htmax[] = { 0, 0xF, 0xFF, 0xFFF, 0xFFFF, 0x10000000 };
uint32_t masks[] =
{ 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00, 0xFFFFF000, 0xFFFF0000, 0 };
// we need bigendian data...
swab32_array( endiandata, pdata, 20 );
deep_luffa_midstate( endiandata );
#ifdef DEBUG_ALGO
printf("[%d] Htarg=%X\n", thr_id, Htarg);
#endif
for ( int m=0; m < 6; m++ )
{
if ( Htarg <= htmax[m] )
{
uint32_t mask = masks[m];
do
{
pdata[19] = ++n;
be32enc( &endiandata[19], n );
deep_hash( hash64, endiandata );
#ifndef DEBUG_ALGO
if (!(hash64[7] & mask))
{
if ( fulltest(hash64, ptarget) )
{
*hashes_done = n - first_nonce + 1;
return true;
}
// else
// {
// applog(LOG_INFO, "Result does not validate on CPU!");
// }
}
#else
if (!(n % 0x1000) && !thr_id) printf(".");
if (!(hash64[7] & mask)) {
printf("[%d]",thr_id);
if (fulltest(hash64, ptarget)) {
*hashes_done = n - first_nonce + 1;
return true;
}
}
#endif
} while ( n < max_nonce && !work_restart[thr_id].restart );
// see blake.c if else to understand the loop on htmax => mask
break;
}
}
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
return 0;
}
bool register_deep_algo( algo_gate_t* gate )
{
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
init_deep_ctx();
gate->scanhash = (void*)&scanhash_deep;
gate->hash = (void*)&deep_hash;
// gate->hash_alt = (void*)&qubithash_alt;
return true;
};

View File

@@ -35,6 +35,7 @@ typedef struct
} qubit_ctx_holder; } qubit_ctx_holder;
qubit_ctx_holder qubit_ctx; qubit_ctx_holder qubit_ctx;
static __thread hashState_luffa qubit_luffa_mid;
void init_qubit_ctx() void init_qubit_ctx()
{ {
@@ -49,16 +50,25 @@ void init_qubit_ctx()
#endif #endif
}; };
void qubit_luffa_midstate( const void* input )
{
memcpy( &qubit_luffa_mid, &qubit_ctx.luffa, sizeof qubit_luffa_mid );
update_luffa( &qubit_luffa_mid, input, 64 );
}
void qubithash(void *output, const void *input) void qubithash(void *output, const void *input)
{ {
unsigned char hash[128]; // uint32_t hashA[16], hashB[16]; unsigned char hash[128] __attribute((aligned(64)));
#define hashB hash+64 #define hashB hash+64
qubit_ctx_holder ctx; qubit_ctx_holder ctx;
memcpy( &ctx, &qubit_ctx, sizeof(qubit_ctx) ); memcpy( &ctx, &qubit_ctx, sizeof(qubit_ctx) );
update_luffa( &ctx.luffa, (const BitSequence*)input, 80 ); const int midlen = 64; // bytes
final_luffa( &ctx.luffa, (BitSequence*)hash); const int tail = 80 - midlen; // 16
memcpy( &ctx.luffa, &qubit_luffa_mid, sizeof qubit_luffa_mid );
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash,
(const BitSequence*)input + midlen, tail );
cubehashUpdate( &ctx.cubehash, (const byte*) hash,64); cubehashUpdate( &ctx.cubehash, (const byte*) hash,64);
cubehashDigest( &ctx.cubehash, (byte*)hash); cubehashDigest( &ctx.cubehash, (byte*)hash);
@@ -114,7 +124,6 @@ void qubithash_alt(void *output, const void *input)
memcpy(output, hash, 32); memcpy(output, hash, 32);
} }
int scanhash_qubit(int thr_id, struct work *work, int scanhash_qubit(int thr_id, struct work *work,
uint32_t max_nonce, uint64_t *hashes_done) uint32_t max_nonce, uint64_t *hashes_done)
{ {
@@ -133,6 +142,8 @@ int scanhash_qubit(int thr_id, struct work *work,
// we need bigendian data... // we need bigendian data...
swab32_array( endiandata, pdata, 20 ); swab32_array( endiandata, pdata, 20 );
qubit_luffa_midstate( endiandata );
#ifdef DEBUG_ALGO #ifdef DEBUG_ALGO
printf("[%d] Htarg=%X\n", thr_id, Htarg); printf("[%d] Htarg=%X\n", thr_id, Htarg);
#endif #endif

View File

@@ -294,7 +294,7 @@ HashReturn update_final_sd( hashState_sd *state, BitSequence *hashval,
{ {
memcpy( state->buffer+current/8, data, (databitlen+7)/8 ); memcpy( state->buffer+current/8, data, (databitlen+7)/8 );
IncreaseCounter( state, databitlen ); IncreaseCounter( state, databitlen );
return SUCCESS; break;
} }
else else
{ {
@@ -341,13 +341,13 @@ HashReturn update_final_sd( hashState_sd *state, BitSequence *hashval,
for ( i=0; i < 2*state->n_feistels; i++ ) for ( i=0; i < 2*state->n_feistels; i++ )
{ {
u32 x = state->A[i]; u32 x = state->A[i];
out[4*i ] = x&0xff; out[4*i ] = x & 0xff;
x >>= 8; x >>= 8;
out[4*i+1] = x&0xff; out[4*i+1] = x & 0xff;
x >>= 8; x >>= 8;
out[4*i+2] = x&0xff; out[4*i+2] = x & 0xff;
x >>= 8; x >>= 8;
out[4*i+3] = x&0xff; out[4*i+3] = x & 0xff;
} }
memcpy( hashval, out, state->hashbitlen / 8 ); memcpy( hashval, out, state->hashbitlen / 8 );

View File

@@ -147,17 +147,17 @@ void timetravel_hash(void *output, const void *input)
switch ( permutation[i] ) switch ( permutation[i] )
{ {
case 0: case 0:
// if ( i == 0 ) if ( i == 0 )
// { {
// memcpy( &ctx.blake, &tt_mid.blake, sizeof tt_mid.blake ); memcpy( &ctx.blake, &tt_mid.blake, sizeof tt_mid.blake );
// sph_blake256( &ctx.blake, input + midlen, tail ); sph_blake512( &ctx.blake, input + midlen, tail );
// sph_blake256_close( &ctx.blake, hashB ); sph_blake512_close( &ctx.blake, hashB );
// } }
// else else
// { {
sph_blake512( &ctx.blake, hashA, dataLen ); sph_blake512( &ctx.blake, hashA, dataLen );
sph_blake512_close( &ctx.blake, hashB ); sph_blake512_close( &ctx.blake, hashB );
// } }
break; break;
case 1: case 1:
if ( i == 0 ) if ( i == 0 )
@@ -239,28 +239,29 @@ void timetravel_hash(void *output, const void *input)
} }
break; break;
case 6: case 6:
// if ( i == 0 ) if ( i == 0 )
// { {
// memcpy( &ctx.luffa, &tt_mid.luffa, sizeof tt_mid.luffa ); memcpy( &ctx.luffa, &tt_mid.luffa, sizeof tt_mid.luffa );
// update_and_final_luffa( &ctx.luffa, hashB, update_and_final_luffa( &ctx.luffa, hashB,
// input + 64, 16 ); input + 64, 16 );
// } }
// else else
// { {
update_and_final_luffa( &ctx.luffa, (BitSequence*)hashB, update_and_final_luffa( &ctx.luffa, (BitSequence*)hashB,
hashA, dataLen ); (const BitSequence *)hashA, dataLen );
// } }
break; break;
case 7: case 7:
if ( i == 0 ) if ( i == 0 )
{ {
memcpy( &ctx.cube, &tt_mid.cube, sizeof tt_mid.cube ); memcpy( &ctx.cube, &tt_mid.cube, sizeof tt_mid.cube );
cubehashUpdateDigest( &ctx.cube, hashB, cubehashUpdateDigest( &ctx.cube, (byte*)hashB,
input + midlen, tail ); (const byte*)input + midlen, tail );
} }
else else
{ {
cubehashUpdateDigest( &ctx.cube, hashB, hashA, dataLen ); cubehashUpdateDigest( &ctx.cube, (byte*)hashB, (const byte*)hashA,
dataLen );
} }
break; break;
default: default:
@@ -306,8 +307,8 @@ int scanhash_timetravel( int thr_id, struct work *work, uint32_t max_nonce,
switch ( permutation[0] ) switch ( permutation[0] )
{ {
case 0: case 0:
// memcpy( &tt_mid.blake, &tt_ctx.blake, sizeof(tt_mid.blake) ); memcpy( &tt_mid.blake, &tt_ctx.blake, sizeof(tt_mid.blake) );
// sph_blake256( &tt_mid.blake, endiandata, 64 ); sph_blake512( &tt_mid.blake, endiandata, 64 );
break; break;
case 1: case 1:
memcpy( &tt_mid.bmw, &tt_ctx.bmw, sizeof(tt_mid.bmw) ); memcpy( &tt_mid.bmw, &tt_ctx.bmw, sizeof(tt_mid.bmw) );
@@ -335,13 +336,12 @@ int scanhash_timetravel( int thr_id, struct work *work, uint32_t max_nonce,
sph_keccak512( &tt_mid.keccak, endiandata, 64 ); sph_keccak512( &tt_mid.keccak, endiandata, 64 );
break; break;
case 6: case 6:
// init_luffa( &tt_mid.luffa, 512 ); memcpy( &tt_mid.luffa, &tt_ctx.luffa, sizeof(tt_mid.luffa ) );
// memcpy( &tt_mid.luffa, &tt_ctx.luffa, sizeof(tt_mid.luffa ) ); update_luffa( &tt_mid.luffa, endiandata, 64 );
// update_luffa( &tt_mid.luffa, endiandata, 64 );
break; break;
case 7: case 7:
memcpy( &tt_mid.cube, &tt_ctx.cube, sizeof(tt_mid.cube ) ); memcpy( &tt_mid.cube, &tt_ctx.cube, sizeof(tt_mid.cube ) );
cubehashUpdate( &tt_mid.cube, endiandata, 64 ); cubehashUpdate( &tt_mid.cube, (const byte*)endiandata, 64 );
break; break;
default: default:
break; break;
@@ -373,23 +373,6 @@ void timetravel_set_target( struct work* work, double job_diff )
work_set_target( work, job_diff / (256.0 * opt_diff_factor) ); work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
} }
// set_data_endian is a reasonable gate to use, it's called upon receipt
// of new work (new ntime) and has the right arg to access it.
void timetravel_calc_perm( struct work *work )
{
// We want to permute algorithms. To get started we
// initialize an array with a sorted sequence of unique
// integers where every integer represents its own algorithm.
int ntime, steps, i;
be32enc( &ntime, work->data[ STD_NTIME_INDEX ] );
steps = ( ntime - HASH_FUNC_BASE_TIMESTAMP )
% HASH_FUNC_COUNT_PERMUTATIONS;
for ( i = 0; i < HASH_FUNC_COUNT; i++ )
permutation[i] = i;
for ( i = 0; i < steps; i++ )
next_permutation( permutation, permutation + HASH_FUNC_COUNT );
}
bool register_timetravel_algo( algo_gate_t* gate ) bool register_timetravel_algo( algo_gate_t* gate )
{ {
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT; gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
@@ -398,7 +381,6 @@ bool register_timetravel_algo( algo_gate_t* gate )
gate->hash = (void*)&timetravel_hash; gate->hash = (void*)&timetravel_hash;
gate->set_target = (void*)&timetravel_set_target; gate->set_target = (void*)&timetravel_set_target;
gate->get_max64 = (void*)&get_max64_0xffffLL; gate->get_max64 = (void*)&get_max64_0xffffLL;
// gate->set_work_data_endian = (void*)&timetravel_calc_perm;
return true; return true;
}; };

351
avxdefs.h
View File

@@ -1,4 +1,5 @@
// Some tools to help using AVX and AVX2 // Some tools to help using AVX and AVX2
// AVX support is required to include this header file, AVX2 optional.
#include <inttypes.h> #include <inttypes.h>
#include <immintrin.h> #include <immintrin.h>
@@ -15,7 +16,7 @@
typedef union typedef union
{ {
#if defined __AVX2__ #if defined (__AVX2__)
__m256i v256; __m256i v256;
#endif #endif
__m128i v128[ 2]; __m128i v128[ 2];
@@ -36,7 +37,8 @@ uint8_t v8 [16];
#if defined (__AVX2__) #if defined (__AVX2__)
// Replacements for vectorized data // AVX2 replacements for vectorized data
// n = number of __m256i (32 bytes) // n = number of __m256i (32 bytes)
inline void memset_zero_m256i( __m256i *dst, int n ) inline void memset_zero_m256i( __m256i *dst, int n )
{ {
@@ -48,68 +50,65 @@ inline void memset_m256i( __m256i *dst, const __m256i a, int n )
for ( int i = 0; i < n; i++ ) dst[i] = a; for ( int i = 0; i < n; i++ ) dst[i] = a;
} }
// Optimized copying using vectors. For misaligned data or more ganuularity
// optimized copying, first fit is usually best. If none of these works there // use __m228i versions or plain memcpy as appropriate.
// are __m128i versions or plain memcpy.
// Fixed size // Copying fixed size
// multi buffered copy for 64 bytes, the size of a cache line. // Multi buffered copy using __m256i.
// minimum alignment is 32 bytes, optimum for cache is 64. // minimum alignment is 32 bytes (_m1256i), optimum 64 (cache line).
// src & dst are __m256i* // src & dst are __m256i*
inline void mcpy64_m256i( __m256i* dst, const __m256i* src )
// Copy 64 bytes (2x__m256i, one cache line), double buffered
inline void mcpy64_m256i( __m256i* dest, const __m256i* srce )
{ {
const __m256i* dest = dst; __m256i a = _mm256_load_si256( srce );
const __m256i* srce = src;
__m256i a = _mm256_load_si256( srce );
__m256i b = _mm256_load_si256( srce + 1 ); __m256i b = _mm256_load_si256( srce + 1 );
_mm256_store_si256( dest, a ); _mm256_store_si256( dest, a );
_mm256_store_si256( dest + 1, b ); _mm256_store_si256( dest + 1, b );
} }
inline void mcpy96_m256i( __m256i* dst, const __m256i* src ) // Copy 96 bytes (3x__m256i), triple buffered
inline void mcpy96_m256i( __m256i* dest, const __m256i* srce )
{ {
const __m256i* dest = dst; __m256i a = _mm256_load_si256( srce );
const __m256i* srce = src;
__m256i a = _mm256_load_si256( srce );
__m256i b = _mm256_load_si256( srce + 1 ); __m256i b = _mm256_load_si256( srce + 1 );
_mm256_store_si256( dest, a );
__m256i c = _mm256_load_si256( srce + 2 ); __m256i c = _mm256_load_si256( srce + 2 );
_mm256_store_si256( dest + 1, b ); _mm256_store_si256( dest, a );
_mm256_store_si256( dest + 2, c ); _mm256_store_si256( dest + 1, b );
_mm256_store_si256( dest + 2, c );
} }
inline void mcpy128_m256i( __m256i* dst, const __m256i* src ) // Copy 128 bytes (4x__m256i), quad buffered
inline void mcpy128_m256i( __m256i* dest, const __m256i* srce )
{ {
const __m256i* dest = dst;
const __m256i* srce = src;
__m256i a = _mm256_load_si256( srce ); __m256i a = _mm256_load_si256( srce );
__m256i b = _mm256_load_si256( srce + 1 ); __m256i b = _mm256_load_si256( srce + 1 );
__m256i c = _mm256_load_si256( srce + 2 ); __m256i c = _mm256_load_si256( srce + 2 );
_mm256_store_si256( dest , a );
__m256i d = _mm256_load_si256( srce + 3 ); __m256i d = _mm256_load_si256( srce + 3 );
_mm256_store_si256( dest + 1, b ); _mm256_store_si256( dest , a );
a = _mm256_load_si256( srce + 4 ); a = _mm256_load_si256( srce + 4 );
_mm256_store_si256( dest + 2, c ); _mm256_store_si256( dest + 1, b );
b = _mm256_load_si256( srce + 5 ); b = _mm256_load_si256( srce + 5 );
_mm256_store_si256( dest + 3, d ); _mm256_store_si256( dest + 2, c );
c = _mm256_load_si256( srce + 6 ); c = _mm256_load_si256( srce + 6 );
_mm256_store_si256( dest + 4, a ); _mm256_store_si256( dest + 3, d );
d = _mm256_load_si256( srce + 7 ); d = _mm256_load_si256( srce + 7 );
_mm256_store_si256( dest + 5, b ); _mm256_store_si256( dest + 4, a );
_mm256_store_si256( dest + 6, c ); _mm256_store_si256( dest + 5, b );
_mm256_store_si256( dest + 7, d ); _mm256_store_si256( dest + 6, c );
_mm256_store_si256( dest + 7, d );
} }
// Variable size // Copy variable size
//
// copy multiples of 64 bytes using quad buffering with interleave // copy multiples of 64 bytes using quad buffering with interleave
// of first read of next line with last write of current line. // of first read of next line with last write of current line.
// n is a multiple of 32 bytes (_m256i size) // n is a multiple of 32 bytes (_m256i size)
// minimum alignment: 32 bytes // minimum alignment: 32 bytes
// optimum alignment: 64 bytes (cache line size) // optimum alignment: 64 bytes (cache line size)
// minimum size.....: 128 bytes (4*n) // minimum size.....: 128 bytes (4*n)
// recommended size.: 256+ bytes // recommended size.: 256+ bytes (8*n)
// minimum increment: 128 bytes // minimum increment: 128 bytes
// Only the first load or store in a cache line triggers a memory access. // Only the first load or store in a cache line triggers a memory access.
// the subsequent actions are trivial because they benefit from data // the subsequent actions are trivial because they benefit from data
@@ -120,17 +119,16 @@ inline void mcpy128_m256i( __m256i* dst, const __m256i* src )
inline void mcpy_m256i_x4( __m256i *dst, const __m256i *src, const int n ) inline void mcpy_m256i_x4( __m256i *dst, const __m256i *src, const int n )
{ {
const __m256i* dest = dst; __m256i* end = dst + n;
const __m256i* srce = src;
// preload 1 cache line to absorb startup latency // preload 1 cache line to absorb startup latency
__m256i a = _mm256_load_si256( srce ); __m256i a = _mm256_load_si256( src );
__m256i b = _mm256_load_si256( srce + 1 ); __m256i b = _mm256_load_si256( src + 1 );
// start loading second line, queue while waiting // start loading second line, queued while waiting for 1st line.
__m256i c = _mm256_load_si256( srce + 2 ); __m256i c = _mm256_load_si256( src + 2 );
// start writing first line, as soon as data available, // start writing first line, as soon as data available,
// second line read will have priority on the bus // second line read will have priority on the bus
_mm256_store_si256( dest, a ); _mm256_store_si256( dst, a );
__m256i d; __m256i d;
int i; int i;
@@ -139,39 +137,37 @@ inline void mcpy_m256i_x4( __m256i *dst, const __m256i *src, const int n )
for ( i = 0; i < loops; i++ ) for ( i = 0; i < loops; i++ )
{ {
const int i4 = i*4; const int i4 = i*4;
const __m256i* si4 = (__m256i*)(srce + i4); const __m256i* si4 = src + i4;
const __m256i* di4 = (__m256i*)(dest + i4); __m256i* di4 = dst + i4;
d = _mm256_load_si256( si4 + 3 ); d = _mm256_load_si256( si4 + 3 );
_mm256_store_si256( di4 + 1, b ); _mm256_store_si256( di4 + 1, b );
// start loading next line // start loading next line
a = _mm256_load_si256( si4 + 4 ); a = _mm256_load_si256( si4 + 4 );
_mm256_store_si256( di4 + 2, c ); _mm256_store_si256( di4 + 2, c );
b = _mm256_load_si256( si4 + 5 ); b = _mm256_load_si256( si4 + 5 );
_mm256_store_si256( di4 + 3, d ); _mm256_store_si256( di4 + 3, d );
c = _mm256_load_si256( si4 + 6 ); c = _mm256_load_si256( si4 + 6 );
// start writing next line // start writing next line
_mm256_store_si256( di4 + 4, a ); _mm256_store_si256( di4 + 4, a );
} }
// finish last line // finish last line
d = _mm256_load_si256( srce + n - 4 ); d = _mm256_load_si256( end - 4 );
_mm256_store_si256( dest + n - 3, b ); _mm256_store_si256( end - 3, b );
_mm256_store_si256( dest + n - 2, c ); _mm256_store_si256( end - 2, c );
_mm256_store_si256( dest + n - 1, d ); _mm256_store_si256( end - 1, d );
} }
// basic __m256i memcpy // basic aligned __m256i memcpy
inline void memcpy_m256i( __m256i *dst, const __m256i *src, int n ) inline void memcpy_m256i( __m256i *dst, const __m256i *src, int n )
{ {
for ( int i = 0; i < n; i ++ ) dst[i] = src[i]; for ( int i = 0; i < n; i ++ ) dst[i] = src[i];
} }
// For cheating with pointer types // For cheating with pointer types
// p = any aligned pointer // p = any aligned pointer
// returns p as pointer to vector type // returns p as pointer to vector type, not very useful
#define castp_m256i(p) ((__m256i*)(p)) #define castp_m256i(p) ((__m256i*)(p))
#define castp_m128i(p) ((__m128i*)(p)) #define castp_m128i(p) ((__m128i*)(p))
@@ -198,7 +194,7 @@ inline void memcpy_m256i( __m256i *dst, const __m256i *src, int n )
//__m256i _mm256_inserti128_si256(__m256i a, __m128i b, const int mask); //__m256i _mm256_inserti128_si256(__m256i a, __m128i b, const int mask);
// Rotate bits in 4 uint64 (3 instructions) // Rotate bits in 4 uint64 (3 instructions)
// __m256i mm256_rotr_64( __256i, int ) // w = packed 64 bit data, n= number of bits to rotate
#define mm256_rotr_64( w, c ) \ #define mm256_rotr_64( w, c ) \
_mm256_or_si256( _mm256_srli_epi64(w, c), _mm256_slli_epi64(w, 64 - c) ) _mm256_or_si256( _mm256_srli_epi64(w, c), _mm256_slli_epi64(w, 64 - c) )
@@ -219,6 +215,7 @@ inline void memcpy_m256i( __m256i *dst, const __m256i *src, int n )
_mm256_permute4x64_epi64( w, 0x93 ) _mm256_permute4x64_epi64( w, 0x93 )
// shift 256 bits by n*64 bits (4 uint64 by n uint64) // shift 256 bits by n*64 bits (4 uint64 by n uint64)
// mm256_slli256_nx64( w )
#define mm256_slli256_1x64( w ) \ #define mm256_slli256_1x64( w ) \
_mm256_and_si256( mm256_rotl256_1x64( w ), \ _mm256_and_si256( mm256_rotl256_1x64( w ), \
_mm256_set_epi64x( 0, \ _mm256_set_epi64x( 0, \
@@ -231,6 +228,7 @@ inline void memcpy_m256i( __m256i *dst, const __m256i *src, int n )
0 ) ) 0 ) )
*/ */
// these ones probably are backward
#define mm256_slli256_2x64( w ) \ #define mm256_slli256_2x64( w ) \
_mm256_and_si256( mm256_swap128( w ), \ _mm256_and_si256( mm256_swap128( w ), \
_mm256_set_epi64x( 0xffffffffffffffffull, \ _mm256_set_epi64x( 0xffffffffffffffffull, \
@@ -271,9 +269,26 @@ inline void memcpy_m256i( __m256i *dst, const __m256i *src, int n )
0xffffffffffffffffull ) ) 0xffffffffffffffffull ) )
*/ */
// vectored version of BYTES_SWAP32
inline __m256i mm256_byteswap_epi32( __m256i x )
{
__m256i x1 = _mm256_and_si256( x,
_mm256_set_epi32( 0x0000ff00, 0x0000ff00, 0x0000ff00, 0x0000ff00,
0x0000ff00, 0x0000ff00, 0x0000ff00, 0x0000ff00 ) );
__m256i x2 = _mm256_and_si256( x,
_mm256_set_epi32( 0x00ff0000, 0x00ff0000, 0x00ff0000, 0x00ff0000,
0x00ff0000, 0x00ff0000, 0x00ff0000, 0x00ff0000 ) );
__m256i x0 = _mm256_slli_epi32( x, 24 ); // x0 = x << 24
x1 = _mm256_slli_epi32( x1, 8 ); // x1 = mask(x) << 8
x2 = _mm256_srli_epi32( x2, 8 ); // x2 = mask(x) >> 8
__m256i x3 = _mm256_srli_epi32( x, 24 ); // x3 = x >> 24
return _mm256_or_si256( _mm256_or_si256( x0, x1 ),
_mm256_or_si256( x2, x3 ) );
}
#endif // AVX2 #endif // AVX2
// Replacements for vectorized data // AVX replacements for vectorized data
inline void memset_zero_m128i( __m128i *dst, int n ) inline void memset_zero_m128i( __m128i *dst, int n )
{ {
@@ -287,52 +302,80 @@ inline void memset_m128i( __m128i *dst, const __m128i a, int n )
// __m128i versions of optimized copying // __m128i versions of optimized copying
inline void mcpy32_m128i( __m128i* dst, const __m128i* src ) // Copy 32 bytes (2x__m128i), double buffered
inline void mcpy32_m128i( __m128i* dest, const __m128i* srce )
{ {
const __m128i* dest = dst;
const __m128i* srce = src;
// 4 loads fills cache line // 4 loads fills cache line
__m128i a = _mm_load_si128( srce ); __m128i a = _mm_load_si128( srce );
__m128i b = _mm_load_si128( srce + 1 ); __m128i b = _mm_load_si128( srce + 1 );
_mm_store_si128( dest, a ); _mm_store_si128( dest, a );
_mm_store_si128( dest + 1, b ); _mm_store_si128( dest + 1, b );
} }
inline void mcpy64_m128i( __m128i* dst, const __m128i* src ) // Copy 64 Bytes (4x__m128i), quad buffered
inline void mcpy64_m128i( __m128i* dest, const __m128i* srce )
{ {
const __m128i* dest = dst;
const __m128i* srce = src;
// 4 loads fills cache line // 4 loads fills cache line
__m128i a = _mm_load_si128( srce ); __m128i a = _mm_load_si128( srce );
__m128i b = _mm_load_si128( srce + 1 ); __m128i b = _mm_load_si128( srce + 1 );
__m128i c = _mm_load_si128( srce + 2 ); __m128i c = _mm_load_si128( srce + 2 );
__m128i d = _mm_load_si128( srce + 3 ); __m128i d = _mm_load_si128( srce + 3 );
// need to store a before overwriting it // need to store a before overwriting it
_mm_store_si128( dest, a ); _mm_store_si128( dest, a );
a = _mm_load_si128( srce + 4 ); a = _mm_load_si128( srce + 4 );
_mm_store_si128( dest + 1, b ); _mm_store_si128( dest + 1, b );
b = _mm_load_si128( srce + 5 ); b = _mm_load_si128( srce + 5 );
_mm_store_si128( dest + 2, c ); _mm_store_si128( dest + 2, c );
c = _mm_load_si128( srce + 6 ); c = _mm_load_si128( srce + 6 );
_mm_store_si128( dest + 3, d ); _mm_store_si128( dest + 3, d );
d = _mm_load_si128( srce + 7 ); d = _mm_load_si128( srce + 7 );
_mm_store_si128( dest + 4, a ); _mm_store_si128( dest + 4, a );
d = _mm_load_si128( srce + 7 ); _mm_store_si128( dest + 5, b );
_mm_store_si128( dest + 5, b ); _mm_store_si128( dest + 6, c );
_mm_store_si128( dest + 6, c ); _mm_store_si128( dest + 7, d );
_mm_store_si128( dest + 7, d ); }
// Copy 96 Bytes (6x__m128i), quad buffered
inline void mcpy96_m128i( __m128i* dest, const __m128i* srce )
{
// 4 loads fills cache line
__m128i a = _mm_load_si128( srce );
__m128i b = _mm_load_si128( srce + 1 );
__m128i c = _mm_load_si128( srce + 2 );
__m128i d = _mm_load_si128( srce + 3 );
// need to store a before overwriting it
_mm_store_si128( dest, a );
a = _mm_load_si128( srce + 4 );
_mm_store_si128( dest + 1, b );
b = _mm_load_si128( srce + 5 );
_mm_store_si128( dest + 2, c );
c = _mm_load_si128( srce + 6 );
_mm_store_si128( dest + 3, d );
d = _mm_load_si128( srce + 7 );
_mm_store_si128( dest + 4, a );
a = _mm_load_si128( srce + 8 );
_mm_store_si128( dest + 5, b );
b = _mm_load_si128( srce + 9 );
_mm_store_si128( dest + 6, c );
c = _mm_load_si128( srce + 10 );
_mm_store_si128( dest + 7, d );
d = _mm_load_si128( srce + 11 );
_mm_store_si128( dest + 8, a );
_mm_store_si128( dest + 9, b );
_mm_store_si128( dest + 10, c );
_mm_store_si128( dest + 11, d );
} }
// Variable length // Variable length
//
// copy multiples of 16 bytes using quad buffering. // Copy multiples of 16 bytes (__m128i) using quad buffering.
// n is a multiple of 16 bytes (__m128i size) // n is a multiple of 16 bytes (__m128i size)
// minimum alignment: 16 bytes // minimum alignment: 16 bytes
// optimum alignment: 64 bytes (cache line size) // optimum alignment: 64 bytes (cache line size)
// minimum size.....: 32 bytes (4*n) // minimum size.....: 64 bytes (4*n)
// recommended size.: 96+ bytes // recommended size.: 128+ bytes (8*n)
// minimum increment: 32 bytes // minimum increment: 64 bytes
inline void memcpy_m128i_x4( __m128i *dst, const __m128i *src, const int n ) inline void mcpy_m128i_x4( __m128i *dst, const __m128i *src, const int n )
{ {
// preload 1 cache line to absorb startup latency // preload 1 cache line to absorb startup latency
__m128i a = _mm_load_si128( src ); __m128i a = _mm_load_si128( src );
@@ -342,36 +385,92 @@ inline void memcpy_m128i_x4( __m128i *dst, const __m128i *src, const int n )
int i; int i;
const int loops = n/4 - 1; const int loops = n/4 - 1;
const __m128i* dst_n = (__m128i*)(dst + n); __m128i* end = dst + n;
for ( i = 0; i < loops; i++ ) for ( i = 0; i < loops; i++ )
{ {
const int i4 = i*4; const int i4 = i*4;
const __m128i* si4 = (__m128i*)(src + i4); const __m128i* si4 = src + i4;
const __m128i* di4 = (__m128i*)(dst + i4); __m128i* di4 = dst + i4;
// need to free a before overwriting it // need to free a before overwriting it
_mm_store_si128( di4, a ); _mm_store_si128( di4, a );
a = _mm_load_si128( di4 + 4 ); a = _mm_load_si128( si4 + 4 );
_mm_store_si128( di4 + 1, b ); _mm_store_si128( di4 + 1, b );
b = _mm_load_si128( di4 + 5 ); b = _mm_load_si128( si4 + 5 );
_mm_store_si128( di4 + 2, c ); _mm_store_si128( di4 + 2, c );
c = _mm_load_si128( di4 + 6 ); c = _mm_load_si128( si4 + 6 );
_mm_store_si128( di4 + 3, d ); _mm_store_si128( di4 + 3, d );
d = _mm_load_si128( di4 + 7 ); d = _mm_load_si128( si4 + 7 );
} }
_mm_store_si128( dst_n - 4, a ); _mm_store_si128( end - 4, a );
_mm_store_si128( dst_n - 3, b ); _mm_store_si128( end - 3, b );
_mm_store_si128( dst_n - 2, c ); _mm_store_si128( end - 2, c );
_mm_store_si128( dst_n - 1, d ); _mm_store_si128( end - 1, d );
} }
// basic __m128i copy // basic aligned __m128i copy
inline void memcpy_m128i( __m128i *dst, const __m128i *src, int n ) inline void memcpy_m128i( __m128i *dst, const __m128i *src, int n )
{ {
for ( int i = 0; i < n; i ++ ) dst[i] = src[i]; for ( int i = 0; i < n; i ++ ) dst[i] = src[i];
} }
inline void memcpy_64( uint64_t* dst, const uint64_t* src, int n )
{
for ( int i = 0; i < n; i++ )
dst[i] = src[i];
}
// Smart generic mem copy optimized for copying large data, n = bytes.
// Most efficient with 256 bit aligned data and size a multiple of 4*256,
// but fkexible enough to handle any any alignment, any size with performance
// considerations. For common fixed sizes use the approppriate functions above.
inline void mcpy( void* dst, const void* src, int n )
{
// enforce alignment and minimum size for quad buffered vector copy
#if defined (__AVX2__)
// Try 256 bit copy
if ( ( (uint64_t)dst % 32 == 0 ) && ( (const uint64_t)src % 32 == 0 ) )
{
if ( n % 128 == 0 )
{
mcpy_m256i_x4( (__m256i*)dst, (const __m256i*)src, n/32 );
return;
}
else
{
memcpy_m256i( (__m256i*)dst, (const __m256i*)src, n/32 );
return;
}
}
else
#endif
// Try 128 bit copy
if ( ( (uint64_t)dst % 16 == 0 ) && ( (const uint64_t)src % 16 == 0 ) )
{
if ( n % 64 == 0 )
{
mcpy_m128i_x4( (__m128i*)dst, (const __m128i*)src, n/16 );
return;
}
else
{
memcpy_m128i( (__m128i*)dst, (const __m128i*)src, n/16 );
return;
}
}
// Try 64 bit copy
else if ( ( (uint64_t)dst % 8 == 0 ) && ( (const uint64_t)src % 8 == 0 )
&& ( n/8 == 0 ) )
{
memcpy_64( (uint64_t*)dst, (const uint64_t*)src, n/8 );
return;
}
// slow copy
memcpy( dst, src, n );
}
// For cheating with pointer types // For cheating with pointer types
// p = any aligned pointer // p = any aligned pointer
@@ -408,14 +507,16 @@ inline void memcpy_m128i( __m128i *dst, const __m128i *src, int n )
// mm256_rotl256_1x64 when avx2 is not available or data is alreeady in __m128i // mm256_rotl256_1x64 when avx2 is not available or data is alreeady in __m128i
// format. uses one local // format. uses one local
//void mm128_rotl256_1x64( __m128i, __m128i ) //void mm128_rotl256_1x64( __m128i, __m128i )
#define mm128_rotl256_1x64(s0, s1) do { \ #define mm128_rotl256_1x64(s0,s1) do { \
__m128i t; \ __m128i t; \
s0 = mm128_swap64( s0); \ s0 = mm128_swap64(s0); \
s1 = mm128_swap64( s1); \ s1 = mm128_swap64(s1); \
t = _mm_or_si128( _mm_and_si128( s0, _mm_set_epi64x(0ull,0xffffffffffffffffull) ), \ t = _mm_or_si128( \
_mm_and_si128( s1, _mm_set_epi64x(0xffffffffffffffffull,0ull) ) ); \ _mm_and_si128( s0, _mm_set_epi64x(0ull,0xffffffffffffffffull) ), \
s1 = _mm_or_si128( _mm_and_si128( s0, _mm_set_epi64x(0xffffffffffffffffull,0ull) ), \ _mm_and_si128( s1, _mm_set_epi64x(0xffffffffffffffffull,0ull) ) ); \
_mm_and_si128( s1, _mm_set_epi64x(0ull,0xffffffffffffffffull) ) ); \ s1 = _mm_or_si128( \
_mm_and_si128( s0, _mm_set_epi64x(0xffffffffffffffffull,0ull) ), \
_mm_and_si128( s1, _mm_set_epi64x(0ull,0xffffffffffffffffull) ) ); \
s0 = t; \ s0 = t; \
} while(0) } while(0)
@@ -423,10 +524,26 @@ inline void memcpy_m128i( __m128i *dst, const __m128i *src, int n )
__m128i t; \ __m128i t; \
s0 = mm128_swap64( s0); \ s0 = mm128_swap64( s0); \
s1 = mm128_swap64( s1); \ s1 = mm128_swap64( s1); \
t = _mm_or_si128( _mm_and_si128( s0, _mm_set_epi64x(0xffffffffffffffffull,0ull) ), \ t = _mm_or_si128( \
_mm_and_si128( s1, _mm_set_epi64x(0ull,0xffffffffffffffffull) ) ); \ _mm_and_si128( s0, _mm_set_epi64x(0xffffffffffffffffull,0ull) ), \
s1 = _mm_or_si128( _mm_and_si128( s0, _mm_set_epi64x(0ull,0xffffffffffffffffull) ), \ _mm_and_si128( s1, _mm_set_epi64x(0ull,0xffffffffffffffffull) ) ); \
_mm_and_si128( s1, _mm_set_epi64x(0xffffffffffffffffull,0ull) ) ); \ s1 = _mm_or_si128( \
_mm_and_si128( s0, _mm_set_epi64x(0ull,0xffffffffffffffffull) ), \
_mm_and_si128( s1, _mm_set_epi64x(0xffffffffffffffffull,0ull) ) ); \
s0 = t; \ s0 = t; \
} while(0) } while(0)
// vectored version of BYTES_SWAP32
inline __m128i mm_byteswap_epi32( __m128i x )
{
__m128i x1 = _mm_and_si128( x, _mm_set_epi32( 0x0000ff00, 0x0000ff00,
0x0000ff00, 0x0000ff00 ) );
__m128i x2 = _mm_and_si128( x, _mm_set_epi32( 0x00ff0000, 0x00ff0000,
0x00ff0000, 0x00ff0000 ) );
__m128i x0 = _mm_slli_epi32( x, 24 ); // x0 = x << 24
x1 = _mm_slli_epi32( x1, 8 ); // x1 = mask(x) << 8
x2 = _mm_srli_epi32( x2, 8 ); // x2 = mask(x) >> 8
__m128i x3 = _mm_srli_epi32( x, 24 ); // x3 = x >> 24
return _mm_or_si128( _mm_or_si128( x0, x1 ), _mm_or_si128( x2, x3 ) );
}

View File

@@ -1,4 +1,4 @@
AC_INIT([cpuminer-opt], [3.5.8]) AC_INIT([cpuminer-opt], [3.5.9])
AC_PREREQ([2.59c]) AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM AC_CANONICAL_SYSTEM

View File

@@ -227,7 +227,7 @@ static void affine_to_cpu_mask(int id, unsigned long mask) { }
// not very useful, just index the arrray directly. // not very useful, just index the arrray directly.
// but declaring this fuinction in miner.h eliminates // but declaring this fuinction in miner.h eliminates
// an annoying compiler warning for not using a static. // an annoying compiler warning for not using a static.
char* algo_name( enum algos a ) {return algo_names[a];} const char* algo_name( enum algos a ) {return algo_names[a];}
void get_currentalgo(char* buf, int sz) void get_currentalgo(char* buf, int sz)
{ {
@@ -1432,7 +1432,7 @@ int64_t get_max64_0x3fffffLL() { return 0x3fffffLL; }
int64_t get_max64_0x1ffff() { return 0x1ffff; } int64_t get_max64_0x1ffff() { return 0x1ffff; }
int64_t get_max64_0xffffLL() { return 0xffffLL; }; int64_t get_max64_0xffffLL() { return 0xffffLL; };
// default
void sha256d_gen_merkle_root( char* merkle_root, struct stratum_ctx* sctx ) void sha256d_gen_merkle_root( char* merkle_root, struct stratum_ctx* sctx )
{ {
sha256d(merkle_root, sctx->job.coinbase, (int) sctx->job.coinbase_size); sha256d(merkle_root, sctx->job.coinbase, (int) sctx->job.coinbase_size);
@@ -1470,14 +1470,6 @@ void swab_work_data( struct work *work )
work->data[i] = swab32( work->data[i] ); work->data[i] = swab32( work->data[i] );
} }
void std_build_extraheader( struct work* work, struct stratum_ctx* sctx )
{
work->data[ algo_gate.ntime_index ] = le32dec(sctx->job.ntime);
work->data[ algo_gate.nbits_index ] = le32dec(sctx->job.nbits);
work->data[20] = 0x80000000;
work->data[31] = 0x00000280;
}
double std_calc_network_diff( struct work* work ) double std_calc_network_diff( struct work* work )
{ {
// sample for diff 43.281 : 1c05ea29 // sample for diff 43.281 : 1c05ea29
@@ -1551,6 +1543,17 @@ void jr2_get_new_work( struct work* work, struct work* g_work, int thr_id,
++(*nonceptr); ++(*nonceptr);
} }
bool std_ready_to_mine( struct work* work, struct stratum_ctx* stratum,
int thr_id )
{
if ( have_stratum && !work->data[0] && !opt_benchmark )
{
sleep(1);
return false;
}
return true;
}
static void *miner_thread( void *userdata ) static void *miner_thread( void *userdata )
{ {
struct thr_info *mythr = (struct thr_info *) userdata; struct thr_info *mythr = (struct thr_info *) userdata;
@@ -1676,8 +1679,9 @@ static void *miner_thread( void *userdata )
} // do_this_thread } // do_this_thread
algo_gate.resync_threads( &work ); algo_gate.resync_threads( &work );
// prevent dupes is called on every loop and has useful args so it if ( !algo_gate.ready_to_mine( &work, &stratum, thr_id ) )
// is being used by zcoin to pass along the work height. continue;
/*
if ( algo_gate.prevent_dupes( &work, &stratum, thr_id ) ) if ( algo_gate.prevent_dupes( &work, &stratum, thr_id ) )
continue; continue;
// prevent scans before a job is received // prevent scans before a job is received
@@ -1686,6 +1690,7 @@ static void *miner_thread( void *userdata )
sleep(1); sleep(1);
continue; continue;
} }
*/
// conditional mining // conditional mining
if (!wanna_mine(thr_id)) if (!wanna_mine(thr_id))
{ {
@@ -2068,23 +2073,16 @@ out:
return ret; return ret;
} }
void std_stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work ) void std_build_extraheader( struct work* g_work, struct stratum_ctx* sctx )
{ {
unsigned char merkle_root[64] = { 0 }; uchar merkle_root[64] = { 0 };
int i;
size_t t; size_t t;
int i;
pthread_mutex_lock( &sctx->work_lock );
free( g_work->job_id );
g_work->job_id = strdup( sctx->job.job_id );
g_work->xnonce2_len = sctx->xnonce2_size;
g_work->xnonce2 = (uchar*) realloc( g_work->xnonce2, sctx->xnonce2_size );
memcpy( g_work->xnonce2, sctx->job.xnonce2, sctx->xnonce2_size );
algo_gate.gen_merkle_root( merkle_root, sctx ); algo_gate.gen_merkle_root( merkle_root, sctx );
/* Increment extranonce2 */ // Increment extranonce2
for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ ); for ( t = 0; t < sctx->xnonce2_size && !( ++sctx->job.xnonce2[t] ); t++ );
/* Assemble block header */ // Assemble block header
memset( g_work->data, 0, sizeof(g_work->data) ); memset( g_work->data, 0, sizeof(g_work->data) );
g_work->data[0] = le32dec( sctx->job.version ); g_work->data[0] = le32dec( sctx->job.version );
for ( i = 0; i < 8; i++ ) for ( i = 0; i < 8; i++ )
@@ -2092,7 +2090,23 @@ void std_stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
for ( i = 0; i < 8; i++ ) for ( i = 0; i < 8; i++ )
g_work->data[9 + i] = be32dec( (uint32_t *) merkle_root + i ); g_work->data[9 + i] = be32dec( (uint32_t *) merkle_root + i );
g_work->data[ algo_gate.ntime_index ] = le32dec(sctx->job.ntime);
g_work->data[ algo_gate.nbits_index ] = le32dec(sctx->job.nbits);
g_work->data[20] = 0x80000000;
g_work->data[31] = 0x00000280;
}
void std_stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
{
pthread_mutex_lock( &sctx->work_lock );
free( g_work->job_id );
g_work->job_id = strdup( sctx->job.job_id );
g_work->xnonce2_len = sctx->xnonce2_size;
g_work->xnonce2 = (uchar*) realloc( g_work->xnonce2, sctx->xnonce2_size );
memcpy( g_work->xnonce2, sctx->job.xnonce2, sctx->xnonce2_size );
algo_gate.build_extraheader( g_work, sctx ); algo_gate.build_extraheader( g_work, sctx );
net_diff = algo_gate.calc_network_diff( g_work ); net_diff = algo_gate.calc_network_diff( g_work );
algo_gate.set_work_data_endian( g_work ); algo_gate.set_work_data_endian( g_work );
pthread_mutex_unlock( &sctx->work_lock ); pthread_mutex_unlock( &sctx->work_lock );
@@ -2105,7 +2119,7 @@ void std_stratum_gen_work( struct stratum_ctx *sctx, struct work *g_work )
g_work->job_id, xnonce2str, swab32( g_work->data[17] ) ); g_work->job_id, xnonce2str, swab32( g_work->data[17] ) );
free( xnonce2str ); free( xnonce2str );
} }
/* set target */
algo_gate.set_target( g_work, sctx->job.diff ); algo_gate.set_target( g_work, sctx->job.diff );
if ( stratum_diff != sctx->job.diff ) if ( stratum_diff != sctx->job.diff )
@@ -2144,98 +2158,100 @@ static void *stratum_thread(void *userdata )
{ {
int failures = 0; int failures = 0;
if (stratum_need_reset) { if ( stratum_need_reset )
{
stratum_need_reset = false; stratum_need_reset = false;
stratum_disconnect(&stratum); stratum_disconnect( &stratum );
if (strcmp(stratum.url, rpc_url)) { if ( strcmp( stratum.url, rpc_url ) )
free(stratum.url); {
stratum.url = strdup(rpc_url); free( stratum.url );
stratum.url = strdup( rpc_url );
applog(LOG_BLUE, "Connection changed to %s", short_url); applog(LOG_BLUE, "Connection changed to %s", short_url);
} else if (!opt_quiet) {
applog(LOG_DEBUG, "Stratum connection reset");
} }
else if ( !opt_quiet )
applog(LOG_DEBUG, "Stratum connection reset");
} }
while ( !stratum.curl ) while ( !stratum.curl )
{ {
pthread_mutex_lock(&g_work_lock); pthread_mutex_lock( &g_work_lock );
g_work_time = 0; g_work_time = 0;
pthread_mutex_unlock(&g_work_lock); pthread_mutex_unlock( &g_work_lock );
restart_threads(); restart_threads();
if (!stratum_connect(&stratum, stratum.url) if ( !stratum_connect( &stratum, stratum.url )
|| !stratum_subscribe(&stratum) || !stratum_subscribe( &stratum )
|| !stratum_authorize(&stratum, rpc_user, rpc_pass)) || !stratum_authorize( &stratum, rpc_user, rpc_pass ) )
{ {
stratum_disconnect(&stratum); stratum_disconnect( &stratum );
if (opt_retries >= 0 && ++failures > opt_retries) if (opt_retries >= 0 && ++failures > opt_retries)
{ {
applog(LOG_ERR, "...terminating workio thread"); applog(LOG_ERR, "...terminating workio thread");
tq_push(thr_info[work_thr_id].q, NULL); tq_push(thr_info[work_thr_id].q, NULL);
goto out; goto out;
} }
if (!opt_benchmark) if (!opt_benchmark)
applog(LOG_ERR, "...retry after %d seconds", opt_fail_pause); applog(LOG_ERR, "...retry after %d seconds", opt_fail_pause);
sleep(opt_fail_pause); sleep(opt_fail_pause);
} }
if (jsonrpc_2) if (jsonrpc_2)
{ {
work_free(&g_work); work_free(&g_work);
work_copy(&g_work, &stratum.work); work_copy(&g_work, &stratum.work);
} }
} }
if (stratum.job.job_id && if ( stratum.job.job_id &&
(!g_work_time || strcmp(stratum.job.job_id, g_work.job_id)) ) ( !g_work_time || strcmp( stratum.job.job_id, g_work.job_id ) ) )
{ {
pthread_mutex_lock(&g_work_lock); pthread_mutex_lock(&g_work_lock);
algo_gate.stratum_gen_work( &stratum, &g_work ); algo_gate.stratum_gen_work( &stratum, &g_work );
time(&g_work_time); time(&g_work_time);
pthread_mutex_unlock(&g_work_lock); pthread_mutex_unlock(&g_work_lock);
restart_threads(); // restart_threads();
if (stratum.job.clean || jsonrpc_2) if (stratum.job.clean || jsonrpc_2)
{ {
static uint32_t last_bloc_height; static uint32_t last_bloc_height;
if ( last_bloc_height != stratum.bloc_height ) if ( last_bloc_height != stratum.bloc_height )
{ {
last_bloc_height = stratum.bloc_height; last_bloc_height = stratum.bloc_height;
if ( !opt_quiet ) if ( !opt_quiet )
{ {
if (net_diff > 0.) if (net_diff > 0.)
applog(LOG_BLUE, "%s block %d, diff %.3f", applog(LOG_BLUE, "%s block %d, diff %.3f",
algo_names[opt_algo], stratum.bloc_height, net_diff); algo_names[opt_algo], stratum.bloc_height, net_diff);
else else
applog(LOG_BLUE, "%s %s block %d", short_url, applog(LOG_BLUE, "%s %s block %d", short_url,
algo_names[opt_algo], stratum.bloc_height); algo_names[opt_algo], stratum.bloc_height);
} }
} }
restart_threads(); restart_threads();
} }
else if (opt_debug && !opt_quiet) else if (opt_debug && !opt_quiet)
{ {
applog(LOG_BLUE, "%s asks job %d for block %d", short_url, applog(LOG_BLUE, "%s asks job %d for block %d", short_url,
strtoul(stratum.job.job_id, NULL, 16), stratum.bloc_height); strtoul(stratum.job.job_id, NULL, 16), stratum.bloc_height);
} }
} } // stratum.job.job_id
if ( !stratum_socket_full( &stratum, opt_timeout ) ) if ( !stratum_socket_full( &stratum, opt_timeout ) )
{ {
applog(LOG_ERR, "Stratum connection timeout"); applog(LOG_ERR, "Stratum connection timeout");
s = NULL; s = NULL;
} }
else else
s = stratum_recv_line(&stratum); s = stratum_recv_line(&stratum);
if ( !s ) if ( !s )
{ {
stratum_disconnect(&stratum); stratum_disconnect(&stratum);
applog(LOG_ERR, "Stratum connection interrupted"); applog(LOG_ERR, "Stratum connection interrupted");
continue; continue;
} }
if (!stratum_handle_method(&stratum, s)) if (!stratum_handle_method(&stratum, s))
stratum_handle_response(s); stratum_handle_response(s);
free(s); free(s);
} } // loop
out: out:
return NULL; return NULL;
} }

16
miner.h
View File

@@ -47,14 +47,14 @@
# endif # endif
#endif #endif
/*
#ifndef min #ifndef min
#define min(a,b) (a>b ? b : a) #define min(a,b) (a>b ? (b) :(a))
#endif #endif
#ifndef max #ifndef max
#define max(a,b) (a<b ? b : a) #define max(a,b) (a<b ? (b) : (a))
#endif #endif
*/
//#ifdef HAVE_ALLOCA_H //#ifdef HAVE_ALLOCA_H
//# include <alloca.h> //# include <alloca.h>
@@ -479,12 +479,14 @@ enum algos {
ALGO_BASTION, ALGO_BASTION,
ALGO_BLAKE, ALGO_BLAKE,
ALGO_BLAKECOIN, ALGO_BLAKECOIN,
// ALGO_BLAKE2B,
ALGO_BLAKE2S, ALGO_BLAKE2S,
ALGO_BMW, ALGO_BMW,
ALGO_C11, ALGO_C11,
ALGO_CRYPTOLIGHT, ALGO_CRYPTOLIGHT,
ALGO_CRYPTONIGHT, ALGO_CRYPTONIGHT,
ALGO_DECRED, ALGO_DECRED,
ALGO_DEEP,
ALGO_DROP, ALGO_DROP,
ALGO_FRESH, ALGO_FRESH,
ALGO_GROESTL, ALGO_GROESTL,
@@ -537,12 +539,14 @@ static const char* const algo_names[] = {
"bastion", "bastion",
"blake", "blake",
"blakecoin", "blakecoin",
// "blake2b",
"blake2s", "blake2s",
"bmw", "bmw",
"c11", "c11",
"cryptolight", "cryptolight",
"cryptonight", "cryptonight",
"decred", "decred",
"deep",
"drop", "drop",
"fresh", "fresh",
"groestl", "groestl",
@@ -589,7 +593,7 @@ static const char* const algo_names[] = {
"\0" "\0"
}; };
char* algo_name( enum algos a ); const char* algo_name( enum algos a );
extern enum algos opt_algo; extern enum algos opt_algo;
extern bool opt_debug; extern bool opt_debug;
@@ -650,12 +654,14 @@ Options:\n\
bastion\n\ bastion\n\
blake Blake-256 (SFR)\n\ blake Blake-256 (SFR)\n\
blakecoin blake256r8\n\ blakecoin blake256r8\n\
"/* blake2b Sia\n*/"\
blake2s Blake-2 S\n\ blake2s Blake-2 S\n\
bmw BMW 256\n\ bmw BMW 256\n\
c11 Flax\n\ c11 Flax\n\
cryptolight Cryptonight-light\n\ cryptolight Cryptonight-light\n\
cryptonight cryptonote, Monero (XMR)\n\ cryptonight cryptonote, Monero (XMR)\n\
decred\n\ decred\n\
deep Deepcoin (DCN)\n\
drop Dropcoin\n\ drop Dropcoin\n\
fresh Fresh\n\ fresh Fresh\n\
groestl groestl\n\ groestl groestl\n\

8
util.c
View File

@@ -1797,10 +1797,10 @@ static bool stratum_set_difficulty(struct stratum_ctx *sctx, json_t *params)
sctx->next_diff = diff; sctx->next_diff = diff;
pthread_mutex_unlock(&sctx->work_lock); pthread_mutex_unlock(&sctx->work_lock);
// /* store for api stats */ /* store for api stats */
// stratum_diff = diff; stratum_diff = diff;
//
// applog(LOG_WARNING, "Stratum difficulty set to %g", diff); applog(LOG_WARNING, "Stratum difficulty set to %g", diff);
return true; return true;
} }