This commit is contained in:
Jay D Dee
2017-03-10 11:38:58 -05:00
parent 38c6f23b66
commit f1f9e821a2
18 changed files with 139 additions and 342 deletions

View File

@@ -1,25 +1,23 @@
#
# Dockerfile for cpuminer
# usage: docker run creack/cpuminer --url xxxx --user xxxx --pass xxxx
# ex: docker run creack/cpuminer --url stratum+tcp://ltc.pool.com:80 --user creack.worker1 --pass abcdef
#
# Dockerfile for cpuminer-opt
# usage: docker build -t cpuminer-opt:latest .
# run: docker run -it --rm cpuminer-opt:latest [ARGS]
# ex: docker run -it --rm cpuminer-opt:latest -a cryptonight -o cryptonight.eu.nicehash.com:3355 -u 1MiningDW2GKzf4VQfmp4q2XoUvR6iy6PD.worker1 -p x -t 3
#
FROM ubuntu:12.10
MAINTAINER Guillaume J. Charmes <guillaume@charmes.net>
FROM ubuntu:16.04
RUN BUILD_DEPS="build-essential \
libssl-dev \
libgmp-dev \
libcurl4-openssl-dev \
libjansson-dev \
automake" && \
RUN apt-get update -qq
apt-get update && \
apt-get install -y ${BUILD_DEPS}
RUN apt-get install -qqy automake
RUN apt-get install -qqy libcurl4-openssl-dev
RUN apt-get install -qqy git
RUN apt-get install -qqy make
COPY . /app/
RUN cd /app/ && ./build.sh
RUN git clone https://github.com/pooler/cpuminer
RUN cd cpuminer && ./autogen.sh
RUN cd cpuminer && ./configure CFLAGS="-O3"
RUN cd cpuminer && make
WORKDIR /cpuminer
ENTRYPOINT ["./cpuminer"]
ENTRYPOINT ["/app/cpuminer"]
CMD ["-h"]

View File

@@ -30,7 +30,7 @@ Supported Algorithms
deep Deepcoin (DCN)
drop Dropcoin
fresh Fresh
groestl groestl
groestl dmd-gr, Groestl coin
heavy Heavy
hmq1725 Espers
hodl Hodlcoin
@@ -40,7 +40,7 @@ Supported Algorithms
lyra2re lyra2
lyra2rev2 lyrav2, Vertcoin
lyra2z Zcoin (XZC)
lyra2zoin Zoin (ZOI)
lyra2z330 Lyra2 330 rows, Zoin (ZOI)
m7m Magi (XMG)
myr-gr Myriad-Groestl
neoscrypt NeoScrypt(128, 2, 1)
@@ -52,7 +52,8 @@ Supported Algorithms
scrypt scrypt(1024, 1, 1) (default)
scrypt:N scrypt(N, 1, 1)
scryptjane:nf
sha256d SHA-256d
sha256d Double SHA-256
sha256t Triple SHA-256, Onecoin (OC)
shavite3 Shavite3
skein Skein+Sha (Skeincoin)
skein2 Double Skein (Woodcoin)
@@ -95,6 +96,11 @@ may work wallet mining but there are no guarantees.
Errata
------
AMD CPUs older than Piledriver, including Athlon x2 and Phenom II x4, are not
supported by cpuminer-opt due to an incompatible implementation of SSE2 on
these CPUs. Some algos may crash the miner with an invalid instruction.
Users are recommended to use an unoptimized miner such as cpuminer-multi.
cpuminer-opt does not work mining Decred algo at Nicehash and produces
only "invalid extranonce2 size" rejects.

View File

@@ -3,6 +3,12 @@ Compile instruction for Linux and Windows are at the bottom of this file.
Change Log
----------
v3.5.13
Found more speed in Cubehash, algo improvement depends on chain length,
deep +8%, timetravel +1% , xevan +1%
Fixed a getwork bug, solo mining is not yet supported but testing is encouraged
v3.5.12
New algo sha256t for Onecoin (OC), 29% faster than ocminer version.

View File

@@ -119,11 +119,11 @@ void init_algo_gate( algo_gate_t* gate )
gate->stratum_gen_work = (void*)&std_stratum_gen_work;
gate->build_stratum_request = (void*)&std_le_build_stratum_request;
gate->set_target = (void*)&std_set_target;
gate->work_decode = (void*)&std_work_decode;
gate->submit_getwork_result = (void*)&std_submit_getwork_result;
gate->build_extraheader = (void*)&std_build_extraheader;
gate->set_work_data_endian = (void*)&do_nothing;
gate->calc_network_diff = (void*)&std_calc_network_diff;
// gate->prevent_dupes = (void*)&return_false;
gate->ready_to_mine = (void*)&std_ready_to_mine;
gate->resync_threads = (void*)&do_nothing;
gate->do_this_thread = (void*)&return_true;
@@ -273,6 +273,7 @@ const char* const algo_alias_map[][2] =
{ "blake256r8vnl", "vanilla" },
{ "sia", "blake2b" },
{ "blake256r14", "blake" },
{ "blake256r14dcr", "decred" },
{ "cryptonote", "cryptonight" },
{ "cryptonight-light", "cryptolight" },
{ "dmd-gr", "groestl" },

View File

@@ -45,7 +45,6 @@ int scanhash_blake2b( int thr_id, struct work *work, uint32_t max_nonce,
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
// const uint32_t first_nonce = pdata[19];
const uint32_t first_nonce = pdata[8];
uint32_t n = first_nonce;
@@ -60,7 +59,6 @@ int scanhash_blake2b( int thr_id, struct work *work, uint32_t max_nonce,
//memcpy(&s_ctx, &s_midstate, sizeof(blake2b_ctx));
do {
// be32enc(&endiandata[19], n);
be32enc(&endiandata[8], n);
//blake2b_hash_end(vhashcpu, endiandata);
blake2b_hash(vhashcpu, endiandata);
@@ -68,7 +66,6 @@ int scanhash_blake2b( int thr_id, struct work *work, uint32_t max_nonce,
if (vhashcpu[7] < Htarg && fulltest(vhashcpu, ptarget)) {
work_set_target_ratio(work, vhashcpu);
*hashes_done = n - first_nonce + 1;
// pdata[19] = n;
pdata[8] = n;
return 1;
}
@@ -76,7 +73,6 @@ int scanhash_blake2b( int thr_id, struct work *work, uint32_t max_nonce,
} while (n < max_nonce && !work_restart[thr_id].restart);
*hashes_done = n - first_nonce + 1;
// pdata[19] = n;
pdata[8] = n;
return 0;
@@ -174,8 +170,8 @@ void blake2b_get_new_work( struct work* work, struct work* g_work, int thr_id,
uint32_t *nonceptr = algo_gate.get_nonceptr( work->data );
if ( memcmp( &work->data[ wkcmp_off ], &g_work->data[ wkcmp_off ], wkcmp_sz )
&& ( clean_job || ( *nonceptr >= *end_nonce_ptr ) )
|| strcmp( work->job_id, g_work->job_id ) )
&& ( clean_job || ( *nonceptr >= *end_nonce_ptr )
|| strcmp( work->job_id, g_work->job_id ) ) )
{
work_free( work );
work_copy( work, g_work );

View File

@@ -5,6 +5,8 @@
#include <string.h>
#include <stdint.h>
#include <memory.h>
#include <unistd.h>
/*
#ifndef min
#define min(a,b) (a>b ? b : a)

View File

@@ -10,10 +10,7 @@
#endif
#include "cubehash_sse2.h"
#include "algo/sha3/sha3-defs.h"
//enum { SUCCESS = 0, FAIL = 1, BAD_HASHBITLEN = 2 };
//#if defined(OPTIMIZE_SSE2)
//#include "avxdefs.h"
static void transform( cubehashParam *sp )
{
@@ -143,72 +140,71 @@ int cubehashInit(cubehashParam *sp, int hashbitlen, int rounds, int blockbytes)
if ( blockbytes <= 0 || blockbytes >= 256)
blockbytes = CUBEHASH_BLOCKBYTES;
sp->hashbitlen = hashbitlen;
sp->rounds = rounds;
sp->blockbytes = blockbytes;
// all sizes of __m128i
sp->hashlen = hashbitlen/128;
sp->blocksize = blockbytes/16;
sp->rounds = rounds;
sp->pos = 0;
for ( i = 0; i < 8; ++i )
sp->x[i] = _mm_set_epi32(0, 0, 0, 0);
sp->x[0] = _mm_set_epi32(0, sp->rounds, sp->blockbytes, hashbitlen / 8);
sp->x[0] = _mm_set_epi32( 0, rounds, blockbytes, hashbitlen / 8 );
for ( i = 0; i < 10; ++i )
transform(sp);
sp->pos = 0;
// sp->pos = 0;
return SUCCESS;
}
int
cubehashReset(cubehashParam *sp)
{
return cubehashInit(sp, sp->hashbitlen, sp->rounds, sp->blockbytes);
}
int cubehashUpdate( cubehashParam *sp, const byte *data, size_t size )
{
uint64_t databitlen = 8 * size;
const int len = size / 16;
const __m128i* in = (__m128i*)data;
int i;
/* caller promises us that previous data had integral number of bytes */
/* so sp->pos is a multiple of 8 */
// It is assumed data is aligned to 256 bits and is a multiple of 128 bits.
// Current usage sata is either 64 or 80 bytes.
while ( databitlen >= 8 )
for ( i = 0; i < len; i++ )
{
( (unsigned char *)sp->x )[sp->pos/8] ^= *data;
data += 1;
databitlen -= 8;
sp->pos += 8;
if ( sp->pos == 8 * sp->blockbytes )
sp->x[ sp->pos ] = _mm_xor_si128( sp->x[ sp->pos ], in[i] );
sp->pos++;
if ( sp->pos == sp->blocksize )
{
transform( sp );
sp->pos = 0;
}
}
if ( databitlen > 0 )
{
( (unsigned char *)sp->x )[sp->pos/8] ^= *data;
sp->pos += databitlen;
transform( sp );
sp->pos = 0;
}
}
return SUCCESS;
}
int cubehashDigest( cubehashParam *sp, byte *digest )
{
__m128i* hash = (__m128i*)digest;
int i;
( (unsigned char *)sp->x )[sp->pos/8] ^= ( 128 >> (sp->pos % 8) );
transform(sp);
// pos is zero for 64 byte data, 1 for 80 byte data.
sp->x[ sp->pos ] = _mm_xor_si128( sp->x[ sp->pos ],
_mm_set_epi8( 0,0,0,0, 0,0,0,0,
0,0,0,0, 0,0,0,0x80 ) );
transform( sp );
sp->x[7] = _mm_xor_si128(sp->x[7], _mm_set_epi32(1, 0, 0, 0));
transform(sp);
transform(sp);
transform(sp);
transform(sp);
transform(sp);
transform(sp);
transform(sp);
transform(sp);
transform(sp);
transform(sp);
sp->x[7] = _mm_xor_si128( sp->x[7], _mm_set_epi32( 1,0,0,0 ) );
transform( sp );
transform( sp );
transform( sp );
transform( sp );
transform( sp );
transform( sp );
transform( sp );
transform( sp );
transform( sp );
transform( sp );
for ( i = 0; i < sp->hashbitlen / 8; ++i )
digest[i] = ((unsigned char *) sp->x)[i];
for ( i = 0; i < sp->hashlen; i++ )
hash[i] = sp->x[i];
return SUCCESS;
}
@@ -216,48 +212,45 @@ int cubehashDigest( cubehashParam *sp, byte *digest )
int cubehashUpdateDigest( cubehashParam *sp, byte *digest,
const byte *data, size_t size )
{
uint64_t databitlen = 8 * size;
int hashlen128 = sp->hashbitlen/128;
const int len = size / 16;
const __m128i* in = (__m128i*)data;
__m128i* hash = (__m128i*)digest;
int i;
/* caller promises us that previous data had integral number of bytes */
/* so sp->pos is a multiple of 8 */
// It is assumed data is aligned to 256 bits and is a multiple of 128 bits.
// Current usage sata is either 64 or 80 bytes.
while ( databitlen >= 8 )
for ( i = 0; i < len; i++ )
{
( (unsigned char *)sp->x )[sp->pos/8] ^= *data;
data += 1;
databitlen -= 8;
sp->pos += 8;
if ( sp->pos == 8 * sp->blockbytes )
sp->x[ sp->pos ] = _mm_xor_si128( sp->x[ sp->pos ], in[i] );
sp->pos++;
if ( sp->pos == sp->blocksize )
{
transform(sp);
sp->pos = 0;
transform( sp );
sp->pos = 0;
}
}
if ( databitlen > 0 )
{
( (unsigned char *)sp->x )[sp->pos/8] ^= *data;
sp->pos += databitlen;
}
( (unsigned char *)sp->x )[sp->pos/8] ^= ( 128 >> (sp->pos % 8) );
// pos is zero for 64 byte data, 1 for 80 byte data.
sp->x[ sp->pos ] = _mm_xor_si128( sp->x[ sp->pos ],
_mm_set_epi8( 0,0,0,0, 0,0,0,0,
0,0,0,0, 0,0,0,0x80 ) );
transform( sp );
sp->x[7] = _mm_xor_si128( sp->x[7], _mm_set_epi32(1,0,0,0) );
transform(sp);
transform(sp);
transform(sp);
transform(sp);
transform(sp);
transform(sp);
transform(sp);
transform(sp);
transform(sp);
transform(sp);
sp->x[7] = _mm_xor_si128( sp->x[7], _mm_set_epi32( 1,0,0,0 ) );
transform( sp );
transform( sp );
transform( sp );
transform( sp );
transform( sp );
transform( sp );
transform( sp );
transform( sp );
transform( sp );
transform( sp );
for ( i = 0; i < hashlen128; i++ )
( (__m128i*)digest )[i] = ( (__m128i*)sp->x )[i];
for ( i = 0; i < sp->hashlen; i++ )
hash[i] = sp->x[i];
return SUCCESS;
}

View File

@@ -4,57 +4,34 @@
#include "compat.h"
#include <stdint.h>
#include "algo/sha3/sha3-defs.h"
//#include <beecrypt/beecrypt.h>
//#if defined(__SSE2__)
#define OPTIMIZE_SSE2
//#endif
#if defined(OPTIMIZE_SSE2)
#include <emmintrin.h>
#endif
/*!\brief Holds all the parameters necessary for the CUBEHASH algorithm.
* \ingroup HASH_cubehash_m
*/
struct _cubehashParam
//#endif
{
int hashbitlen;
int hashlen; // __m128i
int rounds;
int blockbytes;
int pos; /* number of bits read into x from current block */
#if defined(OPTIMIZE_SSE2)
__m128i _ALIGN(256) x[8];
#else
uint32_t x[32];
#endif
int blocksize; // __m128i
int pos; // number of __m128i read into x from current block
__m128i _ALIGN(256) x[8]; // aligned for __m256i
};
//#ifndef __cplusplus
typedef struct _cubehashParam cubehashParam;
//#endif
#ifdef __cplusplus
extern "C" {
#endif
/*!\var cubehash256
* \brief Holds the full API description of the CUBEHASH algorithm.
*/
//extern BEECRYPTAPI const hashFunction cubehash256;
//BEECRYPTAPI
int cubehashInit(cubehashParam* sp, int hashbitlen, int rounds, int blockbytes);
//BEECRYPTAPI
int cubehashReset(cubehashParam* sp);
//BEECRYPTAPI
int cubehashUpdate(cubehashParam* sp, const byte *data, size_t size);
//BEECRYPTAPI
int cubehashDigest(cubehashParam* sp, byte *digest);
int cubehashUpdateDigest( cubehashParam *sp, byte *digest, const byte *data,

View File

@@ -23,22 +23,6 @@
#include "avxdefs.h"
#include "luffa_for_sse2.h"
#if defined (__AVX2__)
#define MULT256(a) \
a = _mm256_xor_si256( \
_mm256_and_si256( _mm256_srli_si256( a, 4 ), \
_mm256_set_epi32( \
0, 0xffffffff, 0xffffffff, 0xffffffff, \
0, 0xffffffff, 0xffffffff, 0xffffffff ) ), \
_mm256_permutevar8x32_epi32( \
_mm256_and_si256( _mm256_srli_si256( a, 4 ), \
_mm256_set_epi32( 0xffffffff, 0, 0, 0, \
0xffffffff, 0,0, 0 ) ), \
_mm256_set_epi32( 0, 0, 0, 0, 0, 0, 0, 0x00800800 ) ) )
#endif // __AVX2__
#define MULT2(a0,a1) do \
{ \
__m128i b = _mm_xor_si128( a0, _mm_shuffle_epi32( _mm_and_si128(a1,MASK), 16 ) ); \
@@ -46,17 +30,6 @@ _mm256_set_epi32( 0, 0, 0, 0, 0, 0, 0, 0x00800800 ) ) )
a1 = _mm_or_si128( _mm_srli_si128(a1,4), _mm_slli_si128(b,12) ); \
} while(0)
/*
#define MULT2(a0,a1) do \
{ \
__m128i b; \
a0 = _mm_xor_si128( a0, _mm_shuffle_epi32( _mm_and_si128(a1,MASK), 16 ) ); \
b = a0; \
a0 = _mm_or_si128( _mm_srli_si128(a0,4), _mm_slli_si128(a1,12) ); \
a1 = _mm_or_si128( _mm_srli_si128(a1,4), _mm_slli_si128(b,12) ); \
} while(0)
*/
#define STEP_PART(x,c,t)\
SUBCRUMB(*x,*(x+1),*(x+2),*(x+3),*t);\
SUBCRUMB(*(x+5),*(x+6),*(x+7),*(x+4),*t);\
@@ -213,17 +186,10 @@ _mm256_set_epi32( 0, 0, 0, 0, 0, 0, 0, 0x00800800 ) ) )
#define MIXTON1024(r0,r1,r2,r3,s0,s1,s2,s3,p0,p1,p2,p3,q0,q1,q2,q3)\
NMLTOM1024(r0,r1,r2,r3,s0,s1,s2,s3,p0,p1,p2,p3,q0,q1,q2,q3);
//#if defined (__AVX2__)
// static void rnd512( hashState_luffa *state, __m256i msg );
//#else
static void rnd512( hashState_luffa *state, __m128i msg1, __m128i msg0 );
//static void rnd512( hashState_luffa *state );
//#endif
static void rnd512( hashState_luffa *state, __m128i msg1, __m128i msg0 );
static void finalization512( hashState_luffa *state, uint32 *b );
/* initial values of chaining variables */
static const uint32 IV[40] __attribute((aligned(16))) = {
0xdbf78465,0x4eaa6fb4,0x44b051e0,0x6d251e69,
@@ -306,12 +272,8 @@ HashReturn update_luffa( hashState_luffa *state, const BitSequence *data,
// full blocks
for ( i = 0; i < blocks; i++ )
{
//#if defined (__AVX2__)
// rnd512( state, mm256_byteswap_epi32( cast_m256i( data ) ) ),
//#else
rnd512( state, mm_byteswap_epi32( casti_m128i( data, 1 ) ),
mm_byteswap_epi32( casti_m128i( data, 0 ) ) );
//#endif
data += MSG_BLOCK_BYTE_LEN;
}
@@ -335,23 +297,14 @@ HashReturn final_luffa(hashState_luffa *state, BitSequence *hashval)
if ( state->rembytes )
{
// not empty, data is in buffer
//#if defined (__AVX2__)
// rnd512( state, cast_m256i( state->buffer ) );
//#else
rnd512( state, casti_m128i( state->buffer, 1 ),
casti_m128i( state->buffer, 0 ) );
//#endif
}
else
{
// empty pad block, constant data
//#if defined (__AVX2__)
// rnd512( state, _mm256_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
// 0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0 ) );
//#else
rnd512( state, _mm_setzero_si128(),
_mm_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0 ) );
//#endif
}
finalization512(state, (uint32*) hashval);
@@ -371,41 +324,23 @@ HashReturn update_and_final_luffa( hashState_luffa *state, BitSequence* output,
// full blocks
for ( i = 0; i < blocks; i++ )
{
//#if defined (__AVX2__)
// rnd512( state, mm256_byteswap_epi32( cast_m256i( data ) ) ),
//#else
rnd512( state, mm_byteswap_epi32( casti_m128i( data, 1 ) ),
mm_byteswap_epi32( casti_m128i( data, 0 ) ) );
//#endif
data += MSG_BLOCK_BYTE_LEN;
}
// 16 byte partial block exists for 80 byte len
if ( state->rembytes )
{
// remaining 16 data bytes + 16 bytes padding
//#if defined (__AVX2__)
// use buffer to manage 16 bytes of data in 32 byte world
// casti_m128i( state->buffer, 0 ) = mm_byteswap_epi32( cast_m128i( data ) );
// padding of partial block
// casti_m128i( state->buffer, 1 ) =
// _mm_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0 );
// rnd512( state, cast_m256i( state->buffer ) );
//#else
rnd512( state, _mm_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0 ),
mm_byteswap_epi32( cast_m128i( data ) ) );
//#endif
}
else
{
// empty pad block
//#if defined (__AVX2__)
// rnd512( state, _mm256_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
// 0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0 ) );
//#else
rnd512( state, _mm_setzero_si128(),
_mm_set_epi8( 0,0,0,0, 0,0,0,0, 0,0,0,0, 0x80,0,0,0 ) );
//#endif
}
finalization512( state, (uint32*) output );
@@ -419,109 +354,6 @@ HashReturn update_and_final_luffa( hashState_luffa *state, BitSequence* output,
/* Round function */
/* state: hash context */
/*
#if defined (__AVX2__)
// AVX2 only
static void rnd512( hashState_luffa *state, __m256i msg )
{
do
{
area256 t;
area256 *chainv;
chainv.v256 = (__m256i*)state->chainv;
area256 Msg;
Msg.v256 = Msg
// __m256i t;
// __m256i *chainv = (__m256i*)state->chainv;
t.v256 = chainv[0];
t.v256 = _mm256_xor_si256( t.v256, chainv.v256[1] );
t.v256 = _mm256_xor_si256( t.v256, chainv.v256[2] );
t.v256 = _mm256_xor_si256( t.v256, chainv.v256[3] );
t.v256 = _mm256_xor_si256( t.v256, chainv.v256[4] );
MULT2( t.v128[0], t.v128[1] );
// MULT256( t );
Msg.v256 = _mm256_shuffle_epi32( Msg.v256, 27 );
chainv.v256[0] = _mm256_xor_si256( chainv.v256[0], t.v256 );
chainv.v256[1] = _mm256_xor_si256( chainv.v256[1], t.v256 );
chainv.v256[2] = _mm256_xor_si256( chainv.v256[2], t.v256 );
chainv.v256[3] = _mm256_xor_si256( chainv.v256[3], t.v256 );
chainv.v256[4] = _mm256_xor_si256( chainv.v256[4], t.v256 );
t.v256 = chainv[0];
MULT2( chainv.v128[0], chainv.v128[1]);
// MULT256( chainv[0] );
chainv[0] = _mm256_xor_si256( chainv.v256[0], chainv.v256[1] );
MULT2( chainv.v128[2], chainv.v128[3]);
// MULT256( chainv[1] );
chainv.v256[1] = _mm256_xor_si256( chainv.v256[1], chainv.v256[2] );
MULT2( chainv.v128[4], chainv.v128[5]);
// MULT256( chainv[2] );
chainv.v256[2] = _mm256_xor_si256( chainv.v256[2], chainv.v256[3] );
MULT2( chainv.v128[6], chainv.v128[7]);
// MULT256( chainv[3] );
chainv.v256[3] = _mm256_xor_si256( chainv.v256[3], chainv.v256[4] );
MULT2( chainv.v128[8], chainv.v128[9]);
// MULT256( chainv[4] );
chainv.v256[4] = _mm256_xor_si256( chainv.v256[4], chainv.v256[5] );
t.v256 = chainv.v256[4];
MULT2( chainv.v128[8], chainv.v128[9]);
// MULT256( chainv[4] );
chainv.v256[4] = _mm256_xor_si256( chainv.v256[4], chainv.v256[3] );
MULT2( chainv.v128[6], chainv.v128[7]);
// MULT256( chainv[3] );
chainv.v256[3] = _mm256_xor_si256( chainv.v256[3], chainv.v256[2] );
MULT2( chainv.v128[4], chainv.v128[5]);
// MULT256( chainv[2] );
chainv.v256[2] = _mm256_xor_si256( chainv.v256[2], chainv.v256[1] );
MULT2( chainv.v128[2], chainv.v128[3]);
// MULT256( chainv[1] );
chainv.v256[1] = _mm256_xor_si256( chainv.v256[1], chainv.v256[0] );
MULT2( chainv.v128[0], chainv.v128[1]);
// MULT256( chainv[0] );
chainv.v256[0] = _mm256_xor_si256( _mm256_xor_si256( chainv.v256[0], t ), Msg.v256 );
MULT2( Msg.v128[0], Msg.v128[1] );
// MULT256( msg );
chainv.v256[1] = _mm256_xor_si256( chainv.v256[1], Msg.v256 );
MULT2( Msg.v128[0], Msg.v128[1] );
// MULT256( msg );
chainv.v256[2] = _mm256_xor_si256( chainv.v256[2], Msg.v256 );
MULT2( Msg.v128[0], Msg.v128[1] );
// MULT256( msg );
chainv.v256[3] = _mm256_xor_si256( chainv.v256[3], Msg.v256 );
MULT2( Msg.v128[0], Msg.v128[1] );
// MULT256( msg );
chainv.v256[4] = _mm256_xor_si256( chainv.v256[4], Msg.v256 );
MULT2( Msg.v128[0], Msg.v128[1] );
// MULT256( msg );
} while (0);
// new set of __m128i vars for the rest
__m128i t[2];
__m128i *chainv = state->chainv;
__m128i tmp[2];
__m128i x[8];
__m128i msg0 = Msg.v128[0];
__m128i msg1 = Msg.v128[1];
// remainder common with SSE2
#else
// SSE2 only
*/
static void rnd512( hashState_luffa *state, __m128i msg1, __m128i msg0 )
{
__m128i t[2];
@@ -635,10 +467,6 @@ static void rnd512( hashState_luffa *state, __m128i msg1, __m128i msg0 )
MULT2( msg0, msg1);
//#endif
// common to SSE2 and AVX2
chainv[3] = _mm_or_si128( _mm_slli_epi32(chainv[3], 1),
_mm_srli_epi32(chainv[3], 31) );
chainv[5] = _mm_or_si128( _mm_slli_epi32(chainv[5], 2),
@@ -693,7 +521,6 @@ static void rnd512( hashState_luffa *state, __m128i msg1, __m128i msg0 )
/* state: hash context */
/* b[8]: hash values */
//*
#if defined (__AVX2__)
static void finalization512( hashState_luffa *state, uint32 *b )
@@ -701,9 +528,9 @@ static void finalization512( hashState_luffa *state, uint32 *b )
uint32 hash[8] __attribute((aligned(64)));
__m256i* chainv = (__m256i*)state->chainv;
__m256i t;
const __m128i zero = _mm_setzero_si128();
rnd512( state, _mm_setzero_si128(), _mm_setzero_si128() );
// rnd512( state, _mm256_setzero_si256() );
rnd512( state, zero, zero );
t = chainv[0];
t = _mm256_xor_si256( t, chainv[1] );
@@ -717,8 +544,7 @@ static void finalization512( hashState_luffa *state, uint32 *b )
casti_m256i( b, 0 ) = mm256_byteswap_epi32( casti_m256i( hash, 0 ) );
rnd512( state, _mm_setzero_si128(), _mm_setzero_si128() );
// rnd512( state, _mm256_setzero_si256() );
rnd512( state, zero, zero );
t = chainv[0];
t = _mm256_xor_si256( t, chainv[1] );
@@ -734,17 +560,15 @@ static void finalization512( hashState_luffa *state, uint32 *b )
#else
static void finalization512( hashState_luffa *state, uint32 *b )
{
uint32 hash[8] __attribute((aligned(64)));
__m128i* chainv = state->chainv;
__m128i t[2];
const __m128i zero = _mm_setzero_si128();
/*---- blank round with m=0 ----*/
rnd512( state, _mm_setzero_si128(), _mm_setzero_si128() );
// _mm_prefetch( b, _MM_HINT_T0 );
rnd512( state, zero, zero );
t[0] = chainv[0];
t[1] = chainv[1];
@@ -766,7 +590,7 @@ static void finalization512( hashState_luffa *state, uint32 *b )
casti_m128i( b, 0 ) = mm_byteswap_epi32( casti_m128i( hash, 0 ) );
casti_m128i( b, 1 ) = mm_byteswap_epi32( casti_m128i( hash, 1 ) );
rnd512( state, _mm_setzero_si128(), _mm_setzero_si128() );
rnd512( state, zero, zero );
t[0] = chainv[0];
t[1] = chainv[1];

View File

@@ -55,13 +55,6 @@ void zoin_set_target( struct work* work, double job_diff )
{
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
}
/*
bool zoin_get_work_height( struct work* work, struct stratum_ctx* sctx )
{
work->height = sctx->bloc_height;
return false;
}
*/
bool zoin_thread_init()
{
@@ -93,7 +86,6 @@ bool register_lyra2z330_algo( algo_gate_t* gate )
gate->hash_alt = (void*)&zoin_hash;
gate->get_max64 = (void*)&get_max64_0xffffLL;
gate->set_target = (void*)&zoin_set_target;
// gate->prevent_dupes = (void*)&zoin_get_work_height;
return true;
};

View File

@@ -323,14 +323,12 @@ void fft128_msg_final(short *a, const unsigned char *x) {
// v16 *Table = (v16*)FFT128_Final_Table;
v16 *A = (v16*) a;
int i;
v16 msg1 = v16_broadcast(x[0]>128?x[0]-257:x[0]);
v16 msg2 = v16_broadcast(x[1]>128?x[1]-257:x[1]);
// v16 msg2 = v16_broadcast(x[1]);
#if 0
int i;
for (i=0; i<16; i++) {
v16 tmp = v16_mul(FFT128_Final_Table[2*i].v16 , msg2);
v16 sum = v16_add(FFT128_Final_Table[2*i+1].v16, msg1);

View File

@@ -156,8 +156,8 @@ void timetravel_hash(void *output, const void *input)
}
else
{
sph_blake512( &ctx.blake, hashA, dataLen );
sph_blake512_close( &ctx.blake, hashB );
sph_blake512( &ctx.blake, hashA, dataLen );
sph_blake512_close( &ctx.blake, hashB );
}
break;
case 1:
@@ -187,6 +187,7 @@ void timetravel_hash(void *output, const void *input)
sph_groestl512_close( &ctx.groestl, hashB );
}
#else
// groestl midstate is slower
// if ( i == 0 )
// {
// memcpy( &ctx.groestl, &tt_mid.groestl, sizeof tt_mid.groestl );
@@ -243,8 +244,8 @@ void timetravel_hash(void *output, const void *input)
if ( i == 0 )
{
memcpy( &ctx.luffa, &tt_mid.luffa, sizeof tt_mid.luffa );
update_and_final_luffa( &ctx.luffa, hashB,
input + 64, 16 );
update_and_final_luffa( &ctx.luffa, (BitSequence*)hashB,
(const BitSequence *)input + 64, 16 );
}
else
{
@@ -320,6 +321,7 @@ int scanhash_timetravel( int thr_id, struct work *work, uint32_t max_nonce,
memcpy( &tt_mid.groestl, &tt_ctx.groestl, sizeof(tt_mid.groestl ) );
sph_groestl512( &tt_mid.groestl, endiandata, 64 );
#else
// groestl midstate is slower
// memcpy( &tt_mid.groestl, &tt_ctx.groestl, sizeof(tt_mid.groestl ) );
// update_groestl( &tt_mid.groestl, (char*)endiandata, 64*8 );
#endif

View File

@@ -37,7 +37,6 @@
#ifndef NO_AES_NI
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/echo/aes_ni/hash_api.h"
#endif
#include "algo/jh/sse2/jh_sse2_opt64.h"

View File

@@ -43,7 +43,9 @@ uint8_t v8 [16];
// n = number of __m256i (32 bytes)
inline void memset_zero_m256i( __m256i *dst, int n )
{
for ( int i = 0; i < n; i++ ) dst[i] = _mm256_setzero_si256();
__m256i zero = _mm256_setzero_si256();
for ( int i = 0; i < n; i++ ) dst[i] = zero;
// for ( int i = 0; i < n; i++ ) dst[i] = _mm256_xor_si256( dst[i], dst[i] );
}
inline void memset_m256i( __m256i *dst, const __m256i a, int n )
@@ -293,7 +295,9 @@ inline __m256i mm256_byteswap_epi32( __m256i x )
inline void memset_zero_m128i( __m128i *dst, int n )
{
for ( int i = 0; i < n; i++ ) dst[i] = _mm_setzero_si128();
__m128i zero = _mm_setzero_si128();
for ( int i = 0; i < n; i++ ) dst[i] = zero;
// for ( int i = 0; i < n; i++ ) dst[i] = _mm_xor_si128( dst[i], dst[i] );
}
inline void memset_m128i( __m128i *dst, const __m128i a, int n )

View File

@@ -1,4 +1,4 @@
AC_INIT([cpuminer-opt], [3.5.12])
AC_INIT([cpuminer-opt], [3.5.13])
AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM

View File

@@ -656,7 +656,6 @@ Options:\n\
bastion\n\
blake Blake-256 (SFR)\n\
blakecoin blake256r8\n\
"/* blake2b Sia\n*/"\
blake2s Blake-2 S\n\
bmw BMW 256\n\
c11 Chaincoin\n\
@@ -666,7 +665,7 @@ Options:\n\
deep Deepcoin (DCN)\n\
drop Dropcoin\n\
fresh Fresh\n\
groestl groestl\n\
groestl dmd-gr, Groestl coin\n\
heavy Heavy\n\
hmq1725 Espers\n\
hodl Hodlcoin\n\
@@ -676,7 +675,7 @@ Options:\n\
lyra2re lyra2\n\
lyra2rev2 lyrav2, Vertcoin\n\
lyra2z Zcoin (XZC)\n\
lyra2z330 Zoin (ZOI)\n\
lyra2z330 Lyra2 330 rows, Zoin (ZOI)\n\
m7m Magi (XMG)\n\
myr-gr Myriad-Groestl\n\
neoscrypt NeoScrypt(128, 2, 1)\n\

View File

@@ -182,7 +182,7 @@ void cpu_getmodelid(char *outbuf, size_t maxsz)
getenv("PROCESSOR_REVISION"), getenv("NUMBER_OF_PROCESSORS"));
#else
FILE *fd = fopen("/proc/cpuinfo", "rb");
char *buf = NULL, *p, *eol;
char *buf = NULL, *p;
int cpufam = 0, model = 0, stepping = 0;
size_t size = 0;
if (!fd) return;

4
util.c
View File

@@ -26,6 +26,7 @@
#include <curl/curl.h>
#include <time.h>
#include <sys/stat.h>
#include <math.h>
//#include <syslog.h>
#if defined(WIN32)
#include <winsock2.h>
@@ -1692,14 +1693,13 @@ static uint32_t getblocheight(struct stratum_ctx *sctx)
static bool stratum_notify(struct stratum_ctx *sctx, json_t *params)
{
const char *job_id, *prevhash, *coinb1, *coinb2, *version, *nbits, *stime;
const char *claim = NULL, *nreward = NULL;
const char *claim = NULL;
size_t coinb1_size, coinb2_size;
bool clean, ret = false;
int merkle_count, i, p = 0;
json_t *merkle_arr;
uchar **merkle = NULL;
bool has_claim = opt_algo == ALGO_LBRY;
int ntime;
job_id = json_string_value(json_array_get(params, p++));
prevhash = json_string_value(json_array_get(params, p++));
if ( has_claim )