This commit is contained in:
Jay D Dee
2022-04-02 12:44:57 -04:00
parent 5b678d2481
commit db76d3865f
6 changed files with 145 additions and 101 deletions

View File

@@ -65,6 +65,11 @@ If not what makes it happen or not happen?
Change Log Change Log
---------- ----------
v3.19.7
#369 Fixed time limited mining, --time-limit.
Fixed a potential compile error when using optimization below -O3.
v3.19.6 v3.19.6
#363 Fixed a stratum bug where the first job may be ignored delaying start of hashing #363 Fixed a stratum bug where the first job may be ignored delaying start of hashing
@@ -76,7 +81,7 @@ v3.19.5
Enhanced stratum-keepalive preemptively resets the stratum connection Enhanced stratum-keepalive preemptively resets the stratum connection
before the server to avoid lost shares. before the server to avoid lost shares.
Added build-msys2.sh scrypt for easier compiling on Windows, see Wiki for details. Added build-msys2.sh shell script for easier compiling on Windows, see Wiki for details.
X16RT: eliminate unnecessary recalculations of the hash order. X16RT: eliminate unnecessary recalculations of the hash order.

View File

@@ -594,9 +594,6 @@ void bmw512_2way_close( bmw_2way_big_context *ctx, void *dst )
#define rb6(x) mm256_rol_64( x, 43 ) #define rb6(x) mm256_rol_64( x, 43 )
#define rb7(x) mm256_rol_64( x, 53 ) #define rb7(x) mm256_rol_64( x, 53 )
#define rol_off_64( M, j ) \
mm256_rol_64( M[ (j) & 0xF ], ( (j) & 0xF ) + 1 )
#define add_elt_b( mj0, mj3, mj10, h, K ) \ #define add_elt_b( mj0, mj3, mj10, h, K ) \
_mm256_xor_si256( h, _mm256_add_epi64( K, \ _mm256_xor_si256( h, _mm256_add_epi64( K, \
_mm256_sub_epi64( _mm256_add_epi64( mj0, mj3 ), mj10 ) ) ) _mm256_sub_epi64( _mm256_add_epi64( mj0, mj3 ), mj10 ) ) )
@@ -732,8 +729,23 @@ void compress_big( const __m256i *M, const __m256i H[16], __m256i dH[16] )
qt[15] = _mm256_add_epi64( sb0( Wb15), H[ 0] ); qt[15] = _mm256_add_epi64( sb0( Wb15), H[ 0] );
__m256i mj[16]; __m256i mj[16];
for ( i = 0; i < 16; i++ )
mj[i] = rol_off_64( M, i ); mj[ 0] = mm256_rol_64( M[ 0], 1 );
mj[ 1] = mm256_rol_64( M[ 1], 2 );
mj[ 2] = mm256_rol_64( M[ 2], 3 );
mj[ 3] = mm256_rol_64( M[ 3], 4 );
mj[ 4] = mm256_rol_64( M[ 4], 5 );
mj[ 5] = mm256_rol_64( M[ 5], 6 );
mj[ 6] = mm256_rol_64( M[ 6], 7 );
mj[ 7] = mm256_rol_64( M[ 7], 8 );
mj[ 8] = mm256_rol_64( M[ 8], 9 );
mj[ 9] = mm256_rol_64( M[ 9], 10 );
mj[10] = mm256_rol_64( M[10], 11 );
mj[11] = mm256_rol_64( M[11], 12 );
mj[12] = mm256_rol_64( M[12], 13 );
mj[13] = mm256_rol_64( M[13], 14 );
mj[14] = mm256_rol_64( M[14], 15 );
mj[15] = mm256_rol_64( M[15], 16 );
qt[16] = add_elt_b( mj[ 0], mj[ 3], mj[10], H[ 7], qt[16] = add_elt_b( mj[ 0], mj[ 3], mj[10], H[ 7],
(const __m256i)_mm256_set1_epi64x( 16 * 0x0555555555555555ULL ) ); (const __m256i)_mm256_set1_epi64x( 16 * 0x0555555555555555ULL ) );
@@ -1034,9 +1046,6 @@ bmw512_4way_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
#define r8b6(x) mm512_rol_64( x, 43 ) #define r8b6(x) mm512_rol_64( x, 43 )
#define r8b7(x) mm512_rol_64( x, 53 ) #define r8b7(x) mm512_rol_64( x, 53 )
#define rol8w_off_64( M, j ) \
mm512_rol_64( M[ (j) & 0xF ], ( (j) & 0xF ) + 1 )
#define add_elt_b8( mj0, mj3, mj10, h, K ) \ #define add_elt_b8( mj0, mj3, mj10, h, K ) \
_mm512_xor_si512( h, _mm512_add_epi64( K, \ _mm512_xor_si512( h, _mm512_add_epi64( K, \
_mm512_sub_epi64( _mm512_add_epi64( mj0, mj3 ), mj10 ) ) ) _mm512_sub_epi64( _mm512_add_epi64( mj0, mj3 ), mj10 ) ) )
@@ -1171,41 +1180,73 @@ void compress_big_8way( const __m512i *M, const __m512i H[16],
qt[15] = _mm512_add_epi64( s8b0( W8b15), H[ 0] ); qt[15] = _mm512_add_epi64( s8b0( W8b15), H[ 0] );
__m512i mj[16]; __m512i mj[16];
for ( i = 0; i < 16; i++ ) uint64_t K = 16 * 0x0555555555555555ULL;
mj[i] = rol8w_off_64( M, i );
mj[ 0] = mm512_rol_64( M[ 0], 1 );
mj[ 1] = mm512_rol_64( M[ 1], 2 );
mj[ 2] = mm512_rol_64( M[ 2], 3 );
mj[ 3] = mm512_rol_64( M[ 3], 4 );
mj[ 4] = mm512_rol_64( M[ 4], 5 );
mj[ 5] = mm512_rol_64( M[ 5], 6 );
mj[ 6] = mm512_rol_64( M[ 6], 7 );
mj[ 7] = mm512_rol_64( M[ 7], 8 );
mj[ 8] = mm512_rol_64( M[ 8], 9 );
mj[ 9] = mm512_rol_64( M[ 9], 10 );
mj[10] = mm512_rol_64( M[10], 11 );
mj[11] = mm512_rol_64( M[11], 12 );
mj[12] = mm512_rol_64( M[12], 13 );
mj[13] = mm512_rol_64( M[13], 14 );
mj[14] = mm512_rol_64( M[14], 15 );
mj[15] = mm512_rol_64( M[15], 16 );
qt[16] = add_elt_b8( mj[ 0], mj[ 3], mj[10], H[ 7], qt[16] = add_elt_b8( mj[ 0], mj[ 3], mj[10], H[ 7],
(const __m512i)_mm512_set1_epi64( 16 * 0x0555555555555555ULL ) ); (const __m512i)_mm512_set1_epi64( K ) );
K += 0x0555555555555555ULL;
qt[17] = add_elt_b8( mj[ 1], mj[ 4], mj[11], H[ 8], qt[17] = add_elt_b8( mj[ 1], mj[ 4], mj[11], H[ 8],
(const __m512i)_mm512_set1_epi64( 17 * 0x0555555555555555ULL ) ); (const __m512i)_mm512_set1_epi64( K ) );
K += 0x0555555555555555ULL;
qt[18] = add_elt_b8( mj[ 2], mj[ 5], mj[12], H[ 9], qt[18] = add_elt_b8( mj[ 2], mj[ 5], mj[12], H[ 9],
(const __m512i)_mm512_set1_epi64( 18 * 0x0555555555555555ULL ) ); (const __m512i)_mm512_set1_epi64( K ) );
K += 0x0555555555555555ULL;
qt[19] = add_elt_b8( mj[ 3], mj[ 6], mj[13], H[10], qt[19] = add_elt_b8( mj[ 3], mj[ 6], mj[13], H[10],
(const __m512i)_mm512_set1_epi64( 19 * 0x0555555555555555ULL ) ); (const __m512i)_mm512_set1_epi64( K ) );
K += 0x0555555555555555ULL;
qt[20] = add_elt_b8( mj[ 4], mj[ 7], mj[14], H[11], qt[20] = add_elt_b8( mj[ 4], mj[ 7], mj[14], H[11],
(const __m512i)_mm512_set1_epi64( 20 * 0x0555555555555555ULL ) ); (const __m512i)_mm512_set1_epi64( K ) );
K += 0x0555555555555555ULL;
qt[21] = add_elt_b8( mj[ 5], mj[ 8], mj[15], H[12], qt[21] = add_elt_b8( mj[ 5], mj[ 8], mj[15], H[12],
(const __m512i)_mm512_set1_epi64( 21 * 0x0555555555555555ULL ) ); (const __m512i)_mm512_set1_epi64( K ) );
K += 0x0555555555555555ULL;
qt[22] = add_elt_b8( mj[ 6], mj[ 9], mj[ 0], H[13], qt[22] = add_elt_b8( mj[ 6], mj[ 9], mj[ 0], H[13],
(const __m512i)_mm512_set1_epi64( 22 * 0x0555555555555555ULL ) ); (const __m512i)_mm512_set1_epi64( K ) );
K += 0x0555555555555555ULL;
qt[23] = add_elt_b8( mj[ 7], mj[10], mj[ 1], H[14], qt[23] = add_elt_b8( mj[ 7], mj[10], mj[ 1], H[14],
(const __m512i)_mm512_set1_epi64( 23 * 0x0555555555555555ULL ) ); (const __m512i)_mm512_set1_epi64( K ) );
K += 0x0555555555555555ULL;
qt[24] = add_elt_b8( mj[ 8], mj[11], mj[ 2], H[15], qt[24] = add_elt_b8( mj[ 8], mj[11], mj[ 2], H[15],
(const __m512i)_mm512_set1_epi64( 24 * 0x0555555555555555ULL ) ); (const __m512i)_mm512_set1_epi64( K ) );
K += 0x0555555555555555ULL;
qt[25] = add_elt_b8( mj[ 9], mj[12], mj[ 3], H[ 0], qt[25] = add_elt_b8( mj[ 9], mj[12], mj[ 3], H[ 0],
(const __m512i)_mm512_set1_epi64( 25 * 0x0555555555555555ULL ) ); (const __m512i)_mm512_set1_epi64( K ) );
K += 0x0555555555555555ULL;
qt[26] = add_elt_b8( mj[10], mj[13], mj[ 4], H[ 1], qt[26] = add_elt_b8( mj[10], mj[13], mj[ 4], H[ 1],
(const __m512i)_mm512_set1_epi64( 26 * 0x0555555555555555ULL ) ); (const __m512i)_mm512_set1_epi64( K ) );
K += 0x0555555555555555ULL;
qt[27] = add_elt_b8( mj[11], mj[14], mj[ 5], H[ 2], qt[27] = add_elt_b8( mj[11], mj[14], mj[ 5], H[ 2],
(const __m512i)_mm512_set1_epi64( 27 * 0x0555555555555555ULL ) ); (const __m512i)_mm512_set1_epi64( K ) );
K += 0x0555555555555555ULL;
qt[28] = add_elt_b8( mj[12], mj[15], mj[ 6], H[ 3], qt[28] = add_elt_b8( mj[12], mj[15], mj[ 6], H[ 3],
(const __m512i)_mm512_set1_epi64( 28 * 0x0555555555555555ULL ) ); (const __m512i)_mm512_set1_epi64( K ) );
K += 0x0555555555555555ULL;
qt[29] = add_elt_b8( mj[13], mj[ 0], mj[ 7], H[ 4], qt[29] = add_elt_b8( mj[13], mj[ 0], mj[ 7], H[ 4],
(const __m512i)_mm512_set1_epi64( 29 * 0x0555555555555555ULL ) ); (const __m512i)_mm512_set1_epi64( K ) );
K += 0x0555555555555555ULL;
qt[30] = add_elt_b8( mj[14], mj[ 1], mj[ 8], H[ 5], qt[30] = add_elt_b8( mj[14], mj[ 1], mj[ 8], H[ 5],
(const __m512i)_mm512_set1_epi64( 30 * 0x0555555555555555ULL ) ); (const __m512i)_mm512_set1_epi64( K ) );
K += 0x0555555555555555ULL;
qt[31] = add_elt_b8( mj[15], mj[ 2], mj[ 9], H[ 6], qt[31] = add_elt_b8( mj[15], mj[ 2], mj[ 9], H[ 6],
(const __m512i)_mm512_set1_epi64( 31 * 0x0555555555555555ULL ) ); (const __m512i)_mm512_set1_epi64( K ) );
qt[16] = _mm512_add_epi64( qt[16], expand1_b8( qt, 16 ) ); qt[16] = _mm512_add_epi64( qt[16], expand1_b8( qt, 16 ) );
qt[17] = _mm512_add_epi64( qt[17], expand1_b8( qt, 17 ) ); qt[17] = _mm512_add_epi64( qt[17], expand1_b8( qt, 17 ) );

View File

@@ -261,7 +261,7 @@ inline void reducedDuplexRowSetup_2way( uint64_t *State, uint64_t *rowIn,
// overlap it's unified. // overlap it's unified.
// As a result normal is Nrows-2 / Nrows. // As a result normal is Nrows-2 / Nrows.
// for 4 rows: 1 unified, 2 overlap, 1 normal. // for 4 rows: 1 unified, 2 overlap, 1 normal.
// for 8 rows: 1 unified, 2 overlap, 56 normal. // for 8 rows: 1 unified, 2 overlap, 5 normal.
static inline void reducedDuplexRow_2way_normal( uint64_t *State, static inline void reducedDuplexRow_2way_normal( uint64_t *State,
uint64_t *rowIn, uint64_t *rowInOut0, uint64_t *rowInOut1, uint64_t *rowIn, uint64_t *rowInOut0, uint64_t *rowInOut1,
@@ -283,6 +283,15 @@ static inline void reducedDuplexRow_2way_normal( uint64_t *State,
for ( i = 0; i < nCols; i++ ) for ( i = 0; i < nCols; i++ )
{ {
//Absorbing "M[prev] [+] M[row*]" //Absorbing "M[prev] [+] M[row*]"
io0 = _mm512_load_si512( inout0 );
io1 = _mm512_load_si512( inout0 +1 );
io2 = _mm512_load_si512( inout0 +2 );
io0 = _mm512_mask_load_epi64( io0, 0xf0, inout1 );
io1 = _mm512_mask_load_epi64( io1, 0xf0, inout1 +1 );
io2 = _mm512_mask_load_epi64( io2, 0xf0, inout1 +2 );
/*
io0 = _mm512_mask_blend_epi64( 0xf0, io0 = _mm512_mask_blend_epi64( 0xf0,
_mm512_load_si512( (__m512i*)inout0 ), _mm512_load_si512( (__m512i*)inout0 ),
_mm512_load_si512( (__m512i*)inout1 ) ); _mm512_load_si512( (__m512i*)inout1 ) );
@@ -292,6 +301,7 @@ static inline void reducedDuplexRow_2way_normal( uint64_t *State,
io2 = _mm512_mask_blend_epi64( 0xf0, io2 = _mm512_mask_blend_epi64( 0xf0,
_mm512_load_si512( (__m512i*)inout0 +2 ), _mm512_load_si512( (__m512i*)inout0 +2 ),
_mm512_load_si512( (__m512i*)inout1 +2 ) ); _mm512_load_si512( (__m512i*)inout1 +2 ) );
*/
state0 = _mm512_xor_si512( state0, _mm512_add_epi64( in[0], io0 ) ); state0 = _mm512_xor_si512( state0, _mm512_add_epi64( in[0], io0 ) );
state1 = _mm512_xor_si512( state1, _mm512_add_epi64( in[1], io1 ) ); state1 = _mm512_xor_si512( state1, _mm512_add_epi64( in[1], io1 ) );
@@ -359,6 +369,15 @@ static inline void reducedDuplexRow_2way_overlap( uint64_t *State,
for ( i = 0; i < nCols; i++ ) for ( i = 0; i < nCols; i++ )
{ {
//Absorbing "M[prev] [+] M[row*]" //Absorbing "M[prev] [+] M[row*]"
io0.v512 = _mm512_load_si512( inout0 );
io1.v512 = _mm512_load_si512( inout0 +1 );
io2.v512 = _mm512_load_si512( inout0 +2 );
io0.v512 = _mm512_mask_load_epi64( io0.v512, 0xf0, inout1 );
io1.v512 = _mm512_mask_load_epi64( io1.v512, 0xf0, inout1 +1 );
io2.v512 = _mm512_mask_load_epi64( io2.v512, 0xf0, inout1 +2 );
/*
io0.v512 = _mm512_mask_blend_epi64( 0xf0, io0.v512 = _mm512_mask_blend_epi64( 0xf0,
_mm512_load_si512( (__m512i*)inout0 ), _mm512_load_si512( (__m512i*)inout0 ),
_mm512_load_si512( (__m512i*)inout1 ) ); _mm512_load_si512( (__m512i*)inout1 ) );
@@ -368,27 +387,12 @@ static inline void reducedDuplexRow_2way_overlap( uint64_t *State,
io2.v512 = _mm512_mask_blend_epi64( 0xf0, io2.v512 = _mm512_mask_blend_epi64( 0xf0,
_mm512_load_si512( (__m512i*)inout0 +2 ), _mm512_load_si512( (__m512i*)inout0 +2 ),
_mm512_load_si512( (__m512i*)inout1 +2 ) ); _mm512_load_si512( (__m512i*)inout1 +2 ) );
*/
state0 = _mm512_xor_si512( state0, _mm512_add_epi64( in[0], io0.v512 ) ); state0 = _mm512_xor_si512( state0, _mm512_add_epi64( in[0], io0.v512 ) );
state1 = _mm512_xor_si512( state1, _mm512_add_epi64( in[1], io1.v512 ) ); state1 = _mm512_xor_si512( state1, _mm512_add_epi64( in[1], io1.v512 ) );
state2 = _mm512_xor_si512( state2, _mm512_add_epi64( in[2], io2.v512 ) ); state2 = _mm512_xor_si512( state2, _mm512_add_epi64( in[2], io2.v512 ) );
/*
io.v512[0] = _mm512_mask_blend_epi64( 0xf0,
_mm512_load_si512( (__m512i*)inout0 ),
_mm512_load_si512( (__m512i*)inout1 ) );
io.v512[1] = _mm512_mask_blend_epi64( 0xf0,
_mm512_load_si512( (__m512i*)inout0 +1 ),
_mm512_load_si512( (__m512i*)inout1 +1 ) );
io.v512[2] = _mm512_mask_blend_epi64( 0xf0,
_mm512_load_si512( (__m512i*)inout0 +2 ),
_mm512_load_si512( (__m512i*)inout1 +2 ) );
state0 = _mm512_xor_si512( state0, _mm512_add_epi64( in[0], io.v512[0] ) );
state1 = _mm512_xor_si512( state1, _mm512_add_epi64( in[1], io.v512[1] ) );
state2 = _mm512_xor_si512( state2, _mm512_add_epi64( in[2], io.v512[2] ) );
*/
//Applies the reduced-round transformation f to the sponge's state //Applies the reduced-round transformation f to the sponge's state
LYRA_ROUND_2WAY_AVX512( state0, state1, state2, state3 ); LYRA_ROUND_2WAY_AVX512( state0, state1, state2, state3 );
@@ -415,22 +419,6 @@ static inline void reducedDuplexRow_2way_overlap( uint64_t *State,
io2.v512 = _mm512_mask_blend_epi64( 0xf0, io2.v512, out[2] ); io2.v512 = _mm512_mask_blend_epi64( 0xf0, io2.v512, out[2] );
} }
/*
if ( rowOut == rowInOut0 )
{
io.v512[0] = _mm512_mask_blend_epi64( 0x0f, io.v512[0], out[0] );
io.v512[1] = _mm512_mask_blend_epi64( 0x0f, io.v512[1], out[1] );
io.v512[2] = _mm512_mask_blend_epi64( 0x0f, io.v512[2], out[2] );
}
if ( rowOut == rowInOut1 )
{
io.v512[0] = _mm512_mask_blend_epi64( 0xf0, io.v512[0], out[0] );
io.v512[1] = _mm512_mask_blend_epi64( 0xf0, io.v512[1], out[1] );
io.v512[2] = _mm512_mask_blend_epi64( 0xf0, io.v512[2], out[2] );
}
*/
//M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand) //M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)
t0 = _mm512_permutex_epi64( state0, 0x93 ); t0 = _mm512_permutex_epi64( state0, 0x93 );
t1 = _mm512_permutex_epi64( state1, 0x93 ); t1 = _mm512_permutex_epi64( state1, 0x93 );
@@ -444,12 +432,23 @@ static inline void reducedDuplexRow_2way_overlap( uint64_t *State,
_mm512_mask_blend_epi64( 0x11, t2, t1 ) ); _mm512_mask_blend_epi64( 0x11, t2, t1 ) );
} }
/*
casti_m256i( inout0, 0 ) = _mm512_castsi512_si256( io0.v512 );
casti_m256i( inout0, 2 ) = _mm512_castsi512_si256( io1.v512 );
casti_m256i( inout0, 4 ) = _mm512_castsi512_si256( io2.v512 );
_mm512_mask_store_epi64( inout1, 0xf0, io0.v512 );
_mm512_mask_store_epi64( inout1 +1, 0xf0, io1.v512 );
_mm512_mask_store_epi64( inout1 +2, 0xf0, io2.v512 );
*/
casti_m256i( inout0, 0 ) = io0.v256lo; casti_m256i( inout0, 0 ) = io0.v256lo;
casti_m256i( inout1, 1 ) = io0.v256hi; casti_m256i( inout1, 1 ) = io0.v256hi;
casti_m256i( inout0, 2 ) = io1.v256lo; casti_m256i( inout0, 2 ) = io1.v256lo;
casti_m256i( inout1, 3 ) = io1.v256hi; casti_m256i( inout1, 3 ) = io1.v256hi;
casti_m256i( inout0, 4 ) = io2.v256lo; casti_m256i( inout0, 4 ) = io2.v256lo;
casti_m256i( inout1, 5 ) = io2.v256hi; casti_m256i( inout1, 5 ) = io2.v256hi;
/* /*
_mm512_mask_store_epi64( inout0, 0x0f, io.v512[0] ); _mm512_mask_store_epi64( inout0, 0x0f, io.v512[0] );
_mm512_mask_store_epi64( inout1, 0xf0, io.v512[0] ); _mm512_mask_store_epi64( inout1, 0xf0, io.v512[0] );

20
configure vendored
View File

@@ -1,6 +1,6 @@
#! /bin/sh #! /bin/sh
# Guess values for system-dependent variables and create Makefiles. # Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.19.6. # Generated by GNU Autoconf 2.69 for cpuminer-opt 3.19.7.
# #
# #
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -577,8 +577,8 @@ MAKEFLAGS=
# Identity of this package. # Identity of this package.
PACKAGE_NAME='cpuminer-opt' PACKAGE_NAME='cpuminer-opt'
PACKAGE_TARNAME='cpuminer-opt' PACKAGE_TARNAME='cpuminer-opt'
PACKAGE_VERSION='3.19.6' PACKAGE_VERSION='3.19.7'
PACKAGE_STRING='cpuminer-opt 3.19.6' PACKAGE_STRING='cpuminer-opt 3.19.7'
PACKAGE_BUGREPORT='' PACKAGE_BUGREPORT=''
PACKAGE_URL='' PACKAGE_URL=''
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing. # Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh. # This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF cat <<_ACEOF
\`configure' configures cpuminer-opt 3.19.6 to adapt to many kinds of systems. \`configure' configures cpuminer-opt 3.19.7 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]... Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1404,7 +1404,7 @@ fi
if test -n "$ac_init_help"; then if test -n "$ac_init_help"; then
case $ac_init_help in case $ac_init_help in
short | recursive ) echo "Configuration of cpuminer-opt 3.19.6:";; short | recursive ) echo "Configuration of cpuminer-opt 3.19.7:";;
esac esac
cat <<\_ACEOF cat <<\_ACEOF
@@ -1509,7 +1509,7 @@ fi
test -n "$ac_init_help" && exit $ac_status test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then if $ac_init_version; then
cat <<\_ACEOF cat <<\_ACEOF
cpuminer-opt configure 3.19.6 cpuminer-opt configure 3.19.7
generated by GNU Autoconf 2.69 generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc. Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake. running configure, to aid debugging if configure makes a mistake.
It was created by cpuminer-opt $as_me 3.19.6, which was It was created by cpuminer-opt $as_me 3.19.7, which was
generated by GNU Autoconf 2.69. Invocation command line was generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@ $ $0 $@
@@ -2993,7 +2993,7 @@ fi
# Define the identity of the package. # Define the identity of the package.
PACKAGE='cpuminer-opt' PACKAGE='cpuminer-opt'
VERSION='3.19.6' VERSION='3.19.7'
cat >>confdefs.h <<_ACEOF cat >>confdefs.h <<_ACEOF
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their # report actual input values of CONFIG_FILES etc. instead of their
# values after options handling. # values after options handling.
ac_log=" ac_log="
This file was extended by cpuminer-opt $as_me 3.19.6, which was This file was extended by cpuminer-opt $as_me 3.19.7, which was
generated by GNU Autoconf 2.69. Invocation command line was generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES CONFIG_FILES = $CONFIG_FILES
@@ -6756,7 +6756,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\ ac_cs_version="\\
cpuminer-opt config.status 3.19.6 cpuminer-opt config.status 3.19.7
configured by $0, generated by GNU Autoconf 2.69, configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\" with options \\"\$ac_cs_config\\"

View File

@@ -1,4 +1,4 @@
AC_INIT([cpuminer-opt], [3.19.6]) AC_INIT([cpuminer-opt], [3.19.7])
AC_PREREQ([2.59c]) AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM AC_CANONICAL_SYSTEM

View File

@@ -105,8 +105,9 @@ bool opt_randomize = false;
static int opt_retries = -1; static int opt_retries = -1;
static int opt_fail_pause = 10; static int opt_fail_pause = 10;
static int opt_time_limit = 0; static int opt_time_limit = 0;
static unsigned int time_limit_stop = 0;
int opt_timeout = 300; int opt_timeout = 300;
static int opt_scantime = 5; static int opt_scantime = 0;
const int min_scantime = 1; const int min_scantime = 1;
//static const bool opt_time = true; //static const bool opt_time = true;
enum algos opt_algo = ALGO_NULL; enum algos opt_algo = ALGO_NULL;
@@ -341,6 +342,7 @@ void get_currentalgo(char* buf, int sz)
void proper_exit(int reason) void proper_exit(int reason)
{ {
if (opt_debug) applog(LOG_INFO,"Program exit");
#ifdef WIN32 #ifdef WIN32
if (opt_background) { if (opt_background) {
HWND hcon = GetConsoleWindow(); HWND hcon = GetConsoleWindow();
@@ -2201,8 +2203,6 @@ static void *miner_thread( void *userdata )
// : 0; // : 0;
uint32_t end_nonce = 0xffffffffU / opt_n_threads * (thr_id + 1) - 0x20; uint32_t end_nonce = 0xffffffffU / opt_n_threads * (thr_id + 1) - 0x20;
time_t firstwork_time = 0;
int i;
memset( &work, 0, sizeof(work) ); memset( &work, 0, sizeof(work) );
/* Set worker threads to nice 19 and then preferentially to SCHED_IDLE /* Set worker threads to nice 19 and then preferentially to SCHED_IDLE
@@ -2291,12 +2291,11 @@ static void *miner_thread( void *userdata )
} }
} }
} }
else // GBT or getwork else if ( !opt_benchmark ) // GBT or getwork
{ {
pthread_rwlock_wrlock( &g_work_lock ); pthread_rwlock_wrlock( &g_work_lock );
if ( ( ( time(NULL) - g_work_time ) if ( ( ( time(NULL) - g_work_time ) >= opt_scantime )
>= ( have_longpoll ? LP_SCANTIME : opt_scantime ) )
|| ( *nonceptr >= end_nonce ) ) || ( *nonceptr >= end_nonce ) )
{ {
if ( unlikely( !get_work( mythr, &g_work ) ) ) if ( unlikely( !get_work( mythr, &g_work ) ) )
@@ -2325,25 +2324,14 @@ static void *miner_thread( void *userdata )
if ( unlikely( !algo_gate.ready_to_mine( &work, &stratum, thr_id ) ) ) if ( unlikely( !algo_gate.ready_to_mine( &work, &stratum, thr_id ) ) )
continue; continue;
// LP_SCANTIME overrides opt_scantime option, is this right? // opt_scantime expressed in hashes
// adjust max_nonce to meet target scan time. Stratum and longpoll
// can go longer because they can rely on restart_threads to signal
// an early abort. get_work on the other hand can't rely on
// restart_threads so need a much shorter scantime
if ( have_stratum )
max64 = 60 * thr_hashrates[thr_id];
else if ( have_longpoll )
max64 = LP_SCANTIME * thr_hashrates[thr_id];
else // getwork inline
max64 = opt_scantime * thr_hashrates[thr_id]; max64 = opt_scantime * thr_hashrates[thr_id];
// time limit // time limit
if ( unlikely( opt_time_limit && firstwork_time ) ) if ( unlikely( opt_time_limit ) )
{ {
int passed = (int)( time(NULL) - firstwork_time ); unsigned int now = (unsigned int)time(NULL);
int remain = (int)( opt_time_limit - passed ); if ( now >= time_limit_stop )
if ( remain < 0 )
{ {
if ( thr_id != 0 ) if ( thr_id != 0 )
{ {
@@ -2355,14 +2343,16 @@ static void *miner_thread( void *userdata )
char rate[32]; char rate[32];
format_hashrate( global_hashrate, rate ); format_hashrate( global_hashrate, rate );
applog( LOG_NOTICE, "Benchmark: %s", rate ); applog( LOG_NOTICE, "Benchmark: %s", rate );
fprintf(stderr, "%llu\n", (unsigned long long)global_hashrate);
} }
else else
applog( LOG_NOTICE, applog( LOG_NOTICE, "Mining timeout of %ds reached, exiting...",
"Mining timeout of %ds reached, exiting...", opt_time_limit); opt_time_limit);
proper_exit(0); proper_exit(0);
} }
if ( remain < max64 ) max64 = remain; // else
if ( time_limit_stop - now < opt_scantime )
max64 = ( time_limit_stop - now ) * thr_hashrates[thr_id] ;
} }
// Select nonce range based on max64, the estimated number of hashes // Select nonce range based on max64, the estimated number of hashes
@@ -2378,8 +2368,6 @@ static void *miner_thread( void *userdata )
max_nonce = work_nonce + (uint32_t)max64; max_nonce = work_nonce + (uint32_t)max64;
// init time // init time
if ( firstwork_time == 0 )
firstwork_time = time(NULL);
hashes_done = 0; hashes_done = 0;
gettimeofday( (struct timeval *) &tv_start, NULL ); gettimeofday( (struct timeval *) &tv_start, NULL );
@@ -2452,7 +2440,7 @@ static void *miner_thread( void *userdata )
{ {
double hashrate = 0.; double hashrate = 0.;
pthread_mutex_lock( &stats_lock ); pthread_mutex_lock( &stats_lock );
for ( i = 0; i < opt_n_threads; i++ ) for ( int i = 0; i < opt_n_threads; i++ )
hashrate += thr_hashrates[i]; hashrate += thr_hashrates[i];
global_hashrate = hashrate; global_hashrate = hashrate;
pthread_mutex_unlock( &stats_lock ); pthread_mutex_unlock( &stats_lock );
@@ -3704,6 +3692,17 @@ int main(int argc, char *argv[])
show_usage_and_exit(1); show_usage_and_exit(1);
} }
if ( !opt_scantime )
{
if ( have_stratum ) opt_scantime = 30;
else if ( have_longpoll ) opt_scantime = LP_SCANTIME;
else opt_scantime = 5;
}
if ( opt_time_limit )
time_limit_stop = (unsigned int)time(NULL) + opt_time_limit;
// need to register to get algo optimizations for cpu capabilities // need to register to get algo optimizations for cpu capabilities
// but that causes registration logs before cpu capabilities is output. // but that causes registration logs before cpu capabilities is output.
// Would need to split register function into 2 parts. First part sets algo // Would need to split register function into 2 parts. First part sets algo