From 79164c24b53d8db6ebcef6161e59ac2bf6b529b8 Mon Sep 17 00:00:00 2001 From: Jay D Dee Date: Sun, 17 Dec 2017 12:00:42 -0500 Subject: [PATCH] v3.7.7 --- Makefile.am | 2 +- RELEASE_NOTES | 7 +- algo/lyra2/lyra2h.c | 2 +- avxdefs.h | 247 +++++++++++++++++++++++++++++++++++++------- configure | 20 ++-- configure.ac | 2 +- cpu-miner.c | 6 +- 7 files changed, 230 insertions(+), 56 deletions(-) diff --git a/Makefile.am b/Makefile.am index ec74130..d70be78 100644 --- a/Makefile.am +++ b/Makefile.am @@ -138,7 +138,7 @@ cpuminer_SOURCES = \ algo/skein/skein-hash-4way.c \ algo/skein/skein.c \ algo/skein/skein-4way.c \ - algo/skein/skein-gate.c \ + algo/skein/skein-gate.c \ algo/skein/skein2.c \ algo/skein/skein2-4way.c \ algo/skein/skein2-gate.c \ diff --git a/RELEASE_NOTES b/RELEASE_NOTES index 16b3d50..e0a463f 100644 --- a/RELEASE_NOTES +++ b/RELEASE_NOTES @@ -164,9 +164,14 @@ Support for even older x86_64 without AES_NI or SSE2 is not availble. Change Log ---------- +v3.7.7 + +Fixed regression caused by 64 CPU support. +Fixed lyra2h. + v3.7.6 -Added kyra2h algo for Hppcoin. +Added lyra2h algo for Hppcoin. Added support for more than 64 CPUs. Optimized shavite512 with AES, improves x11 etc. diff --git a/algo/lyra2/lyra2h.c b/algo/lyra2/lyra2h.c index 85b10a6..51579c3 100644 --- a/algo/lyra2/lyra2h.c +++ b/algo/lyra2/lyra2h.c @@ -31,7 +31,7 @@ void lyra2h_hash( void *state, const void *input ) sph_blake256( &ctx_blake, input + 64, 16 ); sph_blake256_close( &ctx_blake, hash ); - LYRA2Z( lyra2h_matrix, hash, 32, hash, 32, hash, 32, 8, 8, 8); + LYRA2Z( lyra2h_matrix, hash, 32, hash, 32, hash, 32, 16, 16, 16 ); memcpy(state, hash, 32); } diff --git a/avxdefs.h b/avxdefs.h index e531568..b7b686a 100644 --- a/avxdefs.h +++ b/avxdefs.h @@ -9,7 +9,7 @@ #include #include - +// // 128 bit utilities and shortcuts // Constant zero @@ -23,9 +23,51 @@ // Unary negation (-a) #define mm_negate_64( a ) _mm_sub_epi64( mm_zero, a ) - #define mm_negate_32( a ) _mm_sub_epi32( mm_zero, a ) +// +// Bit operations, functional but not very efficient + +// Return x with bit n set/clear in all elements +#define mm_bitset_128( x, n ) \ + _mm_or_si128( _mm_slli_si128( _mm_set_epi64x( 0ULL, 1ULL ), n ) ) + +#define mm_bitclr_128( x, n ) \ + _mm_and_si128( x, mm_not( _mm_slli_si128( \ + _mm_set_epi64x( 0ULL, 1ULL ), n ) ) ) + +#define mm_bitset_64( x, n ) \ + _mm_or_si128( _mm_slli_epi64( _mm_set1_epi64x( 1ULL ), n ) ) + +#define mm_bitclr_64( x, n ) \ + _mm_and_si128( x, mm_not( _mm_slli_epi64( _mm_set1_epi64x( 1ULL ), n ) ) ) + +#define mm_bitset_32( x, n ) \ + _mm_or_si128( _mm_slli_epi32( _mm_set1_epi32( 1UL ), n ) ) + +#define mm_bitclr_32( x, n ) \ + _mm_and_si128( x, mm_not( _mm_slli_epi32( _mm_set1_epi32( 1UL ), n ) ) ) + +#define mm_bitset_16( x, n ) \ + _mm_or_si128( _mm_slli_epi16( _mm_set1_epi16( 1U ), n ) ) + +#define mm_bitclr_16( x, n ) \ + _mm_and_si128( x, mm_not( _mm_slli_epi16( _mm_set1_epi16( 1U ), n ) ) ) + +// return vector of bool +#define mm_bittest_128( x, n ) \ + _mm_and_si256( _mm_srli_si128( x, n ), _mm_set_epi64x( 0ULL, 1ULL ) ) + +#define mm_bittest_64( x, n ) \ + _mm_and_si256( _mm_srli_epi64( x, n ), _mm_set1_epi64x( 1ULL ) ) + +#define mm_bittest_32( x, n ) \ + _mm_and_si256( _mm_srli_epi32( x, n ), _mm_set1_epi32( 1UL ) ) + +#define mm_bittest_16( x, n ) \ + _mm_and_si256( _mm_srli_epi16( x, n ), _mm_set1_epi16( 1U ) ) + +// // Memory functions // n = number of __m128i, bytes/16 @@ -51,6 +93,7 @@ inline void memcpy_64( uint64_t* dst, const uint64_t* src, int n ) dst[i] = src[i]; } +// // Pointer cast // p = any aligned pointer @@ -80,23 +123,29 @@ inline void memcpy_64( uint64_t* dst, const uint64_t* src, int n ) #define mm_rotl_32( w, c ) _mm_or_si128( _mm_slli_epi32( w, c ), \ _mm_srli_epi32( w, 32-c ) ) -// Rotate elements in vector +#define mm_rotr_16( w, c ) _mm_or_si128( _mm_srli_epi16( w, c ), \ + _mm_slli_epi16( w, 16-c ) ) + +#define mm_rotl_16( w, c ) _mm_or_si128( _mm_slli_epi16( w, c ), \ + _mm_srli_epi16( w, 16-c ) ) + +// +// Shuffle vector elements // Swap upper and lower 64 bits of 128 bit source vector #define mm_swap_64(s) _mm_shuffle_epi32( s, 0x4e ) // Rotate 128 vector by 1 32 bit element. #define mm_rotr_1x32( w ) _mm_shuffle_epi32( w, 0x39 ) - #define mm_rotl_1x32( w ) _mm_shuffle_epi32( w, 0x93 ) -// Rotate 256 bits through two 128 bit vectors +// Shuffle elements across two 128 bit vectors // Swap 128 bit source vectors in place. // void mm128_swap128( __m128i, __m128i ) -#define mm_swap_128(s0, s1) s0 = _mm_xor_si128(s0, s1); \ - s1 = _mm_xor_si128(s0, s1); \ - s0 = _mm_xor_si128(s0, s1); +#define mm_swap_128(hi, lo) hi = _mm_xor_si128(hi, lo); \ + lo = _mm_xor_si128(hi, lo); \ + hi = _mm_xor_si128(hi, lo); // Rotate two 128 bit vectors in place as one 256 vector by 1 element #define mm_rotl256_1x64( s0, s1 ) \ @@ -119,6 +168,30 @@ do { \ s0 = t; \ } while(0) +#define mm_rotl256_1x32( s0, s1 ) \ +do { \ + __m128i t; \ + s0 = mm_swap_64( s0 ); \ + s1 = mm_swap_64( s1 ); \ + t = _mm_blendv_epi8( s0, s1, _mm_set_epi32( \ + 0xfffffffful, 0xfffffffful, 0xfffffffful, 0ul )); \ + s1 = _mm_blendv_epi8( s0, s1, _mm_set_epi32( \ + 0ul, 0ul, 0ul, 0xfffffffful )); \ + s0 = t; \ +} while(0) + +#define mm_rotr256_1x32( s0, s1 ) \ +do { \ + __m128i t; \ + s0 = mm_swap_64( s0 ); \ + s1 = mm_swap_64( s1 ); \ + t = _mm_blendv_epi8( s0, s1, _mm_set_epi32( \ + 0ul, 0ul, 0ul, 0xfffffffful )); \ + s1 = _mm_blendv_epi8( s0, s1, _mm_set_epi32( \ + 0xfffffffful, 0xfffffffful, 0xfffffffful, 0ul )); \ + s0 = t; \ +} while(0) + // Older slower #define mm_rotl256_1x64x( s0, s1 ) \ do { \ @@ -148,10 +221,14 @@ do { \ s0 = t; \ } while(0) -// Rotate 256 bits through two 128 bit vectors by n*32 bits and return -// the rotated s0. +// need a better name, not rot, poke? step? +// Return s0 with elements shifted right/left and low/high element from +// s1 shifted into the vacated high/low element of s0. +// Partially rotate elements in two 128 bit vectors as one 256 bit vector +// and return the rotated s0. // Similar to mm_rotr256_1x32 but only a partial rotation as s1 is not // completed. It's faster than a full rotation. + inline __m128i mm_rotr256_32( __m128i s0, __m128i s1, int n ) { return _mm_or_si128( _mm_srli_si128( s0, n<<2 ), @@ -164,8 +241,25 @@ inline __m128i mm_rotl256_32( __m128i s0, __m128i s1, int n ) _mm_srli_si128( s1, 16 - (n<<2) ) ); } +// // Swap bytes in vector elements -inline __m128i mm_byteswap_32( __m128i x ) + +inline __m128i mm_byteswap_32( __m128i x ) +{ + return _mm_shuffle_epi8( x, _mm_set_epi8( + 0x0c, 0x0d, 0x0e, 0x0f, 0x08, 0x09, 0x0a, 0x0b, + 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03 ) ); +} + +inline __m128i mm_byteswap_64( __m128i x ) +{ + return _mm_shuffle_epi8( x, _mm_set_epi8( + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 ) ); +} + +// older slower +inline __m128i mm_byteswap_32x( __m128i x ) { __m128i x1 = _mm_and_si128( x, _mm_set1_epi32( 0x0000ff00 ) ); __m128i x2 = _mm_and_si128( x, _mm_set1_epi32( 0x00ff0000 ) ); @@ -176,7 +270,7 @@ inline __m128i mm_byteswap_32( __m128i x ) return _mm_or_si128( _mm_or_si128( x0, x1 ), _mm_or_si128( x2, x3 ) ); } -inline __m128i mm_byteswap_64( __m128i x ) +inline __m128i mm_byteswap_64x( __m128i x ) { x = _mm_or_si128( _mm_srli_epi64( x, 32 ), _mm_slli_epi64( x, 32 )); @@ -191,10 +285,10 @@ inline __m128i mm_byteswap_64( __m128i x ) _mm_set1_epi64x( 0x00FF00FF00FF00FF ) ), 8 )); } - #if defined (__AVX2__) -// Utilities and Shortcuts +// +// 256 bit utilities and Shortcuts // Constant zero #define mm256_zero _mm256_setzero_si256() @@ -207,19 +301,55 @@ inline __m128i mm_byteswap_64( __m128i x ) // Unary negation ( -a ) #define mm256_negate_64( a ) _mm256_sub_epi64( mm256_zero, a ) - #define mm256_negate_32( a ) _mm256_sub_epi32( mm256_zero, a ) -// Pack/Unpack two 128 bit vectors into/from one 256 bit vector -// usefulness tbd -#define mm256_pack_2x128( hi, lo ) \ - _mm256_inserti128_si256( _mm256_castsi128_si256( lo ), hi, 0 ) \ +// +// Bit operations -#define mm256_unpack_2x128( hi, lo, src ) \ - lo = _mm256_castsi256_si128( src ); \ - hi = _mm256_castsi256_si128( mm256_swap_128( src ) ); +// Return x with bit n set/clear in all elements +#define mm256_bitset_128( x, n ) \ + _mm256_or_si256( _mm256_slli_si256( _mm256_set_m128i( 1U, 1U ), n ) ) +#define mm256_bitclr_128( x, n ) \ + _mm256_and_si256( x, mm256_not( \ + _mm256_slli_si256( _mm256_set_m128i( 1U, 1U ), n ) ) ) + +#define mm256_bitset_64( x, n ) \ + _mm256_or_si256( x, _mm256_set1_epi64x( 1ULL << n ) ) +#define mm256_bitclr_64( x, n ) \ + _mm256_and_si256( x, mm256_not( _mm256_set1_epi64x( 1ULL << n ) ) ) + +#define mm256_bitset_32( x, n ) \ + _mm256_or_si256( x, _mm256_set1_epi32( 1UL << n ) ) + +#define mm256_bitclr_32( x, n ) \ + _mm256_and_si256( x, mm256_not( _mm256_set1_epi32( 1UL << n ) ) ) + +#define mm256_bitset_16( x, n ) \ + _mm256_or_si256( x, _mm256_set1_epi16( 1U << n ) ) + +#define mm256_bitclr_16( x, n ) \ + _mm256_and_si256( x, mm256_not( _mm256_set1_epi16( 1U << n ) ) ) + +// return vector of bool +#define mm256_bittest_128( x, n ) \ + _mm256_and_si256( _mm256_srli_si256( x, n ), \ + _mm256_set_m128i( _mm_set_epi64x( 0ULL, 1ULL ) ) ) + +#define mm256_bittest_64( x, n ) \ + _mm256_and_si256( _mm256_srli_epi64( x, n ), \ + _mm256_set1_epi64x( 1ULL << n ) ) + +#define mm256_bittest_32( x, n ) \ + _mm256_and_si256( _mm256_srli_epi32( x, n ), \ + _mm256_set1_epi32( 1UL << n ) ) + +#define mm256_bittest_16( x, n ) \ + _mm256_and_si256( _mm256_srli_epi16( x, n ), \ + _mm256_set1_epi16( 1U << n ) ) + +// // Memory functions // n = number of 256 bit (32 byte) vectors @@ -238,6 +368,7 @@ inline void memcpy_256( __m256i *dst, const __m256i *src, int n ) for ( int i = 0; i < n; i ++ ) dst[i] = src[i]; } +// // Pointer casting // p = any aligned pointer @@ -252,10 +383,11 @@ inline void memcpy_256( __m256i *dst, const __m256i *src, int n ) // returns p[i] #define casti_m256i(p,i) (((__m256i*)(p))[(i)]) +// // Rotate bits in vector elements // Rotate bits in 64 bit elements -// w = packed 64 bit data, n= number of bits to rotate +// w = packed 64 bit data, c = number of bits to rotate #define mm256_rotr_64( w, c ) \ _mm256_or_si256( _mm256_srli_epi64(w, c), _mm256_slli_epi64(w, 64 - c) ) @@ -271,22 +403,43 @@ inline void memcpy_256( __m256i *dst, const __m256i *src, int n ) // Rotate elements in vector +// Swap 128 bit elements (aka rotate by two 64 bit, four 32 bit elements)) +// Identical functionality but "f" is AVX and "x" iis AVX2, likely faster. +#define mm256_swap_128( w ) _mm256_permute2x128_si256( w, w, 1 ) +//#define mm256_swap_128( w ) _mm256_permute2f128_si256( w, w, 1 ) + // Rotate vector by one 64 bit element (aka two 32 bit elements) //__m256i mm256_rotl256_1x64( _mm256i, int ) #define mm256_rotl256_1x64( w ) _mm256_permute4x64_epi64( w, 0x93 ) - #define mm256_rotr256_1x64( w ) _mm256_permute4x64_epi64( w, 0x39 ) -// Swap 128 bit elements (aka rotate by two 64 bit, four 32 bit elements)) -#define mm256_swap_128( w ) _mm256_permute2f128_si256( w, w, 1 ) - // Rotate by one 32 bit element (aka two 16 bit elements) #define mm256_rotl256_1x32( w ) _mm256_shuffle_epi32( w, 0x93 ) - #define mm256_rotr256_1x32( w ) _mm256_shuffle_epi32( w, 0x39 ) +// // Swap bytes in vector elements + +inline __m256i mm256_byteswap_64( __m256i x ) +{ + return _mm256_shuffle_epi8( x, _mm256_set_epi8( + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 ) ); +} + inline __m256i mm256_byteswap_32( __m256i x ) +{ + return _mm256_shuffle_epi8( x, _mm256_set_epi8( + 0x0c, 0x0d, 0x0e, 0x0f, 0x08, 0x09, 0x0a, 0x0b, + 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, + 0x0c, 0x0d, 0x0e, 0x0f, 0x08, 0x09, 0x0a, 0x0b, + 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03 ) ); +} + +// older, slower +inline __m256i mm256_byteswap_32x( __m256i x ) { __m256i x1 = _mm256_and_si256( x, _mm256_set1_epi32( 0x0000ff00 ) ); __m256i x2 = _mm256_and_si256( x, _mm256_set1_epi32( 0x00ff0000 ) ); @@ -298,7 +451,7 @@ inline __m256i mm256_byteswap_32( __m256i x ) _mm256_or_si256( x2, x3 ) ); } -inline __m256i mm256_byteswap_64( __m256i x ) +inline __m256i mm256_byteswap_64x( __m256i x ) { x = _mm256_or_si256( _mm256_srli_epi64( x, 32 ), _mm256_slli_epi64( x, 32 )); @@ -313,8 +466,19 @@ inline __m256i mm256_byteswap_64( __m256i x ) _mm256_set1_epi64x( 0x00FF00FF00FF00FF ) ), 8 )); } -// Pseudo parallel aes + +// Pack/Unpack two 128 bit vectors into/from one 256 bit vector +// usefulness tbd +#define mm256_pack_2x128( hi, lo ) \ + _mm256_inserti128_si256( _mm256_castsi128_si256( lo ), hi, 1 ) \ + +#define mm256_unpack_2x128( hi, lo, src ) \ + lo = _mm256_castsi256_si128( src ); \ + hi = _mm256_castsi256_si128( mm256_swap_128( src ) ); + +// Pseudo parallel AES // Probably noticeably slower than using pure 128 bit vectors +// More efficient if one key for both lanes. inline __m256i mm256_aesenc_2x128( __m256i x, __m256i k ) { __m128i hi, lo, khi, klo; @@ -370,7 +534,8 @@ inline __m256i mm256_aesenc_nokey_2x128( __m256i x ) // interleave 4 arrays of 32 bit elements for 128 bit processing // bit_len must be 256, 512 or 640 bits. // Vector indexing doesn't work with 32 bit data. -inline void mm_interleave_4x32x( void *dst, const void *src0, const void *src1, +// There's no vector indexing here!!! +inline void mm_interleave_4x32( void *dst, const void *src0, const void *src1, const void *src2, const void *src3, int bit_len ) { uint32_t *s0 = (uint32_t*)src0; @@ -408,7 +573,7 @@ inline void mm_interleave_4x32x( void *dst, const void *src0, const void *src1, } // bit_len must be multiple of 32 -inline void mm_interleave_4x32( void *dst, void *src0, void *src1, +inline void mm_interleave_4x32x( void *dst, void *src0, void *src1, void *src2, void *src3, int bit_len ) { uint32_t *d = (uint32_t*)dst; @@ -427,7 +592,8 @@ inline void mm_interleave_4x32( void *dst, void *src0, void *src1, } // doesn't work with 32 bit elements -inline void mm_deinterleave_4x32x( void *dst0, void *dst1, void *dst2, +// no vector indexing here? +inline void mm_deinterleave_4x32( void *dst0, void *dst1, void *dst2, void *dst3, const void *src, int bit_len ) { uint32_t *s = (uint32_t*)src; @@ -469,7 +635,7 @@ inline void mm_deinterleave_4x32x( void *dst0, void *dst1, void *dst2, // deinterleave 4 arrays into individual buffers for scalarm processing // bit_len must be multiple of 32 -inline void mm_deinterleave_4x32( void *dst0, void *dst1, void *dst2, +inline void mm_deinterleave_4x32x( void *dst0, void *dst1, void *dst2, void *dst3, const void *src, int bit_len ) { uint32_t *s = (uint32_t*)src; @@ -539,6 +705,8 @@ inline void mm256_interleave_4x64x( void *dst, void *src0, void *src1, } // Deinterleave 4 buffers of 64 bit data from the source buffer. +// bit_len must be 256, 512 or 640 bits. +// Requires overrun padding for 640 bit len. inline void mm256_deinterleave_4x64( void *dst0, void *dst1, void *dst2, void *dst3, const void *src, int bit_len ) { @@ -591,8 +759,7 @@ inline void mm256_deinterleave_4x64x( void *dst0, void *dst1, void *dst2, // Interleave 8 source buffers containing 32 bit data into the destination // vector -// Doesn't work, vecror indexing doesn't work for 32 bit elements -inline void mm256_interleave_8x32x( void *dst, const void *src0, +inline void mm256_interleave_8x32( void *dst, const void *src0, const void *src1, const void *src2, const void *src3, const void *src4, const void *src5, const void *src6, const void *src7, int bit_len ) { @@ -654,9 +821,10 @@ inline void mm256_interleave_8x32x( void *dst, const void *src0, s3[19], s2[19], s1[19], s0[19] ); } +// probably obsolete with double pack 2x32->64, 4x64->256. // Slower but it works with 32 bit data // bit_len must be multiple of 32 -inline void mm256_interleave_8x32( uint32_t *dst, uint32_t *src0, +inline void mm256_interleave_8x32x( uint32_t *dst, uint32_t *src0, uint32_t *src1, uint32_t *src2, uint32_t *src3, uint32_t *src4, uint32_t *src5, uint32_t *src6, uint32_t *src7, int bit_len ) { @@ -675,7 +843,7 @@ inline void mm256_interleave_8x32( uint32_t *dst, uint32_t *src0, } // Deinterleave 8 buffers of 32 bit data from the source buffer. -inline void mm256_deinterleave_8x32x( void *dst0, void *dst1, void *dst2, +inline void mm256_deinterleave_8x32( void *dst0, void *dst1, void *dst2, void *dst3, void *dst4, void *dst5, void *dst6, void *dst7, const void *src, int bit_len ) { @@ -758,7 +926,7 @@ inline void mm256_deinterleave_8x32x( void *dst0, void *dst1, void *dst2, // Deinterleave 8 arrays into indivdual buffers for scalar processing // bit_len must be multiple of 32 -inline void mm256_deinterleave_8x32( uint32_t *dst0, uint32_t *dst1, +inline void mm256_deinterleave_8x32x( uint32_t *dst0, uint32_t *dst1, uint32_t *dst2,uint32_t *dst3, uint32_t *dst4, uint32_t *dst5, uint32_t *dst6,uint32_t *dst7,uint32_t *src, int bit_len ) { @@ -776,6 +944,7 @@ inline void mm256_deinterleave_8x32( uint32_t *dst0, uint32_t *dst1, } } +// likely of no use. // convert 4x32 byte (128 bit) vectors to 4x64 (256 bit) vectors for AVX2 // bit_len must be multiple of 64 inline void mm256_reinterleave_4x64( uint64_t *dst, uint32_t *src, diff --git a/configure b/configure index eb3f86a..6795280 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.7.6. +# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.7.7. # # # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. @@ -577,8 +577,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='cpuminer-opt' PACKAGE_TARNAME='cpuminer-opt' -PACKAGE_VERSION='3.7.6' -PACKAGE_STRING='cpuminer-opt 3.7.6' +PACKAGE_VERSION='3.7.7' +PACKAGE_STRING='cpuminer-opt 3.7.7' PACKAGE_BUGREPORT='' PACKAGE_URL='' @@ -1321,7 +1321,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures cpuminer-opt 3.7.6 to adapt to many kinds of systems. +\`configure' configures cpuminer-opt 3.7.7 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1392,7 +1392,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of cpuminer-opt 3.7.6:";; + short | recursive ) echo "Configuration of cpuminer-opt 3.7.7:";; esac cat <<\_ACEOF @@ -1497,7 +1497,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -cpuminer-opt configure 3.7.6 +cpuminer-opt configure 3.7.7 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -2000,7 +2000,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by cpuminer-opt $as_me 3.7.6, which was +It was created by cpuminer-opt $as_me 3.7.7, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -2981,7 +2981,7 @@ fi # Define the identity of the package. PACKAGE='cpuminer-opt' - VERSION='3.7.6' + VERSION='3.7.7' cat >>confdefs.h <<_ACEOF @@ -6677,7 +6677,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by cpuminer-opt $as_me 3.7.6, which was +This file was extended by cpuminer-opt $as_me 3.7.7, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -6743,7 +6743,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -cpuminer-opt config.status 3.7.6 +cpuminer-opt config.status 3.7.7 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" diff --git a/configure.ac b/configure.ac index aaadbee..a6c2c10 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -AC_INIT([cpuminer-opt], [3.7.6]) +AC_INIT([cpuminer-opt], [3.7.7]) AC_PREREQ([2.59c]) AC_CANONICAL_SYSTEM diff --git a/cpu-miner.c b/cpu-miner.c index ab10b29..e26731c 100644 --- a/cpu-miner.c +++ b/cpu-miner.c @@ -204,7 +204,7 @@ static void affine_to_cpu_mask( int id, unsigned long long mask ) for ( uint8_t i = 0; i < ncpus; i++ ) { // cpu mask - if( (ncpus > 64) || ( mask & (1UL << i) ) ) CPU_SET( i, &set ); + if( (ncpus > 64) || ( mask & (1ULL << i) ) ) CPU_SET( i, &set ); } if ( id == -1 ) { @@ -1690,9 +1690,9 @@ static void *miner_thread( void *userdata ) { if (opt_debug) applog( LOG_DEBUG, "Binding thread %d to cpu %d (mask %x)", - thr_id, thr_id % num_cpus, ( 1 << (thr_id % num_cpus) ) ); + thr_id, thr_id % num_cpus, ( 1ULL << (thr_id % num_cpus) ) ); - affine_to_cpu_mask( thr_id, 1 << (thr_id % num_cpus) ); + affine_to_cpu_mask( thr_id, 1ULL << (thr_id % num_cpus) ); } else if (opt_affinity != -1) {