This commit is contained in:
Jay D Dee
2017-12-17 12:00:42 -05:00
parent 7a1389998b
commit 79164c24b5
7 changed files with 230 additions and 56 deletions

View File

@@ -138,7 +138,7 @@ cpuminer_SOURCES = \
algo/skein/skein-hash-4way.c \
algo/skein/skein.c \
algo/skein/skein-4way.c \
algo/skein/skein-gate.c \
algo/skein/skein-gate.c \
algo/skein/skein2.c \
algo/skein/skein2-4way.c \
algo/skein/skein2-gate.c \

View File

@@ -164,9 +164,14 @@ Support for even older x86_64 without AES_NI or SSE2 is not availble.
Change Log
----------
v3.7.7
Fixed regression caused by 64 CPU support.
Fixed lyra2h.
v3.7.6
Added kyra2h algo for Hppcoin.
Added lyra2h algo for Hppcoin.
Added support for more than 64 CPUs.
Optimized shavite512 with AES, improves x11 etc.

View File

@@ -31,7 +31,7 @@ void lyra2h_hash( void *state, const void *input )
sph_blake256( &ctx_blake, input + 64, 16 );
sph_blake256_close( &ctx_blake, hash );
LYRA2Z( lyra2h_matrix, hash, 32, hash, 32, hash, 32, 8, 8, 8);
LYRA2Z( lyra2h_matrix, hash, 32, hash, 32, hash, 32, 16, 16, 16 );
memcpy(state, hash, 32);
}

247
avxdefs.h
View File

@@ -9,7 +9,7 @@
#include <immintrin.h>
#include <memory.h>
//
// 128 bit utilities and shortcuts
// Constant zero
@@ -23,9 +23,51 @@
// Unary negation (-a)
#define mm_negate_64( a ) _mm_sub_epi64( mm_zero, a )
#define mm_negate_32( a ) _mm_sub_epi32( mm_zero, a )
//
// Bit operations, functional but not very efficient
// Return x with bit n set/clear in all elements
#define mm_bitset_128( x, n ) \
_mm_or_si128( _mm_slli_si128( _mm_set_epi64x( 0ULL, 1ULL ), n ) )
#define mm_bitclr_128( x, n ) \
_mm_and_si128( x, mm_not( _mm_slli_si128( \
_mm_set_epi64x( 0ULL, 1ULL ), n ) ) )
#define mm_bitset_64( x, n ) \
_mm_or_si128( _mm_slli_epi64( _mm_set1_epi64x( 1ULL ), n ) )
#define mm_bitclr_64( x, n ) \
_mm_and_si128( x, mm_not( _mm_slli_epi64( _mm_set1_epi64x( 1ULL ), n ) ) )
#define mm_bitset_32( x, n ) \
_mm_or_si128( _mm_slli_epi32( _mm_set1_epi32( 1UL ), n ) )
#define mm_bitclr_32( x, n ) \
_mm_and_si128( x, mm_not( _mm_slli_epi32( _mm_set1_epi32( 1UL ), n ) ) )
#define mm_bitset_16( x, n ) \
_mm_or_si128( _mm_slli_epi16( _mm_set1_epi16( 1U ), n ) )
#define mm_bitclr_16( x, n ) \
_mm_and_si128( x, mm_not( _mm_slli_epi16( _mm_set1_epi16( 1U ), n ) ) )
// return vector of bool
#define mm_bittest_128( x, n ) \
_mm_and_si256( _mm_srli_si128( x, n ), _mm_set_epi64x( 0ULL, 1ULL ) )
#define mm_bittest_64( x, n ) \
_mm_and_si256( _mm_srli_epi64( x, n ), _mm_set1_epi64x( 1ULL ) )
#define mm_bittest_32( x, n ) \
_mm_and_si256( _mm_srli_epi32( x, n ), _mm_set1_epi32( 1UL ) )
#define mm_bittest_16( x, n ) \
_mm_and_si256( _mm_srli_epi16( x, n ), _mm_set1_epi16( 1U ) )
//
// Memory functions
// n = number of __m128i, bytes/16
@@ -51,6 +93,7 @@ inline void memcpy_64( uint64_t* dst, const uint64_t* src, int n )
dst[i] = src[i];
}
//
// Pointer cast
// p = any aligned pointer
@@ -80,23 +123,29 @@ inline void memcpy_64( uint64_t* dst, const uint64_t* src, int n )
#define mm_rotl_32( w, c ) _mm_or_si128( _mm_slli_epi32( w, c ), \
_mm_srli_epi32( w, 32-c ) )
// Rotate elements in vector
#define mm_rotr_16( w, c ) _mm_or_si128( _mm_srli_epi16( w, c ), \
_mm_slli_epi16( w, 16-c ) )
#define mm_rotl_16( w, c ) _mm_or_si128( _mm_slli_epi16( w, c ), \
_mm_srli_epi16( w, 16-c ) )
//
// Shuffle vector elements
// Swap upper and lower 64 bits of 128 bit source vector
#define mm_swap_64(s) _mm_shuffle_epi32( s, 0x4e )
// Rotate 128 vector by 1 32 bit element.
#define mm_rotr_1x32( w ) _mm_shuffle_epi32( w, 0x39 )
#define mm_rotl_1x32( w ) _mm_shuffle_epi32( w, 0x93 )
// Rotate 256 bits through two 128 bit vectors
// Shuffle elements across two 128 bit vectors
// Swap 128 bit source vectors in place.
// void mm128_swap128( __m128i, __m128i )
#define mm_swap_128(s0, s1) s0 = _mm_xor_si128(s0, s1); \
s1 = _mm_xor_si128(s0, s1); \
s0 = _mm_xor_si128(s0, s1);
#define mm_swap_128(hi, lo) hi = _mm_xor_si128(hi, lo); \
lo = _mm_xor_si128(hi, lo); \
hi = _mm_xor_si128(hi, lo);
// Rotate two 128 bit vectors in place as one 256 vector by 1 element
#define mm_rotl256_1x64( s0, s1 ) \
@@ -119,6 +168,30 @@ do { \
s0 = t; \
} while(0)
#define mm_rotl256_1x32( s0, s1 ) \
do { \
__m128i t; \
s0 = mm_swap_64( s0 ); \
s1 = mm_swap_64( s1 ); \
t = _mm_blendv_epi8( s0, s1, _mm_set_epi32( \
0xfffffffful, 0xfffffffful, 0xfffffffful, 0ul )); \
s1 = _mm_blendv_epi8( s0, s1, _mm_set_epi32( \
0ul, 0ul, 0ul, 0xfffffffful )); \
s0 = t; \
} while(0)
#define mm_rotr256_1x32( s0, s1 ) \
do { \
__m128i t; \
s0 = mm_swap_64( s0 ); \
s1 = mm_swap_64( s1 ); \
t = _mm_blendv_epi8( s0, s1, _mm_set_epi32( \
0ul, 0ul, 0ul, 0xfffffffful )); \
s1 = _mm_blendv_epi8( s0, s1, _mm_set_epi32( \
0xfffffffful, 0xfffffffful, 0xfffffffful, 0ul )); \
s0 = t; \
} while(0)
// Older slower
#define mm_rotl256_1x64x( s0, s1 ) \
do { \
@@ -148,10 +221,14 @@ do { \
s0 = t; \
} while(0)
// Rotate 256 bits through two 128 bit vectors by n*32 bits and return
// the rotated s0.
// need a better name, not rot, poke? step?
// Return s0 with elements shifted right/left and low/high element from
// s1 shifted into the vacated high/low element of s0.
// Partially rotate elements in two 128 bit vectors as one 256 bit vector
// and return the rotated s0.
// Similar to mm_rotr256_1x32 but only a partial rotation as s1 is not
// completed. It's faster than a full rotation.
inline __m128i mm_rotr256_32( __m128i s0, __m128i s1, int n )
{
return _mm_or_si128( _mm_srli_si128( s0, n<<2 ),
@@ -164,8 +241,25 @@ inline __m128i mm_rotl256_32( __m128i s0, __m128i s1, int n )
_mm_srli_si128( s1, 16 - (n<<2) ) );
}
//
// Swap bytes in vector elements
inline __m128i mm_byteswap_32( __m128i x )
inline __m128i mm_byteswap_32( __m128i x )
{
return _mm_shuffle_epi8( x, _mm_set_epi8(
0x0c, 0x0d, 0x0e, 0x0f, 0x08, 0x09, 0x0a, 0x0b,
0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03 ) );
}
inline __m128i mm_byteswap_64( __m128i x )
{
return _mm_shuffle_epi8( x, _mm_set_epi8(
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 ) );
}
// older slower
inline __m128i mm_byteswap_32x( __m128i x )
{
__m128i x1 = _mm_and_si128( x, _mm_set1_epi32( 0x0000ff00 ) );
__m128i x2 = _mm_and_si128( x, _mm_set1_epi32( 0x00ff0000 ) );
@@ -176,7 +270,7 @@ inline __m128i mm_byteswap_32( __m128i x )
return _mm_or_si128( _mm_or_si128( x0, x1 ), _mm_or_si128( x2, x3 ) );
}
inline __m128i mm_byteswap_64( __m128i x )
inline __m128i mm_byteswap_64x( __m128i x )
{
x = _mm_or_si128( _mm_srli_epi64( x, 32 ), _mm_slli_epi64( x, 32 ));
@@ -191,10 +285,10 @@ inline __m128i mm_byteswap_64( __m128i x )
_mm_set1_epi64x( 0x00FF00FF00FF00FF ) ), 8 ));
}
#if defined (__AVX2__)
// Utilities and Shortcuts
//
// 256 bit utilities and Shortcuts
// Constant zero
#define mm256_zero _mm256_setzero_si256()
@@ -207,19 +301,55 @@ inline __m128i mm_byteswap_64( __m128i x )
// Unary negation ( -a )
#define mm256_negate_64( a ) _mm256_sub_epi64( mm256_zero, a )
#define mm256_negate_32( a ) _mm256_sub_epi32( mm256_zero, a )
// Pack/Unpack two 128 bit vectors into/from one 256 bit vector
// usefulness tbd
#define mm256_pack_2x128( hi, lo ) \
_mm256_inserti128_si256( _mm256_castsi128_si256( lo ), hi, 0 ) \
//
// Bit operations
#define mm256_unpack_2x128( hi, lo, src ) \
lo = _mm256_castsi256_si128( src ); \
hi = _mm256_castsi256_si128( mm256_swap_128( src ) );
// Return x with bit n set/clear in all elements
#define mm256_bitset_128( x, n ) \
_mm256_or_si256( _mm256_slli_si256( _mm256_set_m128i( 1U, 1U ), n ) )
#define mm256_bitclr_128( x, n ) \
_mm256_and_si256( x, mm256_not( \
_mm256_slli_si256( _mm256_set_m128i( 1U, 1U ), n ) ) )
#define mm256_bitset_64( x, n ) \
_mm256_or_si256( x, _mm256_set1_epi64x( 1ULL << n ) )
#define mm256_bitclr_64( x, n ) \
_mm256_and_si256( x, mm256_not( _mm256_set1_epi64x( 1ULL << n ) ) )
#define mm256_bitset_32( x, n ) \
_mm256_or_si256( x, _mm256_set1_epi32( 1UL << n ) )
#define mm256_bitclr_32( x, n ) \
_mm256_and_si256( x, mm256_not( _mm256_set1_epi32( 1UL << n ) ) )
#define mm256_bitset_16( x, n ) \
_mm256_or_si256( x, _mm256_set1_epi16( 1U << n ) )
#define mm256_bitclr_16( x, n ) \
_mm256_and_si256( x, mm256_not( _mm256_set1_epi16( 1U << n ) ) )
// return vector of bool
#define mm256_bittest_128( x, n ) \
_mm256_and_si256( _mm256_srli_si256( x, n ), \
_mm256_set_m128i( _mm_set_epi64x( 0ULL, 1ULL ) ) )
#define mm256_bittest_64( x, n ) \
_mm256_and_si256( _mm256_srli_epi64( x, n ), \
_mm256_set1_epi64x( 1ULL << n ) )
#define mm256_bittest_32( x, n ) \
_mm256_and_si256( _mm256_srli_epi32( x, n ), \
_mm256_set1_epi32( 1UL << n ) )
#define mm256_bittest_16( x, n ) \
_mm256_and_si256( _mm256_srli_epi16( x, n ), \
_mm256_set1_epi16( 1U << n ) )
//
// Memory functions
// n = number of 256 bit (32 byte) vectors
@@ -238,6 +368,7 @@ inline void memcpy_256( __m256i *dst, const __m256i *src, int n )
for ( int i = 0; i < n; i ++ ) dst[i] = src[i];
}
//
// Pointer casting
// p = any aligned pointer
@@ -252,10 +383,11 @@ inline void memcpy_256( __m256i *dst, const __m256i *src, int n )
// returns p[i]
#define casti_m256i(p,i) (((__m256i*)(p))[(i)])
//
// Rotate bits in vector elements
// Rotate bits in 64 bit elements
// w = packed 64 bit data, n= number of bits to rotate
// w = packed 64 bit data, c = number of bits to rotate
#define mm256_rotr_64( w, c ) \
_mm256_or_si256( _mm256_srli_epi64(w, c), _mm256_slli_epi64(w, 64 - c) )
@@ -271,22 +403,43 @@ inline void memcpy_256( __m256i *dst, const __m256i *src, int n )
// Rotate elements in vector
// Swap 128 bit elements (aka rotate by two 64 bit, four 32 bit elements))
// Identical functionality but "f" is AVX and "x" iis AVX2, likely faster.
#define mm256_swap_128( w ) _mm256_permute2x128_si256( w, w, 1 )
//#define mm256_swap_128( w ) _mm256_permute2f128_si256( w, w, 1 )
// Rotate vector by one 64 bit element (aka two 32 bit elements)
//__m256i mm256_rotl256_1x64( _mm256i, int )
#define mm256_rotl256_1x64( w ) _mm256_permute4x64_epi64( w, 0x93 )
#define mm256_rotr256_1x64( w ) _mm256_permute4x64_epi64( w, 0x39 )
// Swap 128 bit elements (aka rotate by two 64 bit, four 32 bit elements))
#define mm256_swap_128( w ) _mm256_permute2f128_si256( w, w, 1 )
// Rotate by one 32 bit element (aka two 16 bit elements)
#define mm256_rotl256_1x32( w ) _mm256_shuffle_epi32( w, 0x93 )
#define mm256_rotr256_1x32( w ) _mm256_shuffle_epi32( w, 0x39 )
//
// Swap bytes in vector elements
inline __m256i mm256_byteswap_64( __m256i x )
{
return _mm256_shuffle_epi8( x, _mm256_set_epi8(
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 ) );
}
inline __m256i mm256_byteswap_32( __m256i x )
{
return _mm256_shuffle_epi8( x, _mm256_set_epi8(
0x0c, 0x0d, 0x0e, 0x0f, 0x08, 0x09, 0x0a, 0x0b,
0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03,
0x0c, 0x0d, 0x0e, 0x0f, 0x08, 0x09, 0x0a, 0x0b,
0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03 ) );
}
// older, slower
inline __m256i mm256_byteswap_32x( __m256i x )
{
__m256i x1 = _mm256_and_si256( x, _mm256_set1_epi32( 0x0000ff00 ) );
__m256i x2 = _mm256_and_si256( x, _mm256_set1_epi32( 0x00ff0000 ) );
@@ -298,7 +451,7 @@ inline __m256i mm256_byteswap_32( __m256i x )
_mm256_or_si256( x2, x3 ) );
}
inline __m256i mm256_byteswap_64( __m256i x )
inline __m256i mm256_byteswap_64x( __m256i x )
{
x = _mm256_or_si256( _mm256_srli_epi64( x, 32 ), _mm256_slli_epi64( x, 32 ));
@@ -313,8 +466,19 @@ inline __m256i mm256_byteswap_64( __m256i x )
_mm256_set1_epi64x( 0x00FF00FF00FF00FF ) ), 8 ));
}
// Pseudo parallel aes
// Pack/Unpack two 128 bit vectors into/from one 256 bit vector
// usefulness tbd
#define mm256_pack_2x128( hi, lo ) \
_mm256_inserti128_si256( _mm256_castsi128_si256( lo ), hi, 1 ) \
#define mm256_unpack_2x128( hi, lo, src ) \
lo = _mm256_castsi256_si128( src ); \
hi = _mm256_castsi256_si128( mm256_swap_128( src ) );
// Pseudo parallel AES
// Probably noticeably slower than using pure 128 bit vectors
// More efficient if one key for both lanes.
inline __m256i mm256_aesenc_2x128( __m256i x, __m256i k )
{
__m128i hi, lo, khi, klo;
@@ -370,7 +534,8 @@ inline __m256i mm256_aesenc_nokey_2x128( __m256i x )
// interleave 4 arrays of 32 bit elements for 128 bit processing
// bit_len must be 256, 512 or 640 bits.
// Vector indexing doesn't work with 32 bit data.
inline void mm_interleave_4x32x( void *dst, const void *src0, const void *src1,
// There's no vector indexing here!!!
inline void mm_interleave_4x32( void *dst, const void *src0, const void *src1,
const void *src2, const void *src3, int bit_len )
{
uint32_t *s0 = (uint32_t*)src0;
@@ -408,7 +573,7 @@ inline void mm_interleave_4x32x( void *dst, const void *src0, const void *src1,
}
// bit_len must be multiple of 32
inline void mm_interleave_4x32( void *dst, void *src0, void *src1,
inline void mm_interleave_4x32x( void *dst, void *src0, void *src1,
void *src2, void *src3, int bit_len )
{
uint32_t *d = (uint32_t*)dst;
@@ -427,7 +592,8 @@ inline void mm_interleave_4x32( void *dst, void *src0, void *src1,
}
// doesn't work with 32 bit elements
inline void mm_deinterleave_4x32x( void *dst0, void *dst1, void *dst2,
// no vector indexing here?
inline void mm_deinterleave_4x32( void *dst0, void *dst1, void *dst2,
void *dst3, const void *src, int bit_len )
{
uint32_t *s = (uint32_t*)src;
@@ -469,7 +635,7 @@ inline void mm_deinterleave_4x32x( void *dst0, void *dst1, void *dst2,
// deinterleave 4 arrays into individual buffers for scalarm processing
// bit_len must be multiple of 32
inline void mm_deinterleave_4x32( void *dst0, void *dst1, void *dst2,
inline void mm_deinterleave_4x32x( void *dst0, void *dst1, void *dst2,
void *dst3, const void *src, int bit_len )
{
uint32_t *s = (uint32_t*)src;
@@ -539,6 +705,8 @@ inline void mm256_interleave_4x64x( void *dst, void *src0, void *src1,
}
// Deinterleave 4 buffers of 64 bit data from the source buffer.
// bit_len must be 256, 512 or 640 bits.
// Requires overrun padding for 640 bit len.
inline void mm256_deinterleave_4x64( void *dst0, void *dst1, void *dst2,
void *dst3, const void *src, int bit_len )
{
@@ -591,8 +759,7 @@ inline void mm256_deinterleave_4x64x( void *dst0, void *dst1, void *dst2,
// Interleave 8 source buffers containing 32 bit data into the destination
// vector
// Doesn't work, vecror indexing doesn't work for 32 bit elements
inline void mm256_interleave_8x32x( void *dst, const void *src0,
inline void mm256_interleave_8x32( void *dst, const void *src0,
const void *src1, const void *src2, const void *src3, const void *src4,
const void *src5, const void *src6, const void *src7, int bit_len )
{
@@ -654,9 +821,10 @@ inline void mm256_interleave_8x32x( void *dst, const void *src0,
s3[19], s2[19], s1[19], s0[19] );
}
// probably obsolete with double pack 2x32->64, 4x64->256.
// Slower but it works with 32 bit data
// bit_len must be multiple of 32
inline void mm256_interleave_8x32( uint32_t *dst, uint32_t *src0,
inline void mm256_interleave_8x32x( uint32_t *dst, uint32_t *src0,
uint32_t *src1, uint32_t *src2, uint32_t *src3, uint32_t *src4,
uint32_t *src5, uint32_t *src6, uint32_t *src7, int bit_len )
{
@@ -675,7 +843,7 @@ inline void mm256_interleave_8x32( uint32_t *dst, uint32_t *src0,
}
// Deinterleave 8 buffers of 32 bit data from the source buffer.
inline void mm256_deinterleave_8x32x( void *dst0, void *dst1, void *dst2,
inline void mm256_deinterleave_8x32( void *dst0, void *dst1, void *dst2,
void *dst3, void *dst4, void *dst5, void *dst6, void *dst7,
const void *src, int bit_len )
{
@@ -758,7 +926,7 @@ inline void mm256_deinterleave_8x32x( void *dst0, void *dst1, void *dst2,
// Deinterleave 8 arrays into indivdual buffers for scalar processing
// bit_len must be multiple of 32
inline void mm256_deinterleave_8x32( uint32_t *dst0, uint32_t *dst1,
inline void mm256_deinterleave_8x32x( uint32_t *dst0, uint32_t *dst1,
uint32_t *dst2,uint32_t *dst3, uint32_t *dst4, uint32_t *dst5,
uint32_t *dst6,uint32_t *dst7,uint32_t *src, int bit_len )
{
@@ -776,6 +944,7 @@ inline void mm256_deinterleave_8x32( uint32_t *dst0, uint32_t *dst1,
}
}
// likely of no use.
// convert 4x32 byte (128 bit) vectors to 4x64 (256 bit) vectors for AVX2
// bit_len must be multiple of 64
inline void mm256_reinterleave_4x64( uint64_t *dst, uint32_t *src,

20
configure vendored
View File

@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.7.6.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.7.7.
#
#
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -577,8 +577,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='cpuminer-opt'
PACKAGE_TARNAME='cpuminer-opt'
PACKAGE_VERSION='3.7.6'
PACKAGE_STRING='cpuminer-opt 3.7.6'
PACKAGE_VERSION='3.7.7'
PACKAGE_STRING='cpuminer-opt 3.7.7'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
@@ -1321,7 +1321,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures cpuminer-opt 3.7.6 to adapt to many kinds of systems.
\`configure' configures cpuminer-opt 3.7.7 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1392,7 +1392,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of cpuminer-opt 3.7.6:";;
short | recursive ) echo "Configuration of cpuminer-opt 3.7.7:";;
esac
cat <<\_ACEOF
@@ -1497,7 +1497,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
cpuminer-opt configure 3.7.6
cpuminer-opt configure 3.7.7
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2000,7 +2000,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by cpuminer-opt $as_me 3.7.6, which was
It was created by cpuminer-opt $as_me 3.7.7, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -2981,7 +2981,7 @@ fi
# Define the identity of the package.
PACKAGE='cpuminer-opt'
VERSION='3.7.6'
VERSION='3.7.7'
cat >>confdefs.h <<_ACEOF
@@ -6677,7 +6677,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by cpuminer-opt $as_me 3.7.6, which was
This file was extended by cpuminer-opt $as_me 3.7.7, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -6743,7 +6743,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
cpuminer-opt config.status 3.7.6
cpuminer-opt config.status 3.7.7
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"

View File

@@ -1,4 +1,4 @@
AC_INIT([cpuminer-opt], [3.7.6])
AC_INIT([cpuminer-opt], [3.7.7])
AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM

View File

@@ -204,7 +204,7 @@ static void affine_to_cpu_mask( int id, unsigned long long mask )
for ( uint8_t i = 0; i < ncpus; i++ )
{
// cpu mask
if( (ncpus > 64) || ( mask & (1UL << i) ) ) CPU_SET( i, &set );
if( (ncpus > 64) || ( mask & (1ULL << i) ) ) CPU_SET( i, &set );
}
if ( id == -1 )
{
@@ -1690,9 +1690,9 @@ static void *miner_thread( void *userdata )
{
if (opt_debug)
applog( LOG_DEBUG, "Binding thread %d to cpu %d (mask %x)",
thr_id, thr_id % num_cpus, ( 1 << (thr_id % num_cpus) ) );
thr_id, thr_id % num_cpus, ( 1ULL << (thr_id % num_cpus) ) );
affine_to_cpu_mask( thr_id, 1 << (thr_id % num_cpus) );
affine_to_cpu_mask( thr_id, 1ULL << (thr_id % num_cpus) );
}
else if (opt_affinity != -1)
{