This commit is contained in:
Jay D Dee
2021-11-10 21:33:44 -05:00
parent 1a234cbe53
commit e6fd9b1d69
13 changed files with 1198 additions and 829 deletions

View File

@@ -40,7 +40,7 @@ $ mkdir $HOME/usr/lib
version available in the repositories. version available in the repositories.
Download the following source code packages from their respective and Download the following source code packages from their respective and
respected download locations, copy them to ~/usr/lib/ and uncompress them. respected download locations, copy them to $HOME/usr/lib/ and uncompress them.
openssl: https://github.com/openssl/openssl/releases openssl: https://github.com/openssl/openssl/releases
@@ -149,85 +149,10 @@ Copy cpuminer.exe to the release directory, compress and copy the release direct
Run cpuminer Run cpuminer
In a command windows change directories to the unzipped release folder. to get a list of all options: In a command windows change directories to the unzipped release folder. To get a list of all options:
cpuminer.exe --help cpuminer.exe --help
Command options are specific to where you mine. Refer to the pool's instructions on how to set them. Command options are specific to where you mine. Refer to the pool's instructions on how to set them.
Create a link to the locally compiled version of gmp.h
$ ln -s $LOCAL_LIB/gmp-version/gmp.h ./gmp.h
Edit configure.ac to fix lipthread package name.
sed -i 's/"-lpthread"/"-lpthreadGC2"/g' configure.ac
7. Compile
you can use the default compile if you intend to use cpuminer-opt on the
same CPU and the virtual machine supports that architecture.
./build.sh
Otherwise you can compile manually while setting options in CFLAGS.
Some common options:
To compile for a specific CPU architecture:
CFLAGS="-O3 -march=znver1 -Wall" ./configure --with-curl
This will compile for AMD Ryzen.
You can compile more generically for a set of specific CPU features
if you know what features you want:
CFLAGS="-O3 -maes -msse4.2 -Wall" ./configure --with-curl
This will compile for an older CPU that does not have AVX.
You can find several examples in build-allarch.sh
If you have a CPU with more than 64 threads and Windows 7 or higher you
can enable the CPU Groups feature:
-D_WIN32_WINNT==0x0601
Once you have run configure successfully run make with n CPU threads:
make -j n
Copy cpuminer.exe to the release directory, compress and copy the release
directory to a Windows system and run cpuminer.exe from the command line.
Run cpuminer
In a command windows change directories to the unzipped release folder.
to get a list of all options:
cpuminer.exe --help
Command options are specific to where you mine. Refer to the pool's
instructions on how to set them.

View File

@@ -65,7 +65,22 @@ If not what makes it happen or not happen?
Change Log Change Log
---------- ----------
v3.8.2 v3.19.0
Windows binaries now built with support for CPU groups, requires Windows 7.
Changes to cpu-affinity:
- PR#346: Fixed incorrect CPU affinity on Windows built for CPU groups,
- added support for CPU affinity for up to 256 threads or CPUs,
- streamlined code for more efficient initialization of miner threads,
- precise affining of each miner thread to a specific CPU,
- added an option to disable CPU affinity with "--cpu-affinity 0"
Faster sha256t with AVX512 & AVX2.
Added stratum error count to stats log, reported only when non-zero.
v3.18.2
Issue #342, fixed Groestl AES on Windows, broken in v3.18.0. Issue #342, fixed Groestl AES on Windows, broken in v3.18.0.

View File

@@ -62,6 +62,12 @@ void sha256_4way_transform_le( __m128i *state_out, const __m128i *data,
const __m128i *state_in ); const __m128i *state_in );
void sha256_4way_transform_be( __m128i *state_out, const __m128i *data, void sha256_4way_transform_be( __m128i *state_out, const __m128i *data,
const __m128i *state_in ); const __m128i *state_in );
void sha256_4way_prehash_3rounds( __m128i *state_mid, __m128i *X,
const __m128i *W, const __m128i *state_in );
void sha256_4way_final_rounds( __m128i *state_out, const __m128i *data,
const __m128i *state_in, const __m128i *state_mid, const __m128i *X );
int sha256_4way_transform_le_short( __m128i *state_out, const __m128i *data,
const __m128i *state_in );
#endif // SSE2 #endif // SSE2
@@ -84,10 +90,12 @@ void sha256_8way_transform_le( __m256i *state_out, const __m256i *data,
void sha256_8way_transform_be( __m256i *state_out, const __m256i *data, void sha256_8way_transform_be( __m256i *state_out, const __m256i *data,
const __m256i *state_in ); const __m256i *state_in );
void sha256_8way_prehash_3rounds( __m256i *state_mid, const __m256i *W, void sha256_8way_prehash_3rounds( __m256i *state_mid, __m256i *X,
const __m256i *state_in ); const __m256i *W, const __m256i *state_in );
void sha256_8way_final_rounds( __m256i *state_out, const __m256i *data, void sha256_8way_final_rounds( __m256i *state_out, const __m256i *data,
const __m256i *state_in, const __m256i *state_mid ); const __m256i *state_in, const __m256i *state_mid, const __m256i *X );
int sha256_8way_transform_le_short( __m256i *state_out, const __m256i *data,
const __m256i *state_in );
#endif // AVX2 #endif // AVX2
@@ -109,10 +117,13 @@ void sha256_16way_transform_le( __m512i *state_out, const __m512i *data,
const __m512i *state_in ); const __m512i *state_in );
void sha256_16way_transform_be( __m512i *state_out, const __m512i *data, void sha256_16way_transform_be( __m512i *state_out, const __m512i *data,
const __m512i *state_in ); const __m512i *state_in );
void sha256_16way_prehash_3rounds( __m512i *state_mid, const __m512i *W, void sha256_16way_prehash_3rounds( __m512i *state_mid, __m512i *X,
const __m512i *state_in ); const __m512i *W, const __m512i *state_in );
void sha256_16way_final_rounds( __m512i *state_out, const __m512i *data, void sha256_16way_final_rounds( __m512i *state_out, const __m512i *data,
const __m512i *state_in, const __m512i *state_mid ); const __m512i *state_in, const __m512i *state_mid, const __m512i *X );
int sha256_16way_transform_le_short( __m512i *state_out, const __m512i *data,
const __m512i *state_in );
#endif // AVX512 #endif // AVX512

View File

@@ -611,11 +611,11 @@ static inline int scanhash_sha256d_8way_pooler( struct work *work,
#endif /* HAVE_SHA256_8WAY */ #endif /* HAVE_SHA256_8WAY */
int scanhash_sha256d_pooler( struct work *work, int scanhash_sha256d_pooler( struct work *work, uint32_t max_nonce,
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) uint64_t *hashes_done, struct thr_info *mythr )
{ {
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
uint32_t *ptarget = work->target; uint32_t *ptarget = work->target;
uint32_t _ALIGN(128) data[64]; uint32_t _ALIGN(128) data[64];
uint32_t _ALIGN(32) hash[8]; uint32_t _ALIGN(32) hash[8];
uint32_t _ALIGN(32) midstate[8]; uint32_t _ALIGN(32) midstate[8];
@@ -626,12 +626,12 @@ int scanhash_sha256d_pooler( struct work *work,
int thr_id = mythr->id; // thr_id arg is deprecated int thr_id = mythr->id; // thr_id arg is deprecated
#ifdef HAVE_SHA256_8WAY #ifdef HAVE_SHA256_8WAY
if (sha256_use_8way()) if ( sha256_use_8way() )
return scanhash_sha256d_8way_pooler( work, max_nonce, hashes_done, mythr ); return scanhash_sha256d_8way_pooler( work, max_nonce, hashes_done, mythr );
#endif #endif
#ifdef HAVE_SHA256_4WAY #ifdef HAVE_SHA256_4WAY
if (sha256_use_4way()) if ( sha256_use_4way() )
return scanhash_sha256d_4way_pooler( work, max_nonce, hashes_done, mythr ); return scanhash_sha256d_4way_pooler( work, max_nonce, hashes_done, mythr );
#endif #endif
memcpy(data, pdata + 16, 64); memcpy(data, pdata + 16, 64);
@@ -695,8 +695,11 @@ bool register_sha256d_algo( algo_gate_t* gate )
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT; gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT;
#if defined(SHA256D_16WAY) #if defined(SHA256D_16WAY)
gate->scanhash = (void*)&scanhash_sha256d_16way; gate->scanhash = (void*)&scanhash_sha256d_16way;
//#elif defined(SHA256D_8WAY)
// gate->scanhash = (void*)&scanhash_sha256d_8way;
#else #else
gate->scanhash = (void*)&scanhash_sha256d_pooler; gate->scanhash = (void*)&scanhash_sha256d_pooler;
// gate->scanhash = (void*)&scanhash_sha256d_4way;
#endif #endif
// gate->hash = (void*)&sha256d; // gate->hash = (void*)&sha256d;
return true; return true;

File diff suppressed because it is too large Load Diff

View File

@@ -10,13 +10,14 @@
int scanhash_sha256d_16way( struct work *work, const uint32_t max_nonce, int scanhash_sha256d_16way( struct work *work, const uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr ) uint64_t *hashes_done, struct thr_info *mythr )
{ {
__m512i vdata[32] __attribute__ ((aligned (128)));
__m512i block[16] __attribute__ ((aligned (64))); __m512i block[16] __attribute__ ((aligned (64)));
__m512i hash32[8] __attribute__ ((aligned (32))); __m512i hash32[8] __attribute__ ((aligned (64)));
__m512i initstate[8] __attribute__ ((aligned (32))); __m512i initstate[8] __attribute__ ((aligned (64)));
__m512i midstate1[8] __attribute__ ((aligned (32))); __m512i midstate1[8] __attribute__ ((aligned (64)));
__m512i midstate2[8] __attribute__ ((aligned (32))); __m512i midstate2[8] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (32))); __m512i mexp_pre[16] __attribute__ ((aligned (64)));
__m512i vdata[20] __attribute__ ((aligned (32))); uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] ); uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] );
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
const uint32_t *ptarget = work->target; const uint32_t *ptarget = work->target;
@@ -36,6 +37,14 @@ int scanhash_sha256d_16way( struct work *work, const uint32_t max_nonce,
*noncev = _mm512_set_epi32( n+15, n+14, n+13, n+12, n+11, n+10, n+9, n+8, *noncev = _mm512_set_epi32( n+15, n+14, n+13, n+12, n+11, n+10, n+9, n+8,
n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n ); n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n );
vdata[16+4] = last_byte;
memset_zero_512( vdata+16 + 5, 10 );
vdata[16+15] = m512_const1_32( 80*8 ); // bit count
block[ 8] = last_byte;
memset_zero_512( block + 9, 6 );
block[15] = m512_const1_32( 32*8 ); // bit count
// initialize state // initialize state
initstate[0] = m512_const1_64( 0x6A09E6676A09E667 ); initstate[0] = m512_const1_64( 0x6A09E6676A09E667 );
initstate[1] = m512_const1_64( 0xBB67AE85BB67AE85 ); initstate[1] = m512_const1_64( 0xBB67AE85BB67AE85 );
@@ -49,39 +58,33 @@ int scanhash_sha256d_16way( struct work *work, const uint32_t max_nonce,
sha256_16way_transform_le( midstate1, vdata, initstate ); sha256_16way_transform_le( midstate1, vdata, initstate );
// Do 3 rounds on the first 12 bytes of the next block // Do 3 rounds on the first 12 bytes of the next block
sha256_16way_prehash_3rounds( midstate2, vdata + 16, midstate1 ); sha256_16way_prehash_3rounds( midstate2, mexp_pre, vdata+16, midstate1 );
do do
{ {
// 1. final 16 bytes of data, with padding // 1. final 16 bytes of data, with padding
memcpy_512( block, vdata + 16, 4 ); sha256_16way_final_rounds( block, vdata+16, midstate1, midstate2,
block[ 4] = last_byte; mexp_pre );
memset_zero_512( block + 5, 10 );
block[15] = m512_const1_32( 80*8 ); // bit count
sha256_16way_final_rounds( hash32, block, midstate1, midstate2 );
// 2. 32 byte hash from 1. // 2. 32 byte hash from 1.
memcpy_512( block, hash32, 8 ); if ( sha256_16way_transform_le_short( hash32, block, initstate ) )
block[ 8] = last_byte;
memset_zero_512( block + 9, 6 );
block[15] = m512_const1_32( 32*8 ); // bit count
sha256_16way_transform_le( hash32, block, initstate );
// byte swap final hash for testing
mm512_block_bswap_32( hash32, hash32 );
for ( int lane = 0; lane < 16; lane++ )
if ( unlikely( hash32_d7[ lane ] <= targ32_d7 ) )
{ {
extr_lane_16x32( lane_hash, hash32, lane, 256 ); // byte swap final hash for testing
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) ) mm512_block_bswap_32( hash32, hash32 );
for ( int lane = 0; lane < 16; lane++ )
if ( unlikely( hash32_d7[ lane ] <= targ32_d7 ) )
{ {
pdata[19] = n + lane; extr_lane_16x32( lane_hash, hash32, lane, 256 );
submit_solution( work, lane_hash, mythr ); if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
{
pdata[19] = n + lane;
submit_solution( work, lane_hash, mythr );
}
} }
} }
*noncev = _mm512_add_epi32( *noncev, sixteen ); *noncev = _mm512_add_epi32( *noncev, sixteen );
n += 16; n += 16;
} while ( (n < last_nonce) && !work_restart[thr_id].restart ); } while ( (n < last_nonce) && !work_restart[thr_id].restart );
pdata[19] = n; pdata[19] = n;
*hashes_done = n - first_nonce; *hashes_done = n - first_nonce;
@@ -95,13 +98,14 @@ int scanhash_sha256d_16way( struct work *work, const uint32_t max_nonce,
int scanhash_sha256d_8way( struct work *work, const uint32_t max_nonce, int scanhash_sha256d_8way( struct work *work, const uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr ) uint64_t *hashes_done, struct thr_info *mythr )
{ {
__m256i block[16] __attribute__ ((aligned (64))); __m256i vdata[32] __attribute__ ((aligned (64)));
__m256i block[16] __attribute__ ((aligned (32)));
__m256i hash32[8] __attribute__ ((aligned (32))); __m256i hash32[8] __attribute__ ((aligned (32)));
__m256i initstate[8] __attribute__ ((aligned (32))); __m256i initstate[8] __attribute__ ((aligned (32)));
__m256i midstate1[8] __attribute__ ((aligned (32))); __m256i midstate1[8] __attribute__ ((aligned (32)));
__m256i midstate2[8] __attribute__ ((aligned (32))); __m256i midstate2[8] __attribute__ ((aligned (32)));
__m256i mexp_pre[16] __attribute__ ((aligned (32)));
uint32_t lane_hash[8] __attribute__ ((aligned (32))); uint32_t lane_hash[8] __attribute__ ((aligned (32)));
__m256i vdata[20] __attribute__ ((aligned (32)));
uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] ); uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] );
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
const uint32_t *ptarget = work->target; const uint32_t *ptarget = work->target;
@@ -120,6 +124,14 @@ int scanhash_sha256d_8way( struct work *work, const uint32_t max_nonce,
*noncev = _mm256_set_epi32( n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n ); *noncev = _mm256_set_epi32( n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n );
vdata[16+4] = last_byte;
memset_zero_256( vdata+16 + 5, 10 );
vdata[16+15] = m256_const1_32( 80*8 ); // bit count
block[ 8] = last_byte;
memset_zero_256( block + 9, 6 );
block[15] = m256_const1_32( 32*8 ); // bit count
// initialize state // initialize state
initstate[0] = m256_const1_64( 0x6A09E6676A09E667 ); initstate[0] = m256_const1_64( 0x6A09E6676A09E667 );
initstate[1] = m256_const1_64( 0xBB67AE85BB67AE85 ); initstate[1] = m256_const1_64( 0xBB67AE85BB67AE85 );
@@ -133,35 +145,30 @@ int scanhash_sha256d_8way( struct work *work, const uint32_t max_nonce,
sha256_8way_transform_le( midstate1, vdata, initstate ); sha256_8way_transform_le( midstate1, vdata, initstate );
// Do 3 rounds on the first 12 bytes of the next block // Do 3 rounds on the first 12 bytes of the next block
sha256_8way_prehash_3rounds( midstate2, vdata + 16, midstate1 ); sha256_8way_prehash_3rounds( midstate2, mexp_pre, vdata + 16, midstate1 );
do do
{ {
// 1. final 16 bytes of data, with padding // 1. final 16 bytes of data, with padding
memcpy_256( block, vdata + 16, 4 ); sha256_8way_final_rounds( block, vdata+16, midstate1, midstate2,
block[ 4] = last_byte; mexp_pre );
memset_zero_256( block + 5, 10 );
block[15] = m256_const1_32( 80*8 ); // bit count
sha256_8way_final_rounds( hash32, block, midstate1, midstate2 );
// 2. 32 byte hash from 1. // 2. 32 byte hash from 1.
memcpy_256( block, hash32, 8 ); if ( unlikely(
block[ 8] = last_byte; sha256_8way_transform_le_short( hash32, block, initstate ) ) )
memset_zero_256( block + 9, 6 );
block[15] = m256_const1_32( 32*8 ); // bit count
sha256_8way_transform_le( hash32, block, initstate );
// byte swap final hash for testing
mm256_block_bswap_32( hash32, hash32 );
for ( int lane = 0; lane < 8; lane++ )
if ( unlikely( hash32_d7[ lane ] <= targ32_d7 ) )
{ {
extr_lane_8x32( lane_hash, hash32, lane, 256 ); // byte swap final hash for testing
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) ) mm256_block_bswap_32( hash32, hash32 );
for ( int lane = 0; lane < 8; lane++ )
if ( unlikely( hash32_d7[ lane ] <= targ32_d7 ) )
{ {
pdata[19] = n + lane; extr_lane_8x32( lane_hash, hash32, lane, 256 );
submit_solution( work, lane_hash, mythr ); if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
{
pdata[19] = n + lane;
submit_solution( work, lane_hash, mythr );
}
} }
} }
*noncev = _mm256_add_epi32( *noncev, eight ); *noncev = _mm256_add_epi32( *noncev, eight );
@@ -179,12 +186,14 @@ int scanhash_sha256d_8way( struct work *work, const uint32_t max_nonce,
int scanhash_sha256d_4way( struct work *work, const uint32_t max_nonce, int scanhash_sha256d_4way( struct work *work, const uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr ) uint64_t *hashes_done, struct thr_info *mythr )
{ {
__m128i block[16] __attribute__ ((aligned (64))); __m128i vdata[32] __attribute__ ((aligned (64)));
__m128i hash32[8] __attribute__ ((aligned (32))); __m128i block[16] __attribute__ ((aligned (32)));
__m128i initstate[8] __attribute__ ((aligned (32))); __m128i hash32[8] __attribute__ ((aligned (32)));
__m128i midstate[8] __attribute__ ((aligned (32))); __m128i initstate[8] __attribute__ ((aligned (32)));
uint32_t lane_hash[8] __attribute__ ((aligned (32))); __m128i midstate1[8] __attribute__ ((aligned (32)));
__m128i vdata[20] __attribute__ ((aligned (32))); __m128i midstate2[8] __attribute__ ((aligned (32)));
__m128i mexp_pre[16] __attribute__ ((aligned (32)));
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] ); uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] );
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
const uint32_t *ptarget = work->target; const uint32_t *ptarget = work->target;
@@ -203,6 +212,14 @@ int scanhash_sha256d_4way( struct work *work, const uint32_t max_nonce,
*noncev = _mm_set_epi32( n+ 3, n+ 2, n+1, n ); *noncev = _mm_set_epi32( n+ 3, n+ 2, n+1, n );
vdata[16+4] = last_byte;
memset_zero_128( vdata+16 + 5, 10 );
vdata[16+15] = m128_const1_32( 80*8 ); // bit count
block[ 8] = last_byte;
memset_zero_128( block + 9, 6 );
block[15] = m128_const1_32( 32*8 ); // bit count
// initialize state // initialize state
initstate[0] = m128_const1_64( 0x6A09E6676A09E667 ); initstate[0] = m128_const1_64( 0x6A09E6676A09E667 );
initstate[1] = m128_const1_64( 0xBB67AE85BB67AE85 ); initstate[1] = m128_const1_64( 0xBB67AE85BB67AE85 );
@@ -214,39 +231,36 @@ int scanhash_sha256d_4way( struct work *work, const uint32_t max_nonce,
initstate[7] = m128_const1_64( 0x5BE0CD195BE0CD19 ); initstate[7] = m128_const1_64( 0x5BE0CD195BE0CD19 );
// hash first 64 bytes of data // hash first 64 bytes of data
sha256_4way_transform_le( midstate, vdata, initstate ); sha256_4way_transform_le( midstate1, vdata, initstate );
// Do 3 rounds on the first 12 bytes of the next block
sha256_4way_prehash_3rounds( midstate2, mexp_pre, vdata + 16, midstate1 );
do do
{ {
// 1. final 16 bytes of data, with padding // 1. final 16 bytes of data, with padding
memcpy_128( block, vdata + 16, 4 ); sha256_4way_final_rounds( block, vdata+16, midstate1, midstate2,
block[ 4] = last_byte; mexp_pre );
memset_zero_128( block + 5, 10 );
block[15] = m128_const1_32( 80*8 ); // bit count
sha256_4way_transform_le( hash32, block, midstate );
// 2. 32 byte hash from 1. // 2. 32 byte hash from 1.
memcpy_128( block, hash32, 8 ); if ( unlikely(
block[ 8] = last_byte; sha256_4way_transform_le_short( hash32, block, initstate ) ) )
memset_zero_128( block + 9, 6 );
block[15] = m128_const1_32( 32*8 ); // bit count
sha256_4way_transform_le( hash32, block, initstate );
// byte swap final hash for testing
mm128_block_bswap_32( hash32, hash32 );
for ( int lane = 0; lane < 4; lane++ )
if ( unlikely( hash32_d7[ lane ] <= targ32_d7 ) )
{ {
extr_lane_4x32( lane_hash, hash32, lane, 256 ); // byte swap final hash for testing
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) ) mm128_block_bswap_32( hash32, hash32 );
for ( int lane = 0; lane < 4; lane++ )
if ( unlikely( hash32_d7[ lane ] <= targ32_d7 ) )
{ {
pdata[19] = n + lane; extr_lane_4x32( lane_hash, hash32, lane, 256 );
submit_solution( work, lane_hash, mythr ); if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
{
pdata[19] = n + lane;
submit_solution( work, lane_hash, mythr );
}
} }
} }
*noncev = _mm_add_epi32( *noncev, four ); *noncev = _mm_add_epi32( *noncev, four );
n += 4; n += 4;
} while ( (n < last_nonce) && !work_restart[thr_id].restart ); } while ( (n < last_nonce) && !work_restart[thr_id].restart );
pdata[19] = n; pdata[19] = n;
*hashes_done = n - first_nonce; *hashes_done = n - first_nonce;

View File

@@ -6,12 +6,10 @@
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__) #if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
#define SHA256D_16WAY 1 #define SHA256D_16WAY 1
/*
#elif defined(__AVX2__) #elif defined(__AVX2__)
#define SHA256D_8WAY 1 #define SHA256D_8WAY 1
#else #else
#define SHA256D_4WAY 1 #define SHA256D_4WAY 1
*/
#endif #endif
bool register_sha256d_algo( algo_gate_t* gate ); bool register_sha256d_algo( algo_gate_t* gate );
@@ -21,7 +19,7 @@ bool register_sha256d_algo( algo_gate_t* gate );
int scanhash_sha256d_16way( struct work *work, uint32_t max_nonce, int scanhash_sha256d_16way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr ); uint64_t *hashes_done, struct thr_info *mythr );
#endif #endif
/*
#if defined(SHA256D_8WAY) #if defined(SHA256D_8WAY)
int scanhash_sha256d_8way( struct work *work, uint32_t max_nonce, int scanhash_sha256d_8way( struct work *work, uint32_t max_nonce,
@@ -33,7 +31,7 @@ int scanhash_sha256d_8way( struct work *work, uint32_t max_nonce,
int scanhash_sha256d_4way( struct work *work, uint32_t max_nonce, int scanhash_sha256d_4way( struct work *work, uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr ); uint64_t *hashes_done, struct thr_info *mythr );
#endif #endif
*/
/* /*
#if defined(__SHA__) #if defined(__SHA__)

View File

@@ -10,13 +10,14 @@
int scanhash_sha256t_16way( struct work *work, const uint32_t max_nonce, int scanhash_sha256t_16way( struct work *work, const uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr ) uint64_t *hashes_done, struct thr_info *mythr )
{ {
__m512i vdata[32] __attribute__ ((aligned (128)));
__m512i block[16] __attribute__ ((aligned (64))); __m512i block[16] __attribute__ ((aligned (64)));
__m512i hash32[8] __attribute__ ((aligned (32))); __m512i hash32[8] __attribute__ ((aligned (64)));
__m512i initstate[8] __attribute__ ((aligned (32))); __m512i initstate[8] __attribute__ ((aligned (64)));
__m512i midstate1[8] __attribute__ ((aligned (32))); __m512i midstate1[8] __attribute__ ((aligned (64)));
__m512i midstate2[8] __attribute__ ((aligned (32))); __m512i midstate2[8] __attribute__ ((aligned (64)));
uint32_t lane_hash[8] __attribute__ ((aligned (32))); __m512i mexp_pre[16] __attribute__ ((aligned (64)));
__m512i vdata[20] __attribute__ ((aligned (32))); uint32_t lane_hash[8] __attribute__ ((aligned (64)));
uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] ); uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] );
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
const uint32_t *ptarget = work->target; const uint32_t *ptarget = work->target;
@@ -36,7 +37,14 @@ int scanhash_sha256t_16way( struct work *work, const uint32_t max_nonce,
*noncev = _mm512_set_epi32( n+15, n+14, n+13, n+12, n+11, n+10, n+9, n+8, *noncev = _mm512_set_epi32( n+15, n+14, n+13, n+12, n+11, n+10, n+9, n+8,
n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n ); n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n );
// initialize state vdata[16+4] = last_byte;
memset_zero_512( vdata+16 + 5, 10 );
vdata[16+15] = m512_const1_32( 80*8 ); // bit count
block[ 8] = last_byte;
memset_zero_512( block + 9, 6 );
block[15] = m512_const1_32( 32*8 ); // bit count
initstate[0] = m512_const1_64( 0x6A09E6676A09E667 ); initstate[0] = m512_const1_64( 0x6A09E6676A09E667 );
initstate[1] = m512_const1_64( 0xBB67AE85BB67AE85 ); initstate[1] = m512_const1_64( 0xBB67AE85BB67AE85 );
initstate[2] = m512_const1_64( 0x3C6EF3723C6EF372 ); initstate[2] = m512_const1_64( 0x3C6EF3723C6EF372 );
@@ -49,43 +57,37 @@ int scanhash_sha256t_16way( struct work *work, const uint32_t max_nonce,
sha256_16way_transform_le( midstate1, vdata, initstate ); sha256_16way_transform_le( midstate1, vdata, initstate );
// Do 3 rounds on the first 12 bytes of the next block // Do 3 rounds on the first 12 bytes of the next block
sha256_16way_prehash_3rounds( midstate2, vdata + 16, midstate1 ); sha256_16way_prehash_3rounds( midstate2, mexp_pre, vdata+16, midstate1 );
do do
{ {
// 1. final 16 bytes of data, with padding // 1. final 16 bytes of data, pre-padded
memcpy_512( block, vdata + 16, 4 ); sha256_16way_final_rounds( block, vdata+16, midstate1, midstate2,
block[ 4] = last_byte; mexp_pre );
memset_zero_512( block + 5, 10 );
block[15] = m512_const1_32( 80*8 ); // bit count
sha256_16way_final_rounds( hash32, block, midstate1, midstate2 );
// 2. 32 byte hash from 1. // 2. 32 byte hash from 1.
memcpy_512( block, hash32, 8 ); sha256_16way_transform_le( block, block, initstate );
block[ 8] = last_byte;
memset_zero_512( block + 9, 6 );
block[15] = m512_const1_32( 32*8 ); // bit count
sha256_16way_transform_le( hash32, block, initstate );
// 3. 32 byte hash from 2. // 3. 32 byte hash from 2.
memcpy_512( block, hash32, 8 ); if ( unlikely(
sha256_16way_transform_le( hash32, block, initstate ); sha256_16way_transform_le_short( hash32, block, initstate ) ) )
// byte swap final hash for testing
mm512_block_bswap_32( hash32, hash32 );
for ( int lane = 0; lane < 16; lane++ )
if ( unlikely( hash32_d7[ lane ] <= targ32_d7 ) )
{ {
extr_lane_16x32( lane_hash, hash32, lane, 256 ); // byte swap final hash for testing
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) ) mm512_block_bswap_32( hash32, hash32 );
for ( int lane = 0; lane < 16; lane++ )
if ( hash32_d7[ lane ] <= targ32_d7 )
{ {
pdata[19] = n + lane; extr_lane_16x32( lane_hash, hash32, lane, 256 );
submit_solution( work, lane_hash, mythr ); if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
{
pdata[19] = n + lane;
submit_solution( work, lane_hash, mythr );
}
} }
} }
*noncev = _mm512_add_epi32( *noncev, sixteen ); *noncev = _mm512_add_epi32( *noncev, sixteen );
n += 16; n += 16;
} while ( (n < last_nonce) && !work_restart[thr_id].restart ); } while ( (n < last_nonce) && !work_restart[thr_id].restart );
pdata[19] = n; pdata[19] = n;
*hashes_done = n - first_nonce; *hashes_done = n - first_nonce;
@@ -100,13 +102,14 @@ int scanhash_sha256t_16way( struct work *work, const uint32_t max_nonce,
int scanhash_sha256t_8way( struct work *work, const uint32_t max_nonce, int scanhash_sha256t_8way( struct work *work, const uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr ) uint64_t *hashes_done, struct thr_info *mythr )
{ {
__m256i block[16] __attribute__ ((aligned (64))); __m256i vdata[32] __attribute__ ((aligned (64)));
__m256i block[16] __attribute__ ((aligned (32)));
__m256i hash32[8] __attribute__ ((aligned (32))); __m256i hash32[8] __attribute__ ((aligned (32)));
__m256i initstate[8] __attribute__ ((aligned (32))); __m256i initstate[8] __attribute__ ((aligned (32)));
__m256i midstate1[8] __attribute__ ((aligned (32))); __m256i midstate1[8] __attribute__ ((aligned (32)));
__m256i midstate2[8] __attribute__ ((aligned (32))); __m256i midstate2[8] __attribute__ ((aligned (32)));
__m256i mexp_pre[16] __attribute__ ((aligned (32)));
uint32_t lane_hash[8] __attribute__ ((aligned (32))); uint32_t lane_hash[8] __attribute__ ((aligned (32)));
__m256i vdata[20] __attribute__ ((aligned (32)));
uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] ); uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] );
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
const uint32_t *ptarget = work->target; const uint32_t *ptarget = work->target;
@@ -125,6 +128,14 @@ int scanhash_sha256t_8way( struct work *work, const uint32_t max_nonce,
*noncev = _mm256_set_epi32( n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n ); *noncev = _mm256_set_epi32( n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n );
vdata[16+4] = last_byte;
memset_zero_256( vdata+16 + 5, 10 );
vdata[16+15] = m256_const1_32( 80*8 ); // bit count
block[ 8] = last_byte;
memset_zero_256( block + 9, 6 );
block[15] = m256_const1_32( 32*8 ); // bit count
// initialize state // initialize state
initstate[0] = m256_const1_64( 0x6A09E6676A09E667 ); initstate[0] = m256_const1_64( 0x6A09E6676A09E667 );
initstate[1] = m256_const1_64( 0xBB67AE85BB67AE85 ); initstate[1] = m256_const1_64( 0xBB67AE85BB67AE85 );
@@ -138,43 +149,37 @@ int scanhash_sha256t_8way( struct work *work, const uint32_t max_nonce,
sha256_8way_transform_le( midstate1, vdata, initstate ); sha256_8way_transform_le( midstate1, vdata, initstate );
// Do 3 rounds on the first 12 bytes of the next block // Do 3 rounds on the first 12 bytes of the next block
sha256_8way_prehash_3rounds( midstate2, vdata + 16, midstate1 ); sha256_8way_prehash_3rounds( midstate2, mexp_pre, vdata + 16, midstate1 );
do do
{ {
// 1. final 16 bytes of data, with padding // 1. final 16 bytes of data, with padding
memcpy_256( block, vdata + 16, 4 ); sha256_8way_final_rounds( block, vdata+16, midstate1, midstate2,
block[ 4] = last_byte; mexp_pre );
memset_zero_256( block + 5, 10 );
block[15] = m256_const1_32( 80*8 ); // bit count
sha256_8way_final_rounds( hash32, block, midstate1, midstate2 );
// 2. 32 byte hash from 1. // 2. 32 byte hash from 1.
memcpy_256( block, hash32, 8 ); sha256_8way_transform_le( block, block, initstate );
block[ 8] = last_byte;
memset_zero_256( block + 9, 6 );
block[15] = m256_const1_32( 32*8 ); // bit count
sha256_8way_transform_le( hash32, block, initstate );
// 3. 32 byte hash from 2. // 3. 32 byte hash from 2.
memcpy_256( block, hash32, 8 ); if ( unlikely(
sha256_8way_transform_le( hash32, block, initstate ); sha256_8way_transform_le_short( hash32, block, initstate ) ) )
// byte swap final hash for testing
mm256_block_bswap_32( hash32, hash32 );
for ( int lane = 0; lane < 8; lane++ )
if ( unlikely( hash32_d7[ lane ] <= targ32_d7 ) )
{ {
extr_lane_8x32( lane_hash, hash32, lane, 256 ); // byte swap final hash for testing
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) ) mm256_block_bswap_32( hash32, hash32 );
for ( int lane = 0; lane < 8; lane++ )
if ( hash32_d7[ lane ] <= targ32_d7 )
{ {
pdata[19] = n + lane; extr_lane_8x32( lane_hash, hash32, lane, 256 );
submit_solution( work, lane_hash, mythr ); if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
{
pdata[19] = n + lane;
submit_solution( work, lane_hash, mythr );
}
} }
} }
*noncev = _mm256_add_epi32( *noncev, eight ); *noncev = _mm256_add_epi32( *noncev, eight );
n += 8; n += 8;
} while ( (n < last_nonce) && !work_restart[thr_id].restart ); } while ( (n < last_nonce) && !work_restart[thr_id].restart );
pdata[19] = n; pdata[19] = n;
*hashes_done = n - first_nonce; *hashes_done = n - first_nonce;
@@ -183,17 +188,110 @@ int scanhash_sha256t_8way( struct work *work, const uint32_t max_nonce,
#endif #endif
#if defined(SHA256T_4WAY) #if defined(SHA256T_4WAY)
// Optimizations are slower with AVX/SSE2
// https://github.com/JayDDee/cpuminer-opt/issues/344
/*
int scanhash_sha256t_4way( struct work *work, const uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr )
{
__m128i vdata[32] __attribute__ ((aligned (64)));
__m128i block[16] __attribute__ ((aligned (32)));
__m128i hash32[8] __attribute__ ((aligned (32)));
__m128i initstate[8] __attribute__ ((aligned (32)));
__m128i midstate1[8] __attribute__ ((aligned (32)));
__m128i midstate2[8] __attribute__ ((aligned (32)));
__m128i mexp_pre[16] __attribute__ ((aligned (32)));
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] );
uint32_t *pdata = work->data;
const uint32_t *ptarget = work->target;
const uint32_t targ32_d7 = ptarget[7];
const uint32_t first_nonce = pdata[19];
const uint32_t last_nonce = max_nonce - 4;
uint32_t n = first_nonce;
__m128i *noncev = vdata + 19;
const int thr_id = mythr->id;
const bool bench = opt_benchmark;
const __m128i last_byte = m128_const1_32( 0x80000000 );
const __m128i four = m128_const1_32( 4 );
for ( int i = 0; i < 19; i++ )
vdata[i] = m128_const1_32( pdata[i] );
*noncev = _mm_set_epi32( n+ 3, n+ 2, n+1, n );
vdata[16+4] = last_byte;
memset_zero_128( vdata+16 + 5, 10 );
vdata[16+15] = m128_const1_32( 80*8 ); // bit count
block[ 8] = last_byte;
memset_zero_128( block + 9, 6 );
block[15] = m128_const1_32( 32*8 ); // bit count
// initialize state
initstate[0] = m128_const1_64( 0x6A09E6676A09E667 );
initstate[1] = m128_const1_64( 0xBB67AE85BB67AE85 );
initstate[2] = m128_const1_64( 0x3C6EF3723C6EF372 );
initstate[3] = m128_const1_64( 0xA54FF53AA54FF53A );
initstate[4] = m128_const1_64( 0x510E527F510E527F );
initstate[5] = m128_const1_64( 0x9B05688C9B05688C );
initstate[6] = m128_const1_64( 0x1F83D9AB1F83D9AB );
initstate[7] = m128_const1_64( 0x5BE0CD195BE0CD19 );
// hash first 64 bytes of data
sha256_4way_transform_le( midstate1, vdata, initstate );
// Do 3 rounds on the first 12 bytes of the next block
sha256_4way_prehash_3rounds( midstate2, mexp_pre, vdata + 16, midstate1 );
do
{
// 1. final 16 bytes of data, with padding
sha256_4way_final_rounds( block, vdata+16, midstate1, midstate2,
mexp_pre );
// 2. 32 byte hash from 1.
sha256_4way_transform_le( block, block, initstate );
// 3. 32 byte hash from 2.
if ( unlikely(
sha256_4way_transform_le_short( hash32, block, initstate ) ) )
{
// byte swap final hash for testing
mm128_block_bswap_32( hash32, hash32 );
for ( int lane = 0; lane < 4; lane++ )
if ( unlikely( hash32_d7[ lane ] <= targ32_d7 ) )
{
extr_lane_4x32( lane_hash, hash32, lane, 256 );
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
{
pdata[19] = n + lane;
submit_solution( work, lane_hash, mythr );
}
}
}
*noncev = _mm_add_epi32( *noncev, four );
n += 4;
} while ( (n < last_nonce) && !work_restart[thr_id].restart );
pdata[19] = n;
*hashes_done = n - first_nonce;
return 0;
}
*/
int scanhash_sha256t_4way( struct work *work, const uint32_t max_nonce, int scanhash_sha256t_4way( struct work *work, const uint32_t max_nonce,
uint64_t *hashes_done, struct thr_info *mythr ) uint64_t *hashes_done, struct thr_info *mythr )
{ {
__m128i block[16] __attribute__ ((aligned (64))); __m128i vdata[32] __attribute__ ((aligned (64)));
__m128i block[16] __attribute__ ((aligned (32)));
__m128i hash32[8] __attribute__ ((aligned (32))); __m128i hash32[8] __attribute__ ((aligned (32)));
__m128i initstate[8] __attribute__ ((aligned (32))); __m128i initstate[8] __attribute__ ((aligned (32)));
__m128i midstate[8] __attribute__ ((aligned (32))); __m128i midstate[8] __attribute__ ((aligned (32)));
uint32_t lane_hash[8] __attribute__ ((aligned (32))); uint32_t lane_hash[8] __attribute__ ((aligned (32)));
__m128i vdata[20] __attribute__ ((aligned (32)));
uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] ); uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] );
uint32_t *pdata = work->data; uint32_t *pdata = work->data;
const uint32_t *ptarget = work->target; const uint32_t *ptarget = work->target;
@@ -212,6 +310,14 @@ int scanhash_sha256t_4way( struct work *work, const uint32_t max_nonce,
*noncev = _mm_set_epi32( n+ 3, n+ 2, n+1, n ); *noncev = _mm_set_epi32( n+ 3, n+ 2, n+1, n );
vdata[16+4] = last_byte;
memset_zero_128( vdata+16 + 5, 10 );
vdata[16+15] = m128_const1_32( 80*8 ); // bit count
block[ 8] = last_byte;
memset_zero_128( block + 9, 6 );
block[15] = m128_const1_32( 32*8 ); // bit count
// initialize state // initialize state
initstate[0] = m128_const1_64( 0x6A09E6676A09E667 ); initstate[0] = m128_const1_64( 0x6A09E6676A09E667 );
initstate[1] = m128_const1_64( 0xBB67AE85BB67AE85 ); initstate[1] = m128_const1_64( 0xBB67AE85BB67AE85 );
@@ -227,25 +333,9 @@ int scanhash_sha256t_4way( struct work *work, const uint32_t max_nonce,
do do
{ {
// 1. final 16 bytes of data, with padding sha256_4way_transform_le( block, vdata+16, midstate );
memcpy_128( block, vdata + 16, 4 ); sha256_4way_transform_le( block, block, initstate );
block[ 4] = last_byte; sha256_4way_transform_le( hash32, block, initstate );
memset_zero_128( block + 5, 10 );
block[15] = m128_const1_32( 80*8 ); // bit count
sha256_4way_transform_le( hash32, block, midstate );
// 2. 32 byte hash from 1.
memcpy_128( block, hash32, 8 );
block[ 8] = last_byte;
memset_zero_128( block + 9, 6 );
block[15] = m128_const1_32( 32*8 ); // bit count
sha256_4way_transform_le( hash32, block, initstate );
// 3. 32 byte hash from 2.
memcpy_128( block, hash32, 8 );
sha256_4way_transform_le( hash32, block, initstate );
// byte swap final hash for testing
mm128_block_bswap_32( hash32, hash32 ); mm128_block_bswap_32( hash32, hash32 );
for ( int lane = 0; lane < 4; lane++ ) for ( int lane = 0; lane < 4; lane++ )
@@ -266,5 +356,6 @@ int scanhash_sha256t_4way( struct work *work, const uint32_t max_nonce,
return 0; return 0;
} }
#endif #endif

View File

@@ -3,6 +3,10 @@
#ifdef WIN32 #ifdef WIN32
#if _WIN32_WINNT==0x0601 // Windows 7
#define WINDOWS_CPU_GROUPS_ENABLED 1
#endif
#include <windows.h> #include <windows.h>
#include <time.h> #include <time.h>

20
configure vendored
View File

@@ -1,6 +1,6 @@
#! /bin/sh #! /bin/sh
# Guess values for system-dependent variables and create Makefiles. # Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.18.2. # Generated by GNU Autoconf 2.69 for cpuminer-opt 3.18.3.
# #
# #
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -577,8 +577,8 @@ MAKEFLAGS=
# Identity of this package. # Identity of this package.
PACKAGE_NAME='cpuminer-opt' PACKAGE_NAME='cpuminer-opt'
PACKAGE_TARNAME='cpuminer-opt' PACKAGE_TARNAME='cpuminer-opt'
PACKAGE_VERSION='3.18.2' PACKAGE_VERSION='3.18.3'
PACKAGE_STRING='cpuminer-opt 3.18.2' PACKAGE_STRING='cpuminer-opt 3.18.3'
PACKAGE_BUGREPORT='' PACKAGE_BUGREPORT=''
PACKAGE_URL='' PACKAGE_URL=''
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing. # Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh. # This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF cat <<_ACEOF
\`configure' configures cpuminer-opt 3.18.2 to adapt to many kinds of systems. \`configure' configures cpuminer-opt 3.18.3 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]... Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1404,7 +1404,7 @@ fi
if test -n "$ac_init_help"; then if test -n "$ac_init_help"; then
case $ac_init_help in case $ac_init_help in
short | recursive ) echo "Configuration of cpuminer-opt 3.18.2:";; short | recursive ) echo "Configuration of cpuminer-opt 3.18.3:";;
esac esac
cat <<\_ACEOF cat <<\_ACEOF
@@ -1509,7 +1509,7 @@ fi
test -n "$ac_init_help" && exit $ac_status test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then if $ac_init_version; then
cat <<\_ACEOF cat <<\_ACEOF
cpuminer-opt configure 3.18.2 cpuminer-opt configure 3.18.3
generated by GNU Autoconf 2.69 generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc. Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake. running configure, to aid debugging if configure makes a mistake.
It was created by cpuminer-opt $as_me 3.18.2, which was It was created by cpuminer-opt $as_me 3.18.3, which was
generated by GNU Autoconf 2.69. Invocation command line was generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@ $ $0 $@
@@ -2993,7 +2993,7 @@ fi
# Define the identity of the package. # Define the identity of the package.
PACKAGE='cpuminer-opt' PACKAGE='cpuminer-opt'
VERSION='3.18.2' VERSION='3.18.3'
cat >>confdefs.h <<_ACEOF cat >>confdefs.h <<_ACEOF
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their # report actual input values of CONFIG_FILES etc. instead of their
# values after options handling. # values after options handling.
ac_log=" ac_log="
This file was extended by cpuminer-opt $as_me 3.18.2, which was This file was extended by cpuminer-opt $as_me 3.18.3, which was
generated by GNU Autoconf 2.69. Invocation command line was generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES CONFIG_FILES = $CONFIG_FILES
@@ -6756,7 +6756,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\ ac_cs_version="\\
cpuminer-opt config.status 3.18.2 cpuminer-opt config.status 3.18.3
configured by $0, generated by GNU Autoconf 2.69, configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\" with options \\"\$ac_cs_config\\"

View File

@@ -1,4 +1,4 @@
AC_INIT([cpuminer-opt], [3.18.2]) AC_INIT([cpuminer-opt], [3.19.0])
AC_PREREQ([2.59c]) AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM AC_CANONICAL_SYSTEM

View File

@@ -3,7 +3,7 @@
* Copyright 2012-2014 pooler * Copyright 2012-2014 pooler
* Copyright 2014 Lucas Jones * Copyright 2014 Lucas Jones
* Copyright 2014-2016 Tanguy Pruvot * Copyright 2014-2016 Tanguy Pruvot
* Copyright 2016-2020 Jay D Dee * Copyright 2016-2021 Jay D Dee
* *
* This program is free software; you can redistribute it and/or modify it * This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free * under the terms of the GNU General Public License as published by the Free
@@ -115,22 +115,12 @@ int opt_param_n = 0;
int opt_param_r = 0; int opt_param_r = 0;
int opt_n_threads = 0; int opt_n_threads = 0;
bool opt_sapling = false; bool opt_sapling = false;
static uint64_t opt_affinity = 0xFFFFFFFFFFFFFFFFULL; // default, use all cores
// Windows doesn't support 128 bit affinity mask.
// Need compile time and run time test.
#if defined(__linux) && defined(GCC_INT128)
#define AFFINITY_USES_UINT128 1
static uint128_t opt_affinity = -1;
static bool affinity_uses_uint128 = true;
#else
static uint64_t opt_affinity = -1;
static bool affinity_uses_uint128 = false;
#endif
int opt_priority = 0; // deprecated int opt_priority = 0; // deprecated
int num_cpus = 1; int num_cpus = 1;
int num_cpugroups = 1; int num_cpugroups = 1; // For Windows
char *rpc_url = NULL;; #define max_cpus 256 // max for affinity
char *rpc_url = NULL;
char *rpc_userpass = NULL; char *rpc_userpass = NULL;
char *rpc_user, *rpc_pass; char *rpc_user, *rpc_pass;
char *short_url = NULL; char *short_url = NULL;
@@ -166,6 +156,7 @@ uint32_t accepted_share_count = 0;
uint32_t rejected_share_count = 0; uint32_t rejected_share_count = 0;
uint32_t stale_share_count = 0; uint32_t stale_share_count = 0;
uint32_t solved_block_count = 0; uint32_t solved_block_count = 0;
uint32_t stratum_errors = 0;
double *thr_hashrates; double *thr_hashrates;
double global_hashrate = 0.; double global_hashrate = 0.;
double total_hashes = 0.; double total_hashes = 0.;
@@ -227,18 +218,21 @@ char* lp_id;
static void workio_cmd_free(struct workio_cmd *wc); static void workio_cmd_free(struct workio_cmd *wc);
static void format_affinity_map( char *map_str, uint64_t map ) // array mapping thread to cpu
static uint8_t thread_affinity_map[ max_cpus ];
// display affinity mask graphically
static void format_affinity_mask( char *mask_str, uint64_t mask )
{ {
int n = num_cpus < 64 ? num_cpus : 64; int n = num_cpus < 64 ? num_cpus : 64;
int i; int i;
for ( i = 0; i < n; i++ ) for ( i = 0; i < n; i++ )
{ {
if ( map & 1 ) map_str[i] = '!'; if ( mask & 1 ) mask_str[i] = '!';
else map_str[i] = '.'; else mask_str[i] = '.';
map >>= 1; mask >>= 1;
} }
memset( &map_str[i], 0, 64 - i ); memset( &mask_str[i], 0, 64 - i );
} }
#ifdef __linux /* Linux specific policy and affinity management */ #ifdef __linux /* Linux specific policy and affinity management */
@@ -260,93 +254,70 @@ static inline void drop_policy(void)
#define pthread_setaffinity_np(tid,sz,s) {} /* only do process affinity */ #define pthread_setaffinity_np(tid,sz,s) {} /* only do process affinity */
#endif #endif
// Linux affinity can use int128. static void affine_to_cpu( struct thr_info *thr )
#if AFFINITY_USES_UINT128
static void affine_to_cpu_mask( int id, uint128_t mask )
#else
static void affine_to_cpu_mask( int id, uint64_t mask )
#endif
{ {
int thread = thr->id;
cpu_set_t set; cpu_set_t set;
CPU_ZERO( &set ); CPU_ZERO( &set );
uint8_t ncpus = (num_cpus > 256) ? 256 : num_cpus; CPU_SET( thread_affinity_map[ thread ], &set );
if ( opt_debug )
for ( uint8_t i = 0; i < ncpus; i++ ) applog( LOG_INFO, "Binding thread %d to cpu %d",
{ thread, thread_affinity_map[ thread ] );
// cpu mask pthread_setaffinity_np( thr->pth, sizeof(set), &set );
#if AFFINITY_USES_UINT128
if( ( mask & ( (uint128_t)1 << i ) ) ) CPU_SET( i, &set );
#else
if( (ncpus > 64) || ( mask & (1 << i) ) ) CPU_SET( i, &set );
#endif
}
if ( id == -1 )
{
// process affinity
sched_setaffinity(0, sizeof(&set), &set);
}
else
{
// thread only
pthread_setaffinity_np(thr_info[id].pth, sizeof(&set), &set);
}
} }
#elif defined(WIN32) /* Windows */ #elif defined(WIN32) /* Windows */
static inline void drop_policy(void) { } static inline void drop_policy(void) { }
// Windows CPU groups to manage more than 64 CPUs. // Windows CPU groups to manage more than 64 CPUs.
static void affine_to_cpu_mask( int id, uint64_t mask ) // mask arg is ignored
static void affine_to_cpu( struct thr_info *thr )
{ {
bool success; int thread = thr->id;
unsigned long last_error; unsigned long last_error;
// BOOL success; bool ok;
// DWORD last_error;
if ( id == -1 ) #if defined(WINDOWS_CPU_GROUPS_ENABLED)
success = SetProcessAffinityMask( GetCurrentProcess(), mask ); unsigned long group_size = GetActiveProcessorCount( 0 );
unsigned long group = thread / group_size;
unsigned long cpu = thread_affinity_map[ thread % group_size ];
// Are Windows CPU Groups supported? GROUP_AFFINITY affinity;
#if _WIN32_WINNT==0x0601 affinity.Group = group;
else if ( num_cpugroups == 1 ) affinity.Mask = 1ULL << cpu;
success = SetThreadAffinityMask( GetCurrentThread(), mask );
else
{
// Find the correct cpu group
int cpu = id % num_cpus;
int group;
for( group = 0; group < num_cpugroups; group++ )
{
int cpus = GetActiveProcessorCount( group );
if ( cpu < cpus ) break;
cpu -= cpus;
}
if (opt_debug) if ( opt_debug )
applog(LOG_DEBUG, "Binding thread %d to cpu %d on cpu group %d (mask %x)", applog( LOG_INFO, "Binding thread %d to cpu %d in cpu group %d",
id, cpu, group, (1ULL << cpu)); thread, cpu, group );
ok = SetThreadGroupAffinity( GetCurrentThread(), &affinity, NULL );
GROUP_AFFINITY affinity;
affinity.Group = group;
affinity.Mask = 1ULL << cpu;
success = SetThreadGroupAffinity( GetCurrentThread(), &affinity, NULL );
}
#else #else
else
success = SetThreadAffinityMask( GetCurrentThread(), mask ); unsigned long cpu = thread_affinity_map[ thread ];
uint64_t mask = 1ULL << cpu;
if ( opt_debug )
applog( LOG_INFO, "Binding thread %d to cpu %d", thread, cpu );
ok = SetThreadAffinityMask( GetCurrentThread(), mask );
#endif #endif
if (!success) if ( !ok )
{ {
last_error = GetLastError(); last_error = GetLastError();
applog(LOG_WARNING, "affine_to_cpu_mask for %u returned %x", applog( LOG_WARNING, "affine_to_cpu_mask for %u returned 0x%x",
id, last_error); thread, last_error );
} }
} }
#else #else
static inline void drop_policy(void) { } static inline void drop_policy(void) { }
static void affine_to_cpu_mask(int id, unsigned long mask) { } static void affine_to_cpu( struct thr_info *thr ) { }
#endif #endif
// not very useful, just index the arrray directly. // not very useful, just index the arrray directly.
@@ -1159,17 +1130,23 @@ void report_summary_log( bool force )
applog2( prio, "Blocks Solved %7d %7d", applog2( prio, "Blocks Solved %7d %7d",
solved, solved_block_count ); solved, solved_block_count );
} }
if ( stratum_errors )
applog2( LOG_INFO, "Stratum errors %7d", stratum_errors );
applog2( LOG_INFO, "Hi/Lo Share Diff %.5g / %.5g", applog2( LOG_INFO, "Hi/Lo Share Diff %.5g / %.5g",
highest_share, lowest_share ); highest_share, lowest_share );
int mismatch = submitted_share_count int mismatch = submitted_share_count
- ( accepted_share_count + stale_share_count + rejected_share_count ); - ( accepted_share_count + stale_share_count + rejected_share_count );
if ( mismatch ) if ( mismatch )
{ {
if ( mismatch != 1 ) if ( stratum_errors )
applog2(LOG_MINR, "Count mismatch: %d, stats may be inaccurate", mismatch ); applog2( LOG_MINR, "Count mismatch: %d, stats may be inaccurate",
else mismatch );
applog2(LOG_INFO, CL_LBL "Count mismatch, submitted share may still be pending" CL_N ); else if ( !opt_quiet )
applog2( LOG_INFO, CL_LBL
"Count mismatch, submitted share may still be pending" CL_N );
} }
} }
@@ -2241,49 +2218,9 @@ static void *miner_thread( void *userdata )
if ( opt_priority == 0 ) if ( opt_priority == 0 )
drop_policy(); drop_policy();
} }
// CPU thread affinity // CPU thread affinity
if ( num_cpus > 1 ) if ( opt_affinity && num_cpus > 1 ) affine_to_cpu( mythr );
{
#if AFFINITY_USES_UINT128
// Default affinity
if ( (opt_affinity == (uint128_t)(-1) ) && opt_n_threads > 1 )
{
affine_to_cpu_mask( thr_id, (uint128_t)1 << (thr_id % num_cpus) );
if ( opt_debug )
applog( LOG_INFO, "Binding thread %d to cpu %d.",
thr_id, thr_id % num_cpus,
u128_hi64( (uint128_t)1 << (thr_id % num_cpus) ),
u128_lo64( (uint128_t)1 << (thr_id % num_cpus) ) );
}
#else
if ( ( opt_affinity == -1 ) && ( opt_n_threads > 1 ) )
{
affine_to_cpu_mask( thr_id, 1 << (thr_id % num_cpus) );
if (opt_debug)
applog( LOG_DEBUG, "Binding thread %d to cpu %d.",
thr_id, thr_id % num_cpus, 1 << (thr_id % num_cpus)) ;
}
#endif
else // Custom affinity
{
affine_to_cpu_mask( thr_id, opt_affinity );
if ( opt_debug )
{
#if AFFINITY_USES_UINT128
if ( num_cpus > 64 )
applog( LOG_INFO, "Binding thread %d to mask %016llx %016llx",
thr_id, u128_hi64( opt_affinity ),
u128_lo64( opt_affinity ) );
else
applog( LOG_INFO, "Binding thread %d to mask %016llx",
thr_id, opt_affinity );
#else
applog( LOG_INFO, "Binding thread %d to mask %016llx",
thr_id, opt_affinity );
#endif
}
}
} // num_cpus > 1
if ( !algo_gate.miner_thread_init( thr_id ) ) if ( !algo_gate.miner_thread_init( thr_id ) )
{ {
@@ -2792,6 +2729,7 @@ static void *stratum_thread(void *userdata )
{ {
stratum_need_reset = false; stratum_need_reset = false;
stratum_down = true; stratum_down = true;
stratum_errors++;
stratum_disconnect( &stratum ); stratum_disconnect( &stratum );
if ( strcmp( stratum.url, rpc_url ) ) if ( strcmp( stratum.url, rpc_url ) )
{ {
@@ -2809,6 +2747,7 @@ static void *stratum_thread(void *userdata )
while ( !stratum.curl ) while ( !stratum.curl )
{ {
stratum_down = true; stratum_down = true;
restart_threads();
pthread_rwlock_wrlock( &g_work_lock ); pthread_rwlock_wrlock( &g_work_lock );
g_work_time = 0; g_work_time = 0;
pthread_rwlock_unlock( &g_work_lock ); pthread_rwlock_unlock( &g_work_lock );
@@ -2830,7 +2769,6 @@ static void *stratum_thread(void *userdata )
else else
{ {
stratum_down = false; stratum_down = false;
restart_threads();
applog(LOG_BLUE,"Stratum connection established" ); applog(LOG_BLUE,"Stratum connection established" );
} }
} }
@@ -3137,7 +3075,7 @@ void parse_arg(int key, char *arg )
{ {
char *p; char *p;
int v, i; int v, i;
uint64_t ul; // uint64_t ul;
double d; double d;
switch( key ) switch( key )
@@ -3448,21 +3386,10 @@ void parse_arg(int key, char *arg )
break; break;
#endif #endif
case 1020: // cpu-affinity case 1020: // cpu-affinity
p = strstr(arg, "0x"); p = strstr( arg, "0x" );
if ( p ) opt_affinity = p ? strtoull( p, NULL, 16 )
ul = strtoull( p, NULL, 16 ); : atoll( arg );
else break;
ul = atoll( arg );
#if AFFINITY_USES_UINT128
// replicate the low 64 bits to make a full 128 bit mask if there are more
// than 64 CPUs, otherwise zero extend the upper half.
opt_affinity = (uint128_t)ul;
if ( num_cpus > 64 )
opt_affinity |= opt_affinity << 64;
#else
opt_affinity = ul;
#endif
break;
case 1021: // cpu-priority case 1021: // cpu-priority
v = atoi(arg); v = atoi(arg);
if (v < 0 || v > 5) /* sanity check */ if (v < 0 || v > 5) /* sanity check */
@@ -3565,20 +3492,18 @@ static void parse_cmdline(int argc, char *argv[])
while (1) while (1)
{ {
#if HAVE_GETOPT_LONG #if HAVE_GETOPT_LONG
key = getopt_long(argc, argv, short_options, options, NULL); key = getopt_long(argc, argv, short_options, options, NULL);
#else #else
key = getopt(argc, argv, short_options); key = getopt(argc, argv, short_options);
#endif #endif
if (key < 0) if ( key < 0 ) break;
break; parse_arg( key, optarg );
parse_arg(key, optarg);
} }
if (optind < argc) if ( optind < argc )
{ {
fprintf(stderr, "%s: unsupported non-option argument -- '%s'\n", fprintf( stderr, "%s: unsupported non-option argument -- '%s'\n",
argv[0], argv[optind]); argv[0], argv[optind]);
show_usage_and_exit(1); show_usage_and_exit(1);
} }
} }
@@ -3642,26 +3567,21 @@ int main(int argc, char *argv[])
rpc_user = strdup(""); rpc_user = strdup("");
rpc_pass = strdup(""); rpc_pass = strdup("");
parse_cmdline(argc, argv);
#if defined(WIN32) #if defined(WIN32)
// SYSTEM_INFO sysinfo;
// GetSystemInfo(&sysinfo);
// num_cpus = sysinfo.dwNumberOfProcessors;
// What happens if GetActiveProcessorGroupCount called if groups not enabled?
// Are Windows CPU Groups supported? // Are Windows CPU Groups supported?
#if _WIN32_WINNT==0x0601 #if defined(WINDOWS_CPU_GROUPS_ENABLED)
num_cpus = 0; num_cpus = 0;
num_cpugroups = GetActiveProcessorGroupCount(); num_cpugroups = GetActiveProcessorGroupCount();
for( i = 0; i < num_cpugroups; i++ ) for( i = 0; i < num_cpugroups; i++ )
{ {
int cpus = GetActiveProcessorCount(i); int cpus = GetActiveProcessorCount( i );
num_cpus += cpus; num_cpus += cpus;
if (opt_debug) if (opt_debug)
applog(LOG_DEBUG, "Found %d cpus on cpu group %d", cpus, i); applog( LOG_INFO, "Found %d CPUs in CPU group %d", cpus, i );
} }
#else #else
SYSTEM_INFO sysinfo; SYSTEM_INFO sysinfo;
GetSystemInfo(&sysinfo); GetSystemInfo(&sysinfo);
@@ -3677,21 +3597,20 @@ int main(int argc, char *argv[])
#else #else
num_cpus = 1; num_cpus = 1;
#endif #endif
if (num_cpus < 1)
num_cpus = 1;
if (!opt_n_threads) if ( num_cpus < 1 ) num_cpus = 1;
opt_n_threads = num_cpus;
parse_cmdline( argc, argv );
if ( opt_algo == ALGO_NULL ) if ( opt_algo == ALGO_NULL )
{ {
fprintf(stderr, "%s: no algo supplied\n", argv[0]); fprintf( stderr, "%s: No algo parameter specified\n", argv[0] );
show_usage_and_exit(1); show_usage_and_exit(1);
} }
// need to register to get algo optimizations for cpu capabilities // need to register to get algo optimizations for cpu capabilities
// but that causes register logs before cpu capabilities is output. // but that causes registration logs before cpu capabilities is output.
// Would need to split register into 2 parts. First part sets algo // Would need to split register function into 2 parts. First part sets algo
// optimizations but no logging, second part does any logging. // optimizations but no logging, second part does any logging.
if ( !register_algo_gate( opt_algo, &algo_gate ) ) exit(1); if ( !register_algo_gate( opt_algo, &algo_gate ) ) exit(1);
@@ -3735,9 +3654,6 @@ int main(int argc, char *argv[])
return 1; return 1;
} }
// All options must be set before starting the gate
// if ( !register_algo_gate( opt_algo, &algo_gate ) ) exit(1);
if ( coinbase_address ) if ( coinbase_address )
{ {
pk_script_size = address_to_script( pk_script, pk_buffer_size, pk_script_size = address_to_script( pk_script, pk_buffer_size,
@@ -3749,8 +3665,6 @@ int main(int argc, char *argv[])
} }
} }
// if ( !check_cpu_capability() ) exit(1);
pthread_mutex_init( &stats_lock, NULL ); pthread_mutex_init( &stats_lock, NULL );
pthread_rwlock_init( &g_work_lock, NULL ); pthread_rwlock_init( &g_work_lock, NULL );
pthread_mutex_init( &stratum.sock_lock, NULL ); pthread_mutex_init( &stratum.sock_lock, NULL );
@@ -3820,44 +3734,31 @@ int main(int argc, char *argv[])
} }
#endif #endif
// To be confirmed with more than 64 cpus if ( ( opt_n_threads == 0 ) || ( opt_n_threads > num_cpus ) )
if ( opt_affinity != -1 ) opt_n_threads = num_cpus;
{
if ( !affinity_uses_uint128 && num_cpus > 64 )
{
applog(LOG_WARNING,"Setting CPU affinity with more than 64 CPUs is only");
applog(LOG_WARNING,"available on Linux. Using default affinity.");
opt_affinity = -1;
}
/*
else
{
affine_to_cpu_mask( -1, opt_affinity );
if ( !opt_quiet )
{
#if AFFINITY_USES_UINT128
if ( num_cpus > 64 )
applog(LOG_DEBUG, "Binding process to cpu mask %x",
u128_hi64( opt_affinity ), u128_lo64( opt_affinity ) );
else
applog(LOG_DEBUG, "Binding process to cpu mask %x",
opt_affinity );
#else
applog(LOG_DEBUG, "Binding process to cpu mask %x",
opt_affinity );
#endif
}
}
*/
}
if ( !opt_quiet && ( opt_n_threads < num_cpus ) ) if ( opt_affinity && num_cpus > max_cpus )
{ {
char affinity_map[64]; applog( LOG_WARNING, "More than %d CPUs, CPU affinity is disabled",
format_affinity_map( affinity_map, opt_affinity ); max_cpus );
applog( LOG_INFO, "CPU affinity [%s]", affinity_map ); opt_affinity = 0ULL;
} }
if ( opt_affinity )
{
for ( int thr = 0, cpu = 0; thr < opt_n_threads; thr++, cpu++ )
{
while ( !( ( opt_affinity >> ( cpu&63 ) ) & 1ULL ) ) cpu++;
thread_affinity_map[ thr ] = cpu % num_cpus;
}
if ( !opt_quiet )
{
char affinity_mask[64];
format_affinity_mask( affinity_mask, opt_affinity );
applog( LOG_INFO, "CPU affinity [%s]", affinity_mask );
}
}
#ifdef HAVE_SYSLOG_H #ifdef HAVE_SYSLOG_H
if (use_syslog) if (use_syslog)
openlog("cpuminer", LOG_PID, LOG_USER); openlog("cpuminer", LOG_PID, LOG_USER);
@@ -3955,7 +3856,7 @@ int main(int argc, char *argv[])
return 1; return 1;
} }
if ( !opt_quiet ) if ( !opt_quiet )
applog( LOG_INFO,"API listnening to %s:%d", opt_api_allow, applog( LOG_INFO,"API listening to %s:%d", opt_api_allow,
opt_api_listen ); opt_api_listen );
} }

View File

@@ -16,13 +16,13 @@ export MINGW_LIB="/usr/x86_64-w64-mingw32/lib"
export GCC_MINGW_LIB="/usr/lib/gcc/x86_64-w64-mingw32/9.3-win32" export GCC_MINGW_LIB="/usr/lib/gcc/x86_64-w64-mingw32/9.3-win32"
# used by GCC # used by GCC
export LDFLAGS="-L$LOCAL_LIB/curl/lib/.libs -L$LOCAL_LIB/gmp/.libs -L$LOCAL_LIB/openssl" export LDFLAGS="-L$LOCAL_LIB/curl/lib/.libs -L$LOCAL_LIB/gmp/.libs -L$LOCAL_LIB/openssl"
# support for Windows CPU groups
export DEFAULT_CFLAGS="-O3 -Wall -D_WIN32_WINNT=0x0601"
#export DEFAULT_CFLAGS="-O3 -Wall"
# make link to local gmp header file. # make link to local gmp header file.
ln -s $LOCAL_LIB/gmp/gmp.h ./gmp.h ln -s $LOCAL_LIB/gmp/gmp.h ./gmp.h
# edit configure to fix pthread lib name for Windows.
#sed -i 's/"-lpthread"/"-lpthreadGC2"/g' configure.ac
# make release directory and copy selected DLLs. # make release directory and copy selected DLLs.
rm -rf release > /dev/null rm -rf release > /dev/null
@@ -45,7 +45,7 @@ cp $LOCAL_LIB/curl/lib/.libs/libcurl-4.dll release/
./clean-all.sh || echo clean ./clean-all.sh || echo clean
rm -f config.status rm -f config.status
./autogen.sh || echo done ./autogen.sh || echo done
CFLAGS="-O3 -march=icelake-client -Wall" ./configure $CONFIGURE_ARGS CFLAGS="$DEFAULT_CFLAGS -march=icelake-client" ./configure $CONFIGURE_ARGS
make -j 8 make -j 8
strip -s cpuminer.exe strip -s cpuminer.exe
mv cpuminer.exe release/cpuminer-avx512-sha-vaes.exe mv cpuminer.exe release/cpuminer-avx512-sha-vaes.exe
@@ -53,8 +53,8 @@ mv cpuminer.exe release/cpuminer-avx512-sha-vaes.exe
# Rocketlake AVX512 SHA AES # Rocketlake AVX512 SHA AES
make clean || echo clean make clean || echo clean
rm -f config.status rm -f config.status
CFLAGS="-O3 -march=cascadelake -msha -Wall" ./configure $CONFIGURE_ARGS CFLAGS="$DEFAULT_CFLAGS -march=cascadelake -msha" ./configure $CONFIGURE_ARGS
#CFLAGS="-O3 -march=rocketlake -Wall" ./configure $CONFIGURE_ARGS #CFLAGS="$DEFAULT_CFLAGS -march=rocketlake" ./configure $CONFIGURE_ARGS
make -j 8 make -j 8
strip -s cpuminer.exe strip -s cpuminer.exe
mv cpuminer.exe release/cpuminer-avx512-sha.exe mv cpuminer.exe release/cpuminer-avx512-sha.exe
@@ -62,7 +62,7 @@ mv cpuminer.exe release/cpuminer-avx512-sha.exe
# Zen1 AVX2 AES SHA # Zen1 AVX2 AES SHA
make clean || echo clean make clean || echo clean
rm -f config.status rm -f config.status
CFLAGS="-O3 -march=znver1 -Wall" ./configure $CONFIGURE_ARGS CFLAGS="$DEFAULT_CFLAGS -march=znver1" ./configure $CONFIGURE_ARGS
make -j 8 make -j 8
strip -s cpuminer.exe strip -s cpuminer.exe
mv cpuminer.exe release/cpuminer-zen.exe mv cpuminer.exe release/cpuminer-zen.exe
@@ -70,8 +70,8 @@ mv cpuminer.exe release/cpuminer-zen.exe
# Zen3 AVX2 SHA VAES # Zen3 AVX2 SHA VAES
make clean || echo clean make clean || echo clean
rm -f config.status rm -f config.status
CFLAGS="-O3 -march=znver2 -mvaes -Wall" ./configure $CONFIGURE_ARGS CFLAGS="$DEFAULT_CFLAGS -march=znver2 -mvaes" ./configure $CONFIGURE_ARGS
# CFLAGS="-O3 -march=znver3 -Wall" ./configure $CONFIGURE_ARGS # CFLAGS="$DEFAULT_CFLAGS -march=znver3" ./configure $CONFIGURE_ARGS
make -j 8 make -j 8
strip -s cpuminer.exe strip -s cpuminer.exe
mv cpuminer.exe release/cpuminer-zen3.exe mv cpuminer.exe release/cpuminer-zen3.exe
@@ -80,7 +80,7 @@ mv cpuminer.exe release/cpuminer-zen3.exe
# mingw won't compile avx512 without -fno-asynchronous-unwind-tables # mingw won't compile avx512 without -fno-asynchronous-unwind-tables
make clean || echo clean make clean || echo clean
rm -f config.status rm -f config.status
CFLAGS="-O3 -march=skylake-avx512 -Wall" ./configure $CONFIGURE_ARGS CFLAGS="$DEFAULT_CFLAGS -march=skylake-avx512" ./configure $CONFIGURE_ARGS
#CFLAGS="-O3 -march=skylake-avx512 -Wall -fno-asynchronous-unwind-tables" ./configure $CONFIGURE_ARGS #CFLAGS="-O3 -march=skylake-avx512 -Wall -fno-asynchronous-unwind-tables" ./configure $CONFIGURE_ARGS
make -j 8 make -j 8
strip -s cpuminer.exe strip -s cpuminer.exe
@@ -90,7 +90,7 @@ mv cpuminer.exe release/cpuminer-avx512.exe
make clean || echo clean make clean || echo clean
rm -f config.status rm -f config.status
# GCC 9 doesn't include AES in -march=core-avx2 # GCC 9 doesn't include AES in -march=core-avx2
CFLAGS="-O3 -march=core-avx2 -maes -Wall" ./configure $CONFIGURE_ARGS CFLAGS="$DEFAULT_CFLAGS -march=core-avx2 -maes" ./configure $CONFIGURE_ARGS
make -j 8 make -j 8
strip -s cpuminer.exe strip -s cpuminer.exe
mv cpuminer.exe release/cpuminer-avx2.exe mv cpuminer.exe release/cpuminer-avx2.exe
@@ -99,7 +99,7 @@ mv cpuminer.exe release/cpuminer-avx2.exe
make clean || echo clean make clean || echo clean
rm -f config.status rm -f config.status
# -march=corei7-avx still includes aes, but just in case # -march=corei7-avx still includes aes, but just in case
CFLAGS="-O3 -march=corei7-avx -maes -Wall" ./configure $CONFIGURE_ARGS CFLAGS="$DEFAULT_CFLAGS -march=corei7-avx -maes" ./configure $CONFIGURE_ARGS
make -j 8 make -j 8
strip -s cpuminer.exe strip -s cpuminer.exe
mv cpuminer.exe release/cpuminer-avx.exe mv cpuminer.exe release/cpuminer-avx.exe
@@ -107,7 +107,7 @@ mv cpuminer.exe release/cpuminer-avx.exe
# Westmere SSE4.2 AES # Westmere SSE4.2 AES
make clean || echo clean make clean || echo clean
rm -f config.status rm -f config.status
CFLAGS="-O3 -march=westmere -maes -Wall" ./configure $CONFIGURE_ARGS CFLAGS="$DEFAULT_CFLAGS -march=westmere -maes" ./configure $CONFIGURE_ARGS
#CFLAGS="-O3 -maes -msse4.2 -Wall" ./configure $CONFIGURE_ARGS #CFLAGS="-O3 -maes -msse4.2 -Wall" ./configure $CONFIGURE_ARGS
make -j 8 make -j 8
strip -s cpuminer.exe strip -s cpuminer.exe
@@ -116,7 +116,7 @@ mv cpuminer.exe release/cpuminer-aes-sse42.exe
# Nehalem SSE4.2 # Nehalem SSE4.2
#make clean || echo clean #make clean || echo clean
#rm -f config.status #rm -f config.status
#CFLAGS="-O3 -march=corei7 -Wall" ./configure $CONFIGURE_ARGS #CFLAGS="$DEFAULT_CFLAGS -march=corei7" ./configure $CONFIGURE_ARGS
#make #make
#strip -s cpuminer.exe #strip -s cpuminer.exe
#mv cpuminer.exe release/cpuminer-sse42.exe #mv cpuminer.exe release/cpuminer-sse42.exe
@@ -124,7 +124,7 @@ mv cpuminer.exe release/cpuminer-aes-sse42.exe
# Core2 SSSE3 # Core2 SSSE3
#make clean || echo clean #make clean || echo clean
#rm -f config.status #rm -f config.status
#CFLAGS="-O3 -march=core2 -Wall" ./configure $CONFIGURE_ARGS #CFLAGS="$DEFAULT_CFLAGS -march=core2" ./configure $CONFIGURE_ARGS
#make #make
#strip -s cpuminer.exe #strip -s cpuminer.exe
#mv cpuminer.exe release/cpuminer-ssse3.exe #mv cpuminer.exe release/cpuminer-ssse3.exe
@@ -133,7 +133,7 @@ mv cpuminer.exe release/cpuminer-aes-sse42.exe
# Generic SSE2 # Generic SSE2
make clean || echo clean make clean || echo clean
rm -f config.status rm -f config.status
CFLAGS="-O3 -msse2 -Wall" ./configure $CONFIGURE_ARGS CFLAGS="$DEFAULT_CFLAGS -msse2" ./configure $CONFIGURE_ARGS
make -j 8 make -j 8
strip -s cpuminer.exe strip -s cpuminer.exe
mv cpuminer.exe release/cpuminer-sse2.exe mv cpuminer.exe release/cpuminer-sse2.exe