mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v3.19.0
This commit is contained in:
@@ -40,7 +40,7 @@ $ mkdir $HOME/usr/lib
|
|||||||
version available in the repositories.
|
version available in the repositories.
|
||||||
|
|
||||||
Download the following source code packages from their respective and
|
Download the following source code packages from their respective and
|
||||||
respected download locations, copy them to ~/usr/lib/ and uncompress them.
|
respected download locations, copy them to $HOME/usr/lib/ and uncompress them.
|
||||||
|
|
||||||
openssl: https://github.com/openssl/openssl/releases
|
openssl: https://github.com/openssl/openssl/releases
|
||||||
|
|
||||||
@@ -149,85 +149,10 @@ Copy cpuminer.exe to the release directory, compress and copy the release direct
|
|||||||
|
|
||||||
Run cpuminer
|
Run cpuminer
|
||||||
|
|
||||||
In a command windows change directories to the unzipped release folder. to get a list of all options:
|
In a command windows change directories to the unzipped release folder. To get a list of all options:
|
||||||
|
|
||||||
cpuminer.exe --help
|
cpuminer.exe --help
|
||||||
|
|
||||||
Command options are specific to where you mine. Refer to the pool's instructions on how to set them.
|
Command options are specific to where you mine. Refer to the pool's instructions on how to set them.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Create a link to the locally compiled version of gmp.h
|
|
||||||
|
|
||||||
$ ln -s $LOCAL_LIB/gmp-version/gmp.h ./gmp.h
|
|
||||||
|
|
||||||
Edit configure.ac to fix lipthread package name.
|
|
||||||
|
|
||||||
sed -i 's/"-lpthread"/"-lpthreadGC2"/g' configure.ac
|
|
||||||
|
|
||||||
|
|
||||||
7. Compile
|
|
||||||
|
|
||||||
you can use the default compile if you intend to use cpuminer-opt on the
|
|
||||||
same CPU and the virtual machine supports that architecture.
|
|
||||||
|
|
||||||
./build.sh
|
|
||||||
|
|
||||||
Otherwise you can compile manually while setting options in CFLAGS.
|
|
||||||
|
|
||||||
Some common options:
|
|
||||||
|
|
||||||
To compile for a specific CPU architecture:
|
|
||||||
|
|
||||||
CFLAGS="-O3 -march=znver1 -Wall" ./configure --with-curl
|
|
||||||
|
|
||||||
This will compile for AMD Ryzen.
|
|
||||||
|
|
||||||
You can compile more generically for a set of specific CPU features
|
|
||||||
if you know what features you want:
|
|
||||||
|
|
||||||
CFLAGS="-O3 -maes -msse4.2 -Wall" ./configure --with-curl
|
|
||||||
|
|
||||||
This will compile for an older CPU that does not have AVX.
|
|
||||||
|
|
||||||
You can find several examples in build-allarch.sh
|
|
||||||
|
|
||||||
If you have a CPU with more than 64 threads and Windows 7 or higher you
|
|
||||||
can enable the CPU Groups feature:
|
|
||||||
|
|
||||||
-D_WIN32_WINNT==0x0601
|
|
||||||
|
|
||||||
Once you have run configure successfully run make with n CPU threads:
|
|
||||||
|
|
||||||
make -j n
|
|
||||||
|
|
||||||
Copy cpuminer.exe to the release directory, compress and copy the release
|
|
||||||
directory to a Windows system and run cpuminer.exe from the command line.
|
|
||||||
|
|
||||||
Run cpuminer
|
|
||||||
|
|
||||||
In a command windows change directories to the unzipped release folder.
|
|
||||||
to get a list of all options:
|
|
||||||
|
|
||||||
cpuminer.exe --help
|
|
||||||
|
|
||||||
Command options are specific to where you mine. Refer to the pool's
|
|
||||||
instructions on how to set them.
|
|
||||||
|
@@ -65,7 +65,22 @@ If not what makes it happen or not happen?
|
|||||||
Change Log
|
Change Log
|
||||||
----------
|
----------
|
||||||
|
|
||||||
v3.8.2
|
v3.19.0
|
||||||
|
|
||||||
|
Windows binaries now built with support for CPU groups, requires Windows 7.
|
||||||
|
|
||||||
|
Changes to cpu-affinity:
|
||||||
|
- PR#346: Fixed incorrect CPU affinity on Windows built for CPU groups,
|
||||||
|
- added support for CPU affinity for up to 256 threads or CPUs,
|
||||||
|
- streamlined code for more efficient initialization of miner threads,
|
||||||
|
- precise affining of each miner thread to a specific CPU,
|
||||||
|
- added an option to disable CPU affinity with "--cpu-affinity 0"
|
||||||
|
|
||||||
|
Faster sha256t with AVX512 & AVX2.
|
||||||
|
|
||||||
|
Added stratum error count to stats log, reported only when non-zero.
|
||||||
|
|
||||||
|
v3.18.2
|
||||||
|
|
||||||
Issue #342, fixed Groestl AES on Windows, broken in v3.18.0.
|
Issue #342, fixed Groestl AES on Windows, broken in v3.18.0.
|
||||||
|
|
||||||
|
@@ -62,6 +62,12 @@ void sha256_4way_transform_le( __m128i *state_out, const __m128i *data,
|
|||||||
const __m128i *state_in );
|
const __m128i *state_in );
|
||||||
void sha256_4way_transform_be( __m128i *state_out, const __m128i *data,
|
void sha256_4way_transform_be( __m128i *state_out, const __m128i *data,
|
||||||
const __m128i *state_in );
|
const __m128i *state_in );
|
||||||
|
void sha256_4way_prehash_3rounds( __m128i *state_mid, __m128i *X,
|
||||||
|
const __m128i *W, const __m128i *state_in );
|
||||||
|
void sha256_4way_final_rounds( __m128i *state_out, const __m128i *data,
|
||||||
|
const __m128i *state_in, const __m128i *state_mid, const __m128i *X );
|
||||||
|
int sha256_4way_transform_le_short( __m128i *state_out, const __m128i *data,
|
||||||
|
const __m128i *state_in );
|
||||||
|
|
||||||
#endif // SSE2
|
#endif // SSE2
|
||||||
|
|
||||||
@@ -84,10 +90,12 @@ void sha256_8way_transform_le( __m256i *state_out, const __m256i *data,
|
|||||||
void sha256_8way_transform_be( __m256i *state_out, const __m256i *data,
|
void sha256_8way_transform_be( __m256i *state_out, const __m256i *data,
|
||||||
const __m256i *state_in );
|
const __m256i *state_in );
|
||||||
|
|
||||||
void sha256_8way_prehash_3rounds( __m256i *state_mid, const __m256i *W,
|
void sha256_8way_prehash_3rounds( __m256i *state_mid, __m256i *X,
|
||||||
const __m256i *state_in );
|
const __m256i *W, const __m256i *state_in );
|
||||||
void sha256_8way_final_rounds( __m256i *state_out, const __m256i *data,
|
void sha256_8way_final_rounds( __m256i *state_out, const __m256i *data,
|
||||||
const __m256i *state_in, const __m256i *state_mid );
|
const __m256i *state_in, const __m256i *state_mid, const __m256i *X );
|
||||||
|
int sha256_8way_transform_le_short( __m256i *state_out, const __m256i *data,
|
||||||
|
const __m256i *state_in );
|
||||||
|
|
||||||
#endif // AVX2
|
#endif // AVX2
|
||||||
|
|
||||||
@@ -109,10 +117,13 @@ void sha256_16way_transform_le( __m512i *state_out, const __m512i *data,
|
|||||||
const __m512i *state_in );
|
const __m512i *state_in );
|
||||||
void sha256_16way_transform_be( __m512i *state_out, const __m512i *data,
|
void sha256_16way_transform_be( __m512i *state_out, const __m512i *data,
|
||||||
const __m512i *state_in );
|
const __m512i *state_in );
|
||||||
void sha256_16way_prehash_3rounds( __m512i *state_mid, const __m512i *W,
|
void sha256_16way_prehash_3rounds( __m512i *state_mid, __m512i *X,
|
||||||
const __m512i *state_in );
|
const __m512i *W, const __m512i *state_in );
|
||||||
void sha256_16way_final_rounds( __m512i *state_out, const __m512i *data,
|
void sha256_16way_final_rounds( __m512i *state_out, const __m512i *data,
|
||||||
const __m512i *state_in, const __m512i *state_mid );
|
const __m512i *state_in, const __m512i *state_mid, const __m512i *X );
|
||||||
|
|
||||||
|
int sha256_16way_transform_le_short( __m512i *state_out, const __m512i *data,
|
||||||
|
const __m512i *state_in );
|
||||||
|
|
||||||
#endif // AVX512
|
#endif // AVX512
|
||||||
|
|
||||||
|
@@ -611,11 +611,11 @@ static inline int scanhash_sha256d_8way_pooler( struct work *work,
|
|||||||
|
|
||||||
#endif /* HAVE_SHA256_8WAY */
|
#endif /* HAVE_SHA256_8WAY */
|
||||||
|
|
||||||
int scanhash_sha256d_pooler( struct work *work,
|
int scanhash_sha256d_pooler( struct work *work, uint32_t max_nonce,
|
||||||
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
uint32_t _ALIGN(128) data[64];
|
uint32_t _ALIGN(128) data[64];
|
||||||
uint32_t _ALIGN(32) hash[8];
|
uint32_t _ALIGN(32) hash[8];
|
||||||
uint32_t _ALIGN(32) midstate[8];
|
uint32_t _ALIGN(32) midstate[8];
|
||||||
@@ -626,12 +626,12 @@ int scanhash_sha256d_pooler( struct work *work,
|
|||||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||||
|
|
||||||
#ifdef HAVE_SHA256_8WAY
|
#ifdef HAVE_SHA256_8WAY
|
||||||
if (sha256_use_8way())
|
if ( sha256_use_8way() )
|
||||||
return scanhash_sha256d_8way_pooler( work, max_nonce, hashes_done, mythr );
|
return scanhash_sha256d_8way_pooler( work, max_nonce, hashes_done, mythr );
|
||||||
#endif
|
#endif
|
||||||
#ifdef HAVE_SHA256_4WAY
|
#ifdef HAVE_SHA256_4WAY
|
||||||
if (sha256_use_4way())
|
if ( sha256_use_4way() )
|
||||||
return scanhash_sha256d_4way_pooler( work, max_nonce, hashes_done, mythr );
|
return scanhash_sha256d_4way_pooler( work, max_nonce, hashes_done, mythr );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
memcpy(data, pdata + 16, 64);
|
memcpy(data, pdata + 16, 64);
|
||||||
@@ -695,8 +695,11 @@ bool register_sha256d_algo( algo_gate_t* gate )
|
|||||||
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT;
|
gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT;
|
||||||
#if defined(SHA256D_16WAY)
|
#if defined(SHA256D_16WAY)
|
||||||
gate->scanhash = (void*)&scanhash_sha256d_16way;
|
gate->scanhash = (void*)&scanhash_sha256d_16way;
|
||||||
|
//#elif defined(SHA256D_8WAY)
|
||||||
|
// gate->scanhash = (void*)&scanhash_sha256d_8way;
|
||||||
#else
|
#else
|
||||||
gate->scanhash = (void*)&scanhash_sha256d_pooler;
|
gate->scanhash = (void*)&scanhash_sha256d_pooler;
|
||||||
|
// gate->scanhash = (void*)&scanhash_sha256d_4way;
|
||||||
#endif
|
#endif
|
||||||
// gate->hash = (void*)&sha256d;
|
// gate->hash = (void*)&sha256d;
|
||||||
return true;
|
return true;
|
||||||
|
File diff suppressed because it is too large
Load Diff
@@ -10,13 +10,14 @@
|
|||||||
int scanhash_sha256d_16way( struct work *work, const uint32_t max_nonce,
|
int scanhash_sha256d_16way( struct work *work, const uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
|
__m512i vdata[32] __attribute__ ((aligned (128)));
|
||||||
__m512i block[16] __attribute__ ((aligned (64)));
|
__m512i block[16] __attribute__ ((aligned (64)));
|
||||||
__m512i hash32[8] __attribute__ ((aligned (32)));
|
__m512i hash32[8] __attribute__ ((aligned (64)));
|
||||||
__m512i initstate[8] __attribute__ ((aligned (32)));
|
__m512i initstate[8] __attribute__ ((aligned (64)));
|
||||||
__m512i midstate1[8] __attribute__ ((aligned (32)));
|
__m512i midstate1[8] __attribute__ ((aligned (64)));
|
||||||
__m512i midstate2[8] __attribute__ ((aligned (32)));
|
__m512i midstate2[8] __attribute__ ((aligned (64)));
|
||||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
__m512i mexp_pre[16] __attribute__ ((aligned (64)));
|
||||||
__m512i vdata[20] __attribute__ ((aligned (32)));
|
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||||
uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] );
|
uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] );
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
const uint32_t *ptarget = work->target;
|
const uint32_t *ptarget = work->target;
|
||||||
@@ -36,6 +37,14 @@ int scanhash_sha256d_16way( struct work *work, const uint32_t max_nonce,
|
|||||||
*noncev = _mm512_set_epi32( n+15, n+14, n+13, n+12, n+11, n+10, n+9, n+8,
|
*noncev = _mm512_set_epi32( n+15, n+14, n+13, n+12, n+11, n+10, n+9, n+8,
|
||||||
n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n );
|
n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n );
|
||||||
|
|
||||||
|
vdata[16+4] = last_byte;
|
||||||
|
memset_zero_512( vdata+16 + 5, 10 );
|
||||||
|
vdata[16+15] = m512_const1_32( 80*8 ); // bit count
|
||||||
|
|
||||||
|
block[ 8] = last_byte;
|
||||||
|
memset_zero_512( block + 9, 6 );
|
||||||
|
block[15] = m512_const1_32( 32*8 ); // bit count
|
||||||
|
|
||||||
// initialize state
|
// initialize state
|
||||||
initstate[0] = m512_const1_64( 0x6A09E6676A09E667 );
|
initstate[0] = m512_const1_64( 0x6A09E6676A09E667 );
|
||||||
initstate[1] = m512_const1_64( 0xBB67AE85BB67AE85 );
|
initstate[1] = m512_const1_64( 0xBB67AE85BB67AE85 );
|
||||||
@@ -49,39 +58,33 @@ int scanhash_sha256d_16way( struct work *work, const uint32_t max_nonce,
|
|||||||
sha256_16way_transform_le( midstate1, vdata, initstate );
|
sha256_16way_transform_le( midstate1, vdata, initstate );
|
||||||
|
|
||||||
// Do 3 rounds on the first 12 bytes of the next block
|
// Do 3 rounds on the first 12 bytes of the next block
|
||||||
sha256_16way_prehash_3rounds( midstate2, vdata + 16, midstate1 );
|
sha256_16way_prehash_3rounds( midstate2, mexp_pre, vdata+16, midstate1 );
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
// 1. final 16 bytes of data, with padding
|
// 1. final 16 bytes of data, with padding
|
||||||
memcpy_512( block, vdata + 16, 4 );
|
sha256_16way_final_rounds( block, vdata+16, midstate1, midstate2,
|
||||||
block[ 4] = last_byte;
|
mexp_pre );
|
||||||
memset_zero_512( block + 5, 10 );
|
|
||||||
block[15] = m512_const1_32( 80*8 ); // bit count
|
|
||||||
sha256_16way_final_rounds( hash32, block, midstate1, midstate2 );
|
|
||||||
|
|
||||||
// 2. 32 byte hash from 1.
|
// 2. 32 byte hash from 1.
|
||||||
memcpy_512( block, hash32, 8 );
|
if ( sha256_16way_transform_le_short( hash32, block, initstate ) )
|
||||||
block[ 8] = last_byte;
|
|
||||||
memset_zero_512( block + 9, 6 );
|
|
||||||
block[15] = m512_const1_32( 32*8 ); // bit count
|
|
||||||
sha256_16way_transform_le( hash32, block, initstate );
|
|
||||||
|
|
||||||
// byte swap final hash for testing
|
|
||||||
mm512_block_bswap_32( hash32, hash32 );
|
|
||||||
|
|
||||||
for ( int lane = 0; lane < 16; lane++ )
|
|
||||||
if ( unlikely( hash32_d7[ lane ] <= targ32_d7 ) )
|
|
||||||
{
|
{
|
||||||
extr_lane_16x32( lane_hash, hash32, lane, 256 );
|
// byte swap final hash for testing
|
||||||
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
mm512_block_bswap_32( hash32, hash32 );
|
||||||
|
|
||||||
|
for ( int lane = 0; lane < 16; lane++ )
|
||||||
|
if ( unlikely( hash32_d7[ lane ] <= targ32_d7 ) )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
extr_lane_16x32( lane_hash, hash32, lane, 256 );
|
||||||
submit_solution( work, lane_hash, mythr );
|
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||||
|
{
|
||||||
|
pdata[19] = n + lane;
|
||||||
|
submit_solution( work, lane_hash, mythr );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*noncev = _mm512_add_epi32( *noncev, sixteen );
|
*noncev = _mm512_add_epi32( *noncev, sixteen );
|
||||||
n += 16;
|
n += 16;
|
||||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart );
|
} while ( (n < last_nonce) && !work_restart[thr_id].restart );
|
||||||
pdata[19] = n;
|
pdata[19] = n;
|
||||||
*hashes_done = n - first_nonce;
|
*hashes_done = n - first_nonce;
|
||||||
@@ -95,13 +98,14 @@ int scanhash_sha256d_16way( struct work *work, const uint32_t max_nonce,
|
|||||||
int scanhash_sha256d_8way( struct work *work, const uint32_t max_nonce,
|
int scanhash_sha256d_8way( struct work *work, const uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
__m256i block[16] __attribute__ ((aligned (64)));
|
__m256i vdata[32] __attribute__ ((aligned (64)));
|
||||||
|
__m256i block[16] __attribute__ ((aligned (32)));
|
||||||
__m256i hash32[8] __attribute__ ((aligned (32)));
|
__m256i hash32[8] __attribute__ ((aligned (32)));
|
||||||
__m256i initstate[8] __attribute__ ((aligned (32)));
|
__m256i initstate[8] __attribute__ ((aligned (32)));
|
||||||
__m256i midstate1[8] __attribute__ ((aligned (32)));
|
__m256i midstate1[8] __attribute__ ((aligned (32)));
|
||||||
__m256i midstate2[8] __attribute__ ((aligned (32)));
|
__m256i midstate2[8] __attribute__ ((aligned (32)));
|
||||||
|
__m256i mexp_pre[16] __attribute__ ((aligned (32)));
|
||||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||||
__m256i vdata[20] __attribute__ ((aligned (32)));
|
|
||||||
uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] );
|
uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] );
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
const uint32_t *ptarget = work->target;
|
const uint32_t *ptarget = work->target;
|
||||||
@@ -120,6 +124,14 @@ int scanhash_sha256d_8way( struct work *work, const uint32_t max_nonce,
|
|||||||
|
|
||||||
*noncev = _mm256_set_epi32( n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n );
|
*noncev = _mm256_set_epi32( n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n );
|
||||||
|
|
||||||
|
vdata[16+4] = last_byte;
|
||||||
|
memset_zero_256( vdata+16 + 5, 10 );
|
||||||
|
vdata[16+15] = m256_const1_32( 80*8 ); // bit count
|
||||||
|
|
||||||
|
block[ 8] = last_byte;
|
||||||
|
memset_zero_256( block + 9, 6 );
|
||||||
|
block[15] = m256_const1_32( 32*8 ); // bit count
|
||||||
|
|
||||||
// initialize state
|
// initialize state
|
||||||
initstate[0] = m256_const1_64( 0x6A09E6676A09E667 );
|
initstate[0] = m256_const1_64( 0x6A09E6676A09E667 );
|
||||||
initstate[1] = m256_const1_64( 0xBB67AE85BB67AE85 );
|
initstate[1] = m256_const1_64( 0xBB67AE85BB67AE85 );
|
||||||
@@ -133,35 +145,30 @@ int scanhash_sha256d_8way( struct work *work, const uint32_t max_nonce,
|
|||||||
sha256_8way_transform_le( midstate1, vdata, initstate );
|
sha256_8way_transform_le( midstate1, vdata, initstate );
|
||||||
|
|
||||||
// Do 3 rounds on the first 12 bytes of the next block
|
// Do 3 rounds on the first 12 bytes of the next block
|
||||||
sha256_8way_prehash_3rounds( midstate2, vdata + 16, midstate1 );
|
sha256_8way_prehash_3rounds( midstate2, mexp_pre, vdata + 16, midstate1 );
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
// 1. final 16 bytes of data, with padding
|
// 1. final 16 bytes of data, with padding
|
||||||
memcpy_256( block, vdata + 16, 4 );
|
sha256_8way_final_rounds( block, vdata+16, midstate1, midstate2,
|
||||||
block[ 4] = last_byte;
|
mexp_pre );
|
||||||
memset_zero_256( block + 5, 10 );
|
|
||||||
block[15] = m256_const1_32( 80*8 ); // bit count
|
|
||||||
sha256_8way_final_rounds( hash32, block, midstate1, midstate2 );
|
|
||||||
|
|
||||||
// 2. 32 byte hash from 1.
|
// 2. 32 byte hash from 1.
|
||||||
memcpy_256( block, hash32, 8 );
|
if ( unlikely(
|
||||||
block[ 8] = last_byte;
|
sha256_8way_transform_le_short( hash32, block, initstate ) ) )
|
||||||
memset_zero_256( block + 9, 6 );
|
|
||||||
block[15] = m256_const1_32( 32*8 ); // bit count
|
|
||||||
sha256_8way_transform_le( hash32, block, initstate );
|
|
||||||
|
|
||||||
// byte swap final hash for testing
|
|
||||||
mm256_block_bswap_32( hash32, hash32 );
|
|
||||||
|
|
||||||
for ( int lane = 0; lane < 8; lane++ )
|
|
||||||
if ( unlikely( hash32_d7[ lane ] <= targ32_d7 ) )
|
|
||||||
{
|
{
|
||||||
extr_lane_8x32( lane_hash, hash32, lane, 256 );
|
// byte swap final hash for testing
|
||||||
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
mm256_block_bswap_32( hash32, hash32 );
|
||||||
|
|
||||||
|
for ( int lane = 0; lane < 8; lane++ )
|
||||||
|
if ( unlikely( hash32_d7[ lane ] <= targ32_d7 ) )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
extr_lane_8x32( lane_hash, hash32, lane, 256 );
|
||||||
submit_solution( work, lane_hash, mythr );
|
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||||
|
{
|
||||||
|
pdata[19] = n + lane;
|
||||||
|
submit_solution( work, lane_hash, mythr );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*noncev = _mm256_add_epi32( *noncev, eight );
|
*noncev = _mm256_add_epi32( *noncev, eight );
|
||||||
@@ -179,12 +186,14 @@ int scanhash_sha256d_8way( struct work *work, const uint32_t max_nonce,
|
|||||||
int scanhash_sha256d_4way( struct work *work, const uint32_t max_nonce,
|
int scanhash_sha256d_4way( struct work *work, const uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
__m128i block[16] __attribute__ ((aligned (64)));
|
__m128i vdata[32] __attribute__ ((aligned (64)));
|
||||||
__m128i hash32[8] __attribute__ ((aligned (32)));
|
__m128i block[16] __attribute__ ((aligned (32)));
|
||||||
__m128i initstate[8] __attribute__ ((aligned (32)));
|
__m128i hash32[8] __attribute__ ((aligned (32)));
|
||||||
__m128i midstate[8] __attribute__ ((aligned (32)));
|
__m128i initstate[8] __attribute__ ((aligned (32)));
|
||||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
__m128i midstate1[8] __attribute__ ((aligned (32)));
|
||||||
__m128i vdata[20] __attribute__ ((aligned (32)));
|
__m128i midstate2[8] __attribute__ ((aligned (32)));
|
||||||
|
__m128i mexp_pre[16] __attribute__ ((aligned (32)));
|
||||||
|
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||||
uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] );
|
uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] );
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
const uint32_t *ptarget = work->target;
|
const uint32_t *ptarget = work->target;
|
||||||
@@ -203,6 +212,14 @@ int scanhash_sha256d_4way( struct work *work, const uint32_t max_nonce,
|
|||||||
|
|
||||||
*noncev = _mm_set_epi32( n+ 3, n+ 2, n+1, n );
|
*noncev = _mm_set_epi32( n+ 3, n+ 2, n+1, n );
|
||||||
|
|
||||||
|
vdata[16+4] = last_byte;
|
||||||
|
memset_zero_128( vdata+16 + 5, 10 );
|
||||||
|
vdata[16+15] = m128_const1_32( 80*8 ); // bit count
|
||||||
|
|
||||||
|
block[ 8] = last_byte;
|
||||||
|
memset_zero_128( block + 9, 6 );
|
||||||
|
block[15] = m128_const1_32( 32*8 ); // bit count
|
||||||
|
|
||||||
// initialize state
|
// initialize state
|
||||||
initstate[0] = m128_const1_64( 0x6A09E6676A09E667 );
|
initstate[0] = m128_const1_64( 0x6A09E6676A09E667 );
|
||||||
initstate[1] = m128_const1_64( 0xBB67AE85BB67AE85 );
|
initstate[1] = m128_const1_64( 0xBB67AE85BB67AE85 );
|
||||||
@@ -214,39 +231,36 @@ int scanhash_sha256d_4way( struct work *work, const uint32_t max_nonce,
|
|||||||
initstate[7] = m128_const1_64( 0x5BE0CD195BE0CD19 );
|
initstate[7] = m128_const1_64( 0x5BE0CD195BE0CD19 );
|
||||||
|
|
||||||
// hash first 64 bytes of data
|
// hash first 64 bytes of data
|
||||||
sha256_4way_transform_le( midstate, vdata, initstate );
|
sha256_4way_transform_le( midstate1, vdata, initstate );
|
||||||
|
// Do 3 rounds on the first 12 bytes of the next block
|
||||||
|
sha256_4way_prehash_3rounds( midstate2, mexp_pre, vdata + 16, midstate1 );
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
// 1. final 16 bytes of data, with padding
|
// 1. final 16 bytes of data, with padding
|
||||||
memcpy_128( block, vdata + 16, 4 );
|
sha256_4way_final_rounds( block, vdata+16, midstate1, midstate2,
|
||||||
block[ 4] = last_byte;
|
mexp_pre );
|
||||||
memset_zero_128( block + 5, 10 );
|
|
||||||
block[15] = m128_const1_32( 80*8 ); // bit count
|
|
||||||
sha256_4way_transform_le( hash32, block, midstate );
|
|
||||||
|
|
||||||
// 2. 32 byte hash from 1.
|
// 2. 32 byte hash from 1.
|
||||||
memcpy_128( block, hash32, 8 );
|
if ( unlikely(
|
||||||
block[ 8] = last_byte;
|
sha256_4way_transform_le_short( hash32, block, initstate ) ) )
|
||||||
memset_zero_128( block + 9, 6 );
|
|
||||||
block[15] = m128_const1_32( 32*8 ); // bit count
|
|
||||||
sha256_4way_transform_le( hash32, block, initstate );
|
|
||||||
|
|
||||||
// byte swap final hash for testing
|
|
||||||
mm128_block_bswap_32( hash32, hash32 );
|
|
||||||
|
|
||||||
for ( int lane = 0; lane < 4; lane++ )
|
|
||||||
if ( unlikely( hash32_d7[ lane ] <= targ32_d7 ) )
|
|
||||||
{
|
{
|
||||||
extr_lane_4x32( lane_hash, hash32, lane, 256 );
|
// byte swap final hash for testing
|
||||||
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
mm128_block_bswap_32( hash32, hash32 );
|
||||||
|
|
||||||
|
for ( int lane = 0; lane < 4; lane++ )
|
||||||
|
if ( unlikely( hash32_d7[ lane ] <= targ32_d7 ) )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
extr_lane_4x32( lane_hash, hash32, lane, 256 );
|
||||||
submit_solution( work, lane_hash, mythr );
|
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||||
|
{
|
||||||
|
pdata[19] = n + lane;
|
||||||
|
submit_solution( work, lane_hash, mythr );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*noncev = _mm_add_epi32( *noncev, four );
|
*noncev = _mm_add_epi32( *noncev, four );
|
||||||
n += 4;
|
n += 4;
|
||||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart );
|
} while ( (n < last_nonce) && !work_restart[thr_id].restart );
|
||||||
pdata[19] = n;
|
pdata[19] = n;
|
||||||
*hashes_done = n - first_nonce;
|
*hashes_done = n - first_nonce;
|
||||||
|
@@ -6,12 +6,10 @@
|
|||||||
|
|
||||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||||
#define SHA256D_16WAY 1
|
#define SHA256D_16WAY 1
|
||||||
/*
|
|
||||||
#elif defined(__AVX2__)
|
#elif defined(__AVX2__)
|
||||||
#define SHA256D_8WAY 1
|
#define SHA256D_8WAY 1
|
||||||
#else
|
#else
|
||||||
#define SHA256D_4WAY 1
|
#define SHA256D_4WAY 1
|
||||||
*/
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
bool register_sha256d_algo( algo_gate_t* gate );
|
bool register_sha256d_algo( algo_gate_t* gate );
|
||||||
@@ -21,7 +19,7 @@ bool register_sha256d_algo( algo_gate_t* gate );
|
|||||||
int scanhash_sha256d_16way( struct work *work, uint32_t max_nonce,
|
int scanhash_sha256d_16way( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
#endif
|
#endif
|
||||||
/*
|
|
||||||
#if defined(SHA256D_8WAY)
|
#if defined(SHA256D_8WAY)
|
||||||
|
|
||||||
int scanhash_sha256d_8way( struct work *work, uint32_t max_nonce,
|
int scanhash_sha256d_8way( struct work *work, uint32_t max_nonce,
|
||||||
@@ -33,7 +31,7 @@ int scanhash_sha256d_8way( struct work *work, uint32_t max_nonce,
|
|||||||
int scanhash_sha256d_4way( struct work *work, uint32_t max_nonce,
|
int scanhash_sha256d_4way( struct work *work, uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr );
|
uint64_t *hashes_done, struct thr_info *mythr );
|
||||||
#endif
|
#endif
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
#if defined(__SHA__)
|
#if defined(__SHA__)
|
||||||
|
@@ -10,13 +10,14 @@
|
|||||||
int scanhash_sha256t_16way( struct work *work, const uint32_t max_nonce,
|
int scanhash_sha256t_16way( struct work *work, const uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
|
__m512i vdata[32] __attribute__ ((aligned (128)));
|
||||||
__m512i block[16] __attribute__ ((aligned (64)));
|
__m512i block[16] __attribute__ ((aligned (64)));
|
||||||
__m512i hash32[8] __attribute__ ((aligned (32)));
|
__m512i hash32[8] __attribute__ ((aligned (64)));
|
||||||
__m512i initstate[8] __attribute__ ((aligned (32)));
|
__m512i initstate[8] __attribute__ ((aligned (64)));
|
||||||
__m512i midstate1[8] __attribute__ ((aligned (32)));
|
__m512i midstate1[8] __attribute__ ((aligned (64)));
|
||||||
__m512i midstate2[8] __attribute__ ((aligned (32)));
|
__m512i midstate2[8] __attribute__ ((aligned (64)));
|
||||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
__m512i mexp_pre[16] __attribute__ ((aligned (64)));
|
||||||
__m512i vdata[20] __attribute__ ((aligned (32)));
|
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||||
uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] );
|
uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] );
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
const uint32_t *ptarget = work->target;
|
const uint32_t *ptarget = work->target;
|
||||||
@@ -36,7 +37,14 @@ int scanhash_sha256t_16way( struct work *work, const uint32_t max_nonce,
|
|||||||
*noncev = _mm512_set_epi32( n+15, n+14, n+13, n+12, n+11, n+10, n+9, n+8,
|
*noncev = _mm512_set_epi32( n+15, n+14, n+13, n+12, n+11, n+10, n+9, n+8,
|
||||||
n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n );
|
n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n );
|
||||||
|
|
||||||
// initialize state
|
vdata[16+4] = last_byte;
|
||||||
|
memset_zero_512( vdata+16 + 5, 10 );
|
||||||
|
vdata[16+15] = m512_const1_32( 80*8 ); // bit count
|
||||||
|
|
||||||
|
block[ 8] = last_byte;
|
||||||
|
memset_zero_512( block + 9, 6 );
|
||||||
|
block[15] = m512_const1_32( 32*8 ); // bit count
|
||||||
|
|
||||||
initstate[0] = m512_const1_64( 0x6A09E6676A09E667 );
|
initstate[0] = m512_const1_64( 0x6A09E6676A09E667 );
|
||||||
initstate[1] = m512_const1_64( 0xBB67AE85BB67AE85 );
|
initstate[1] = m512_const1_64( 0xBB67AE85BB67AE85 );
|
||||||
initstate[2] = m512_const1_64( 0x3C6EF3723C6EF372 );
|
initstate[2] = m512_const1_64( 0x3C6EF3723C6EF372 );
|
||||||
@@ -49,43 +57,37 @@ int scanhash_sha256t_16way( struct work *work, const uint32_t max_nonce,
|
|||||||
sha256_16way_transform_le( midstate1, vdata, initstate );
|
sha256_16way_transform_le( midstate1, vdata, initstate );
|
||||||
|
|
||||||
// Do 3 rounds on the first 12 bytes of the next block
|
// Do 3 rounds on the first 12 bytes of the next block
|
||||||
sha256_16way_prehash_3rounds( midstate2, vdata + 16, midstate1 );
|
sha256_16way_prehash_3rounds( midstate2, mexp_pre, vdata+16, midstate1 );
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
// 1. final 16 bytes of data, with padding
|
// 1. final 16 bytes of data, pre-padded
|
||||||
memcpy_512( block, vdata + 16, 4 );
|
sha256_16way_final_rounds( block, vdata+16, midstate1, midstate2,
|
||||||
block[ 4] = last_byte;
|
mexp_pre );
|
||||||
memset_zero_512( block + 5, 10 );
|
|
||||||
block[15] = m512_const1_32( 80*8 ); // bit count
|
|
||||||
sha256_16way_final_rounds( hash32, block, midstate1, midstate2 );
|
|
||||||
|
|
||||||
// 2. 32 byte hash from 1.
|
// 2. 32 byte hash from 1.
|
||||||
memcpy_512( block, hash32, 8 );
|
sha256_16way_transform_le( block, block, initstate );
|
||||||
block[ 8] = last_byte;
|
|
||||||
memset_zero_512( block + 9, 6 );
|
|
||||||
block[15] = m512_const1_32( 32*8 ); // bit count
|
|
||||||
sha256_16way_transform_le( hash32, block, initstate );
|
|
||||||
|
|
||||||
// 3. 32 byte hash from 2.
|
// 3. 32 byte hash from 2.
|
||||||
memcpy_512( block, hash32, 8 );
|
if ( unlikely(
|
||||||
sha256_16way_transform_le( hash32, block, initstate );
|
sha256_16way_transform_le_short( hash32, block, initstate ) ) )
|
||||||
|
|
||||||
// byte swap final hash for testing
|
|
||||||
mm512_block_bswap_32( hash32, hash32 );
|
|
||||||
|
|
||||||
for ( int lane = 0; lane < 16; lane++ )
|
|
||||||
if ( unlikely( hash32_d7[ lane ] <= targ32_d7 ) )
|
|
||||||
{
|
{
|
||||||
extr_lane_16x32( lane_hash, hash32, lane, 256 );
|
// byte swap final hash for testing
|
||||||
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
mm512_block_bswap_32( hash32, hash32 );
|
||||||
|
|
||||||
|
for ( int lane = 0; lane < 16; lane++ )
|
||||||
|
if ( hash32_d7[ lane ] <= targ32_d7 )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
extr_lane_16x32( lane_hash, hash32, lane, 256 );
|
||||||
submit_solution( work, lane_hash, mythr );
|
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||||
|
{
|
||||||
|
pdata[19] = n + lane;
|
||||||
|
submit_solution( work, lane_hash, mythr );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*noncev = _mm512_add_epi32( *noncev, sixteen );
|
*noncev = _mm512_add_epi32( *noncev, sixteen );
|
||||||
n += 16;
|
n += 16;
|
||||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart );
|
} while ( (n < last_nonce) && !work_restart[thr_id].restart );
|
||||||
pdata[19] = n;
|
pdata[19] = n;
|
||||||
*hashes_done = n - first_nonce;
|
*hashes_done = n - first_nonce;
|
||||||
@@ -100,13 +102,14 @@ int scanhash_sha256t_16way( struct work *work, const uint32_t max_nonce,
|
|||||||
int scanhash_sha256t_8way( struct work *work, const uint32_t max_nonce,
|
int scanhash_sha256t_8way( struct work *work, const uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
__m256i block[16] __attribute__ ((aligned (64)));
|
__m256i vdata[32] __attribute__ ((aligned (64)));
|
||||||
|
__m256i block[16] __attribute__ ((aligned (32)));
|
||||||
__m256i hash32[8] __attribute__ ((aligned (32)));
|
__m256i hash32[8] __attribute__ ((aligned (32)));
|
||||||
__m256i initstate[8] __attribute__ ((aligned (32)));
|
__m256i initstate[8] __attribute__ ((aligned (32)));
|
||||||
__m256i midstate1[8] __attribute__ ((aligned (32)));
|
__m256i midstate1[8] __attribute__ ((aligned (32)));
|
||||||
__m256i midstate2[8] __attribute__ ((aligned (32)));
|
__m256i midstate2[8] __attribute__ ((aligned (32)));
|
||||||
|
__m256i mexp_pre[16] __attribute__ ((aligned (32)));
|
||||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||||
__m256i vdata[20] __attribute__ ((aligned (32)));
|
|
||||||
uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] );
|
uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] );
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
const uint32_t *ptarget = work->target;
|
const uint32_t *ptarget = work->target;
|
||||||
@@ -125,6 +128,14 @@ int scanhash_sha256t_8way( struct work *work, const uint32_t max_nonce,
|
|||||||
|
|
||||||
*noncev = _mm256_set_epi32( n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n );
|
*noncev = _mm256_set_epi32( n+ 7, n+ 6, n+ 5, n+ 4, n+ 3, n+ 2, n+1, n );
|
||||||
|
|
||||||
|
vdata[16+4] = last_byte;
|
||||||
|
memset_zero_256( vdata+16 + 5, 10 );
|
||||||
|
vdata[16+15] = m256_const1_32( 80*8 ); // bit count
|
||||||
|
|
||||||
|
block[ 8] = last_byte;
|
||||||
|
memset_zero_256( block + 9, 6 );
|
||||||
|
block[15] = m256_const1_32( 32*8 ); // bit count
|
||||||
|
|
||||||
// initialize state
|
// initialize state
|
||||||
initstate[0] = m256_const1_64( 0x6A09E6676A09E667 );
|
initstate[0] = m256_const1_64( 0x6A09E6676A09E667 );
|
||||||
initstate[1] = m256_const1_64( 0xBB67AE85BB67AE85 );
|
initstate[1] = m256_const1_64( 0xBB67AE85BB67AE85 );
|
||||||
@@ -138,43 +149,37 @@ int scanhash_sha256t_8way( struct work *work, const uint32_t max_nonce,
|
|||||||
sha256_8way_transform_le( midstate1, vdata, initstate );
|
sha256_8way_transform_le( midstate1, vdata, initstate );
|
||||||
|
|
||||||
// Do 3 rounds on the first 12 bytes of the next block
|
// Do 3 rounds on the first 12 bytes of the next block
|
||||||
sha256_8way_prehash_3rounds( midstate2, vdata + 16, midstate1 );
|
sha256_8way_prehash_3rounds( midstate2, mexp_pre, vdata + 16, midstate1 );
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
// 1. final 16 bytes of data, with padding
|
// 1. final 16 bytes of data, with padding
|
||||||
memcpy_256( block, vdata + 16, 4 );
|
sha256_8way_final_rounds( block, vdata+16, midstate1, midstate2,
|
||||||
block[ 4] = last_byte;
|
mexp_pre );
|
||||||
memset_zero_256( block + 5, 10 );
|
|
||||||
block[15] = m256_const1_32( 80*8 ); // bit count
|
|
||||||
sha256_8way_final_rounds( hash32, block, midstate1, midstate2 );
|
|
||||||
|
|
||||||
// 2. 32 byte hash from 1.
|
// 2. 32 byte hash from 1.
|
||||||
memcpy_256( block, hash32, 8 );
|
sha256_8way_transform_le( block, block, initstate );
|
||||||
block[ 8] = last_byte;
|
|
||||||
memset_zero_256( block + 9, 6 );
|
|
||||||
block[15] = m256_const1_32( 32*8 ); // bit count
|
|
||||||
sha256_8way_transform_le( hash32, block, initstate );
|
|
||||||
|
|
||||||
// 3. 32 byte hash from 2.
|
// 3. 32 byte hash from 2.
|
||||||
memcpy_256( block, hash32, 8 );
|
if ( unlikely(
|
||||||
sha256_8way_transform_le( hash32, block, initstate );
|
sha256_8way_transform_le_short( hash32, block, initstate ) ) )
|
||||||
|
|
||||||
// byte swap final hash for testing
|
|
||||||
mm256_block_bswap_32( hash32, hash32 );
|
|
||||||
|
|
||||||
for ( int lane = 0; lane < 8; lane++ )
|
|
||||||
if ( unlikely( hash32_d7[ lane ] <= targ32_d7 ) )
|
|
||||||
{
|
{
|
||||||
extr_lane_8x32( lane_hash, hash32, lane, 256 );
|
// byte swap final hash for testing
|
||||||
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
mm256_block_bswap_32( hash32, hash32 );
|
||||||
|
|
||||||
|
for ( int lane = 0; lane < 8; lane++ )
|
||||||
|
if ( hash32_d7[ lane ] <= targ32_d7 )
|
||||||
{
|
{
|
||||||
pdata[19] = n + lane;
|
extr_lane_8x32( lane_hash, hash32, lane, 256 );
|
||||||
submit_solution( work, lane_hash, mythr );
|
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||||
|
{
|
||||||
|
pdata[19] = n + lane;
|
||||||
|
submit_solution( work, lane_hash, mythr );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*noncev = _mm256_add_epi32( *noncev, eight );
|
*noncev = _mm256_add_epi32( *noncev, eight );
|
||||||
n += 8;
|
n += 8;
|
||||||
} while ( (n < last_nonce) && !work_restart[thr_id].restart );
|
} while ( (n < last_nonce) && !work_restart[thr_id].restart );
|
||||||
pdata[19] = n;
|
pdata[19] = n;
|
||||||
*hashes_done = n - first_nonce;
|
*hashes_done = n - first_nonce;
|
||||||
@@ -183,17 +188,110 @@ int scanhash_sha256t_8way( struct work *work, const uint32_t max_nonce,
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#if defined(SHA256T_4WAY)
|
#if defined(SHA256T_4WAY)
|
||||||
|
|
||||||
|
// Optimizations are slower with AVX/SSE2
|
||||||
|
// https://github.com/JayDDee/cpuminer-opt/issues/344
|
||||||
|
/*
|
||||||
|
int scanhash_sha256t_4way( struct work *work, const uint32_t max_nonce,
|
||||||
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
|
{
|
||||||
|
__m128i vdata[32] __attribute__ ((aligned (64)));
|
||||||
|
__m128i block[16] __attribute__ ((aligned (32)));
|
||||||
|
__m128i hash32[8] __attribute__ ((aligned (32)));
|
||||||
|
__m128i initstate[8] __attribute__ ((aligned (32)));
|
||||||
|
__m128i midstate1[8] __attribute__ ((aligned (32)));
|
||||||
|
__m128i midstate2[8] __attribute__ ((aligned (32)));
|
||||||
|
__m128i mexp_pre[16] __attribute__ ((aligned (32)));
|
||||||
|
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||||
|
uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] );
|
||||||
|
uint32_t *pdata = work->data;
|
||||||
|
const uint32_t *ptarget = work->target;
|
||||||
|
const uint32_t targ32_d7 = ptarget[7];
|
||||||
|
const uint32_t first_nonce = pdata[19];
|
||||||
|
const uint32_t last_nonce = max_nonce - 4;
|
||||||
|
uint32_t n = first_nonce;
|
||||||
|
__m128i *noncev = vdata + 19;
|
||||||
|
const int thr_id = mythr->id;
|
||||||
|
const bool bench = opt_benchmark;
|
||||||
|
const __m128i last_byte = m128_const1_32( 0x80000000 );
|
||||||
|
const __m128i four = m128_const1_32( 4 );
|
||||||
|
|
||||||
|
for ( int i = 0; i < 19; i++ )
|
||||||
|
vdata[i] = m128_const1_32( pdata[i] );
|
||||||
|
|
||||||
|
*noncev = _mm_set_epi32( n+ 3, n+ 2, n+1, n );
|
||||||
|
|
||||||
|
vdata[16+4] = last_byte;
|
||||||
|
memset_zero_128( vdata+16 + 5, 10 );
|
||||||
|
vdata[16+15] = m128_const1_32( 80*8 ); // bit count
|
||||||
|
|
||||||
|
block[ 8] = last_byte;
|
||||||
|
memset_zero_128( block + 9, 6 );
|
||||||
|
block[15] = m128_const1_32( 32*8 ); // bit count
|
||||||
|
|
||||||
|
// initialize state
|
||||||
|
initstate[0] = m128_const1_64( 0x6A09E6676A09E667 );
|
||||||
|
initstate[1] = m128_const1_64( 0xBB67AE85BB67AE85 );
|
||||||
|
initstate[2] = m128_const1_64( 0x3C6EF3723C6EF372 );
|
||||||
|
initstate[3] = m128_const1_64( 0xA54FF53AA54FF53A );
|
||||||
|
initstate[4] = m128_const1_64( 0x510E527F510E527F );
|
||||||
|
initstate[5] = m128_const1_64( 0x9B05688C9B05688C );
|
||||||
|
initstate[6] = m128_const1_64( 0x1F83D9AB1F83D9AB );
|
||||||
|
initstate[7] = m128_const1_64( 0x5BE0CD195BE0CD19 );
|
||||||
|
|
||||||
|
// hash first 64 bytes of data
|
||||||
|
sha256_4way_transform_le( midstate1, vdata, initstate );
|
||||||
|
|
||||||
|
// Do 3 rounds on the first 12 bytes of the next block
|
||||||
|
sha256_4way_prehash_3rounds( midstate2, mexp_pre, vdata + 16, midstate1 );
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
// 1. final 16 bytes of data, with padding
|
||||||
|
sha256_4way_final_rounds( block, vdata+16, midstate1, midstate2,
|
||||||
|
mexp_pre );
|
||||||
|
|
||||||
|
// 2. 32 byte hash from 1.
|
||||||
|
sha256_4way_transform_le( block, block, initstate );
|
||||||
|
|
||||||
|
// 3. 32 byte hash from 2.
|
||||||
|
if ( unlikely(
|
||||||
|
sha256_4way_transform_le_short( hash32, block, initstate ) ) )
|
||||||
|
{
|
||||||
|
// byte swap final hash for testing
|
||||||
|
mm128_block_bswap_32( hash32, hash32 );
|
||||||
|
|
||||||
|
for ( int lane = 0; lane < 4; lane++ )
|
||||||
|
if ( unlikely( hash32_d7[ lane ] <= targ32_d7 ) )
|
||||||
|
{
|
||||||
|
extr_lane_4x32( lane_hash, hash32, lane, 256 );
|
||||||
|
if ( likely( valid_hash( lane_hash, ptarget ) && !bench ) )
|
||||||
|
{
|
||||||
|
pdata[19] = n + lane;
|
||||||
|
submit_solution( work, lane_hash, mythr );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*noncev = _mm_add_epi32( *noncev, four );
|
||||||
|
n += 4;
|
||||||
|
} while ( (n < last_nonce) && !work_restart[thr_id].restart );
|
||||||
|
pdata[19] = n;
|
||||||
|
*hashes_done = n - first_nonce;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
int scanhash_sha256t_4way( struct work *work, const uint32_t max_nonce,
|
int scanhash_sha256t_4way( struct work *work, const uint32_t max_nonce,
|
||||||
uint64_t *hashes_done, struct thr_info *mythr )
|
uint64_t *hashes_done, struct thr_info *mythr )
|
||||||
{
|
{
|
||||||
__m128i block[16] __attribute__ ((aligned (64)));
|
__m128i vdata[32] __attribute__ ((aligned (64)));
|
||||||
|
__m128i block[16] __attribute__ ((aligned (32)));
|
||||||
__m128i hash32[8] __attribute__ ((aligned (32)));
|
__m128i hash32[8] __attribute__ ((aligned (32)));
|
||||||
__m128i initstate[8] __attribute__ ((aligned (32)));
|
__m128i initstate[8] __attribute__ ((aligned (32)));
|
||||||
__m128i midstate[8] __attribute__ ((aligned (32)));
|
__m128i midstate[8] __attribute__ ((aligned (32)));
|
||||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||||
__m128i vdata[20] __attribute__ ((aligned (32)));
|
|
||||||
uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] );
|
uint32_t *hash32_d7 = (uint32_t*)&( hash32[7] );
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
const uint32_t *ptarget = work->target;
|
const uint32_t *ptarget = work->target;
|
||||||
@@ -212,6 +310,14 @@ int scanhash_sha256t_4way( struct work *work, const uint32_t max_nonce,
|
|||||||
|
|
||||||
*noncev = _mm_set_epi32( n+ 3, n+ 2, n+1, n );
|
*noncev = _mm_set_epi32( n+ 3, n+ 2, n+1, n );
|
||||||
|
|
||||||
|
vdata[16+4] = last_byte;
|
||||||
|
memset_zero_128( vdata+16 + 5, 10 );
|
||||||
|
vdata[16+15] = m128_const1_32( 80*8 ); // bit count
|
||||||
|
|
||||||
|
block[ 8] = last_byte;
|
||||||
|
memset_zero_128( block + 9, 6 );
|
||||||
|
block[15] = m128_const1_32( 32*8 ); // bit count
|
||||||
|
|
||||||
// initialize state
|
// initialize state
|
||||||
initstate[0] = m128_const1_64( 0x6A09E6676A09E667 );
|
initstate[0] = m128_const1_64( 0x6A09E6676A09E667 );
|
||||||
initstate[1] = m128_const1_64( 0xBB67AE85BB67AE85 );
|
initstate[1] = m128_const1_64( 0xBB67AE85BB67AE85 );
|
||||||
@@ -227,25 +333,9 @@ int scanhash_sha256t_4way( struct work *work, const uint32_t max_nonce,
|
|||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
// 1. final 16 bytes of data, with padding
|
sha256_4way_transform_le( block, vdata+16, midstate );
|
||||||
memcpy_128( block, vdata + 16, 4 );
|
sha256_4way_transform_le( block, block, initstate );
|
||||||
block[ 4] = last_byte;
|
sha256_4way_transform_le( hash32, block, initstate );
|
||||||
memset_zero_128( block + 5, 10 );
|
|
||||||
block[15] = m128_const1_32( 80*8 ); // bit count
|
|
||||||
sha256_4way_transform_le( hash32, block, midstate );
|
|
||||||
|
|
||||||
// 2. 32 byte hash from 1.
|
|
||||||
memcpy_128( block, hash32, 8 );
|
|
||||||
block[ 8] = last_byte;
|
|
||||||
memset_zero_128( block + 9, 6 );
|
|
||||||
block[15] = m128_const1_32( 32*8 ); // bit count
|
|
||||||
sha256_4way_transform_le( hash32, block, initstate );
|
|
||||||
|
|
||||||
// 3. 32 byte hash from 2.
|
|
||||||
memcpy_128( block, hash32, 8 );
|
|
||||||
sha256_4way_transform_le( hash32, block, initstate );
|
|
||||||
|
|
||||||
// byte swap final hash for testing
|
|
||||||
mm128_block_bswap_32( hash32, hash32 );
|
mm128_block_bswap_32( hash32, hash32 );
|
||||||
|
|
||||||
for ( int lane = 0; lane < 4; lane++ )
|
for ( int lane = 0; lane < 4; lane++ )
|
||||||
@@ -266,5 +356,6 @@ int scanhash_sha256t_4way( struct work *work, const uint32_t max_nonce,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
4
compat.h
4
compat.h
@@ -3,6 +3,10 @@
|
|||||||
|
|
||||||
#ifdef WIN32
|
#ifdef WIN32
|
||||||
|
|
||||||
|
#if _WIN32_WINNT==0x0601 // Windows 7
|
||||||
|
#define WINDOWS_CPU_GROUPS_ENABLED 1
|
||||||
|
#endif
|
||||||
|
|
||||||
#include <windows.h>
|
#include <windows.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
|
||||||
|
20
configure
vendored
20
configure
vendored
@@ -1,6 +1,6 @@
|
|||||||
#! /bin/sh
|
#! /bin/sh
|
||||||
# Guess values for system-dependent variables and create Makefiles.
|
# Guess values for system-dependent variables and create Makefiles.
|
||||||
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.18.2.
|
# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.18.3.
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
||||||
@@ -577,8 +577,8 @@ MAKEFLAGS=
|
|||||||
# Identity of this package.
|
# Identity of this package.
|
||||||
PACKAGE_NAME='cpuminer-opt'
|
PACKAGE_NAME='cpuminer-opt'
|
||||||
PACKAGE_TARNAME='cpuminer-opt'
|
PACKAGE_TARNAME='cpuminer-opt'
|
||||||
PACKAGE_VERSION='3.18.2'
|
PACKAGE_VERSION='3.18.3'
|
||||||
PACKAGE_STRING='cpuminer-opt 3.18.2'
|
PACKAGE_STRING='cpuminer-opt 3.18.3'
|
||||||
PACKAGE_BUGREPORT=''
|
PACKAGE_BUGREPORT=''
|
||||||
PACKAGE_URL=''
|
PACKAGE_URL=''
|
||||||
|
|
||||||
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
|
|||||||
# Omit some internal or obsolete options to make the list less imposing.
|
# Omit some internal or obsolete options to make the list less imposing.
|
||||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||||
cat <<_ACEOF
|
cat <<_ACEOF
|
||||||
\`configure' configures cpuminer-opt 3.18.2 to adapt to many kinds of systems.
|
\`configure' configures cpuminer-opt 3.18.3 to adapt to many kinds of systems.
|
||||||
|
|
||||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||||
|
|
||||||
@@ -1404,7 +1404,7 @@ fi
|
|||||||
|
|
||||||
if test -n "$ac_init_help"; then
|
if test -n "$ac_init_help"; then
|
||||||
case $ac_init_help in
|
case $ac_init_help in
|
||||||
short | recursive ) echo "Configuration of cpuminer-opt 3.18.2:";;
|
short | recursive ) echo "Configuration of cpuminer-opt 3.18.3:";;
|
||||||
esac
|
esac
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
|
|
||||||
@@ -1509,7 +1509,7 @@ fi
|
|||||||
test -n "$ac_init_help" && exit $ac_status
|
test -n "$ac_init_help" && exit $ac_status
|
||||||
if $ac_init_version; then
|
if $ac_init_version; then
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
cpuminer-opt configure 3.18.2
|
cpuminer-opt configure 3.18.3
|
||||||
generated by GNU Autoconf 2.69
|
generated by GNU Autoconf 2.69
|
||||||
|
|
||||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||||
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
|
|||||||
This file contains any messages produced by compilers while
|
This file contains any messages produced by compilers while
|
||||||
running configure, to aid debugging if configure makes a mistake.
|
running configure, to aid debugging if configure makes a mistake.
|
||||||
|
|
||||||
It was created by cpuminer-opt $as_me 3.18.2, which was
|
It was created by cpuminer-opt $as_me 3.18.3, which was
|
||||||
generated by GNU Autoconf 2.69. Invocation command line was
|
generated by GNU Autoconf 2.69. Invocation command line was
|
||||||
|
|
||||||
$ $0 $@
|
$ $0 $@
|
||||||
@@ -2993,7 +2993,7 @@ fi
|
|||||||
|
|
||||||
# Define the identity of the package.
|
# Define the identity of the package.
|
||||||
PACKAGE='cpuminer-opt'
|
PACKAGE='cpuminer-opt'
|
||||||
VERSION='3.18.2'
|
VERSION='3.18.3'
|
||||||
|
|
||||||
|
|
||||||
cat >>confdefs.h <<_ACEOF
|
cat >>confdefs.h <<_ACEOF
|
||||||
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
|||||||
# report actual input values of CONFIG_FILES etc. instead of their
|
# report actual input values of CONFIG_FILES etc. instead of their
|
||||||
# values after options handling.
|
# values after options handling.
|
||||||
ac_log="
|
ac_log="
|
||||||
This file was extended by cpuminer-opt $as_me 3.18.2, which was
|
This file was extended by cpuminer-opt $as_me 3.18.3, which was
|
||||||
generated by GNU Autoconf 2.69. Invocation command line was
|
generated by GNU Autoconf 2.69. Invocation command line was
|
||||||
|
|
||||||
CONFIG_FILES = $CONFIG_FILES
|
CONFIG_FILES = $CONFIG_FILES
|
||||||
@@ -6756,7 +6756,7 @@ _ACEOF
|
|||||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||||
ac_cs_version="\\
|
ac_cs_version="\\
|
||||||
cpuminer-opt config.status 3.18.2
|
cpuminer-opt config.status 3.18.3
|
||||||
configured by $0, generated by GNU Autoconf 2.69,
|
configured by $0, generated by GNU Autoconf 2.69,
|
||||||
with options \\"\$ac_cs_config\\"
|
with options \\"\$ac_cs_config\\"
|
||||||
|
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
AC_INIT([cpuminer-opt], [3.18.2])
|
AC_INIT([cpuminer-opt], [3.19.0])
|
||||||
|
|
||||||
AC_PREREQ([2.59c])
|
AC_PREREQ([2.59c])
|
||||||
AC_CANONICAL_SYSTEM
|
AC_CANONICAL_SYSTEM
|
||||||
|
331
cpu-miner.c
331
cpu-miner.c
@@ -3,7 +3,7 @@
|
|||||||
* Copyright 2012-2014 pooler
|
* Copyright 2012-2014 pooler
|
||||||
* Copyright 2014 Lucas Jones
|
* Copyright 2014 Lucas Jones
|
||||||
* Copyright 2014-2016 Tanguy Pruvot
|
* Copyright 2014-2016 Tanguy Pruvot
|
||||||
* Copyright 2016-2020 Jay D Dee
|
* Copyright 2016-2021 Jay D Dee
|
||||||
*
|
*
|
||||||
* This program is free software; you can redistribute it and/or modify it
|
* This program is free software; you can redistribute it and/or modify it
|
||||||
* under the terms of the GNU General Public License as published by the Free
|
* under the terms of the GNU General Public License as published by the Free
|
||||||
@@ -115,22 +115,12 @@ int opt_param_n = 0;
|
|||||||
int opt_param_r = 0;
|
int opt_param_r = 0;
|
||||||
int opt_n_threads = 0;
|
int opt_n_threads = 0;
|
||||||
bool opt_sapling = false;
|
bool opt_sapling = false;
|
||||||
|
static uint64_t opt_affinity = 0xFFFFFFFFFFFFFFFFULL; // default, use all cores
|
||||||
// Windows doesn't support 128 bit affinity mask.
|
|
||||||
// Need compile time and run time test.
|
|
||||||
#if defined(__linux) && defined(GCC_INT128)
|
|
||||||
#define AFFINITY_USES_UINT128 1
|
|
||||||
static uint128_t opt_affinity = -1;
|
|
||||||
static bool affinity_uses_uint128 = true;
|
|
||||||
#else
|
|
||||||
static uint64_t opt_affinity = -1;
|
|
||||||
static bool affinity_uses_uint128 = false;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int opt_priority = 0; // deprecated
|
int opt_priority = 0; // deprecated
|
||||||
int num_cpus = 1;
|
int num_cpus = 1;
|
||||||
int num_cpugroups = 1;
|
int num_cpugroups = 1; // For Windows
|
||||||
char *rpc_url = NULL;;
|
#define max_cpus 256 // max for affinity
|
||||||
|
char *rpc_url = NULL;
|
||||||
char *rpc_userpass = NULL;
|
char *rpc_userpass = NULL;
|
||||||
char *rpc_user, *rpc_pass;
|
char *rpc_user, *rpc_pass;
|
||||||
char *short_url = NULL;
|
char *short_url = NULL;
|
||||||
@@ -166,6 +156,7 @@ uint32_t accepted_share_count = 0;
|
|||||||
uint32_t rejected_share_count = 0;
|
uint32_t rejected_share_count = 0;
|
||||||
uint32_t stale_share_count = 0;
|
uint32_t stale_share_count = 0;
|
||||||
uint32_t solved_block_count = 0;
|
uint32_t solved_block_count = 0;
|
||||||
|
uint32_t stratum_errors = 0;
|
||||||
double *thr_hashrates;
|
double *thr_hashrates;
|
||||||
double global_hashrate = 0.;
|
double global_hashrate = 0.;
|
||||||
double total_hashes = 0.;
|
double total_hashes = 0.;
|
||||||
@@ -227,18 +218,21 @@ char* lp_id;
|
|||||||
|
|
||||||
static void workio_cmd_free(struct workio_cmd *wc);
|
static void workio_cmd_free(struct workio_cmd *wc);
|
||||||
|
|
||||||
static void format_affinity_map( char *map_str, uint64_t map )
|
// array mapping thread to cpu
|
||||||
|
static uint8_t thread_affinity_map[ max_cpus ];
|
||||||
|
|
||||||
|
// display affinity mask graphically
|
||||||
|
static void format_affinity_mask( char *mask_str, uint64_t mask )
|
||||||
{
|
{
|
||||||
int n = num_cpus < 64 ? num_cpus : 64;
|
int n = num_cpus < 64 ? num_cpus : 64;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for ( i = 0; i < n; i++ )
|
for ( i = 0; i < n; i++ )
|
||||||
{
|
{
|
||||||
if ( map & 1 ) map_str[i] = '!';
|
if ( mask & 1 ) mask_str[i] = '!';
|
||||||
else map_str[i] = '.';
|
else mask_str[i] = '.';
|
||||||
map >>= 1;
|
mask >>= 1;
|
||||||
}
|
}
|
||||||
memset( &map_str[i], 0, 64 - i );
|
memset( &mask_str[i], 0, 64 - i );
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __linux /* Linux specific policy and affinity management */
|
#ifdef __linux /* Linux specific policy and affinity management */
|
||||||
@@ -260,93 +254,70 @@ static inline void drop_policy(void)
|
|||||||
#define pthread_setaffinity_np(tid,sz,s) {} /* only do process affinity */
|
#define pthread_setaffinity_np(tid,sz,s) {} /* only do process affinity */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Linux affinity can use int128.
|
static void affine_to_cpu( struct thr_info *thr )
|
||||||
#if AFFINITY_USES_UINT128
|
|
||||||
static void affine_to_cpu_mask( int id, uint128_t mask )
|
|
||||||
#else
|
|
||||||
static void affine_to_cpu_mask( int id, uint64_t mask )
|
|
||||||
#endif
|
|
||||||
{
|
{
|
||||||
|
int thread = thr->id;
|
||||||
cpu_set_t set;
|
cpu_set_t set;
|
||||||
CPU_ZERO( &set );
|
CPU_ZERO( &set );
|
||||||
uint8_t ncpus = (num_cpus > 256) ? 256 : num_cpus;
|
CPU_SET( thread_affinity_map[ thread ], &set );
|
||||||
|
if ( opt_debug )
|
||||||
for ( uint8_t i = 0; i < ncpus; i++ )
|
applog( LOG_INFO, "Binding thread %d to cpu %d",
|
||||||
{
|
thread, thread_affinity_map[ thread ] );
|
||||||
// cpu mask
|
pthread_setaffinity_np( thr->pth, sizeof(set), &set );
|
||||||
#if AFFINITY_USES_UINT128
|
|
||||||
if( ( mask & ( (uint128_t)1 << i ) ) ) CPU_SET( i, &set );
|
|
||||||
#else
|
|
||||||
if( (ncpus > 64) || ( mask & (1 << i) ) ) CPU_SET( i, &set );
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
if ( id == -1 )
|
|
||||||
{
|
|
||||||
// process affinity
|
|
||||||
sched_setaffinity(0, sizeof(&set), &set);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// thread only
|
|
||||||
pthread_setaffinity_np(thr_info[id].pth, sizeof(&set), &set);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif defined(WIN32) /* Windows */
|
#elif defined(WIN32) /* Windows */
|
||||||
|
|
||||||
static inline void drop_policy(void) { }
|
static inline void drop_policy(void) { }
|
||||||
|
|
||||||
// Windows CPU groups to manage more than 64 CPUs.
|
// Windows CPU groups to manage more than 64 CPUs.
|
||||||
static void affine_to_cpu_mask( int id, uint64_t mask )
|
// mask arg is ignored
|
||||||
|
static void affine_to_cpu( struct thr_info *thr )
|
||||||
{
|
{
|
||||||
bool success;
|
int thread = thr->id;
|
||||||
unsigned long last_error;
|
unsigned long last_error;
|
||||||
// BOOL success;
|
bool ok;
|
||||||
// DWORD last_error;
|
|
||||||
|
|
||||||
if ( id == -1 )
|
#if defined(WINDOWS_CPU_GROUPS_ENABLED)
|
||||||
success = SetProcessAffinityMask( GetCurrentProcess(), mask );
|
unsigned long group_size = GetActiveProcessorCount( 0 );
|
||||||
|
unsigned long group = thread / group_size;
|
||||||
|
unsigned long cpu = thread_affinity_map[ thread % group_size ];
|
||||||
|
|
||||||
// Are Windows CPU Groups supported?
|
GROUP_AFFINITY affinity;
|
||||||
#if _WIN32_WINNT==0x0601
|
affinity.Group = group;
|
||||||
else if ( num_cpugroups == 1 )
|
affinity.Mask = 1ULL << cpu;
|
||||||
success = SetThreadAffinityMask( GetCurrentThread(), mask );
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// Find the correct cpu group
|
|
||||||
int cpu = id % num_cpus;
|
|
||||||
int group;
|
|
||||||
for( group = 0; group < num_cpugroups; group++ )
|
|
||||||
{
|
|
||||||
int cpus = GetActiveProcessorCount( group );
|
|
||||||
if ( cpu < cpus ) break;
|
|
||||||
cpu -= cpus;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (opt_debug)
|
if ( opt_debug )
|
||||||
applog(LOG_DEBUG, "Binding thread %d to cpu %d on cpu group %d (mask %x)",
|
applog( LOG_INFO, "Binding thread %d to cpu %d in cpu group %d",
|
||||||
id, cpu, group, (1ULL << cpu));
|
thread, cpu, group );
|
||||||
|
|
||||||
|
ok = SetThreadGroupAffinity( GetCurrentThread(), &affinity, NULL );
|
||||||
|
|
||||||
GROUP_AFFINITY affinity;
|
|
||||||
affinity.Group = group;
|
|
||||||
affinity.Mask = 1ULL << cpu;
|
|
||||||
success = SetThreadGroupAffinity( GetCurrentThread(), &affinity, NULL );
|
|
||||||
}
|
|
||||||
#else
|
#else
|
||||||
else
|
|
||||||
success = SetThreadAffinityMask( GetCurrentThread(), mask );
|
unsigned long cpu = thread_affinity_map[ thread ];
|
||||||
|
uint64_t mask = 1ULL << cpu;
|
||||||
|
|
||||||
|
if ( opt_debug )
|
||||||
|
applog( LOG_INFO, "Binding thread %d to cpu %d", thread, cpu );
|
||||||
|
|
||||||
|
ok = SetThreadAffinityMask( GetCurrentThread(), mask );
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (!success)
|
if ( !ok )
|
||||||
{
|
{
|
||||||
last_error = GetLastError();
|
last_error = GetLastError();
|
||||||
applog(LOG_WARNING, "affine_to_cpu_mask for %u returned %x",
|
applog( LOG_WARNING, "affine_to_cpu_mask for %u returned 0x%x",
|
||||||
id, last_error);
|
thread, last_error );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
static inline void drop_policy(void) { }
|
static inline void drop_policy(void) { }
|
||||||
static void affine_to_cpu_mask(int id, unsigned long mask) { }
|
static void affine_to_cpu( struct thr_info *thr ) { }
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// not very useful, just index the arrray directly.
|
// not very useful, just index the arrray directly.
|
||||||
@@ -1159,17 +1130,23 @@ void report_summary_log( bool force )
|
|||||||
applog2( prio, "Blocks Solved %7d %7d",
|
applog2( prio, "Blocks Solved %7d %7d",
|
||||||
solved, solved_block_count );
|
solved, solved_block_count );
|
||||||
}
|
}
|
||||||
|
if ( stratum_errors )
|
||||||
|
applog2( LOG_INFO, "Stratum errors %7d", stratum_errors );
|
||||||
|
|
||||||
applog2( LOG_INFO, "Hi/Lo Share Diff %.5g / %.5g",
|
applog2( LOG_INFO, "Hi/Lo Share Diff %.5g / %.5g",
|
||||||
highest_share, lowest_share );
|
highest_share, lowest_share );
|
||||||
|
|
||||||
int mismatch = submitted_share_count
|
int mismatch = submitted_share_count
|
||||||
- ( accepted_share_count + stale_share_count + rejected_share_count );
|
- ( accepted_share_count + stale_share_count + rejected_share_count );
|
||||||
|
|
||||||
if ( mismatch )
|
if ( mismatch )
|
||||||
{
|
{
|
||||||
if ( mismatch != 1 )
|
if ( stratum_errors )
|
||||||
applog2(LOG_MINR, "Count mismatch: %d, stats may be inaccurate", mismatch );
|
applog2( LOG_MINR, "Count mismatch: %d, stats may be inaccurate",
|
||||||
else
|
mismatch );
|
||||||
applog2(LOG_INFO, CL_LBL "Count mismatch, submitted share may still be pending" CL_N );
|
else if ( !opt_quiet )
|
||||||
|
applog2( LOG_INFO, CL_LBL
|
||||||
|
"Count mismatch, submitted share may still be pending" CL_N );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2241,49 +2218,9 @@ static void *miner_thread( void *userdata )
|
|||||||
if ( opt_priority == 0 )
|
if ( opt_priority == 0 )
|
||||||
drop_policy();
|
drop_policy();
|
||||||
}
|
}
|
||||||
|
|
||||||
// CPU thread affinity
|
// CPU thread affinity
|
||||||
if ( num_cpus > 1 )
|
if ( opt_affinity && num_cpus > 1 ) affine_to_cpu( mythr );
|
||||||
{
|
|
||||||
#if AFFINITY_USES_UINT128
|
|
||||||
// Default affinity
|
|
||||||
if ( (opt_affinity == (uint128_t)(-1) ) && opt_n_threads > 1 )
|
|
||||||
{
|
|
||||||
affine_to_cpu_mask( thr_id, (uint128_t)1 << (thr_id % num_cpus) );
|
|
||||||
if ( opt_debug )
|
|
||||||
applog( LOG_INFO, "Binding thread %d to cpu %d.",
|
|
||||||
thr_id, thr_id % num_cpus,
|
|
||||||
u128_hi64( (uint128_t)1 << (thr_id % num_cpus) ),
|
|
||||||
u128_lo64( (uint128_t)1 << (thr_id % num_cpus) ) );
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
if ( ( opt_affinity == -1 ) && ( opt_n_threads > 1 ) )
|
|
||||||
{
|
|
||||||
affine_to_cpu_mask( thr_id, 1 << (thr_id % num_cpus) );
|
|
||||||
if (opt_debug)
|
|
||||||
applog( LOG_DEBUG, "Binding thread %d to cpu %d.",
|
|
||||||
thr_id, thr_id % num_cpus, 1 << (thr_id % num_cpus)) ;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
else // Custom affinity
|
|
||||||
{
|
|
||||||
affine_to_cpu_mask( thr_id, opt_affinity );
|
|
||||||
if ( opt_debug )
|
|
||||||
{
|
|
||||||
#if AFFINITY_USES_UINT128
|
|
||||||
if ( num_cpus > 64 )
|
|
||||||
applog( LOG_INFO, "Binding thread %d to mask %016llx %016llx",
|
|
||||||
thr_id, u128_hi64( opt_affinity ),
|
|
||||||
u128_lo64( opt_affinity ) );
|
|
||||||
else
|
|
||||||
applog( LOG_INFO, "Binding thread %d to mask %016llx",
|
|
||||||
thr_id, opt_affinity );
|
|
||||||
#else
|
|
||||||
applog( LOG_INFO, "Binding thread %d to mask %016llx",
|
|
||||||
thr_id, opt_affinity );
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} // num_cpus > 1
|
|
||||||
|
|
||||||
if ( !algo_gate.miner_thread_init( thr_id ) )
|
if ( !algo_gate.miner_thread_init( thr_id ) )
|
||||||
{
|
{
|
||||||
@@ -2792,6 +2729,7 @@ static void *stratum_thread(void *userdata )
|
|||||||
{
|
{
|
||||||
stratum_need_reset = false;
|
stratum_need_reset = false;
|
||||||
stratum_down = true;
|
stratum_down = true;
|
||||||
|
stratum_errors++;
|
||||||
stratum_disconnect( &stratum );
|
stratum_disconnect( &stratum );
|
||||||
if ( strcmp( stratum.url, rpc_url ) )
|
if ( strcmp( stratum.url, rpc_url ) )
|
||||||
{
|
{
|
||||||
@@ -2809,6 +2747,7 @@ static void *stratum_thread(void *userdata )
|
|||||||
while ( !stratum.curl )
|
while ( !stratum.curl )
|
||||||
{
|
{
|
||||||
stratum_down = true;
|
stratum_down = true;
|
||||||
|
restart_threads();
|
||||||
pthread_rwlock_wrlock( &g_work_lock );
|
pthread_rwlock_wrlock( &g_work_lock );
|
||||||
g_work_time = 0;
|
g_work_time = 0;
|
||||||
pthread_rwlock_unlock( &g_work_lock );
|
pthread_rwlock_unlock( &g_work_lock );
|
||||||
@@ -2830,7 +2769,6 @@ static void *stratum_thread(void *userdata )
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
stratum_down = false;
|
stratum_down = false;
|
||||||
restart_threads();
|
|
||||||
applog(LOG_BLUE,"Stratum connection established" );
|
applog(LOG_BLUE,"Stratum connection established" );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -3137,7 +3075,7 @@ void parse_arg(int key, char *arg )
|
|||||||
{
|
{
|
||||||
char *p;
|
char *p;
|
||||||
int v, i;
|
int v, i;
|
||||||
uint64_t ul;
|
// uint64_t ul;
|
||||||
double d;
|
double d;
|
||||||
|
|
||||||
switch( key )
|
switch( key )
|
||||||
@@ -3448,21 +3386,10 @@ void parse_arg(int key, char *arg )
|
|||||||
break;
|
break;
|
||||||
#endif
|
#endif
|
||||||
case 1020: // cpu-affinity
|
case 1020: // cpu-affinity
|
||||||
p = strstr(arg, "0x");
|
p = strstr( arg, "0x" );
|
||||||
if ( p )
|
opt_affinity = p ? strtoull( p, NULL, 16 )
|
||||||
ul = strtoull( p, NULL, 16 );
|
: atoll( arg );
|
||||||
else
|
break;
|
||||||
ul = atoll( arg );
|
|
||||||
#if AFFINITY_USES_UINT128
|
|
||||||
// replicate the low 64 bits to make a full 128 bit mask if there are more
|
|
||||||
// than 64 CPUs, otherwise zero extend the upper half.
|
|
||||||
opt_affinity = (uint128_t)ul;
|
|
||||||
if ( num_cpus > 64 )
|
|
||||||
opt_affinity |= opt_affinity << 64;
|
|
||||||
#else
|
|
||||||
opt_affinity = ul;
|
|
||||||
#endif
|
|
||||||
break;
|
|
||||||
case 1021: // cpu-priority
|
case 1021: // cpu-priority
|
||||||
v = atoi(arg);
|
v = atoi(arg);
|
||||||
if (v < 0 || v > 5) /* sanity check */
|
if (v < 0 || v > 5) /* sanity check */
|
||||||
@@ -3565,20 +3492,18 @@ static void parse_cmdline(int argc, char *argv[])
|
|||||||
while (1)
|
while (1)
|
||||||
{
|
{
|
||||||
#if HAVE_GETOPT_LONG
|
#if HAVE_GETOPT_LONG
|
||||||
key = getopt_long(argc, argv, short_options, options, NULL);
|
key = getopt_long(argc, argv, short_options, options, NULL);
|
||||||
#else
|
#else
|
||||||
key = getopt(argc, argv, short_options);
|
key = getopt(argc, argv, short_options);
|
||||||
#endif
|
#endif
|
||||||
if (key < 0)
|
if ( key < 0 ) break;
|
||||||
break;
|
parse_arg( key, optarg );
|
||||||
|
|
||||||
parse_arg(key, optarg);
|
|
||||||
}
|
}
|
||||||
if (optind < argc)
|
if ( optind < argc )
|
||||||
{
|
{
|
||||||
fprintf(stderr, "%s: unsupported non-option argument -- '%s'\n",
|
fprintf( stderr, "%s: unsupported non-option argument -- '%s'\n",
|
||||||
argv[0], argv[optind]);
|
argv[0], argv[optind]);
|
||||||
show_usage_and_exit(1);
|
show_usage_and_exit(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3642,26 +3567,21 @@ int main(int argc, char *argv[])
|
|||||||
rpc_user = strdup("");
|
rpc_user = strdup("");
|
||||||
rpc_pass = strdup("");
|
rpc_pass = strdup("");
|
||||||
|
|
||||||
parse_cmdline(argc, argv);
|
|
||||||
|
|
||||||
#if defined(WIN32)
|
#if defined(WIN32)
|
||||||
// SYSTEM_INFO sysinfo;
|
|
||||||
// GetSystemInfo(&sysinfo);
|
|
||||||
// num_cpus = sysinfo.dwNumberOfProcessors;
|
|
||||||
// What happens if GetActiveProcessorGroupCount called if groups not enabled?
|
|
||||||
|
|
||||||
// Are Windows CPU Groups supported?
|
// Are Windows CPU Groups supported?
|
||||||
#if _WIN32_WINNT==0x0601
|
#if defined(WINDOWS_CPU_GROUPS_ENABLED)
|
||||||
num_cpus = 0;
|
num_cpus = 0;
|
||||||
num_cpugroups = GetActiveProcessorGroupCount();
|
num_cpugroups = GetActiveProcessorGroupCount();
|
||||||
for( i = 0; i < num_cpugroups; i++ )
|
for( i = 0; i < num_cpugroups; i++ )
|
||||||
{
|
{
|
||||||
int cpus = GetActiveProcessorCount(i);
|
int cpus = GetActiveProcessorCount( i );
|
||||||
num_cpus += cpus;
|
num_cpus += cpus;
|
||||||
|
|
||||||
if (opt_debug)
|
if (opt_debug)
|
||||||
applog(LOG_DEBUG, "Found %d cpus on cpu group %d", cpus, i);
|
applog( LOG_INFO, "Found %d CPUs in CPU group %d", cpus, i );
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
SYSTEM_INFO sysinfo;
|
SYSTEM_INFO sysinfo;
|
||||||
GetSystemInfo(&sysinfo);
|
GetSystemInfo(&sysinfo);
|
||||||
@@ -3677,21 +3597,20 @@ int main(int argc, char *argv[])
|
|||||||
#else
|
#else
|
||||||
num_cpus = 1;
|
num_cpus = 1;
|
||||||
#endif
|
#endif
|
||||||
if (num_cpus < 1)
|
|
||||||
num_cpus = 1;
|
|
||||||
|
|
||||||
if (!opt_n_threads)
|
if ( num_cpus < 1 ) num_cpus = 1;
|
||||||
opt_n_threads = num_cpus;
|
|
||||||
|
parse_cmdline( argc, argv );
|
||||||
|
|
||||||
if ( opt_algo == ALGO_NULL )
|
if ( opt_algo == ALGO_NULL )
|
||||||
{
|
{
|
||||||
fprintf(stderr, "%s: no algo supplied\n", argv[0]);
|
fprintf( stderr, "%s: No algo parameter specified\n", argv[0] );
|
||||||
show_usage_and_exit(1);
|
show_usage_and_exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// need to register to get algo optimizations for cpu capabilities
|
// need to register to get algo optimizations for cpu capabilities
|
||||||
// but that causes register logs before cpu capabilities is output.
|
// but that causes registration logs before cpu capabilities is output.
|
||||||
// Would need to split register into 2 parts. First part sets algo
|
// Would need to split register function into 2 parts. First part sets algo
|
||||||
// optimizations but no logging, second part does any logging.
|
// optimizations but no logging, second part does any logging.
|
||||||
if ( !register_algo_gate( opt_algo, &algo_gate ) ) exit(1);
|
if ( !register_algo_gate( opt_algo, &algo_gate ) ) exit(1);
|
||||||
|
|
||||||
@@ -3735,9 +3654,6 @@ int main(int argc, char *argv[])
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// All options must be set before starting the gate
|
|
||||||
// if ( !register_algo_gate( opt_algo, &algo_gate ) ) exit(1);
|
|
||||||
|
|
||||||
if ( coinbase_address )
|
if ( coinbase_address )
|
||||||
{
|
{
|
||||||
pk_script_size = address_to_script( pk_script, pk_buffer_size,
|
pk_script_size = address_to_script( pk_script, pk_buffer_size,
|
||||||
@@ -3749,8 +3665,6 @@ int main(int argc, char *argv[])
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// if ( !check_cpu_capability() ) exit(1);
|
|
||||||
|
|
||||||
pthread_mutex_init( &stats_lock, NULL );
|
pthread_mutex_init( &stats_lock, NULL );
|
||||||
pthread_rwlock_init( &g_work_lock, NULL );
|
pthread_rwlock_init( &g_work_lock, NULL );
|
||||||
pthread_mutex_init( &stratum.sock_lock, NULL );
|
pthread_mutex_init( &stratum.sock_lock, NULL );
|
||||||
@@ -3820,44 +3734,31 @@ int main(int argc, char *argv[])
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// To be confirmed with more than 64 cpus
|
if ( ( opt_n_threads == 0 ) || ( opt_n_threads > num_cpus ) )
|
||||||
if ( opt_affinity != -1 )
|
opt_n_threads = num_cpus;
|
||||||
{
|
|
||||||
if ( !affinity_uses_uint128 && num_cpus > 64 )
|
|
||||||
{
|
|
||||||
applog(LOG_WARNING,"Setting CPU affinity with more than 64 CPUs is only");
|
|
||||||
applog(LOG_WARNING,"available on Linux. Using default affinity.");
|
|
||||||
opt_affinity = -1;
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
else
|
|
||||||
{
|
|
||||||
affine_to_cpu_mask( -1, opt_affinity );
|
|
||||||
if ( !opt_quiet )
|
|
||||||
{
|
|
||||||
#if AFFINITY_USES_UINT128
|
|
||||||
if ( num_cpus > 64 )
|
|
||||||
applog(LOG_DEBUG, "Binding process to cpu mask %x",
|
|
||||||
u128_hi64( opt_affinity ), u128_lo64( opt_affinity ) );
|
|
||||||
else
|
|
||||||
applog(LOG_DEBUG, "Binding process to cpu mask %x",
|
|
||||||
opt_affinity );
|
|
||||||
#else
|
|
||||||
applog(LOG_DEBUG, "Binding process to cpu mask %x",
|
|
||||||
opt_affinity );
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( !opt_quiet && ( opt_n_threads < num_cpus ) )
|
if ( opt_affinity && num_cpus > max_cpus )
|
||||||
{
|
{
|
||||||
char affinity_map[64];
|
applog( LOG_WARNING, "More than %d CPUs, CPU affinity is disabled",
|
||||||
format_affinity_map( affinity_map, opt_affinity );
|
max_cpus );
|
||||||
applog( LOG_INFO, "CPU affinity [%s]", affinity_map );
|
opt_affinity = 0ULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( opt_affinity )
|
||||||
|
{
|
||||||
|
for ( int thr = 0, cpu = 0; thr < opt_n_threads; thr++, cpu++ )
|
||||||
|
{
|
||||||
|
while ( !( ( opt_affinity >> ( cpu&63 ) ) & 1ULL ) ) cpu++;
|
||||||
|
thread_affinity_map[ thr ] = cpu % num_cpus;
|
||||||
|
}
|
||||||
|
if ( !opt_quiet )
|
||||||
|
{
|
||||||
|
char affinity_mask[64];
|
||||||
|
format_affinity_mask( affinity_mask, opt_affinity );
|
||||||
|
applog( LOG_INFO, "CPU affinity [%s]", affinity_mask );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef HAVE_SYSLOG_H
|
#ifdef HAVE_SYSLOG_H
|
||||||
if (use_syslog)
|
if (use_syslog)
|
||||||
openlog("cpuminer", LOG_PID, LOG_USER);
|
openlog("cpuminer", LOG_PID, LOG_USER);
|
||||||
@@ -3955,7 +3856,7 @@ int main(int argc, char *argv[])
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
if ( !opt_quiet )
|
if ( !opt_quiet )
|
||||||
applog( LOG_INFO,"API listnening to %s:%d", opt_api_allow,
|
applog( LOG_INFO,"API listening to %s:%d", opt_api_allow,
|
||||||
opt_api_listen );
|
opt_api_listen );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -16,13 +16,13 @@ export MINGW_LIB="/usr/x86_64-w64-mingw32/lib"
|
|||||||
export GCC_MINGW_LIB="/usr/lib/gcc/x86_64-w64-mingw32/9.3-win32"
|
export GCC_MINGW_LIB="/usr/lib/gcc/x86_64-w64-mingw32/9.3-win32"
|
||||||
# used by GCC
|
# used by GCC
|
||||||
export LDFLAGS="-L$LOCAL_LIB/curl/lib/.libs -L$LOCAL_LIB/gmp/.libs -L$LOCAL_LIB/openssl"
|
export LDFLAGS="-L$LOCAL_LIB/curl/lib/.libs -L$LOCAL_LIB/gmp/.libs -L$LOCAL_LIB/openssl"
|
||||||
|
# support for Windows CPU groups
|
||||||
|
export DEFAULT_CFLAGS="-O3 -Wall -D_WIN32_WINNT=0x0601"
|
||||||
|
#export DEFAULT_CFLAGS="-O3 -Wall"
|
||||||
|
|
||||||
# make link to local gmp header file.
|
# make link to local gmp header file.
|
||||||
ln -s $LOCAL_LIB/gmp/gmp.h ./gmp.h
|
ln -s $LOCAL_LIB/gmp/gmp.h ./gmp.h
|
||||||
|
|
||||||
# edit configure to fix pthread lib name for Windows.
|
|
||||||
#sed -i 's/"-lpthread"/"-lpthreadGC2"/g' configure.ac
|
|
||||||
|
|
||||||
# make release directory and copy selected DLLs.
|
# make release directory and copy selected DLLs.
|
||||||
|
|
||||||
rm -rf release > /dev/null
|
rm -rf release > /dev/null
|
||||||
@@ -45,7 +45,7 @@ cp $LOCAL_LIB/curl/lib/.libs/libcurl-4.dll release/
|
|||||||
./clean-all.sh || echo clean
|
./clean-all.sh || echo clean
|
||||||
rm -f config.status
|
rm -f config.status
|
||||||
./autogen.sh || echo done
|
./autogen.sh || echo done
|
||||||
CFLAGS="-O3 -march=icelake-client -Wall" ./configure $CONFIGURE_ARGS
|
CFLAGS="$DEFAULT_CFLAGS -march=icelake-client" ./configure $CONFIGURE_ARGS
|
||||||
make -j 8
|
make -j 8
|
||||||
strip -s cpuminer.exe
|
strip -s cpuminer.exe
|
||||||
mv cpuminer.exe release/cpuminer-avx512-sha-vaes.exe
|
mv cpuminer.exe release/cpuminer-avx512-sha-vaes.exe
|
||||||
@@ -53,8 +53,8 @@ mv cpuminer.exe release/cpuminer-avx512-sha-vaes.exe
|
|||||||
# Rocketlake AVX512 SHA AES
|
# Rocketlake AVX512 SHA AES
|
||||||
make clean || echo clean
|
make clean || echo clean
|
||||||
rm -f config.status
|
rm -f config.status
|
||||||
CFLAGS="-O3 -march=cascadelake -msha -Wall" ./configure $CONFIGURE_ARGS
|
CFLAGS="$DEFAULT_CFLAGS -march=cascadelake -msha" ./configure $CONFIGURE_ARGS
|
||||||
#CFLAGS="-O3 -march=rocketlake -Wall" ./configure $CONFIGURE_ARGS
|
#CFLAGS="$DEFAULT_CFLAGS -march=rocketlake" ./configure $CONFIGURE_ARGS
|
||||||
make -j 8
|
make -j 8
|
||||||
strip -s cpuminer.exe
|
strip -s cpuminer.exe
|
||||||
mv cpuminer.exe release/cpuminer-avx512-sha.exe
|
mv cpuminer.exe release/cpuminer-avx512-sha.exe
|
||||||
@@ -62,7 +62,7 @@ mv cpuminer.exe release/cpuminer-avx512-sha.exe
|
|||||||
# Zen1 AVX2 AES SHA
|
# Zen1 AVX2 AES SHA
|
||||||
make clean || echo clean
|
make clean || echo clean
|
||||||
rm -f config.status
|
rm -f config.status
|
||||||
CFLAGS="-O3 -march=znver1 -Wall" ./configure $CONFIGURE_ARGS
|
CFLAGS="$DEFAULT_CFLAGS -march=znver1" ./configure $CONFIGURE_ARGS
|
||||||
make -j 8
|
make -j 8
|
||||||
strip -s cpuminer.exe
|
strip -s cpuminer.exe
|
||||||
mv cpuminer.exe release/cpuminer-zen.exe
|
mv cpuminer.exe release/cpuminer-zen.exe
|
||||||
@@ -70,8 +70,8 @@ mv cpuminer.exe release/cpuminer-zen.exe
|
|||||||
# Zen3 AVX2 SHA VAES
|
# Zen3 AVX2 SHA VAES
|
||||||
make clean || echo clean
|
make clean || echo clean
|
||||||
rm -f config.status
|
rm -f config.status
|
||||||
CFLAGS="-O3 -march=znver2 -mvaes -Wall" ./configure $CONFIGURE_ARGS
|
CFLAGS="$DEFAULT_CFLAGS -march=znver2 -mvaes" ./configure $CONFIGURE_ARGS
|
||||||
# CFLAGS="-O3 -march=znver3 -Wall" ./configure $CONFIGURE_ARGS
|
# CFLAGS="$DEFAULT_CFLAGS -march=znver3" ./configure $CONFIGURE_ARGS
|
||||||
make -j 8
|
make -j 8
|
||||||
strip -s cpuminer.exe
|
strip -s cpuminer.exe
|
||||||
mv cpuminer.exe release/cpuminer-zen3.exe
|
mv cpuminer.exe release/cpuminer-zen3.exe
|
||||||
@@ -80,7 +80,7 @@ mv cpuminer.exe release/cpuminer-zen3.exe
|
|||||||
# mingw won't compile avx512 without -fno-asynchronous-unwind-tables
|
# mingw won't compile avx512 without -fno-asynchronous-unwind-tables
|
||||||
make clean || echo clean
|
make clean || echo clean
|
||||||
rm -f config.status
|
rm -f config.status
|
||||||
CFLAGS="-O3 -march=skylake-avx512 -Wall" ./configure $CONFIGURE_ARGS
|
CFLAGS="$DEFAULT_CFLAGS -march=skylake-avx512" ./configure $CONFIGURE_ARGS
|
||||||
#CFLAGS="-O3 -march=skylake-avx512 -Wall -fno-asynchronous-unwind-tables" ./configure $CONFIGURE_ARGS
|
#CFLAGS="-O3 -march=skylake-avx512 -Wall -fno-asynchronous-unwind-tables" ./configure $CONFIGURE_ARGS
|
||||||
make -j 8
|
make -j 8
|
||||||
strip -s cpuminer.exe
|
strip -s cpuminer.exe
|
||||||
@@ -90,7 +90,7 @@ mv cpuminer.exe release/cpuminer-avx512.exe
|
|||||||
make clean || echo clean
|
make clean || echo clean
|
||||||
rm -f config.status
|
rm -f config.status
|
||||||
# GCC 9 doesn't include AES in -march=core-avx2
|
# GCC 9 doesn't include AES in -march=core-avx2
|
||||||
CFLAGS="-O3 -march=core-avx2 -maes -Wall" ./configure $CONFIGURE_ARGS
|
CFLAGS="$DEFAULT_CFLAGS -march=core-avx2 -maes" ./configure $CONFIGURE_ARGS
|
||||||
make -j 8
|
make -j 8
|
||||||
strip -s cpuminer.exe
|
strip -s cpuminer.exe
|
||||||
mv cpuminer.exe release/cpuminer-avx2.exe
|
mv cpuminer.exe release/cpuminer-avx2.exe
|
||||||
@@ -99,7 +99,7 @@ mv cpuminer.exe release/cpuminer-avx2.exe
|
|||||||
make clean || echo clean
|
make clean || echo clean
|
||||||
rm -f config.status
|
rm -f config.status
|
||||||
# -march=corei7-avx still includes aes, but just in case
|
# -march=corei7-avx still includes aes, but just in case
|
||||||
CFLAGS="-O3 -march=corei7-avx -maes -Wall" ./configure $CONFIGURE_ARGS
|
CFLAGS="$DEFAULT_CFLAGS -march=corei7-avx -maes" ./configure $CONFIGURE_ARGS
|
||||||
make -j 8
|
make -j 8
|
||||||
strip -s cpuminer.exe
|
strip -s cpuminer.exe
|
||||||
mv cpuminer.exe release/cpuminer-avx.exe
|
mv cpuminer.exe release/cpuminer-avx.exe
|
||||||
@@ -107,7 +107,7 @@ mv cpuminer.exe release/cpuminer-avx.exe
|
|||||||
# Westmere SSE4.2 AES
|
# Westmere SSE4.2 AES
|
||||||
make clean || echo clean
|
make clean || echo clean
|
||||||
rm -f config.status
|
rm -f config.status
|
||||||
CFLAGS="-O3 -march=westmere -maes -Wall" ./configure $CONFIGURE_ARGS
|
CFLAGS="$DEFAULT_CFLAGS -march=westmere -maes" ./configure $CONFIGURE_ARGS
|
||||||
#CFLAGS="-O3 -maes -msse4.2 -Wall" ./configure $CONFIGURE_ARGS
|
#CFLAGS="-O3 -maes -msse4.2 -Wall" ./configure $CONFIGURE_ARGS
|
||||||
make -j 8
|
make -j 8
|
||||||
strip -s cpuminer.exe
|
strip -s cpuminer.exe
|
||||||
@@ -116,7 +116,7 @@ mv cpuminer.exe release/cpuminer-aes-sse42.exe
|
|||||||
# Nehalem SSE4.2
|
# Nehalem SSE4.2
|
||||||
#make clean || echo clean
|
#make clean || echo clean
|
||||||
#rm -f config.status
|
#rm -f config.status
|
||||||
#CFLAGS="-O3 -march=corei7 -Wall" ./configure $CONFIGURE_ARGS
|
#CFLAGS="$DEFAULT_CFLAGS -march=corei7" ./configure $CONFIGURE_ARGS
|
||||||
#make
|
#make
|
||||||
#strip -s cpuminer.exe
|
#strip -s cpuminer.exe
|
||||||
#mv cpuminer.exe release/cpuminer-sse42.exe
|
#mv cpuminer.exe release/cpuminer-sse42.exe
|
||||||
@@ -124,7 +124,7 @@ mv cpuminer.exe release/cpuminer-aes-sse42.exe
|
|||||||
# Core2 SSSE3
|
# Core2 SSSE3
|
||||||
#make clean || echo clean
|
#make clean || echo clean
|
||||||
#rm -f config.status
|
#rm -f config.status
|
||||||
#CFLAGS="-O3 -march=core2 -Wall" ./configure $CONFIGURE_ARGS
|
#CFLAGS="$DEFAULT_CFLAGS -march=core2" ./configure $CONFIGURE_ARGS
|
||||||
#make
|
#make
|
||||||
#strip -s cpuminer.exe
|
#strip -s cpuminer.exe
|
||||||
#mv cpuminer.exe release/cpuminer-ssse3.exe
|
#mv cpuminer.exe release/cpuminer-ssse3.exe
|
||||||
@@ -133,7 +133,7 @@ mv cpuminer.exe release/cpuminer-aes-sse42.exe
|
|||||||
# Generic SSE2
|
# Generic SSE2
|
||||||
make clean || echo clean
|
make clean || echo clean
|
||||||
rm -f config.status
|
rm -f config.status
|
||||||
CFLAGS="-O3 -msse2 -Wall" ./configure $CONFIGURE_ARGS
|
CFLAGS="$DEFAULT_CFLAGS -msse2" ./configure $CONFIGURE_ARGS
|
||||||
make -j 8
|
make -j 8
|
||||||
strip -s cpuminer.exe
|
strip -s cpuminer.exe
|
||||||
mv cpuminer.exe release/cpuminer-sse2.exe
|
mv cpuminer.exe release/cpuminer-sse2.exe
|
||||||
|
Reference in New Issue
Block a user