mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v3.10.5
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@@ -2,7 +2,10 @@
|
||||
|
||||
bool register_hmq1725_algo( algo_gate_t* gate )
|
||||
{
|
||||
#if defined(HMQ1725_4WAY)
|
||||
#if defined(HMQ1725_8WAY)
|
||||
gate->scanhash = (void*)&scanhash_hmq1725_8way;
|
||||
gate->hash = (void*)&hmq1725_8way_hash;
|
||||
#elif defined(HMQ1725_4WAY)
|
||||
gate->scanhash = (void*)&scanhash_hmq1725_4way;
|
||||
gate->hash = (void*)&hmq1725_4way_hash;
|
||||
#else
|
||||
@@ -10,7 +13,7 @@ bool register_hmq1725_algo( algo_gate_t* gate )
|
||||
gate->scanhash = (void*)&scanhash_hmq1725;
|
||||
gate->hash = (void*)&hmq1725hash;
|
||||
#endif
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT;
|
||||
gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT;
|
||||
opt_target_factor = 65536.0;
|
||||
return true;
|
||||
};
|
||||
|
||||
@@ -4,13 +4,21 @@
|
||||
#include "algo-gate-api.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(__AVX2__) && defined(__AES__)
|
||||
// #define HMQ1725_4WAY 1
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#define HMQ1725_8WAY 1
|
||||
#elif defined(__AVX2__) && defined(__AES__)
|
||||
#define HMQ1725_4WAY 1
|
||||
#endif
|
||||
|
||||
bool register_hmq1725_algo( algo_gate_t* gate );
|
||||
|
||||
#if defined(HMQ1725_4WAY)
|
||||
#if defined(HMQ1725_8WAY)
|
||||
|
||||
void hmq1725_8way_hash( void *state, const void *input );
|
||||
int scanhash_hmq1725_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#elif defined(HMQ1725_4WAY)
|
||||
|
||||
void hmq1725_4way_hash( void *state, const void *input );
|
||||
int scanhash_hmq1725_4way( struct work *work, uint32_t max_nonce,
|
||||
|
||||
@@ -333,6 +333,7 @@ int scanhash_hmq1725( struct work *work, uint32_t max_nonce,
|
||||
if (((hash64[7]&0xFFFFFFFF)==0) &&
|
||||
fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
work_set_target_ratio( work, hash64 );
|
||||
return true;
|
||||
}
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
@@ -346,6 +347,7 @@ int scanhash_hmq1725( struct work *work, uint32_t max_nonce,
|
||||
if (((hash64[7]&0xFFFFFFF0)==0) &&
|
||||
fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
work_set_target_ratio( work, hash64 );
|
||||
return true;
|
||||
}
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
@@ -359,6 +361,7 @@ int scanhash_hmq1725( struct work *work, uint32_t max_nonce,
|
||||
if (((hash64[7]&0xFFFFFF00)==0) &&
|
||||
fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
work_set_target_ratio( work, hash64 );
|
||||
return true;
|
||||
}
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
@@ -372,6 +375,7 @@ int scanhash_hmq1725( struct work *work, uint32_t max_nonce,
|
||||
if (((hash64[7]&0xFFFFF000)==0) &&
|
||||
fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
work_set_target_ratio( work, hash64 );
|
||||
return true;
|
||||
}
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
@@ -386,6 +390,7 @@ int scanhash_hmq1725( struct work *work, uint32_t max_nonce,
|
||||
if (((hash64[7]&0xFFFF0000)==0) &&
|
||||
fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
work_set_target_ratio( work, hash64 );
|
||||
return true;
|
||||
}
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
@@ -399,6 +404,7 @@ int scanhash_hmq1725( struct work *work, uint32_t max_nonce,
|
||||
hmq1725hash(hash64, endiandata);
|
||||
if (fulltest(hash64, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
work_set_target_ratio( work, hash64 );
|
||||
return true;
|
||||
}
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
@@ -63,20 +63,6 @@ void quark_8way_hash( void *state, const void *input )
|
||||
bmw512_8way_update( &ctx.bmw, vhash, 64 );
|
||||
bmw512_8way_close( &ctx.bmw, vhash );
|
||||
|
||||
// AVX 512 cmpeq returns a bit mask instead of a vector mask.
|
||||
// This should simplify things but the logic doesn't seem to be working.
|
||||
// The problem appears to be related to the test to skip a hash if it isn't
|
||||
// to be used. Skipping the test for all 8 way hashes seems to have
|
||||
// fixed it. The hash selection blending works if the hash is produced
|
||||
// but the hash wasn't being produced when it should.
|
||||
// Both decisions are based on the same data, the __mmask8. It works
|
||||
// as a blend mask but not in a logical comparison, maybe the type is the
|
||||
// problem. Maybe a cast to int or movm is needed to make it work.
|
||||
// It's now moot because the hash can only be skipped 1 in 256 iterations
|
||||
// when hashing parallel 8 ways.
|
||||
// The performance impact of the workaround should be negligible.
|
||||
// It's a problem for another day.
|
||||
|
||||
vh_mask = _mm512_cmpeq_epi64_mask( _mm512_and_si512( vh[0], bit3_mask ),
|
||||
zero );
|
||||
|
||||
|
||||
Reference in New Issue
Block a user