mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
Compare commits
5 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
e2d5762ef2 | ||
![]() |
e625ed5420 | ||
![]() |
9abc19a30a | ||
![]() |
0d769ee0fe | ||
![]() |
0d48d573ce |
@@ -134,14 +134,12 @@ cpuminer_SOURCES = \
|
||||
algo/lyra2/phi2-4way.c \
|
||||
algo/lyra2/phi2.c \
|
||||
algo/m7m.c \
|
||||
algo/neoscrypt/neoscrypt.c \
|
||||
algo/nist5/nist5-gate.c \
|
||||
algo/nist5/nist5-4way.c \
|
||||
algo/nist5/nist5.c \
|
||||
algo/nist5/zr5.c \
|
||||
algo/panama/sph_panama.c \
|
||||
algo/radiogatun/sph_radiogatun.c \
|
||||
algo/pluck.c \
|
||||
algo/quark/quark-gate.c \
|
||||
algo/quark/quark.c \
|
||||
algo/quark/quark-4way.c \
|
||||
@@ -162,7 +160,9 @@ cpuminer_SOURCES = \
|
||||
algo/ripemd/lbry-gate.c \
|
||||
algo/ripemd/lbry.c \
|
||||
algo/ripemd/lbry-4way.c \
|
||||
algo/scrypt.c \
|
||||
algo/scrypt/scrypt.c \
|
||||
algo/scrypt/neoscrypt.c \
|
||||
algo/scrypt/pluck.c \
|
||||
algo/scryptjane/scrypt-jane.c \
|
||||
algo/sha/sph_sha2.c \
|
||||
algo/sha/sph_sha2big.c \
|
||||
@@ -198,7 +198,6 @@ cpuminer_SOURCES = \
|
||||
algo/whirlpool/sph_whirlpool.c \
|
||||
algo/whirlpool/whirlpool-hash-4way.c \
|
||||
algo/whirlpool/whirlpool-gate.c \
|
||||
algo/whirlpool/whirlpool-4way.c \
|
||||
algo/whirlpool/whirlpool.c \
|
||||
algo/whirlpool/whirlpoolx.c \
|
||||
algo/x11/x11-gate.c \
|
||||
|
@@ -59,9 +59,6 @@ Supported Algorithms
|
||||
blake2s Blake-2 S
|
||||
bmw BMW 256
|
||||
c11 Chaincoin
|
||||
cryptolight Cryptonight-light
|
||||
cryptonight
|
||||
cryptonightv7 Monero (XMR)
|
||||
decred
|
||||
deep Deepcoin (DCN)
|
||||
dmd-gr Diamond-Groestl
|
||||
@@ -78,9 +75,9 @@ Supported Algorithms
|
||||
luffa Luffa
|
||||
lyra2h Hppcoin
|
||||
lyra2re lyra2
|
||||
lyra2rev2 lyra2v2, Vertcoin
|
||||
lyra2rev2 lyra2v2
|
||||
lyra2rev3 lyrav2v3, Vertcoin
|
||||
lyra2z Zcoin (XZC)
|
||||
lyra2z
|
||||
lyra2z330 Lyra2 330 rows, Zoin (ZOI)
|
||||
m7m Magi (XMG)
|
||||
myr-gr Myriad-Groestl
|
||||
@@ -97,6 +94,7 @@ Supported Algorithms
|
||||
scrypt:N scrypt(N, 1, 1)
|
||||
scryptjane:nf
|
||||
sha256d Double SHA-256
|
||||
sha256q Quad SHA-256, Pyrite (PYE)
|
||||
sha256t Triple SHA-256, Onecoin (OC)
|
||||
shavite3 Shavite3
|
||||
skein Skein+Sha (Skeincoin)
|
||||
|
@@ -38,6 +38,51 @@ supported.
|
||||
Change Log
|
||||
----------
|
||||
|
||||
v3.9.5.4
|
||||
|
||||
Fixed sha256q AVX2 poor performance.
|
||||
Fixed skein2 buffer overflow and restored bswap-interleave optimization.
|
||||
More restructuring.
|
||||
|
||||
v3.9.5.3
|
||||
|
||||
Fix crash mining hodl with aes-sse42.
|
||||
More restructuring and share report tweaks.
|
||||
|
||||
v3.9.5.2
|
||||
|
||||
Revert bswap-interleave optimization for causing crashes on Windows.
|
||||
|
||||
v3.9.5.1
|
||||
|
||||
Fixed skein2 crash on Windows.
|
||||
|
||||
Fixed CPU temperature reading on Ubuntu 19.04.
|
||||
|
||||
Realigned log message colours, blue is used to report normal activity and
|
||||
yellow is only used to report abnormal activity.
|
||||
|
||||
Changed stats colours, yellow now means below average, white is average
|
||||
range. Tweaked colour thresholds.
|
||||
|
||||
Changed colour of stratum difficulty change messages to blue to match other
|
||||
normal protocol messages. Blue messages (block, stratum, submit) will no
|
||||
longer be displayed when using -q option.
|
||||
|
||||
Added job id to new block, share submit, and share result messages and added
|
||||
new nessage when a new job is received for an existing block. This will for
|
||||
better troubleshooting of invalid job id rejects seen at zergpool.
|
||||
|
||||
Some more restructuring.
|
||||
|
||||
v3.9.5
|
||||
|
||||
New share reporting information includes calculation of equivalent hashrate
|
||||
based on share difficulty, network latency, 5 minute summary.
|
||||
Per-thread hash rate reports are disabled by default.
|
||||
New command line option --hash-meter added to enable per-thread hash rates.
|
||||
|
||||
|
||||
v3.9.4
|
||||
|
||||
Faster AVX2 for lyra2v3, quark, anime.
|
||||
|
223
algo-gate-api.c
223
algo-gate-api.c
@@ -71,7 +71,6 @@ bool return_false () { return false; }
|
||||
void *return_null () { return NULL; }
|
||||
void call_error () { printf("ERR: Uninitialized function pointer\n"); }
|
||||
|
||||
|
||||
void algo_not_tested()
|
||||
{
|
||||
applog( LOG_WARNING,"Algo %s has not been tested live. It may not work",
|
||||
@@ -149,111 +148,110 @@ void init_algo_gate( algo_gate_t* gate )
|
||||
// called by each thread that uses the gate
|
||||
bool register_algo_gate( int algo, algo_gate_t *gate )
|
||||
{
|
||||
if ( NULL == gate )
|
||||
{
|
||||
applog(LOG_ERR,"FAIL: algo_gate registration failed, NULL gate\n");
|
||||
return false;
|
||||
}
|
||||
if ( NULL == gate )
|
||||
{
|
||||
applog(LOG_ERR,"FAIL: algo_gate registration failed, NULL gate\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
init_algo_gate( gate );
|
||||
init_algo_gate( gate );
|
||||
|
||||
switch (algo)
|
||||
{
|
||||
case ALGO_ALLIUM: register_allium_algo ( gate ); break;
|
||||
case ALGO_ANIME: register_anime_algo ( gate ); break;
|
||||
case ALGO_ARGON2: register_argon2_algo ( gate ); break;
|
||||
case ALGO_ARGON2D250: register_argon2d_crds_algo ( gate ); break;
|
||||
case ALGO_ARGON2D500: register_argon2d_dyn_algo ( gate ); break;
|
||||
case ALGO_ARGON2D4096: register_argon2d4096_algo ( gate ); break;
|
||||
case ALGO_AXIOM: register_axiom_algo ( gate ); break;
|
||||
case ALGO_BASTION: register_bastion_algo ( gate ); break;
|
||||
case ALGO_BLAKE: register_blake_algo ( gate ); break;
|
||||
case ALGO_BLAKECOIN: register_blakecoin_algo ( gate ); break;
|
||||
// case ALGO_BLAKE2B: register_blake2b_algo ( gate ); break;
|
||||
case ALGO_BLAKE2S: register_blake2s_algo ( gate ); break;
|
||||
case ALGO_C11: register_c11_algo ( gate ); break;
|
||||
case ALGO_CRYPTOLIGHT: register_cryptolight_algo ( gate ); break;
|
||||
case ALGO_CRYPTONIGHT: register_cryptonight_algo ( gate ); break;
|
||||
case ALGO_CRYPTONIGHTV7:register_cryptonightv7_algo( gate ); break;
|
||||
case ALGO_DECRED: register_decred_algo ( gate ); break;
|
||||
case ALGO_DEEP: register_deep_algo ( gate ); break;
|
||||
case ALGO_DMD_GR: register_dmd_gr_algo ( gate ); break;
|
||||
case ALGO_DROP: register_drop_algo ( gate ); break;
|
||||
case ALGO_FRESH: register_fresh_algo ( gate ); break;
|
||||
case ALGO_GROESTL: register_groestl_algo ( gate ); break;
|
||||
case ALGO_HEAVY: register_heavy_algo ( gate ); break;
|
||||
case ALGO_HMQ1725: register_hmq1725_algo ( gate ); break;
|
||||
case ALGO_HODL: register_hodl_algo ( gate ); break;
|
||||
case ALGO_JHA: register_jha_algo ( gate ); break;
|
||||
case ALGO_KECCAK: register_keccak_algo ( gate ); break;
|
||||
case ALGO_KECCAKC: register_keccakc_algo ( gate ); break;
|
||||
case ALGO_LBRY: register_lbry_algo ( gate ); break;
|
||||
case ALGO_LUFFA: register_luffa_algo ( gate ); break;
|
||||
case ALGO_LYRA2H: register_lyra2h_algo ( gate ); break;
|
||||
case ALGO_LYRA2RE: register_lyra2re_algo ( gate ); break;
|
||||
case ALGO_LYRA2REV2: register_lyra2rev2_algo ( gate ); break;
|
||||
case ALGO_LYRA2REV3: register_lyra2rev3_algo ( gate ); break;
|
||||
case ALGO_LYRA2Z: register_lyra2z_algo ( gate ); break;
|
||||
case ALGO_LYRA2Z330: register_lyra2z330_algo ( gate ); break;
|
||||
case ALGO_M7M: register_m7m_algo ( gate ); break;
|
||||
case ALGO_MYR_GR: register_myriad_algo ( gate ); break;
|
||||
case ALGO_NEOSCRYPT: register_neoscrypt_algo ( gate ); break;
|
||||
case ALGO_NIST5: register_nist5_algo ( gate ); break;
|
||||
case ALGO_PENTABLAKE: register_pentablake_algo ( gate ); break;
|
||||
case ALGO_PHI1612: register_phi1612_algo ( gate ); break;
|
||||
case ALGO_PHI2: register_phi2_algo ( gate ); break;
|
||||
case ALGO_PLUCK: register_pluck_algo ( gate ); break;
|
||||
case ALGO_POLYTIMOS: register_polytimos_algo ( gate ); break;
|
||||
case ALGO_QUARK: register_quark_algo ( gate ); break;
|
||||
case ALGO_QUBIT: register_qubit_algo ( gate ); break;
|
||||
case ALGO_SCRYPT: register_scrypt_algo ( gate ); break;
|
||||
case ALGO_SCRYPTJANE: register_scryptjane_algo ( gate ); break;
|
||||
case ALGO_SHA256D: register_sha256d_algo ( gate ); break;
|
||||
case ALGO_SHA256T: register_sha256t_algo ( gate ); break;
|
||||
case ALGO_SHA256Q: register_sha256q_algo ( gate ); break;
|
||||
case ALGO_SHAVITE3: register_shavite_algo ( gate ); break;
|
||||
case ALGO_SKEIN: register_skein_algo ( gate ); break;
|
||||
case ALGO_SKEIN2: register_skein2_algo ( gate ); break;
|
||||
case ALGO_SKUNK: register_skunk_algo ( gate ); break;
|
||||
case ALGO_SONOA: register_sonoa_algo ( gate ); break;
|
||||
case ALGO_TIMETRAVEL: register_timetravel_algo ( gate ); break;
|
||||
case ALGO_TIMETRAVEL10: register_timetravel10_algo ( gate ); break;
|
||||
case ALGO_TRIBUS: register_tribus_algo ( gate ); break;
|
||||
case ALGO_VANILLA: register_vanilla_algo ( gate ); break;
|
||||
case ALGO_VELTOR: register_veltor_algo ( gate ); break;
|
||||
case ALGO_WHIRLPOOL: register_whirlpool_algo ( gate ); break;
|
||||
case ALGO_WHIRLPOOLX: register_whirlpoolx_algo ( gate ); break;
|
||||
case ALGO_X11: register_x11_algo ( gate ); break;
|
||||
case ALGO_X11EVO: register_x11evo_algo ( gate ); break;
|
||||
case ALGO_X11GOST: register_x11gost_algo ( gate ); break;
|
||||
case ALGO_X12: register_x12_algo ( gate ); break;
|
||||
case ALGO_X13: register_x13_algo ( gate ); break;
|
||||
case ALGO_X13SM3: register_x13sm3_algo ( gate ); break;
|
||||
case ALGO_X14: register_x14_algo ( gate ); break;
|
||||
case ALGO_X15: register_x15_algo ( gate ); break;
|
||||
case ALGO_X16R: register_x16r_algo ( gate ); break;
|
||||
case ALGO_X16S: register_x16s_algo ( gate ); break;
|
||||
case ALGO_X17: register_x17_algo ( gate ); break;
|
||||
case ALGO_XEVAN: register_xevan_algo ( gate ); break;
|
||||
switch (algo)
|
||||
{
|
||||
case ALGO_ALLIUM: register_allium_algo ( gate ); break;
|
||||
case ALGO_ANIME: register_anime_algo ( gate ); break;
|
||||
case ALGO_ARGON2: register_argon2_algo ( gate ); break;
|
||||
case ALGO_ARGON2D250: register_argon2d_crds_algo ( gate ); break;
|
||||
case ALGO_ARGON2D500: register_argon2d_dyn_algo ( gate ); break;
|
||||
case ALGO_ARGON2D4096: register_argon2d4096_algo ( gate ); break;
|
||||
case ALGO_AXIOM: register_axiom_algo ( gate ); break;
|
||||
case ALGO_BASTION: register_bastion_algo ( gate ); break;
|
||||
case ALGO_BLAKE: register_blake_algo ( gate ); break;
|
||||
case ALGO_BLAKECOIN: register_blakecoin_algo ( gate ); break;
|
||||
// case ALGO_BLAKE2B: register_blake2b_algo ( gate ); break;
|
||||
case ALGO_BLAKE2S: register_blake2s_algo ( gate ); break;
|
||||
case ALGO_C11: register_c11_algo ( gate ); break;
|
||||
case ALGO_CRYPTOLIGHT: register_cryptolight_algo ( gate ); break;
|
||||
case ALGO_CRYPTONIGHT: register_cryptonight_algo ( gate ); break;
|
||||
case ALGO_CRYPTONIGHTV7: register_cryptonightv7_algo ( gate ); break;
|
||||
case ALGO_DECRED: register_decred_algo ( gate ); break;
|
||||
case ALGO_DEEP: register_deep_algo ( gate ); break;
|
||||
case ALGO_DMD_GR: register_dmd_gr_algo ( gate ); break;
|
||||
case ALGO_DROP: register_drop_algo ( gate ); break;
|
||||
case ALGO_FRESH: register_fresh_algo ( gate ); break;
|
||||
case ALGO_GROESTL: register_groestl_algo ( gate ); break;
|
||||
case ALGO_HEAVY: register_heavy_algo ( gate ); break;
|
||||
case ALGO_HMQ1725: register_hmq1725_algo ( gate ); break;
|
||||
case ALGO_HODL: register_hodl_algo ( gate ); break;
|
||||
case ALGO_JHA: register_jha_algo ( gate ); break;
|
||||
case ALGO_KECCAK: register_keccak_algo ( gate ); break;
|
||||
case ALGO_KECCAKC: register_keccakc_algo ( gate ); break;
|
||||
case ALGO_LBRY: register_lbry_algo ( gate ); break;
|
||||
case ALGO_LUFFA: register_luffa_algo ( gate ); break;
|
||||
case ALGO_LYRA2H: register_lyra2h_algo ( gate ); break;
|
||||
case ALGO_LYRA2RE: register_lyra2re_algo ( gate ); break;
|
||||
case ALGO_LYRA2REV2: register_lyra2rev2_algo ( gate ); break;
|
||||
case ALGO_LYRA2REV3: register_lyra2rev3_algo ( gate ); break;
|
||||
case ALGO_LYRA2Z: register_lyra2z_algo ( gate ); break;
|
||||
case ALGO_LYRA2Z330: register_lyra2z330_algo ( gate ); break;
|
||||
case ALGO_M7M: register_m7m_algo ( gate ); break;
|
||||
case ALGO_MYR_GR: register_myriad_algo ( gate ); break;
|
||||
case ALGO_NEOSCRYPT: register_neoscrypt_algo ( gate ); break;
|
||||
case ALGO_NIST5: register_nist5_algo ( gate ); break;
|
||||
case ALGO_PENTABLAKE: register_pentablake_algo ( gate ); break;
|
||||
case ALGO_PHI1612: register_phi1612_algo ( gate ); break;
|
||||
case ALGO_PHI2: register_phi2_algo ( gate ); break;
|
||||
case ALGO_PLUCK: register_pluck_algo ( gate ); break;
|
||||
case ALGO_POLYTIMOS: register_polytimos_algo ( gate ); break;
|
||||
case ALGO_QUARK: register_quark_algo ( gate ); break;
|
||||
case ALGO_QUBIT: register_qubit_algo ( gate ); break;
|
||||
case ALGO_SCRYPT: register_scrypt_algo ( gate ); break;
|
||||
case ALGO_SCRYPTJANE: register_scryptjane_algo ( gate ); break;
|
||||
case ALGO_SHA256D: register_sha256d_algo ( gate ); break;
|
||||
case ALGO_SHA256Q: register_sha256q_algo ( gate ); break;
|
||||
case ALGO_SHA256T: register_sha256t_algo ( gate ); break;
|
||||
case ALGO_SHAVITE3: register_shavite_algo ( gate ); break;
|
||||
case ALGO_SKEIN: register_skein_algo ( gate ); break;
|
||||
case ALGO_SKEIN2: register_skein2_algo ( gate ); break;
|
||||
case ALGO_SKUNK: register_skunk_algo ( gate ); break;
|
||||
case ALGO_SONOA: register_sonoa_algo ( gate ); break;
|
||||
case ALGO_TIMETRAVEL: register_timetravel_algo ( gate ); break;
|
||||
case ALGO_TIMETRAVEL10: register_timetravel10_algo ( gate ); break;
|
||||
case ALGO_TRIBUS: register_tribus_algo ( gate ); break;
|
||||
case ALGO_VANILLA: register_vanilla_algo ( gate ); break;
|
||||
case ALGO_VELTOR: register_veltor_algo ( gate ); break;
|
||||
case ALGO_WHIRLPOOL: register_whirlpool_algo ( gate ); break;
|
||||
case ALGO_WHIRLPOOLX: register_whirlpoolx_algo ( gate ); break;
|
||||
case ALGO_X11: register_x11_algo ( gate ); break;
|
||||
case ALGO_X11EVO: register_x11evo_algo ( gate ); break;
|
||||
case ALGO_X11GOST: register_x11gost_algo ( gate ); break;
|
||||
case ALGO_X12: register_x12_algo ( gate ); break;
|
||||
case ALGO_X13: register_x13_algo ( gate ); break;
|
||||
case ALGO_X13SM3: register_x13sm3_algo ( gate ); break;
|
||||
case ALGO_X14: register_x14_algo ( gate ); break;
|
||||
case ALGO_X15: register_x15_algo ( gate ); break;
|
||||
case ALGO_X16R: register_x16r_algo ( gate ); break;
|
||||
case ALGO_X16S: register_x16s_algo ( gate ); break;
|
||||
case ALGO_X17: register_x17_algo ( gate ); break;
|
||||
case ALGO_XEVAN: register_xevan_algo ( gate ); break;
|
||||
/* case ALGO_YESCRYPT: register_yescrypt_05_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR8: register_yescryptr8_05_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR16: register_yescryptr16_05_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR32: register_yescryptr32_05_algo ( gate ); break;
|
||||
*/
|
||||
case ALGO_YESCRYPT: register_yescrypt_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR8: register_yescryptr8_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR16: register_yescryptr16_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR32: register_yescryptr32_algo ( gate ); break;
|
||||
case ALGO_YESCRYPT: register_yescrypt_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR8: register_yescryptr8_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR16: register_yescryptr16_algo ( gate ); break;
|
||||
case ALGO_YESCRYPTR32: register_yescryptr32_algo ( gate ); break;
|
||||
case ALGO_YESPOWER: register_yespower_algo ( gate ); break;
|
||||
case ALGO_YESPOWERR16: register_yespowerr16_algo ( gate ); break;
|
||||
case ALGO_ZR5: register_zr5_algo ( gate ); break;
|
||||
default:
|
||||
applog(LOG_ERR,"FAIL: algo_gate registration failed, unknown algo %s.\n", algo_names[opt_algo] );
|
||||
return false;
|
||||
} // switch
|
||||
|
||||
case ALGO_YESPOWER: register_yespower_algo ( gate ); break;
|
||||
case ALGO_YESPOWERR16: register_yespowerr16_algo ( gate ); break;
|
||||
case ALGO_ZR5: register_zr5_algo ( gate ); break;
|
||||
default:
|
||||
applog(LOG_ERR,"FAIL: algo_gate registration failed, unknown algo %s.\n", algo_names[opt_algo] );
|
||||
return false;
|
||||
} // switch
|
||||
|
||||
// ensure required functions were defined.
|
||||
// ensure required functions were defined.
|
||||
if ( gate->scanhash == (void*)&null_scanhash )
|
||||
{
|
||||
applog(LOG_ERR, "FAIL: Required algo_gate functions undefined\n");
|
||||
@@ -363,16 +361,16 @@ void get_algo_alias( char** algo_or_alias )
|
||||
#undef ALIAS
|
||||
#undef PROPER
|
||||
|
||||
// only for parallel when there are lanes.
|
||||
bool submit_solution( struct work *work, void *hash,
|
||||
struct thr_info *thr, int lane )
|
||||
struct thr_info *thr )
|
||||
{
|
||||
work_set_target_ratio( work, hash );
|
||||
if ( submit_work( thr, work ) )
|
||||
{
|
||||
applog( LOG_NOTICE, "Share %d submitted by thread %d, lane %d.",
|
||||
accepted_share_count + rejected_share_count + 1,
|
||||
thr->id, lane );
|
||||
if ( !opt_quiet )
|
||||
applog( LOG_BLUE, "Share %d submitted by thread %d, job %s.",
|
||||
accepted_share_count + rejected_share_count + 1,
|
||||
thr->id, work->job_id );
|
||||
return true;
|
||||
}
|
||||
else
|
||||
@@ -380,4 +378,23 @@ bool submit_solution( struct work *work, void *hash,
|
||||
return false;
|
||||
}
|
||||
|
||||
bool submit_lane_solution( struct work *work, void *hash,
|
||||
struct thr_info *thr, int lane )
|
||||
{
|
||||
work_set_target_ratio( work, hash );
|
||||
if ( submit_work( thr, work ) )
|
||||
{
|
||||
if ( !opt_quiet )
|
||||
// applog( LOG_BLUE, "Share %d submitted by thread %d, lane %d.",
|
||||
// accepted_share_count + rejected_share_count + 1,
|
||||
// thr->id, lane );
|
||||
applog( LOG_BLUE, "Share %d submitted by thread %d, lane %d, job %s.",
|
||||
accepted_share_count + rejected_share_count + 1, thr->id,
|
||||
lane, work->job_id );
|
||||
return true;
|
||||
}
|
||||
else
|
||||
applog( LOG_WARNING, "Failed to submit share." );
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@@ -116,7 +116,7 @@ typedef struct
|
||||
// Added a 5th arg for the thread_info structure to replace the int thr id
|
||||
// in the first arg. Both will co-exist during the trasition.
|
||||
//int ( *scanhash ) ( int, struct work*, uint32_t, uint64_t* );
|
||||
int ( *scanhash ) ( int, struct work*, uint32_t, uint64_t*, struct thr_info* );
|
||||
int ( *scanhash ) ( struct work*, uint32_t, uint64_t*, struct thr_info* );
|
||||
|
||||
// optional unsafe, must be overwritten if algo uses function
|
||||
void ( *hash ) ( void*, const void*, uint32_t ) ;
|
||||
@@ -153,7 +153,6 @@ int ntime_index;
|
||||
int nbits_index;
|
||||
int nonce_index; // use with caution, see warning below
|
||||
int work_cmp_size;
|
||||
|
||||
} algo_gate_t;
|
||||
|
||||
extern algo_gate_t algo_gate;
|
||||
@@ -194,9 +193,12 @@ void four_way_not_tested();
|
||||
// allways returns failure
|
||||
int null_scanhash();
|
||||
|
||||
// The one and only, a callback for scanhash.
|
||||
// Allow algos to submit from scanhash loop.
|
||||
bool submit_solution( struct work *work, void *hash,
|
||||
struct thr_info *thr, int lane );
|
||||
struct thr_info *thr );
|
||||
bool submit_lane_solution( struct work *work, void *hash,
|
||||
struct thr_info *thr, int lane );
|
||||
|
||||
|
||||
bool submit_work( struct thr_info *thr, const struct work *work_in );
|
||||
|
||||
|
@@ -42,12 +42,14 @@ void argon2hash(void *output, const void *input)
|
||||
(unsigned char *)output);
|
||||
}
|
||||
|
||||
int scanhash_argon2(int thr_id, struct work* work, uint32_t max_nonce, uint64_t *hashes_done)
|
||||
int scanhash_argon2( struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t _ALIGN(64) hash[8];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
|
@@ -33,13 +33,14 @@ void argon2d_crds_hash( void *output, const void *input )
|
||||
argon2_ctx( &context, Argon2_d );
|
||||
}
|
||||
|
||||
int scanhash_argon2d_crds( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int scanhash_argon2d_crds( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t _ALIGN(64) hash[8];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
@@ -103,13 +104,14 @@ void argon2d_dyn_hash( void *output, const void *input )
|
||||
argon2_ctx( &context, Argon2_d );
|
||||
}
|
||||
|
||||
int scanhash_argon2d_dyn( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int scanhash_argon2d_dyn( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t _ALIGN(64) hash[8];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
@@ -147,8 +149,8 @@ bool register_argon2d_dyn_algo( algo_gate_t* gate )
|
||||
|
||||
// Unitus
|
||||
|
||||
int scanhash_argon2d4096( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done)
|
||||
int scanhash_argon2d4096( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(64) vhash[8];
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
@@ -157,7 +159,7 @@ int scanhash_argon2d4096( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
uint32_t t_cost = 1; // 1 iteration
|
||||
uint32_t m_cost = 4096; // use 4MB
|
||||
uint32_t parallelism = 1; // 1 thread, 2 lanes
|
||||
|
@@ -9,23 +9,23 @@ bool register_argon2d_crds_algo( algo_gate_t* gate );
|
||||
|
||||
void argon2d_crds_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_argon2d_crds( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_argon2d_crds( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
// Dynamic: version = 0x10, m_cost = 500.
|
||||
bool register_argon2d_dyn_algo( algo_gate_t* gate );
|
||||
|
||||
void argon2d_dyn_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_argon2d_dyn( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_argon2d_dyn( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
|
||||
// Unitus: version = 0x13, m_cost = 4096.
|
||||
bool register_argon2d4096_algo( algo_gate_t* gate );
|
||||
|
||||
int scanhash_argon2d4096( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_argon2d4096( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#endif
|
||||
|
||||
|
@@ -15,11 +15,11 @@ void blakehash_4way(void *state, const void *input)
|
||||
memcpy( &ctx, &blake_4w_ctx, sizeof ctx );
|
||||
blake256r14_4way( &ctx, input + (64<<2), 16 );
|
||||
blake256r14_4way_close( &ctx, vhash );
|
||||
mm128_deinterleave_4x32( state, state+32, state+64, state+96, vhash, 256 );
|
||||
dintrlv_4x32( state, state+32, state+64, state+96, vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int scanhash_blake_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (32)));
|
||||
@@ -27,43 +27,34 @@ int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t HTarget = ptarget[7];
|
||||
uint32_t _ALIGN(32) edata[20];
|
||||
__m128i *noncev = (__m128i*)vdata + 19; // aligned
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if (opt_benchmark)
|
||||
HTarget = 0x7f;
|
||||
|
||||
// we need big endian data...
|
||||
swab32_array( edata, pdata, 20 );
|
||||
mm128_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
||||
mm128_bswap32_intrlv80_4x32( vdata, pdata );
|
||||
blake256r14_4way_init( &blake_4w_ctx );
|
||||
blake256r14_4way( &blake_4w_ctx, vdata, 64 );
|
||||
|
||||
uint32_t *noncep = vdata + 76; // 19*4
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep +1, n+1 );
|
||||
be32enc( noncep +2, n+2 );
|
||||
be32enc( noncep +3, n+3 );
|
||||
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
||||
|
||||
blakehash_4way( hash, vdata );
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= HTarget && fulltest( hash+(i<<3), ptarget ) )
|
||||
if ( (hash+(i<<3))[7] <= HTarget )
|
||||
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 4;
|
||||
|
||||
} while ( (num_found == 0) && (n < max_nonce)
|
||||
&& !work_restart[thr_id].restart );
|
||||
|
||||
} while ( (n < max_nonce) && !work_restart[thr_id].restart );
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -79,13 +70,13 @@ void blakehash_8way( void *state, const void *input )
|
||||
memcpy( &ctx, &blake_8w_ctx, sizeof ctx );
|
||||
blake256r14_8way( &ctx, input + (64<<3), 16 );
|
||||
blake256r14_8way_close( &ctx, vhash );
|
||||
mm256_deinterleave_8x32( state, state+ 32, state+ 64, state+ 96,
|
||||
state+128, state+160, state+192, state+224,
|
||||
vhash, 256 );
|
||||
_dintrlv_8x32( state, state+ 32, state+ 64, state+ 96,
|
||||
state+128, state+160, state+192, state+224,
|
||||
vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_blake_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int scanhash_blake_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (32)));
|
||||
@@ -93,33 +84,21 @@ int scanhash_blake_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t HTarget = ptarget[7];
|
||||
uint32_t _ALIGN(32) edata[20];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
__m256i *noncev = (__m256i*)vdata + 19; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if (opt_benchmark)
|
||||
HTarget = 0x7f;
|
||||
|
||||
// we need big endian data...
|
||||
swab32_array( edata, pdata, 20 );
|
||||
|
||||
mm256_interleave_8x32( vdata, edata, edata, edata, edata,
|
||||
edata, edata, edata, edata, 640 );
|
||||
mm256_bswap32_intrlv80_8x32( vdata, pdata );
|
||||
|
||||
blake256r14_8way_init( &blake_8w_ctx );
|
||||
blake256r14_8way( &blake_8w_ctx, vdata, 64 );
|
||||
|
||||
uint32_t *noncep = vdata + 152; // 19*8
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep +1, n+1 );
|
||||
be32enc( noncep +2, n+2 );
|
||||
be32enc( noncep +3, n+3 );
|
||||
be32enc( noncep +4, n+4 );
|
||||
be32enc( noncep +5, n+5 );
|
||||
be32enc( noncep +6, n+6 );
|
||||
be32enc( noncep +7, n+7 );
|
||||
*noncev = mm256_bswap_32( _mm256_set_epi32( n+7, n+6, n+5, n+4,
|
||||
n+3, n+2, n+1, n ) );
|
||||
pdata[19] = n;
|
||||
|
||||
blakehash_8way( hash, vdata );
|
||||
@@ -128,17 +107,14 @@ int scanhash_blake_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
if ( (hash+i)[7] <= HTarget && fulltest( hash+i, ptarget ) )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
num_found++;
|
||||
nonces[i] = n+i;
|
||||
work_set_target_ratio( work, hash+1 );
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 8;
|
||||
|
||||
} while ( (num_found == 0) && (n < max_nonce)
|
||||
&& !work_restart[thr_id].restart );
|
||||
} while ( (n < max_nonce) !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -10,12 +10,12 @@
|
||||
|
||||
#if defined (BLAKE_4WAY)
|
||||
void blakehash_4way(void *state, const void *input);
|
||||
int scanhash_blake_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_blake_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
#endif
|
||||
|
||||
void blakehash( void *state, const void *input );
|
||||
int scanhash_blake( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_blake( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#endif
|
||||
|
@@ -39,8 +39,8 @@ void blakehash(void *state, const void *input)
|
||||
|
||||
}
|
||||
|
||||
int scanhash_blake( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int scanhash_blake( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
@@ -49,6 +49,7 @@ int scanhash_blake( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t _ALIGN(32) hash64[8];
|
||||
uint32_t _ALIGN(32) endiandata[20];
|
||||
uint32_t n = first_nonce;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
ctx_midstate_done = false;
|
||||
|
||||
|
@@ -412,34 +412,16 @@ do { \
|
||||
V5 = H5; \
|
||||
V6 = H6; \
|
||||
V7 = H7; \
|
||||
V8 = _mm_xor_si128( S0, _mm_set_epi32( CS0, CS0, CS0, CS0 ) ); \
|
||||
V9 = _mm_xor_si128( S1, _mm_set_epi32( CS1, CS1, CS1, CS1 ) ); \
|
||||
VA = _mm_xor_si128( S2, _mm_set_epi32( CS2, CS2, CS2, CS2 ) ); \
|
||||
VB = _mm_xor_si128( S3, _mm_set_epi32( CS3, CS3, CS3, CS3 ) ); \
|
||||
VC = _mm_xor_si128( _mm_set_epi32( T0, T0, T0, T0 ), \
|
||||
_mm_set_epi32( CS4, CS4, CS4, CS4 ) ); \
|
||||
VD = _mm_xor_si128( _mm_set_epi32( T0, T0, T0, T0 ), \
|
||||
_mm_set_epi32( CS5, CS5, CS5, CS5 ) ); \
|
||||
VE = _mm_xor_si128( _mm_set_epi32( T1, T1, T1, T1 ) \
|
||||
, _mm_set_epi32( CS6, CS6, CS6, CS6 ) ); \
|
||||
VF = _mm_xor_si128( _mm_set_epi32( T1, T1, T1, T1 ), \
|
||||
_mm_set_epi32( CS7, CS7, CS7, CS7 ) ); \
|
||||
M[0x0] = mm128_bswap_32( *(buf + 0) ); \
|
||||
M[0x1] = mm128_bswap_32( *(buf + 1) ); \
|
||||
M[0x2] = mm128_bswap_32( *(buf + 2) ); \
|
||||
M[0x3] = mm128_bswap_32( *(buf + 3) ); \
|
||||
M[0x4] = mm128_bswap_32( *(buf + 4) ); \
|
||||
M[0x5] = mm128_bswap_32( *(buf + 5) ); \
|
||||
M[0x6] = mm128_bswap_32( *(buf + 6) ); \
|
||||
M[0x7] = mm128_bswap_32( *(buf + 7) ); \
|
||||
M[0x8] = mm128_bswap_32( *(buf + 8) ); \
|
||||
M[0x9] = mm128_bswap_32( *(buf + 9) ); \
|
||||
M[0xA] = mm128_bswap_32( *(buf + 10) ); \
|
||||
M[0xB] = mm128_bswap_32( *(buf + 11) ); \
|
||||
M[0xC] = mm128_bswap_32( *(buf + 12) ); \
|
||||
M[0xD] = mm128_bswap_32( *(buf + 13) ); \
|
||||
M[0xE] = mm128_bswap_32( *(buf + 14) ); \
|
||||
M[0xF] = mm128_bswap_32( *(buf + 15) ); \
|
||||
V8 = _mm_xor_si128( S0, _mm_set1_epi32( CS0 ) ); \
|
||||
V9 = _mm_xor_si128( S1, _mm_set1_epi32( CS1 ) ); \
|
||||
VA = _mm_xor_si128( S2, _mm_set1_epi32( CS2 ) ); \
|
||||
VB = _mm_xor_si128( S3, _mm_set1_epi32( CS3 ) ); \
|
||||
VC = _mm_xor_si128( _mm_set1_epi32( T0 ), _mm_set1_epi32( CS4 ) ); \
|
||||
VD = _mm_xor_si128( _mm_set1_epi32( T0 ), _mm_set1_epi32( CS5 ) ); \
|
||||
VE = _mm_xor_si128( _mm_set1_epi32( T1 ), _mm_set1_epi32( CS6 ) ); \
|
||||
VF = _mm_xor_si128( _mm_set1_epi32( T1 ), _mm_set1_epi32( CS7 ) ); \
|
||||
mm128_block_bswap_32( M, buf ); \
|
||||
mm128_block_bswap_32( M+8, buf+8 ); \
|
||||
for (r = 0; r < rounds; r ++) \
|
||||
ROUND_S_4WAY(r); \
|
||||
H0 = _mm_xor_si128( _mm_xor_si128( \
|
||||
@@ -464,6 +446,54 @@ do { \
|
||||
|
||||
// current impl
|
||||
|
||||
#if defined(__SSSE3__)
|
||||
|
||||
#define BLAKE256_4WAY_BLOCK_BSWAP32 do \
|
||||
{ \
|
||||
__m128i shuf_bswap32 = _mm_set_epi64x( 0x0c0d0e0f08090a0b, \
|
||||
0x0405060700010203 ); \
|
||||
M0 = _mm_shuffle_epi8( buf[ 0], shuf_bswap32 ); \
|
||||
M1 = _mm_shuffle_epi8( buf[ 1], shuf_bswap32 ); \
|
||||
M2 = _mm_shuffle_epi8( buf[ 2], shuf_bswap32 ); \
|
||||
M3 = _mm_shuffle_epi8( buf[ 3], shuf_bswap32 ); \
|
||||
M4 = _mm_shuffle_epi8( buf[ 4], shuf_bswap32 ); \
|
||||
M5 = _mm_shuffle_epi8( buf[ 5], shuf_bswap32 ); \
|
||||
M6 = _mm_shuffle_epi8( buf[ 6], shuf_bswap32 ); \
|
||||
M7 = _mm_shuffle_epi8( buf[ 7], shuf_bswap32 ); \
|
||||
M8 = _mm_shuffle_epi8( buf[ 8], shuf_bswap32 ); \
|
||||
M9 = _mm_shuffle_epi8( buf[ 9], shuf_bswap32 ); \
|
||||
MA = _mm_shuffle_epi8( buf[10], shuf_bswap32 ); \
|
||||
MB = _mm_shuffle_epi8( buf[11], shuf_bswap32 ); \
|
||||
MC = _mm_shuffle_epi8( buf[12], shuf_bswap32 ); \
|
||||
MD = _mm_shuffle_epi8( buf[13], shuf_bswap32 ); \
|
||||
ME = _mm_shuffle_epi8( buf[14], shuf_bswap32 ); \
|
||||
MF = _mm_shuffle_epi8( buf[15], shuf_bswap32 ); \
|
||||
} while(0)
|
||||
|
||||
#else // SSE2
|
||||
|
||||
#define BLAKE256_4WAY_BLOCK_BSWAP32 do \
|
||||
{ \
|
||||
M0 = mm128_bswap_32( buf[0] ); \
|
||||
M1 = mm128_bswap_32( buf[1] ); \
|
||||
M2 = mm128_bswap_32( buf[2] ); \
|
||||
M3 = mm128_bswap_32( buf[3] ); \
|
||||
M4 = mm128_bswap_32( buf[4] ); \
|
||||
M5 = mm128_bswap_32( buf[5] ); \
|
||||
M6 = mm128_bswap_32( buf[6] ); \
|
||||
M7 = mm128_bswap_32( buf[7] ); \
|
||||
M8 = mm128_bswap_32( buf[8] ); \
|
||||
M9 = mm128_bswap_32( buf[9] ); \
|
||||
MA = mm128_bswap_32( buf[10] ); \
|
||||
MB = mm128_bswap_32( buf[11] ); \
|
||||
MC = mm128_bswap_32( buf[12] ); \
|
||||
MD = mm128_bswap_32( buf[13] ); \
|
||||
ME = mm128_bswap_32( buf[14] ); \
|
||||
MF = mm128_bswap_32( buf[15] ); \
|
||||
} while(0)
|
||||
|
||||
#endif // SSSE3 else SSE2
|
||||
|
||||
#define COMPRESS32_4WAY( rounds ) \
|
||||
do { \
|
||||
__m128i M0, M1, M2, M3, M4, M5, M6, M7; \
|
||||
@@ -486,22 +516,7 @@ do { \
|
||||
VD = _mm_xor_si128( _mm_set1_epi32( T0 ), _mm_set1_epi32( CS5 ) ); \
|
||||
VE = _mm_xor_si128( _mm_set1_epi32( T1 ), _mm_set1_epi32( CS6 ) ); \
|
||||
VF = _mm_xor_si128( _mm_set1_epi32( T1 ), _mm_set1_epi32( CS7 ) ); \
|
||||
M0 = mm128_bswap_32( buf[ 0] ); \
|
||||
M1 = mm128_bswap_32( buf[ 1] ); \
|
||||
M2 = mm128_bswap_32( buf[ 2] ); \
|
||||
M3 = mm128_bswap_32( buf[ 3] ); \
|
||||
M4 = mm128_bswap_32( buf[ 4] ); \
|
||||
M5 = mm128_bswap_32( buf[ 5] ); \
|
||||
M6 = mm128_bswap_32( buf[ 6] ); \
|
||||
M7 = mm128_bswap_32( buf[ 7] ); \
|
||||
M8 = mm128_bswap_32( buf[ 8] ); \
|
||||
M9 = mm128_bswap_32( buf[ 9] ); \
|
||||
MA = mm128_bswap_32( buf[10] ); \
|
||||
MB = mm128_bswap_32( buf[11] ); \
|
||||
MC = mm128_bswap_32( buf[12] ); \
|
||||
MD = mm128_bswap_32( buf[13] ); \
|
||||
ME = mm128_bswap_32( buf[14] ); \
|
||||
MF = mm128_bswap_32( buf[15] ); \
|
||||
BLAKE256_4WAY_BLOCK_BSWAP32; \
|
||||
ROUND_S_4WAY(0); \
|
||||
ROUND_S_4WAY(1); \
|
||||
ROUND_S_4WAY(2); \
|
||||
@@ -519,14 +534,14 @@ do { \
|
||||
ROUND_S_4WAY(2); \
|
||||
ROUND_S_4WAY(3); \
|
||||
} \
|
||||
H0 = _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( V8, V0 ), S0 ), H0 ); \
|
||||
H1 = _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( V9, V1 ), S1 ), H1 ); \
|
||||
H2 = _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( VA, V2 ), S2 ), H2 ); \
|
||||
H3 = _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( VB, V3 ), S3 ), H3 ); \
|
||||
H4 = _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( VC, V4 ), S0 ), H4 ); \
|
||||
H5 = _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( VD, V5 ), S1 ), H5 ); \
|
||||
H6 = _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( VE, V6 ), S2 ), H6 ); \
|
||||
H7 = _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( VF, V7 ), S3 ), H7 ); \
|
||||
H0 = mm128_xor4( V8, V0, S0, H0 ); \
|
||||
H1 = mm128_xor4( V9, V1, S1, H1 ); \
|
||||
H2 = mm128_xor4( VA, V2, S2, H2 ); \
|
||||
H3 = mm128_xor4( VB, V3, S3, H3 ); \
|
||||
H4 = mm128_xor4( VC, V4, S0, H4 ); \
|
||||
H5 = mm128_xor4( VD, V5, S1, H5 ); \
|
||||
H6 = mm128_xor4( VE, V6, S2, H6 ); \
|
||||
H7 = mm128_xor4( VF, V7, S3, H7 ); \
|
||||
} while (0)
|
||||
|
||||
#endif
|
||||
@@ -607,6 +622,7 @@ do { \
|
||||
__m256i M8, M9, MA, MB, MC, MD, ME, MF; \
|
||||
__m256i V0, V1, V2, V3, V4, V5, V6, V7; \
|
||||
__m256i V8, V9, VA, VB, VC, VD, VE, VF; \
|
||||
__m256i shuf_bswap32; \
|
||||
V0 = H0; \
|
||||
V1 = H1; \
|
||||
V2 = H2; \
|
||||
@@ -623,22 +639,24 @@ do { \
|
||||
VD = _mm256_xor_si256( _mm256_set1_epi32( T0 ), _mm256_set1_epi32( CS5 ) ); \
|
||||
VE = _mm256_xor_si256( _mm256_set1_epi32( T1 ), _mm256_set1_epi32( CS6 ) ); \
|
||||
VF = _mm256_xor_si256( _mm256_set1_epi32( T1 ), _mm256_set1_epi32( CS7 ) ); \
|
||||
M0 = mm256_bswap_32( * buf ); \
|
||||
M1 = mm256_bswap_32( *(buf+1) ); \
|
||||
M2 = mm256_bswap_32( *(buf+2) ); \
|
||||
M3 = mm256_bswap_32( *(buf+3) ); \
|
||||
M4 = mm256_bswap_32( *(buf+4) ); \
|
||||
M5 = mm256_bswap_32( *(buf+5) ); \
|
||||
M6 = mm256_bswap_32( *(buf+6) ); \
|
||||
M7 = mm256_bswap_32( *(buf+7) ); \
|
||||
M8 = mm256_bswap_32( *(buf+8) ); \
|
||||
M9 = mm256_bswap_32( *(buf+9) ); \
|
||||
MA = mm256_bswap_32( *(buf+10) ); \
|
||||
MB = mm256_bswap_32( *(buf+11) ); \
|
||||
MC = mm256_bswap_32( *(buf+12) ); \
|
||||
MD = mm256_bswap_32( *(buf+13) ); \
|
||||
ME = mm256_bswap_32( *(buf+14) ); \
|
||||
MF = mm256_bswap_32( *(buf+15) ); \
|
||||
shuf_bswap32 = _mm256_set_epi64x( 0x0c0d0e0f08090a0b, 0x0405060700010203, \
|
||||
0x0c0d0e0f08090a0b, 0x0405060700010203 ); \
|
||||
M0 = _mm256_shuffle_epi8( * buf , shuf_bswap32 ); \
|
||||
M1 = _mm256_shuffle_epi8( *(buf+ 1), shuf_bswap32 ); \
|
||||
M2 = _mm256_shuffle_epi8( *(buf+ 2), shuf_bswap32 ); \
|
||||
M3 = _mm256_shuffle_epi8( *(buf+ 3), shuf_bswap32 ); \
|
||||
M4 = _mm256_shuffle_epi8( *(buf+ 4), shuf_bswap32 ); \
|
||||
M5 = _mm256_shuffle_epi8( *(buf+ 5), shuf_bswap32 ); \
|
||||
M6 = _mm256_shuffle_epi8( *(buf+ 6), shuf_bswap32 ); \
|
||||
M7 = _mm256_shuffle_epi8( *(buf+ 7), shuf_bswap32 ); \
|
||||
M8 = _mm256_shuffle_epi8( *(buf+ 8), shuf_bswap32 ); \
|
||||
M9 = _mm256_shuffle_epi8( *(buf+ 9), shuf_bswap32 ); \
|
||||
MA = _mm256_shuffle_epi8( *(buf+10), shuf_bswap32 ); \
|
||||
MB = _mm256_shuffle_epi8( *(buf+11), shuf_bswap32 ); \
|
||||
MC = _mm256_shuffle_epi8( *(buf+12), shuf_bswap32 ); \
|
||||
MD = _mm256_shuffle_epi8( *(buf+13), shuf_bswap32 ); \
|
||||
ME = _mm256_shuffle_epi8( *(buf+14), shuf_bswap32 ); \
|
||||
MF = _mm256_shuffle_epi8( *(buf+15), shuf_bswap32 ); \
|
||||
ROUND_S_8WAY(0); \
|
||||
ROUND_S_8WAY(1); \
|
||||
ROUND_S_8WAY(2); \
|
||||
@@ -656,22 +674,14 @@ do { \
|
||||
ROUND_S_8WAY(2); \
|
||||
ROUND_S_8WAY(3); \
|
||||
} \
|
||||
H0 = _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( V8, V0 ), \
|
||||
S0 ), H0 ); \
|
||||
H1 = _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( V9, V1 ), \
|
||||
S1 ), H1 ); \
|
||||
H2 = _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( VA, V2 ), \
|
||||
S2 ), H2 ); \
|
||||
H3 = _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( VB, V3 ), \
|
||||
S3 ), H3 ); \
|
||||
H4 = _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( VC, V4 ), \
|
||||
S0 ), H4 ); \
|
||||
H5 = _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( VD, V5 ), \
|
||||
S1 ), H5 ); \
|
||||
H6 = _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( VE, V6 ), \
|
||||
S2 ), H6 ); \
|
||||
H7 = _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( VF, V7 ), \
|
||||
S3 ), H7 ); \
|
||||
H0 = mm256_xor4( V8, V0, S0, H0 ); \
|
||||
H1 = mm256_xor4( V9, V1, S1, H1 ); \
|
||||
H2 = mm256_xor4( VA, V2, S2, H2 ); \
|
||||
H3 = mm256_xor4( VB, V3, S3, H3 ); \
|
||||
H4 = mm256_xor4( VC, V4, S0, H4 ); \
|
||||
H5 = mm256_xor4( VD, V5, S1, H5 ); \
|
||||
H6 = mm256_xor4( VE, V6, S2, H6 ); \
|
||||
H7 = mm256_xor4( VF, V7, S3, H7 ); \
|
||||
} while (0)
|
||||
|
||||
|
||||
@@ -685,6 +695,7 @@ static void
|
||||
blake32_4way_init( blake_4way_small_context *ctx, const uint32_t *iv,
|
||||
const uint32_t *salt, int rounds )
|
||||
{
|
||||
__m128i zero = m128_zero;
|
||||
casti_m128i( ctx->H, 0 ) = _mm_set1_epi32( iv[0] );
|
||||
casti_m128i( ctx->H, 1 ) = _mm_set1_epi32( iv[1] );
|
||||
casti_m128i( ctx->H, 2 ) = _mm_set1_epi32( iv[2] );
|
||||
@@ -694,16 +705,10 @@ blake32_4way_init( blake_4way_small_context *ctx, const uint32_t *iv,
|
||||
casti_m128i( ctx->H, 6 ) = _mm_set1_epi32( iv[6] );
|
||||
casti_m128i( ctx->H, 7 ) = _mm_set1_epi32( iv[7] );
|
||||
|
||||
casti_m128i( ctx->S, 0 ) = m128_zero;
|
||||
casti_m128i( ctx->S, 1 ) = m128_zero;
|
||||
casti_m128i( ctx->S, 2 ) = m128_zero;
|
||||
casti_m128i( ctx->S, 3 ) = m128_zero;
|
||||
/*
|
||||
sc->S[0] = _mm_set1_epi32( salt[0] );
|
||||
sc->S[1] = _mm_set1_epi32( salt[1] );
|
||||
sc->S[2] = _mm_set1_epi32( salt[2] );
|
||||
sc->S[3] = _mm_set1_epi32( salt[3] );
|
||||
*/
|
||||
casti_m128i( ctx->S, 0 ) = zero;
|
||||
casti_m128i( ctx->S, 1 ) = zero;
|
||||
casti_m128i( ctx->S, 2 ) = zero;
|
||||
casti_m128i( ctx->S, 3 ) = zero;
|
||||
ctx->T0 = ctx->T1 = 0;
|
||||
ctx->ptr = 0;
|
||||
ctx->rounds = rounds;
|
||||
@@ -796,14 +801,7 @@ blake32_4way_close( blake_4way_small_context *ctx, unsigned ub, unsigned n,
|
||||
blake32_4way( ctx, buf, 64 );
|
||||
}
|
||||
|
||||
casti_m128i( dst, 0 ) = mm128_bswap_32( casti_m128i( ctx->H, 0 ) );
|
||||
casti_m128i( dst, 1 ) = mm128_bswap_32( casti_m128i( ctx->H, 1 ) );
|
||||
casti_m128i( dst, 2 ) = mm128_bswap_32( casti_m128i( ctx->H, 2 ) );
|
||||
casti_m128i( dst, 3 ) = mm128_bswap_32( casti_m128i( ctx->H, 3 ) );
|
||||
casti_m128i( dst, 4 ) = mm128_bswap_32( casti_m128i( ctx->H, 4 ) );
|
||||
casti_m128i( dst, 5 ) = mm128_bswap_32( casti_m128i( ctx->H, 5 ) );
|
||||
casti_m128i( dst, 6 ) = mm128_bswap_32( casti_m128i( ctx->H, 6 ) );
|
||||
casti_m128i( dst, 7 ) = mm128_bswap_32( casti_m128i( ctx->H, 7 ) );
|
||||
mm128_block_bswap_32( (__m128i*)dst, (__m128i*)ctx->H );
|
||||
}
|
||||
|
||||
#if defined (__AVX2__)
|
||||
@@ -816,11 +814,21 @@ static void
|
||||
blake32_8way_init( blake_8way_small_context *sc, const sph_u32 *iv,
|
||||
const sph_u32 *salt, int rounds )
|
||||
{
|
||||
int i;
|
||||
for ( i = 0; i < 8; i++ )
|
||||
sc->H[i] = _mm256_set1_epi32( iv[i] );
|
||||
for ( i = 0; i < 4; i++ )
|
||||
sc->S[i] = _mm256_set1_epi32( salt[i] );
|
||||
__m256i zero = m256_zero;
|
||||
casti_m256i( sc->H, 0 ) = _mm256_set1_epi32( iv[0] );
|
||||
casti_m256i( sc->H, 1 ) = _mm256_set1_epi32( iv[1] );
|
||||
casti_m256i( sc->H, 2 ) = _mm256_set1_epi32( iv[2] );
|
||||
casti_m256i( sc->H, 3 ) = _mm256_set1_epi32( iv[3] );
|
||||
casti_m256i( sc->H, 4 ) = _mm256_set1_epi32( iv[4] );
|
||||
casti_m256i( sc->H, 5 ) = _mm256_set1_epi32( iv[5] );
|
||||
casti_m256i( sc->H, 6 ) = _mm256_set1_epi32( iv[6] );
|
||||
casti_m256i( sc->H, 7 ) = _mm256_set1_epi32( iv[7] );
|
||||
|
||||
casti_m256i( sc->S, 0 ) = zero;
|
||||
casti_m256i( sc->S, 1 ) = zero;
|
||||
casti_m256i( sc->S, 2 ) = zero;
|
||||
casti_m256i( sc->S, 3 ) = zero;
|
||||
|
||||
sc->T0 = sc->T1 = 0;
|
||||
sc->ptr = 0;
|
||||
sc->rounds = rounds;
|
||||
@@ -872,14 +880,10 @@ static void
|
||||
blake32_8way_close( blake_8way_small_context *sc, unsigned ub, unsigned n,
|
||||
void *dst, size_t out_size_w32 )
|
||||
{
|
||||
// union {
|
||||
__m256i buf[16];
|
||||
// sph_u32 dummy;
|
||||
// } u;
|
||||
size_t ptr, k;
|
||||
__m256i buf[16];
|
||||
size_t ptr;
|
||||
unsigned bit_len;
|
||||
sph_u32 th, tl;
|
||||
__m256i *out;
|
||||
|
||||
ptr = sc->ptr;
|
||||
bit_len = ((unsigned)ptr << 3);
|
||||
@@ -923,9 +927,7 @@ blake32_8way_close( blake_8way_small_context *sc, unsigned ub, unsigned n,
|
||||
*(buf+(60>>2)) = mm256_bswap_32( _mm256_set1_epi32( tl ) );
|
||||
blake32_8way( sc, buf, 64 );
|
||||
}
|
||||
out = (__m256i*)dst;
|
||||
for ( k = 0; k < out_size_w32; k++ )
|
||||
out[k] = mm256_bswap_32( sc->H[k] );
|
||||
mm256_block_bswap_32( (__m256i*)dst, (__m256i*)sc->H );
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -35,13 +35,14 @@ static void blake2b_hash_end(uint32_t *output, const uint32_t *input)
|
||||
}
|
||||
*/
|
||||
|
||||
int scanhash_blake2b( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int scanhash_blake2b( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(A) vhashcpu[8];
|
||||
uint32_t _ALIGN(A) endiandata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[8];
|
||||
|
@@ -16,60 +16,49 @@ void blake2s_8way_hash( void *output, const void *input )
|
||||
blake2s_8way_update( &ctx, input + (64<<3), 16 );
|
||||
blake2s_8way_final( &ctx, vhash, BLAKE2S_OUTBYTES );
|
||||
|
||||
mm256_deinterleave_8x32( output, output+ 32, output+ 64, output+ 96,
|
||||
output+128, output+160, output+192, output+224,
|
||||
vhash, 256 );
|
||||
dintrlv_8x32( output, output+ 32, output+ 64, output+ 96,
|
||||
output+128, output+160, output+192, output+224,
|
||||
vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_blake2s_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int scanhash_blake2s_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t _ALIGN(64) edata[20];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
__m256i *noncev = (__m256i*)vdata + 19; // aligned
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep = vdata + 152; // 19*8
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
swab32_array( edata, pdata, 20 );
|
||||
mm256_interleave_8x32( vdata, edata, edata, edata, edata,
|
||||
edata, edata, edata, edata, 640 );
|
||||
mm256_bswap32_intrlv80_8x32( vdata, pdata );
|
||||
blake2s_8way_init( &blake2s_8w_ctx, BLAKE2S_OUTBYTES );
|
||||
blake2s_8way_update( &blake2s_8w_ctx, vdata, 64 );
|
||||
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep +1, n+1 );
|
||||
be32enc( noncep +2, n+2 );
|
||||
be32enc( noncep +3, n+3 );
|
||||
be32enc( noncep +4, n+4 );
|
||||
be32enc( noncep +5, n+5 );
|
||||
be32enc( noncep +6, n+6 );
|
||||
be32enc( noncep +7, n+7 );
|
||||
*noncev = mm256_bswap_32( _mm256_set_epi32( n+7, n+6, n+5, n+4,
|
||||
n+3, n+2, n+1, n ) );
|
||||
pdata[19] = n;
|
||||
|
||||
blake2s_8way_hash( hash, vdata );
|
||||
|
||||
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
if ( (hash+(i<<3))[7] <= Htarg )
|
||||
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 8;
|
||||
|
||||
} while ( (num_found == 0) && (n < max_nonce)
|
||||
&& !work_restart[thr_id].restart );
|
||||
} while ( (n < max_nonce) && !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#elif defined(BLAKE2S_4WAY)
|
||||
@@ -85,53 +74,46 @@ void blake2s_4way_hash( void *output, const void *input )
|
||||
blake2s_4way_update( &ctx, input + (64<<2), 16 );
|
||||
blake2s_4way_final( &ctx, vhash, BLAKE2S_OUTBYTES );
|
||||
|
||||
mm128_deinterleave_4x32( output, output+32, output+64, output+96,
|
||||
dintrlv_4x32( output, output+32, output+64, output+96,
|
||||
vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_blake2s_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int scanhash_blake2s_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t _ALIGN(64) edata[20];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
__m128i *noncev = (__m128i*)vdata + 19; // aligned
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep = vdata + 76; // 19*4
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
swab32_array( edata, pdata, 20 );
|
||||
mm128_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
||||
mm128_bswap32_intrlv80_4x32( vdata, pdata );
|
||||
blake2s_4way_init( &blake2s_4w_ctx, BLAKE2S_OUTBYTES );
|
||||
blake2s_4way_update( &blake2s_4w_ctx, vdata, 64 );
|
||||
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep +1, n+1 );
|
||||
be32enc( noncep +2, n+2 );
|
||||
be32enc( noncep +3, n+3 );
|
||||
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
||||
pdata[19] = n;
|
||||
|
||||
blake2s_4way_hash( hash, vdata );
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
if ( (hash+(i<<3))[7] <= Htarg )
|
||||
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 4;
|
||||
|
||||
} while ( (num_found == 0) && (n < max_nonce)
|
||||
&& !work_restart[thr_id].restart );
|
||||
} while ( (n < max_nonce) && !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -16,19 +16,19 @@ bool register_blake2s_algo( algo_gate_t* gate );
|
||||
#if defined(BLAKE2S_8WAY)
|
||||
|
||||
void blake2s_8way_hash( void *state, const void *input );
|
||||
int scanhash_blake2s_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_blake2s_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#elif defined (BLAKE2S_4WAY)
|
||||
|
||||
void blake2s_4way_hash( void *state, const void *input );
|
||||
int scanhash_blake2s_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_blake2s_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
#else
|
||||
|
||||
void blake2s_hash( void *state, const void *input );
|
||||
int scanhash_blake2s( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_blake2s( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#endif
|
||||
|
||||
|
@@ -32,14 +32,15 @@ static void blake2s_hash_end(uint32_t *output, const uint32_t *input)
|
||||
blake2s_final(&s_ctx, (uint8_t*) output, BLAKE2S_OUTBYTES);
|
||||
}
|
||||
*/
|
||||
int scanhash_blake2s(int thr_id, struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done)
|
||||
int scanhash_blake2s( struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
|
||||
uint32_t _ALIGN(64) hash64[8];
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
|
@@ -412,18 +412,18 @@ static const sph_u64 CB[16] = {
|
||||
V5 = H5; \
|
||||
V6 = H6; \
|
||||
V7 = H7; \
|
||||
V8 = _mm256_xor_si256( S0, _mm256_set_epi64x( CB0, CB0, CB0, CB0 ) ); \
|
||||
V9 = _mm256_xor_si256( S1, _mm256_set_epi64x( CB1, CB1, CB1, CB1 ) ); \
|
||||
VA = _mm256_xor_si256( S2, _mm256_set_epi64x( CB2, CB2, CB2, CB2 ) ); \
|
||||
VB = _mm256_xor_si256( S3, _mm256_set_epi64x( CB3, CB3, CB3, CB3 ) ); \
|
||||
VC = _mm256_xor_si256( _mm256_set_epi64x( T0, T0, T0, T0 ), \
|
||||
_mm256_set_epi64x( CB4, CB4, CB4, CB4 ) ); \
|
||||
VD = _mm256_xor_si256( _mm256_set_epi64x( T0, T0, T0, T0 ), \
|
||||
_mm256_set_epi64x( CB5, CB5, CB5, CB5 ) ); \
|
||||
VE = _mm256_xor_si256( _mm256_set_epi64x( T1, T1, T1, T1 ), \
|
||||
_mm256_set_epi64x( CB6, CB6, CB6, CB6 ) ); \
|
||||
VF = _mm256_xor_si256( _mm256_set_epi64x( T1, T1, T1, T1 ), \
|
||||
_mm256_set_epi64x( CB7, CB7, CB7, CB7 ) ); \
|
||||
V8 = _mm256_xor_si256( S0, _mm256_set_epi64x( CB0, CB0, CB0, CB0 ) ); \
|
||||
V9 = _mm256_xor_si256( S1, _mm256_set_epi64x( CB1, CB1, CB1, CB1 ) ); \
|
||||
VA = _mm256_xor_si256( S2, _mm256_set_epi64x( CB2, CB2, CB2, CB2 ) ); \
|
||||
VB = _mm256_xor_si256( S3, _mm256_set_epi64x( CB3, CB3, CB3, CB3 ) ); \
|
||||
VC = _mm256_xor_si256( _mm256_set_epi64x( T0, T0, T0, T0 ), \
|
||||
_mm256_set_epi64x( CB4, CB4, CB4, CB4 ) ); \
|
||||
VD = _mm256_xor_si256( _mm256_set_epi64x( T0, T0, T0, T0 ), \
|
||||
_mm256_set_epi64x( CB5, CB5, CB5, CB5 ) ); \
|
||||
VE = _mm256_xor_si256( _mm256_set_epi64x( T1, T1, T1, T1 ), \
|
||||
_mm256_set_epi64x( CB6, CB6, CB6, CB6 ) ); \
|
||||
VF = _mm256_xor_si256( _mm256_set_epi64x( T1, T1, T1, T1 ), \
|
||||
_mm256_set_epi64x( CB7, CB7, CB7, CB7 ) ); \
|
||||
M[0x0] = mm256_bswap_64( *(buf+0) ); \
|
||||
M[0x1] = mm256_bswap_64( *(buf+1) ); \
|
||||
M[0x2] = mm256_bswap_64( *(buf+2) ); \
|
||||
@@ -464,80 +464,76 @@ static const sph_u64 CB[16] = {
|
||||
|
||||
//current impl
|
||||
|
||||
#define COMPRESS64_4WAY do { \
|
||||
__m256i M0, M1, M2, M3, M4, M5, M6, M7; \
|
||||
__m256i M8, M9, MA, MB, MC, MD, ME, MF; \
|
||||
__m256i V0, V1, V2, V3, V4, V5, V6, V7; \
|
||||
__m256i V8, V9, VA, VB, VC, VD, VE, VF; \
|
||||
V0 = H0; \
|
||||
V1 = H1; \
|
||||
V2 = H2; \
|
||||
V3 = H3; \
|
||||
V4 = H4; \
|
||||
V5 = H5; \
|
||||
V6 = H6; \
|
||||
V7 = H7; \
|
||||
V8 = _mm256_xor_si256( S0, _mm256_set_epi64x( CB0, CB0, CB0, CB0 ) ); \
|
||||
V9 = _mm256_xor_si256( S1, _mm256_set_epi64x( CB1, CB1, CB1, CB1 ) ); \
|
||||
VA = _mm256_xor_si256( S2, _mm256_set_epi64x( CB2, CB2, CB2, CB2 ) ); \
|
||||
VB = _mm256_xor_si256( S3, _mm256_set_epi64x( CB3, CB3, CB3, CB3 ) ); \
|
||||
VC = _mm256_xor_si256( _mm256_set_epi64x( T0, T0, T0, T0 ), \
|
||||
_mm256_set_epi64x( CB4, CB4, CB4, CB4 ) ); \
|
||||
VD = _mm256_xor_si256( _mm256_set_epi64x( T0, T0, T0, T0 ), \
|
||||
_mm256_set_epi64x( CB5, CB5, CB5, CB5 ) ); \
|
||||
VE = _mm256_xor_si256( _mm256_set_epi64x( T1, T1, T1, T1 ), \
|
||||
_mm256_set_epi64x( CB6, CB6, CB6, CB6 ) ); \
|
||||
VF = _mm256_xor_si256( _mm256_set_epi64x( T1, T1, T1, T1 ), \
|
||||
_mm256_set_epi64x( CB7, CB7, CB7, CB7 ) ); \
|
||||
M0 = mm256_bswap_64( *(buf + 0) ); \
|
||||
M1 = mm256_bswap_64( *(buf + 1) ); \
|
||||
M2 = mm256_bswap_64( *(buf + 2) ); \
|
||||
M3 = mm256_bswap_64( *(buf + 3) ); \
|
||||
M4 = mm256_bswap_64( *(buf + 4) ); \
|
||||
M5 = mm256_bswap_64( *(buf + 5) ); \
|
||||
M6 = mm256_bswap_64( *(buf + 6) ); \
|
||||
M7 = mm256_bswap_64( *(buf + 7) ); \
|
||||
M8 = mm256_bswap_64( *(buf + 8) ); \
|
||||
M9 = mm256_bswap_64( *(buf + 9) ); \
|
||||
MA = mm256_bswap_64( *(buf + 10) ); \
|
||||
MB = mm256_bswap_64( *(buf + 11) ); \
|
||||
MC = mm256_bswap_64( *(buf + 12) ); \
|
||||
MD = mm256_bswap_64( *(buf + 13) ); \
|
||||
ME = mm256_bswap_64( *(buf + 14) ); \
|
||||
MF = mm256_bswap_64( *(buf + 15) ); \
|
||||
ROUND_B_4WAY(0); \
|
||||
ROUND_B_4WAY(1); \
|
||||
ROUND_B_4WAY(2); \
|
||||
ROUND_B_4WAY(3); \
|
||||
ROUND_B_4WAY(4); \
|
||||
ROUND_B_4WAY(5); \
|
||||
ROUND_B_4WAY(6); \
|
||||
ROUND_B_4WAY(7); \
|
||||
ROUND_B_4WAY(8); \
|
||||
ROUND_B_4WAY(9); \
|
||||
ROUND_B_4WAY(0); \
|
||||
ROUND_B_4WAY(1); \
|
||||
ROUND_B_4WAY(2); \
|
||||
ROUND_B_4WAY(3); \
|
||||
ROUND_B_4WAY(4); \
|
||||
ROUND_B_4WAY(5); \
|
||||
H0 = _mm256_xor_si256( _mm256_xor_si256( \
|
||||
_mm256_xor_si256( S0, V0 ), V8 ), H0 ); \
|
||||
H1 = _mm256_xor_si256( _mm256_xor_si256( \
|
||||
_mm256_xor_si256( S1, V1 ), V9 ), H1 ); \
|
||||
H2 = _mm256_xor_si256( _mm256_xor_si256( \
|
||||
_mm256_xor_si256( S2, V2 ), VA ), H2 ); \
|
||||
H3 = _mm256_xor_si256( _mm256_xor_si256( \
|
||||
_mm256_xor_si256( S3, V3 ), VB ), H3 ); \
|
||||
H4 = _mm256_xor_si256( _mm256_xor_si256( \
|
||||
_mm256_xor_si256( S0, V4 ), VC ), H4 ); \
|
||||
H5 = _mm256_xor_si256( _mm256_xor_si256( \
|
||||
_mm256_xor_si256( S1, V5 ), VD ), H5 ); \
|
||||
H6 = _mm256_xor_si256( _mm256_xor_si256( \
|
||||
_mm256_xor_si256( S2, V6 ), VE ), H6 ); \
|
||||
H7 = _mm256_xor_si256( _mm256_xor_si256( \
|
||||
_mm256_xor_si256( S3, V7 ), VF ), H7 ); \
|
||||
} while (0)
|
||||
#define COMPRESS64_4WAY do \
|
||||
{ \
|
||||
__m256i M0, M1, M2, M3, M4, M5, M6, M7; \
|
||||
__m256i M8, M9, MA, MB, MC, MD, ME, MF; \
|
||||
__m256i V0, V1, V2, V3, V4, V5, V6, V7; \
|
||||
__m256i V8, V9, VA, VB, VC, VD, VE, VF; \
|
||||
__m256i shuf_bswap64; \
|
||||
V0 = H0; \
|
||||
V1 = H1; \
|
||||
V2 = H2; \
|
||||
V3 = H3; \
|
||||
V4 = H4; \
|
||||
V5 = H5; \
|
||||
V6 = H6; \
|
||||
V7 = H7; \
|
||||
V8 = _mm256_xor_si256( S0, _mm256_set1_epi64x( CB0 ) ); \
|
||||
V9 = _mm256_xor_si256( S1, _mm256_set1_epi64x( CB1 ) ); \
|
||||
VA = _mm256_xor_si256( S2, _mm256_set1_epi64x( CB2 ) ); \
|
||||
VB = _mm256_xor_si256( S3, _mm256_set1_epi64x( CB3 ) ); \
|
||||
VC = _mm256_xor_si256( _mm256_set1_epi64x( T0 ), \
|
||||
_mm256_set1_epi64x( CB4 ) ); \
|
||||
VD = _mm256_xor_si256( _mm256_set1_epi64x( T0 ), \
|
||||
_mm256_set1_epi64x( CB5 ) ); \
|
||||
VE = _mm256_xor_si256( _mm256_set1_epi64x( T1 ), \
|
||||
_mm256_set1_epi64x( CB6 ) ); \
|
||||
VF = _mm256_xor_si256( _mm256_set1_epi64x( T1 ), \
|
||||
_mm256_set1_epi64x( CB7 ) ); \
|
||||
shuf_bswap64 = _mm256_set_epi64x( 0x08090a0b0c0d0e0f, 0x0001020304050607, \
|
||||
0x08090a0b0c0d0e0f, 0x0001020304050607 ); \
|
||||
M0 = _mm256_shuffle_epi8( *(buf+ 0), shuf_bswap64 ); \
|
||||
M1 = _mm256_shuffle_epi8( *(buf+ 1), shuf_bswap64 ); \
|
||||
M2 = _mm256_shuffle_epi8( *(buf+ 2), shuf_bswap64 ); \
|
||||
M3 = _mm256_shuffle_epi8( *(buf+ 3), shuf_bswap64 ); \
|
||||
M4 = _mm256_shuffle_epi8( *(buf+ 4), shuf_bswap64 ); \
|
||||
M5 = _mm256_shuffle_epi8( *(buf+ 5), shuf_bswap64 ); \
|
||||
M6 = _mm256_shuffle_epi8( *(buf+ 6), shuf_bswap64 ); \
|
||||
M7 = _mm256_shuffle_epi8( *(buf+ 7), shuf_bswap64 ); \
|
||||
M8 = _mm256_shuffle_epi8( *(buf+ 8), shuf_bswap64 ); \
|
||||
M9 = _mm256_shuffle_epi8( *(buf+ 9), shuf_bswap64 ); \
|
||||
MA = _mm256_shuffle_epi8( *(buf+10), shuf_bswap64 ); \
|
||||
MB = _mm256_shuffle_epi8( *(buf+11), shuf_bswap64 ); \
|
||||
MC = _mm256_shuffle_epi8( *(buf+12), shuf_bswap64 ); \
|
||||
MD = _mm256_shuffle_epi8( *(buf+13), shuf_bswap64 ); \
|
||||
ME = _mm256_shuffle_epi8( *(buf+14), shuf_bswap64 ); \
|
||||
MF = _mm256_shuffle_epi8( *(buf+15), shuf_bswap64 ); \
|
||||
ROUND_B_4WAY(0); \
|
||||
ROUND_B_4WAY(1); \
|
||||
ROUND_B_4WAY(2); \
|
||||
ROUND_B_4WAY(3); \
|
||||
ROUND_B_4WAY(4); \
|
||||
ROUND_B_4WAY(5); \
|
||||
ROUND_B_4WAY(6); \
|
||||
ROUND_B_4WAY(7); \
|
||||
ROUND_B_4WAY(8); \
|
||||
ROUND_B_4WAY(9); \
|
||||
ROUND_B_4WAY(0); \
|
||||
ROUND_B_4WAY(1); \
|
||||
ROUND_B_4WAY(2); \
|
||||
ROUND_B_4WAY(3); \
|
||||
ROUND_B_4WAY(4); \
|
||||
ROUND_B_4WAY(5); \
|
||||
H0 = mm256_xor4( V8, V0, S0, H0 ); \
|
||||
H1 = mm256_xor4( V9, V1, S1, H1 ); \
|
||||
H2 = mm256_xor4( VA, V2, S2, H2 ); \
|
||||
H3 = mm256_xor4( VB, V3, S3, H3 ); \
|
||||
H4 = mm256_xor4( VC, V4, S0, H4 ); \
|
||||
H5 = mm256_xor4( VD, V5, S1, H5 ); \
|
||||
H6 = mm256_xor4( VE, V6, S2, H6 ); \
|
||||
H7 = mm256_xor4( VF, V7, S3, H7 ); \
|
||||
} while (0)
|
||||
|
||||
#endif
|
||||
|
||||
@@ -547,13 +543,23 @@ static void
|
||||
blake64_4way_init( blake_4way_big_context *sc, const sph_u64 *iv,
|
||||
const sph_u64 *salt )
|
||||
{
|
||||
int i;
|
||||
for ( i = 0; i < 8; i++ )
|
||||
sc->H[i] = _mm256_set1_epi64x( iv[i] );
|
||||
for ( i = 0; i < 4; i++ )
|
||||
sc->S[i] = _mm256_set1_epi64x( salt[i] );
|
||||
sc->T0 = sc->T1 = 0;
|
||||
sc->ptr = 0;
|
||||
__m256i zero = m256_zero;
|
||||
casti_m256i( sc->H, 0 ) = _mm256_set1_epi64x( iv[0] );
|
||||
casti_m256i( sc->H, 1 ) = _mm256_set1_epi64x( iv[1] );
|
||||
casti_m256i( sc->H, 2 ) = _mm256_set1_epi64x( iv[2] );
|
||||
casti_m256i( sc->H, 3 ) = _mm256_set1_epi64x( iv[3] );
|
||||
casti_m256i( sc->H, 4 ) = _mm256_set1_epi64x( iv[4] );
|
||||
casti_m256i( sc->H, 5 ) = _mm256_set1_epi64x( iv[5] );
|
||||
casti_m256i( sc->H, 6 ) = _mm256_set1_epi64x( iv[6] );
|
||||
casti_m256i( sc->H, 7 ) = _mm256_set1_epi64x( iv[7] );
|
||||
|
||||
casti_m256i( sc->S, 0 ) = zero;
|
||||
casti_m256i( sc->S, 1 ) = zero;
|
||||
casti_m256i( sc->S, 2 ) = zero;
|
||||
casti_m256i( sc->S, 3 ) = zero;
|
||||
|
||||
sc->T0 = sc->T1 = 0;
|
||||
sc->ptr = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -604,15 +610,11 @@ static void
|
||||
blake64_4way_close( blake_4way_big_context *sc,
|
||||
unsigned ub, unsigned n, void *dst, size_t out_size_w64)
|
||||
{
|
||||
// union {
|
||||
__m256i buf[16];
|
||||
// sph_u64 dummy;
|
||||
// } u;
|
||||
size_t ptr, k;
|
||||
__m256i buf[16];
|
||||
size_t ptr;
|
||||
unsigned bit_len;
|
||||
uint64_t z, zz;
|
||||
sph_u64 th, tl;
|
||||
__m256i *out;
|
||||
|
||||
ptr = sc->ptr;
|
||||
bit_len = ((unsigned)ptr << 3);
|
||||
@@ -665,9 +667,7 @@ blake64_4way_close( blake_4way_big_context *sc,
|
||||
|
||||
blake64_4way( sc, buf, 128 );
|
||||
}
|
||||
out = (__m256i*)dst;
|
||||
for ( k = 0; k < out_size_w64; k++ )
|
||||
out[k] = mm256_bswap_64( sc->H[k] );
|
||||
mm256_block_bswap_64( (__m256i*)dst, sc->H );
|
||||
}
|
||||
|
||||
void
|
||||
|
@@ -17,11 +17,11 @@ void blakecoin_4way_hash(void *state, const void *input)
|
||||
blake256r8_4way( &ctx, input + (64<<2), 16 );
|
||||
blake256r8_4way_close( &ctx, vhash );
|
||||
|
||||
mm128_deinterleave_4x32( state, state+32, state+64, state+96, vhash, 256 );
|
||||
dintrlv_4x32( state, state+32, state+64, state+96, vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_blakecoin_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int scanhash_blakecoin_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (32)));
|
||||
@@ -29,41 +29,34 @@ int scanhash_blakecoin_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t HTarget = ptarget[7];
|
||||
uint32_t _ALIGN(32) edata[20];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
__m128i *noncev = (__m128i*)vdata + 19; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
if ( opt_benchmark )
|
||||
HTarget = 0x7f;
|
||||
|
||||
swab32_array( edata, pdata, 20 );
|
||||
mm128_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
||||
mm128_bswap32_intrlv80_4x32( vdata, pdata );
|
||||
blake256r8_4way_init( &blakecoin_4w_ctx );
|
||||
blake256r8_4way( &blakecoin_4w_ctx, vdata, 64 );
|
||||
|
||||
uint32_t *noncep = vdata + 76; // 19*4
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep +1, n+1 );
|
||||
be32enc( noncep +2, n+2 );
|
||||
be32enc( noncep +3, n+3 );
|
||||
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
||||
pdata[19] = n;
|
||||
blakecoin_4way_hash( hash, vdata );
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= HTarget && fulltest( hash+(i<<3), ptarget ) )
|
||||
if ( (hash+(i<<3))[7] <= HTarget && fulltest( hash+(i<<3), ptarget )
|
||||
&& !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 4;
|
||||
|
||||
} while ( (num_found == 0) && (n < max_nonce)
|
||||
&& !work_restart[thr_id].restart );
|
||||
} while ( (n < max_nonce) && !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -81,13 +74,12 @@ void blakecoin_8way_hash( void *state, const void *input )
|
||||
blake256r8_8way( &ctx, input + (64<<3), 16 );
|
||||
blake256r8_8way_close( &ctx, vhash );
|
||||
|
||||
mm256_deinterleave_8x32( state, state+ 32, state+ 64, state+ 96,
|
||||
state+128, state+160, state+192, state+224,
|
||||
vhash, 256 );
|
||||
dintrlv_8x32( state, state+ 32, state+ 64, state+ 96, state+128,
|
||||
state+160, state+192, state+224, vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_blakecoin_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int scanhash_blakecoin_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (32)));
|
||||
@@ -95,46 +87,34 @@ int scanhash_blakecoin_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t HTarget = ptarget[7];
|
||||
uint32_t _ALIGN(32) edata[20];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
uint32_t *noncep = vdata + 152; // 19*8
|
||||
int num_found = 0;
|
||||
__m256i *noncev = (__m256i*)vdata + 19; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
if ( opt_benchmark )
|
||||
HTarget = 0x7f;
|
||||
|
||||
// we need big endian data...
|
||||
swab32_array( edata, pdata, 20 );
|
||||
mm256_interleave_8x32( vdata, edata, edata, edata, edata,
|
||||
edata, edata, edata, edata, 640 );
|
||||
mm256_bswap32_intrlv80_8x32( vdata, pdata );
|
||||
blake256r8_8way_init( &blakecoin_8w_ctx );
|
||||
blake256r8_8way( &blakecoin_8w_ctx, vdata, 64 );
|
||||
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep +1, n+1 );
|
||||
be32enc( noncep +2, n+2 );
|
||||
be32enc( noncep +3, n+3 );
|
||||
be32enc( noncep +4, n+4 );
|
||||
be32enc( noncep +5, n+5 );
|
||||
be32enc( noncep +6, n+6 );
|
||||
be32enc( noncep +7, n+7 );
|
||||
*noncev = mm256_bswap_32( _mm256_set_epi32( n+7, n+6, n+5, n+4,
|
||||
n+3, n+2, n+1, n ) );
|
||||
pdata[19] = n;
|
||||
blakecoin_8way_hash( hash, vdata );
|
||||
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
if ( (hash+(i<<3))[7] <= HTarget && fulltest( hash+(i<<3), ptarget ) )
|
||||
if ( (hash+(i<<3))[7] <= HTarget && fulltest( hash+(i<<3), ptarget )
|
||||
&& !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 8;
|
||||
} while ( (num_found == 0) && (n < max_nonce)
|
||||
&& !work_restart[thr_id].restart );
|
||||
} while ( (n < max_nonce) && !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -13,18 +13,18 @@
|
||||
|
||||
#if defined (BLAKECOIN_8WAY)
|
||||
void blakecoin_8way_hash(void *state, const void *input);
|
||||
int scanhash_blakecoin_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_blakecoin_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
#endif
|
||||
|
||||
#if defined (BLAKECOIN_4WAY)
|
||||
void blakecoin_4way_hash(void *state, const void *input);
|
||||
int scanhash_blakecoin_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_blakecoin_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
#endif
|
||||
|
||||
void blakecoinhash( void *state, const void *input );
|
||||
int scanhash_blakecoin( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_blakecoin( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#endif
|
||||
|
@@ -39,13 +39,14 @@ void blakecoinhash( void *state, const void *input )
|
||||
memcpy( state, hash, 32 );
|
||||
}
|
||||
|
||||
int scanhash_blakecoin( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int scanhash_blakecoin( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t HTarget = ptarget[7];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
uint32_t _ALIGN(32) hash64[8];
|
||||
uint32_t _ALIGN(32) endiandata[20];
|
||||
|
@@ -23,11 +23,11 @@ void decred_hash_4way( void *state, const void *input )
|
||||
memcpy( &ctx, &blake_mid, sizeof(blake_mid) );
|
||||
blake256_4way( &ctx, tail, tail_len );
|
||||
blake256_4way_close( &ctx, vhash );
|
||||
mm128_deinterleave_4x32( state, state+32, state+64, state+96, vhash, 256 );
|
||||
dintrlv_4x32( state, state+32, state+64, state+96, vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_decred_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done)
|
||||
int scanhash_decred_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[48*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (32)));
|
||||
@@ -37,14 +37,13 @@ int scanhash_decred_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
const uint32_t first_nonce = pdata[DECRED_NONCE_INDEX];
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t HTarget = opt_benchmark ? 0x7f : ptarget[7];
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
// copy to buffer guaranteed to be aligned.
|
||||
memcpy( edata, pdata, 180 );
|
||||
|
||||
// use the old way until new way updated for size.
|
||||
mm128_interleave_4x32x( vdata, edata, edata, edata, edata, 180*8 );
|
||||
mm128_intrlv_4x32x( vdata, edata, edata, edata, edata, 180*8 );
|
||||
|
||||
blake256_4way_init( &blake_mid );
|
||||
blake256_4way( &blake_mid, vdata, DECRED_MIDSTATE_LEN );
|
||||
@@ -59,18 +58,17 @@ int scanhash_decred_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
decred_hash_4way( hash, vdata );
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= HTarget && fulltest( hash+(i<<3), ptarget ) )
|
||||
if ( (hash+(i<<3))[7] <= HTarget )
|
||||
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[DECRED_NONCE_INDEX] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 4;
|
||||
} while ( (num_found == 0) && (n < max_nonce)
|
||||
&& !work_restart[thr_id].restart );
|
||||
} while ( (n < max_nonce) && !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -14,7 +14,7 @@
|
||||
|
||||
#if defined (__AVX2__)
|
||||
//void blakehash_84way(void *state, const void *input);
|
||||
//int scanhash_blake_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
//int scanhash_blake_8way( struct work *work, uint32_t max_nonce,
|
||||
// uint64_t *hashes_done );
|
||||
#endif
|
||||
|
||||
@@ -24,13 +24,13 @@
|
||||
|
||||
#if defined (DECRED_4WAY)
|
||||
void decred_hash_4way(void *state, const void *input);
|
||||
int scanhash_decred_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_decred_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
#endif
|
||||
|
||||
void decred_hash( void *state, const void *input );
|
||||
int scanhash_decred( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_decred( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#endif
|
||||
|
||||
|
@@ -52,12 +52,14 @@ void decred_hash_simple(void *state, const void *input)
|
||||
sph_blake256_close(&ctx, state);
|
||||
}
|
||||
|
||||
int scanhash_decred(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done)
|
||||
int scanhash_decred( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(64) endiandata[48];
|
||||
uint32_t _ALIGN(64) hash32[8];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
// #define DCR_NONCE_OFT32 35
|
||||
|
||||
|
@@ -10,13 +10,8 @@
|
||||
#include "blake-hash-4way.h"
|
||||
#include "sph_blake.h"
|
||||
|
||||
//#define DEBUG_ALGO
|
||||
|
||||
extern void pentablakehash_4way( void *output, const void *input )
|
||||
{
|
||||
unsigned char _ALIGN(32) hash[128];
|
||||
// // same as uint32_t hashA[16], hashB[16];
|
||||
// #define hashB hash+64
|
||||
|
||||
uint64_t hash0[8] __attribute__ ((aligned (64)));
|
||||
uint64_t hash1[8] __attribute__ ((aligned (64)));
|
||||
@@ -30,21 +25,6 @@ extern void pentablakehash_4way( void *output, const void *input )
|
||||
blake512_4way( &ctx, input, 80 );
|
||||
blake512_4way_close( &ctx, vhash );
|
||||
|
||||
uint64_t sin0[10], sin1[10], sin2[10], sin3[10];
|
||||
mm256_deinterleave_4x64( sin0, sin1, sin2, sin3, input, 640 );
|
||||
sph_blake512_context ctx2_blake;
|
||||
sph_blake512_init(&ctx2_blake);
|
||||
sph_blake512(&ctx2_blake, sin0, 80);
|
||||
sph_blake512_close(&ctx2_blake, (void*) hash);
|
||||
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
uint64_t* hash64 = (uint64_t*)hash;
|
||||
for( int i = 0; i < 8; i++ )
|
||||
{
|
||||
if ( hash0[i] != hash64[i] )
|
||||
printf("hash mismatch %u\n",i);
|
||||
}
|
||||
|
||||
blake512_4way_init( &ctx );
|
||||
blake512_4way( &ctx, vhash, 64 );
|
||||
blake512_4way_close( &ctx, vhash );
|
||||
@@ -61,46 +41,14 @@ for( int i = 0; i < 8; i++ )
|
||||
blake512_4way( &ctx, vhash, 64 );
|
||||
blake512_4way_close( &ctx, vhash );
|
||||
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
memcpy( output, hash0, 32 );
|
||||
memcpy( output+32, hash1, 32 );
|
||||
memcpy( output+64, hash2, 32 );
|
||||
memcpy( output+96, hash3, 32 );
|
||||
|
||||
/*
|
||||
uint64_t sin0[10] __attribute__ ((aligned (64)));
|
||||
uint64_t sin1[10] __attribute__ ((aligned (64)));
|
||||
uint64_t sin2[10] __attribute__ ((aligned (64)));
|
||||
uint64_t sin3[10] __attribute__ ((aligned (64)));
|
||||
|
||||
sph_blake512_context ctx_blake;
|
||||
|
||||
sph_blake512_init(&ctx_blake);
|
||||
sph_blake512(&ctx_blake, input, 80);
|
||||
sph_blake512_close(&ctx_blake, hash);
|
||||
|
||||
sph_blake512_init(&ctx_blake);
|
||||
sph_blake512(&ctx_blake, hash, 64);
|
||||
sph_blake512_close(&ctx_blake, hash);
|
||||
|
||||
sph_blake512_init(&ctx_blake);
|
||||
sph_blake512(&ctx_blake, hash, 64);
|
||||
sph_blake512_close(&ctx_blake, hash);
|
||||
|
||||
sph_blake512_init(&ctx_blake);
|
||||
sph_blake512(&ctx_blake, hash, 64);
|
||||
sph_blake512_close(&ctx_blake, hash);
|
||||
|
||||
sph_blake512_init(&ctx_blake);
|
||||
sph_blake512(&ctx_blake, hash, 64);
|
||||
sph_blake512_close(&ctx_blake, hash);
|
||||
|
||||
memcpy(output, hash, 32);
|
||||
*/
|
||||
}
|
||||
|
||||
int scanhash_pentablake_4way( int thr_id, struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done )
|
||||
int scanhash_pentablake_4way( struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
@@ -110,9 +58,8 @@ int scanhash_pentablake_4way( int thr_id, struct work *work,
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
// uint32_t _ALIGN(32) hash64[8];
|
||||
// uint32_t _ALIGN(32) endiandata[32];
|
||||
@@ -138,7 +85,7 @@ int scanhash_pentablake_4way( int thr_id, struct work *work,
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
intrlv_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
for ( int m=0; m < 6; m++ )
|
||||
{
|
||||
@@ -155,10 +102,10 @@ int scanhash_pentablake_4way( int thr_id, struct work *work,
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( !( (hash+(i<<3))[7] & mask )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
&& fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||
{
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
pdata[19] = n + i;
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 4;
|
||||
|
||||
|
@@ -10,12 +10,12 @@
|
||||
|
||||
#if defined(PENTABLAKE_4WAY)
|
||||
void pentablakehash_4way( void *state, const void *input );
|
||||
int scanhash_pentablake_4way( int thr_id, struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done );
|
||||
int scanhash_pentablake_4way( struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr );
|
||||
#endif
|
||||
|
||||
void pentablakehash( void *state, const void *input );
|
||||
int scanhash_pentablake( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_pentablake( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
#endif
|
||||
|
||||
|
@@ -40,8 +40,8 @@ extern void pentablakehash(void *output, const void *input)
|
||||
|
||||
}
|
||||
|
||||
int scanhash_pentablake(int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done)
|
||||
int scanhash_pentablake( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
@@ -49,6 +49,7 @@ int scanhash_pentablake(int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
uint32_t _ALIGN(32) hash64[8];
|
||||
uint32_t _ALIGN(32) endiandata[32];
|
||||
|
@@ -113,50 +113,27 @@ static const uint32_t IV256[] = {
|
||||
|
||||
|
||||
#define expand1s( qt, M, H, i ) \
|
||||
_mm_add_epi32( \
|
||||
_mm_add_epi32( \
|
||||
_mm_add_epi32( \
|
||||
_mm_add_epi32( \
|
||||
_mm_add_epi32( ss1( qt[ (i)-16 ] ), \
|
||||
ss2( qt[ (i)-15 ] ) ), \
|
||||
_mm_add_epi32( ss3( qt[ (i)-14 ] ), \
|
||||
ss0( qt[ (i)-13 ] ) ) ), \
|
||||
_mm_add_epi32( \
|
||||
_mm_add_epi32( ss1( qt[ (i)-12 ] ), \
|
||||
ss2( qt[ (i)-11 ] ) ), \
|
||||
_mm_add_epi32( ss3( qt[ (i)-10 ] ), \
|
||||
ss0( qt[ (i)- 9 ] ) ) ) ), \
|
||||
_mm_add_epi32( \
|
||||
_mm_add_epi32( \
|
||||
_mm_add_epi32( ss1( qt[ (i)- 8 ] ), \
|
||||
ss2( qt[ (i)- 7 ] ) ), \
|
||||
_mm_add_epi32( ss3( qt[ (i)- 6 ] ), \
|
||||
ss0( qt[ (i)- 5 ] ) ) ), \
|
||||
_mm_add_epi32( \
|
||||
_mm_add_epi32( ss1( qt[ (i)- 4 ] ), \
|
||||
ss2( qt[ (i)- 3 ] ) ), \
|
||||
_mm_add_epi32( ss3( qt[ (i)- 2 ] ), \
|
||||
ss0( qt[ (i)- 1 ] ) ) ) ) ), \
|
||||
_mm_add_epi32( mm128_add4_32( \
|
||||
mm128_add4_32( ss1( qt[ (i)-16 ] ), ss2( qt[ (i)-15 ] ), \
|
||||
ss3( qt[ (i)-14 ] ), ss0( qt[ (i)-13 ] ) ), \
|
||||
mm128_add4_32( ss1( qt[ (i)-12 ] ), ss2( qt[ (i)-11 ] ), \
|
||||
ss3( qt[ (i)-10 ] ), ss0( qt[ (i)- 9 ] ) ), \
|
||||
mm128_add4_32( ss1( qt[ (i)- 8 ] ), ss2( qt[ (i)- 7 ] ), \
|
||||
ss3( qt[ (i)- 6 ] ), ss0( qt[ (i)- 5 ] ) ), \
|
||||
mm128_add4_32( ss1( qt[ (i)- 4 ] ), ss2( qt[ (i)- 3 ] ), \
|
||||
ss3( qt[ (i)- 2 ] ), ss0( qt[ (i)- 1 ] ) ) ), \
|
||||
add_elt_s( M, H, (i)-16 ) )
|
||||
|
||||
#define expand2s( qt, M, H, i) \
|
||||
_mm_add_epi32( \
|
||||
_mm_add_epi32( \
|
||||
_mm_add_epi32( \
|
||||
_mm_add_epi32( \
|
||||
_mm_add_epi32( qt[ (i)-16 ], rs1( qt[ (i)-15 ] ) ), \
|
||||
_mm_add_epi32( qt[ (i)-14 ], rs2( qt[ (i)-13 ] ) ) ), \
|
||||
_mm_add_epi32( \
|
||||
_mm_add_epi32( qt[ (i)-12 ], rs3( qt[ (i)-11 ] ) ), \
|
||||
_mm_add_epi32( qt[ (i)-10 ], rs4( qt[ (i)- 9 ] ) ) ) ), \
|
||||
_mm_add_epi32( \
|
||||
_mm_add_epi32( \
|
||||
_mm_add_epi32( qt[ (i)- 8 ], rs5( qt[ (i)- 7 ] ) ), \
|
||||
_mm_add_epi32( qt[ (i)- 6 ], rs6( qt[ (i)- 5 ] ) ) ), \
|
||||
_mm_add_epi32( \
|
||||
_mm_add_epi32( qt[ (i)- 4 ], rs7( qt[ (i)- 3 ] ) ), \
|
||||
_mm_add_epi32( ss4( qt[ (i)- 2 ] ), \
|
||||
ss5( qt[ (i)- 1 ] ) ) ) ) ), \
|
||||
_mm_add_epi32( mm128_add4_32( \
|
||||
mm128_add4_32( qt[ (i)-16 ], rs1( qt[ (i)-15 ] ), \
|
||||
qt[ (i)-14 ], rs2( qt[ (i)-13 ] ) ), \
|
||||
mm128_add4_32( qt[ (i)-12 ], rs3( qt[ (i)-11 ] ), \
|
||||
qt[ (i)-10 ], rs4( qt[ (i)- 9 ] ) ), \
|
||||
mm128_add4_32( qt[ (i)- 8 ], rs5( qt[ (i)- 7 ] ), \
|
||||
qt[ (i)- 6 ], rs6( qt[ (i)- 5 ] ) ), \
|
||||
mm128_add4_32( qt[ (i)- 4 ], rs7( qt[ (i)- 3 ] ), \
|
||||
ss4( qt[ (i)- 2 ] ), ss5( qt[ (i)- 1 ] ) ) ), \
|
||||
add_elt_s( M, H, (i)-16 ) )
|
||||
|
||||
#define Ws0 \
|
||||
@@ -357,17 +334,11 @@ void compress_small( const __m128i *M, const __m128i H[16], __m128i dH[16] )
|
||||
qt[30] = expand2s( qt, M, H, 30 );
|
||||
qt[31] = expand2s( qt, M, H, 31 );
|
||||
|
||||
xl = _mm_xor_si128(
|
||||
_mm_xor_si128( _mm_xor_si128( qt[16], qt[17] ),
|
||||
_mm_xor_si128( qt[18], qt[19] ) ),
|
||||
_mm_xor_si128( _mm_xor_si128( qt[20], qt[21] ),
|
||||
_mm_xor_si128( qt[22], qt[23] ) ) );
|
||||
xh = _mm_xor_si128( xl,
|
||||
_mm_xor_si128(
|
||||
_mm_xor_si128( _mm_xor_si128( qt[24], qt[25] ),
|
||||
_mm_xor_si128( qt[26], qt[27] ) ),
|
||||
_mm_xor_si128( _mm_xor_si128( qt[28], qt[29] ),
|
||||
_mm_xor_si128( qt[30], qt[31] ) )));
|
||||
xl = _mm_xor_si128( mm128_xor4( qt[16], qt[17], qt[18], qt[19] ),
|
||||
mm128_xor4( qt[20], qt[21], qt[22], qt[23] ) );
|
||||
xh = _mm_xor_si128( xl, _mm_xor_si128(
|
||||
mm128_xor4( qt[24], qt[25], qt[26], qt[27] ),
|
||||
mm128_xor4( qt[28], qt[29], qt[30], qt[31] ) ) );
|
||||
|
||||
dH[ 0] = _mm_add_epi32(
|
||||
_mm_xor_si128( M[0],
|
||||
@@ -695,22 +666,15 @@ bmw256_4way_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
|
||||
#define expand2s8( qt, M, H, i) \
|
||||
_mm256_add_epi32( \
|
||||
_mm256_add_epi32( \
|
||||
_mm256_add_epi32( \
|
||||
_mm256_add_epi32( \
|
||||
_mm256_add_epi32( qt[ (i)-16 ], r8s1( qt[ (i)-15 ] ) ), \
|
||||
_mm256_add_epi32( qt[ (i)-14 ], r8s2( qt[ (i)-13 ] ) ) ), \
|
||||
_mm256_add_epi32( \
|
||||
_mm256_add_epi32( qt[ (i)-12 ], r8s3( qt[ (i)-11 ] ) ), \
|
||||
_mm256_add_epi32( qt[ (i)-10 ], r8s4( qt[ (i)- 9 ] ) ) ) ), \
|
||||
_mm256_add_epi32( \
|
||||
_mm256_add_epi32( \
|
||||
_mm256_add_epi32( qt[ (i)- 8 ], r8s5( qt[ (i)- 7 ] ) ), \
|
||||
_mm256_add_epi32( qt[ (i)- 6 ], r8s6( qt[ (i)- 5 ] ) ) ), \
|
||||
_mm256_add_epi32( \
|
||||
_mm256_add_epi32( qt[ (i)- 4 ], r8s7( qt[ (i)- 3 ] ) ), \
|
||||
_mm256_add_epi32( s8s4( qt[ (i)- 2 ] ), \
|
||||
s8s5( qt[ (i)- 1 ] ) ) ) ) ), \
|
||||
mm256_add4_32( \
|
||||
mm256_add4_32( qt[ (i)-16 ], r8s1( qt[ (i)-15 ] ), \
|
||||
qt[ (i)-14 ], r8s2( qt[ (i)-13 ] ) ), \
|
||||
mm256_add4_32( qt[ (i)-12 ], r8s3( qt[ (i)-11 ] ), \
|
||||
qt[ (i)-10 ], r8s4( qt[ (i)- 9 ] ) ), \
|
||||
mm256_add4_32( qt[ (i)- 8 ], r8s5( qt[ (i)- 7 ] ), \
|
||||
qt[ (i)- 6 ], r8s6( qt[ (i)- 5 ] ) ), \
|
||||
mm256_add4_32( qt[ (i)- 4 ], r8s7( qt[ (i)- 3 ] ), \
|
||||
s8s4( qt[ (i)- 2 ] ), s8s5( qt[ (i)- 1 ] ) ) ), \
|
||||
add_elt_s8( M, H, (i)-16 ) )
|
||||
|
||||
|
||||
@@ -913,16 +877,11 @@ void compress_small_8way( const __m256i *M, const __m256i H[16],
|
||||
qt[31] = expand2s8( qt, M, H, 31 );
|
||||
|
||||
xl = _mm256_xor_si256(
|
||||
_mm256_xor_si256( _mm256_xor_si256( qt[16], qt[17] ),
|
||||
_mm256_xor_si256( qt[18], qt[19] ) ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( qt[20], qt[21] ),
|
||||
_mm256_xor_si256( qt[22], qt[23] ) ) );
|
||||
xh = _mm256_xor_si256( xl,
|
||||
_mm256_xor_si256(
|
||||
_mm256_xor_si256( _mm256_xor_si256( qt[24], qt[25] ),
|
||||
_mm256_xor_si256( qt[26], qt[27] ) ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( qt[28], qt[29] ),
|
||||
_mm256_xor_si256( qt[30], qt[31] ) )));
|
||||
mm256_xor4( qt[16], qt[17], qt[18], qt[19] ),
|
||||
mm256_xor4( qt[20], qt[21], qt[22], qt[23] ) );
|
||||
xh = _mm256_xor_si256( xl, _mm256_xor_si256(
|
||||
mm256_xor4( qt[24], qt[25], qt[26], qt[27] ),
|
||||
mm256_xor4( qt[28], qt[29], qt[30], qt[31] ) ) );
|
||||
|
||||
dH[ 0] = _mm256_add_epi32(
|
||||
_mm256_xor_si256( M[0],
|
||||
|
@@ -19,14 +19,15 @@ void bmwhash(void *output, const void *input)
|
||||
*/
|
||||
}
|
||||
|
||||
int scanhash_bmw(int thr_id, struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done)
|
||||
int scanhash_bmw( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
|
||||
uint32_t _ALIGN(64) hash64[8];
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
int thr_id = mythr->id;
|
||||
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
|
@@ -569,28 +569,20 @@ void bmw512_2way_close( bmw_2way_big_context *ctx, void *dst )
|
||||
|
||||
|
||||
#define sb0(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( _mm256_srli_epi64( (x), 1), \
|
||||
_mm256_slli_epi64( (x), 3) ), \
|
||||
_mm256_xor_si256( mm256_rol_64( (x), 4), \
|
||||
mm256_rol_64( (x), 37) ) )
|
||||
mm256_xor4( _mm256_srli_epi64( (x), 1), _mm256_slli_epi64( (x), 3), \
|
||||
mm256_rol_64( (x), 4), mm256_rol_64( (x),37) )
|
||||
|
||||
#define sb1(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( _mm256_srli_epi64( (x), 1), \
|
||||
_mm256_slli_epi64( (x), 2) ), \
|
||||
_mm256_xor_si256( mm256_rol_64( (x), 13), \
|
||||
mm256_rol_64( (x), 43) ) )
|
||||
mm256_xor4( _mm256_srli_epi64( (x), 1), _mm256_slli_epi64( (x), 2), \
|
||||
mm256_rol_64( (x),13), mm256_rol_64( (x),43) )
|
||||
|
||||
#define sb2(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( _mm256_srli_epi64( (x), 2), \
|
||||
_mm256_slli_epi64( (x), 1) ), \
|
||||
_mm256_xor_si256( mm256_rol_64( (x), 19), \
|
||||
mm256_rol_64( (x), 53) ) )
|
||||
mm256_xor4( _mm256_srli_epi64( (x), 2), _mm256_slli_epi64( (x), 1), \
|
||||
mm256_rol_64( (x),19), mm256_rol_64( (x),53) )
|
||||
|
||||
#define sb3(x) \
|
||||
_mm256_xor_si256( _mm256_xor_si256( _mm256_srli_epi64( (x), 2), \
|
||||
_mm256_slli_epi64( (x), 2) ), \
|
||||
_mm256_xor_si256( mm256_rol_64( (x), 28), \
|
||||
mm256_rol_64( (x), 59) ) )
|
||||
mm256_xor4( _mm256_srli_epi64( (x), 2), _mm256_slli_epi64( (x), 2), \
|
||||
mm256_rol_64( (x),28), mm256_rol_64( (x),59) )
|
||||
|
||||
#define sb4(x) \
|
||||
_mm256_xor_si256( (x), _mm256_srli_epi64( (x), 1 ) )
|
||||
@@ -618,55 +610,32 @@ void bmw512_2way_close( bmw_2way_big_context *ctx, void *dst )
|
||||
rol_off_64( M, j, 10 ) ), \
|
||||
_mm256_set1_epi64x( ( (j) + 16 ) * 0x0555555555555555ULL ) ), \
|
||||
H[ ( (j)+7 ) & 0xF ] )
|
||||
|
||||
|
||||
|
||||
#define expand1b( qt, M, H, i ) \
|
||||
_mm256_add_epi64( \
|
||||
_mm256_add_epi64( \
|
||||
_mm256_add_epi64( \
|
||||
_mm256_add_epi64( \
|
||||
_mm256_add_epi64( sb1( qt[ (i)-16 ] ), \
|
||||
sb2( qt[ (i)-15 ] ) ), \
|
||||
_mm256_add_epi64( sb3( qt[ (i)-14 ] ), \
|
||||
sb0( qt[ (i)-13 ] ) ) ), \
|
||||
_mm256_add_epi64( \
|
||||
_mm256_add_epi64( sb1( qt[ (i)-12 ] ), \
|
||||
sb2( qt[ (i)-11 ] ) ), \
|
||||
_mm256_add_epi64( sb3( qt[ (i)-10 ] ), \
|
||||
sb0( qt[ (i)- 9 ] ) ) ) ), \
|
||||
_mm256_add_epi64( \
|
||||
_mm256_add_epi64( \
|
||||
_mm256_add_epi64( sb1( qt[ (i)- 8 ] ), \
|
||||
sb2( qt[ (i)- 7 ] ) ), \
|
||||
_mm256_add_epi64( sb3( qt[ (i)- 6 ] ), \
|
||||
sb0( qt[ (i)- 5 ] ) ) ), \
|
||||
_mm256_add_epi64( \
|
||||
_mm256_add_epi64( sb1( qt[ (i)- 4 ] ), \
|
||||
sb2( qt[ (i)- 3 ] ) ), \
|
||||
_mm256_add_epi64( sb3( qt[ (i)- 2 ] ), \
|
||||
sb0( qt[ (i)- 1 ] ) ) ) ) ), \
|
||||
_mm256_add_epi64( mm256_add4_64( \
|
||||
mm256_add4_64( sb1( qt[ (i)-16 ] ), sb2( qt[ (i)-15 ] ), \
|
||||
sb3( qt[ (i)-14 ] ), sb0( qt[ (i)-13 ] )), \
|
||||
mm256_add4_64( sb1( qt[ (i)-12 ] ), sb2( qt[ (i)-11 ] ), \
|
||||
sb3( qt[ (i)-10 ] ), sb0( qt[ (i)- 9 ] )), \
|
||||
mm256_add4_64( sb1( qt[ (i)- 8 ] ), sb2( qt[ (i)- 7 ] ), \
|
||||
sb3( qt[ (i)- 6 ] ), sb0( qt[ (i)- 5 ] )), \
|
||||
mm256_add4_64( sb1( qt[ (i)- 4 ] ), sb2( qt[ (i)- 3 ] ), \
|
||||
sb3( qt[ (i)- 2 ] ), sb0( qt[ (i)- 1 ] ) ) ), \
|
||||
add_elt_b( M, H, (i)-16 ) )
|
||||
|
||||
#define expand2b( qt, M, H, i) \
|
||||
_mm256_add_epi64( \
|
||||
_mm256_add_epi64( \
|
||||
_mm256_add_epi64( \
|
||||
_mm256_add_epi64( \
|
||||
_mm256_add_epi64( qt[ (i)-16 ], rb1( qt[ (i)-15 ] ) ), \
|
||||
_mm256_add_epi64( qt[ (i)-14 ], rb2( qt[ (i)-13 ] ) ) ), \
|
||||
_mm256_add_epi64( \
|
||||
_mm256_add_epi64( qt[ (i)-12 ], rb3( qt[ (i)-11 ] ) ), \
|
||||
_mm256_add_epi64( qt[ (i)-10 ], rb4( qt[ (i)- 9 ] ) ) ) ), \
|
||||
_mm256_add_epi64( \
|
||||
_mm256_add_epi64( \
|
||||
_mm256_add_epi64( qt[ (i)- 8 ], rb5( qt[ (i)- 7 ] ) ), \
|
||||
_mm256_add_epi64( qt[ (i)- 6 ], rb6( qt[ (i)- 5 ] ) ) ), \
|
||||
_mm256_add_epi64( \
|
||||
_mm256_add_epi64( qt[ (i)- 4 ], rb7( qt[ (i)- 3 ] ) ), \
|
||||
_mm256_add_epi64( sb4( qt[ (i)- 2 ] ), \
|
||||
sb5( qt[ (i)- 1 ] ) ) ) ) ), \
|
||||
_mm256_add_epi64( mm256_add4_64( \
|
||||
mm256_add4_64( qt[ (i)-16 ], rb1( qt[ (i)-15 ] ), \
|
||||
qt[ (i)-14 ], rb2( qt[ (i)-13 ] ) ), \
|
||||
mm256_add4_64( qt[ (i)-12 ], rb3( qt[ (i)-11 ] ), \
|
||||
qt[ (i)-10 ], rb4( qt[ (i)- 9 ] ) ), \
|
||||
mm256_add4_64( qt[ (i)- 8 ], rb5( qt[ (i)- 7 ] ), \
|
||||
qt[ (i)- 6 ], rb6( qt[ (i)- 5 ] ) ), \
|
||||
mm256_add4_64( qt[ (i)- 4 ], rb7( qt[ (i)- 3 ] ), \
|
||||
sb4( qt[ (i)- 2 ] ), sb5( qt[ (i)- 1 ] ) ) ), \
|
||||
add_elt_b( M, H, (i)-16 ) )
|
||||
|
||||
|
||||
#define Wb0 \
|
||||
_mm256_add_epi64( \
|
||||
_mm256_add_epi64( \
|
||||
@@ -864,95 +833,90 @@ void compress_big( const __m256i *M, const __m256i H[16], __m256i dH[16] )
|
||||
qt[30] = expand2b( qt, M, H, 30 );
|
||||
qt[31] = expand2b( qt, M, H, 31 );
|
||||
|
||||
xl = _mm256_xor_si256(
|
||||
_mm256_xor_si256( _mm256_xor_si256( qt[16], qt[17] ),
|
||||
_mm256_xor_si256( qt[18], qt[19] ) ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( qt[20], qt[21] ),
|
||||
_mm256_xor_si256( qt[22], qt[23] ) ) );
|
||||
xh = _mm256_xor_si256( xl,
|
||||
_mm256_xor_si256(
|
||||
_mm256_xor_si256( _mm256_xor_si256( qt[24], qt[25] ),
|
||||
_mm256_xor_si256( qt[26], qt[27] ) ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( qt[28], qt[29] ),
|
||||
_mm256_xor_si256( qt[30], qt[31] ) )));
|
||||
xl = _mm256_xor_si256(
|
||||
mm256_xor4( qt[16], qt[17], qt[18], qt[19] ),
|
||||
mm256_xor4( qt[20], qt[21], qt[22], qt[23] ) );
|
||||
xh = _mm256_xor_si256( xl, _mm256_xor_si256(
|
||||
mm256_xor4( qt[24], qt[25], qt[26], qt[27] ),
|
||||
mm256_xor4( qt[28], qt[29], qt[30], qt[31] ) ) );
|
||||
|
||||
dH[ 0] = _mm256_add_epi64(
|
||||
_mm256_xor_si256( M[0],
|
||||
_mm256_xor_si256( _mm256_slli_epi64( xh, 5 ),
|
||||
_mm256_srli_epi64( qt[16], 5 ) ) ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( xl, qt[24] ), qt[ 0] ));
|
||||
_mm256_xor_si256( M[0],
|
||||
_mm256_xor_si256( _mm256_slli_epi64( xh, 5 ),
|
||||
_mm256_srli_epi64( qt[16], 5 ) ) ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( xl, qt[24] ), qt[ 0] ) );
|
||||
dH[ 1] = _mm256_add_epi64(
|
||||
_mm256_xor_si256( M[1],
|
||||
_mm256_xor_si256( _mm256_srli_epi64( xh, 7 ),
|
||||
_mm256_slli_epi64( qt[17], 8 ) ) ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( xl, qt[25] ), qt[ 1] ));
|
||||
_mm256_xor_si256( M[1],
|
||||
_mm256_xor_si256( _mm256_srli_epi64( xh, 7 ),
|
||||
_mm256_slli_epi64( qt[17], 8 ) ) ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( xl, qt[25] ), qt[ 1] ) );
|
||||
dH[ 2] = _mm256_add_epi64(
|
||||
_mm256_xor_si256( M[2],
|
||||
_mm256_xor_si256( _mm256_srli_epi64( xh, 5 ),
|
||||
_mm256_slli_epi64( qt[18], 5 ) ) ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( xl, qt[26] ), qt[ 2] ));
|
||||
_mm256_xor_si256( M[2],
|
||||
_mm256_xor_si256( _mm256_srli_epi64( xh, 5 ),
|
||||
_mm256_slli_epi64( qt[18], 5 ) ) ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( xl, qt[26] ), qt[ 2] ) );
|
||||
dH[ 3] = _mm256_add_epi64(
|
||||
_mm256_xor_si256( M[3],
|
||||
_mm256_xor_si256( _mm256_srli_epi64( xh, 1 ),
|
||||
_mm256_slli_epi64( qt[19], 5 ) ) ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( xl, qt[27] ), qt[ 3] ));
|
||||
_mm256_xor_si256( M[3],
|
||||
_mm256_xor_si256( _mm256_srli_epi64( xh, 1 ),
|
||||
_mm256_slli_epi64( qt[19], 5 ) ) ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( xl, qt[27] ), qt[ 3] ) );
|
||||
dH[ 4] = _mm256_add_epi64(
|
||||
_mm256_xor_si256( M[4],
|
||||
_mm256_xor_si256( _mm256_srli_epi64( xh, 3 ),
|
||||
_mm256_slli_epi64( qt[20], 0 ) ) ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( xl, qt[28] ), qt[ 4] ));
|
||||
_mm256_xor_si256( M[4],
|
||||
_mm256_xor_si256( _mm256_srli_epi64( xh, 3 ),
|
||||
_mm256_slli_epi64( qt[20], 0 ) ) ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( xl, qt[28] ), qt[ 4] ) );
|
||||
dH[ 5] = _mm256_add_epi64(
|
||||
_mm256_xor_si256( M[5],
|
||||
_mm256_xor_si256( _mm256_slli_epi64( xh, 6 ),
|
||||
_mm256_srli_epi64( qt[21], 6 ) ) ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( xl, qt[29] ), qt[ 5] ));
|
||||
_mm256_xor_si256( M[5],
|
||||
_mm256_xor_si256( _mm256_slli_epi64( xh, 6 ),
|
||||
_mm256_srli_epi64( qt[21], 6 ) ) ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( xl, qt[29] ), qt[ 5] ) );
|
||||
dH[ 6] = _mm256_add_epi64(
|
||||
_mm256_xor_si256( M[6],
|
||||
_mm256_xor_si256( _mm256_srli_epi64( xh, 4 ),
|
||||
_mm256_slli_epi64( qt[22], 6 ) ) ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( xl, qt[30] ), qt[ 6] ));
|
||||
_mm256_xor_si256( M[6],
|
||||
_mm256_xor_si256( _mm256_srli_epi64( xh, 4 ),
|
||||
_mm256_slli_epi64( qt[22], 6 ) ) ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( xl, qt[30] ), qt[ 6] ) );
|
||||
dH[ 7] = _mm256_add_epi64(
|
||||
_mm256_xor_si256( M[7],
|
||||
_mm256_xor_si256( _mm256_srli_epi64( xh, 11 ),
|
||||
_mm256_slli_epi64( qt[23], 2 ) ) ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( xl, qt[31] ), qt[ 7] ));
|
||||
_mm256_xor_si256( M[7],
|
||||
_mm256_xor_si256( _mm256_srli_epi64( xh, 11 ),
|
||||
_mm256_slli_epi64( qt[23], 2 ) ) ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( xl, qt[31] ), qt[ 7] ) );
|
||||
dH[ 8] = _mm256_add_epi64( _mm256_add_epi64(
|
||||
mm256_rol_64( dH[4], 9 ),
|
||||
mm256_rol_64( dH[4], 9 ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( xh, qt[24] ), M[ 8] )),
|
||||
_mm256_xor_si256( _mm256_slli_epi64( xl, 8 ),
|
||||
_mm256_xor_si256( qt[23], qt[ 8] ) ) );
|
||||
dH[ 9] = _mm256_add_epi64( _mm256_add_epi64(
|
||||
mm256_rol_64( dH[5], 10 ),
|
||||
mm256_rol_64( dH[5], 10 ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( xh, qt[25] ), M[ 9] )),
|
||||
_mm256_xor_si256( _mm256_srli_epi64( xl, 6 ),
|
||||
_mm256_xor_si256( qt[16], qt[ 9] ) ) );
|
||||
dH[10] = _mm256_add_epi64( _mm256_add_epi64(
|
||||
mm256_rol_64( dH[6], 11 ),
|
||||
mm256_rol_64( dH[6], 11 ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( xh, qt[26] ), M[10] )),
|
||||
_mm256_xor_si256( _mm256_slli_epi64( xl, 6 ),
|
||||
_mm256_xor_si256( qt[17], qt[10] ) ) );
|
||||
dH[11] = _mm256_add_epi64( _mm256_add_epi64(
|
||||
mm256_rol_64( dH[7], 12 ),
|
||||
mm256_rol_64( dH[7], 12 ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( xh, qt[27] ), M[11] )),
|
||||
_mm256_xor_si256( _mm256_slli_epi64( xl, 4 ),
|
||||
_mm256_xor_si256( qt[18], qt[11] ) ) );
|
||||
dH[12] = _mm256_add_epi64( _mm256_add_epi64(
|
||||
mm256_rol_64( dH[0], 13 ),
|
||||
mm256_rol_64( dH[0], 13 ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( xh, qt[28] ), M[12] )),
|
||||
_mm256_xor_si256( _mm256_srli_epi64( xl, 3 ),
|
||||
_mm256_xor_si256( qt[19], qt[12] ) ) );
|
||||
dH[13] = _mm256_add_epi64( _mm256_add_epi64(
|
||||
mm256_rol_64( dH[1], 14 ),
|
||||
mm256_rol_64( dH[1], 14 ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( xh, qt[29] ), M[13] )),
|
||||
_mm256_xor_si256( _mm256_srli_epi64( xl, 4 ),
|
||||
_mm256_xor_si256( qt[20], qt[13] ) ) );
|
||||
dH[14] = _mm256_add_epi64( _mm256_add_epi64(
|
||||
mm256_rol_64( dH[2], 15 ),
|
||||
mm256_rol_64( dH[2], 15 ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( xh, qt[30] ), M[14] )),
|
||||
_mm256_xor_si256( _mm256_srli_epi64( xl, 7 ),
|
||||
_mm256_xor_si256( qt[21], qt[14] ) ) );
|
||||
dH[15] = _mm256_add_epi64( _mm256_add_epi64(
|
||||
mm256_rol_64( dH[3], 16 ),
|
||||
mm256_rol_64( dH[3], 16 ),
|
||||
_mm256_xor_si256( _mm256_xor_si256( xh, qt[31] ), M[15] )),
|
||||
_mm256_xor_si256( _mm256_srli_epi64( xl, 2 ),
|
||||
_mm256_xor_si256( qt[22], qt[15] ) ) );
|
||||
|
@@ -242,6 +242,8 @@ void cryptolight_hash(void* output, const void* input, int len) {
|
||||
free(ctx);
|
||||
}
|
||||
|
||||
#if defined(__AES__)
|
||||
|
||||
static void cryptolight_hash_ctx_aes_ni(void* output, const void* input,
|
||||
int len, struct cryptonight_ctx* ctx)
|
||||
{
|
||||
@@ -312,8 +314,10 @@ static void cryptolight_hash_ctx_aes_ni(void* output, const void* input,
|
||||
oaes_free((OAES_CTX **) &ctx->aes_ctx);
|
||||
}
|
||||
|
||||
int scanhash_cryptolight(int thr_id, struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done)
|
||||
#endif
|
||||
|
||||
int scanhash_cryptolight( struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
@@ -322,6 +326,7 @@ int scanhash_cryptolight(int thr_id, struct work *work,
|
||||
const uint32_t first_nonce = n + 1;
|
||||
//const uint32_t Htarg = ptarget[7];
|
||||
uint32_t _ALIGN(32) hash[HASH_SIZE / 4];
|
||||
int thr_id = mythr->id;
|
||||
|
||||
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx));
|
||||
|
||||
|
@@ -70,11 +70,12 @@ void cryptonight_hash_suw( void *restrict output, const void *input )
|
||||
|
||||
bool cryptonightV7 = false;
|
||||
|
||||
int scanhash_cryptonight( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int scanhash_cryptonight( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
int thr_id = mythr->id;
|
||||
|
||||
uint32_t *nonceptr = (uint32_t*) (((char*)pdata) + 39);
|
||||
uint32_t n = *nonceptr - 1;
|
||||
|
@@ -40,8 +40,8 @@ void cryptonight_hash_ctx(void* output, const void* input, int len);
|
||||
void keccakf(uint64_t st[25], int rounds);
|
||||
extern void (* const extra_hashes[4])(const void *, size_t, char *);
|
||||
|
||||
int scanhash_cryptonight( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_cryptonight( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void cryptonight_hash_aes( void *restrict output, const void *input, int len );
|
||||
|
||||
|
@@ -43,7 +43,7 @@
|
||||
# if !defined( __MINGW32__ ) && !defined( _AIX )
|
||||
# include <endian.h>
|
||||
# if !defined( __BEOS__ )
|
||||
# include <byteswap.h>
|
||||
//# include <byteswap.h>
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
@@ -56,14 +56,15 @@ void groestlhash( void *output, const void *input )
|
||||
memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_groestl( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int scanhash_groestl( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t endiandata[20] __attribute__ ((aligned (64)));
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = 0x0000ff;
|
||||
|
@@ -54,8 +54,8 @@ void myriad_hash(void *output, const void *input)
|
||||
memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_myriad(int thr_id, struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done)
|
||||
int scanhash_myriad( struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
@@ -63,6 +63,7 @@ int scanhash_myriad(int thr_id, struct work *work,
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = 0x0000ff;
|
||||
|
@@ -33,7 +33,7 @@ void myriad_4way_hash( void *output, const void *input )
|
||||
myrgr_4way_ctx_holder ctx;
|
||||
memcpy( &ctx, &myrgr_4way_ctx, sizeof(myrgr_4way_ctx) );
|
||||
|
||||
mm128_deinterleave_4x32( hash0, hash1, hash2, hash3, input, 640 );
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, input, 640 );
|
||||
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 640 );
|
||||
memcpy( &ctx.groestl, &myrgr_4way_ctx.groestl, sizeof(hashState_groestl) );
|
||||
@@ -43,66 +43,52 @@ void myriad_4way_hash( void *output, const void *input )
|
||||
memcpy( &ctx.groestl, &myrgr_4way_ctx.groestl, sizeof(hashState_groestl) );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 640 );
|
||||
|
||||
mm128_interleave_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
sha256_4way( &ctx.sha, vhash, 64 );
|
||||
sha256_4way_close( &ctx.sha, vhash );
|
||||
|
||||
mm128_deinterleave_4x32( output, output+32, output+64, output+96,
|
||||
vhash, 256 );
|
||||
sha256_4way_close( &ctx.sha, output );
|
||||
}
|
||||
|
||||
int scanhash_myriad_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int scanhash_myriad_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t _ALIGN(64) edata[20];
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[7<<2]);
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep = vdata + 76; // 19*4
|
||||
__m128i *noncev = (__m128i*)vdata + 19; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
/*
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
*/
|
||||
if ( opt_benchmark )
|
||||
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||
|
||||
swab32_array( edata, pdata, 20 );
|
||||
mm128_interleave_4x32( vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
mm128_bswap32_intrlv80_4x32( vdata, pdata );
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+1, n+1 );
|
||||
be32enc( noncep+2, n+2 );
|
||||
be32enc( noncep+3, n+3 );
|
||||
*noncev = mm128_bswap_32( _mm_set_epi32( n+3,n+2,n+1,n ) );
|
||||
|
||||
myriad_4way_hash( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( (hash+(i<<3))[7] <= Htarg && fulltest( hash+(i<<3), ptarget ) )
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if ( hash7[ lane ] <= Htarg )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
extr_lane_4x32( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
n += 4;
|
||||
} while ( (num_found == 0) && (n < max_nonce-4)
|
||||
&& !work_restart[thr_id].restart);
|
||||
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -12,8 +12,8 @@
|
||||
|
||||
void myriad_4way_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_myriad_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_myriad_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void init_myrgr_4way_ctx();
|
||||
|
||||
@@ -21,8 +21,8 @@ void init_myrgr_4way_ctx();
|
||||
|
||||
void myriad_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_myriad( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_myriad( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void init_myrgr_ctx();
|
||||
|
||||
|
@@ -531,16 +531,17 @@ static const sph_u32 T512[64][16] = {
|
||||
|
||||
#define INPUT_BIG \
|
||||
do { \
|
||||
const __m256i zero = _mm256_setzero_si256(); \
|
||||
__m256i db = *buf; \
|
||||
const sph_u32 *tp = &T512[0][0]; \
|
||||
m0 = m256_zero; \
|
||||
m1 = m256_zero; \
|
||||
m2 = m256_zero; \
|
||||
m3 = m256_zero; \
|
||||
m4 = m256_zero; \
|
||||
m5 = m256_zero; \
|
||||
m6 = m256_zero; \
|
||||
m7 = m256_zero; \
|
||||
m0 = zero; \
|
||||
m1 = zero; \
|
||||
m2 = zero; \
|
||||
m3 = zero; \
|
||||
m4 = zero; \
|
||||
m5 = zero; \
|
||||
m6 = zero; \
|
||||
m7 = zero; \
|
||||
for ( int u = 0; u < 64; u++ ) \
|
||||
{ \
|
||||
__m256i dm = _mm256_and_si256( db, m256_one_64 ) ; \
|
||||
@@ -913,9 +914,7 @@ void hamsi512_4way( hamsi_4way_big_context *sc, const void *data, size_t len )
|
||||
|
||||
void hamsi512_4way_close( hamsi_4way_big_context *sc, void *dst )
|
||||
{
|
||||
__m256i *out = (__m256i*)dst;
|
||||
__m256i pad[1];
|
||||
size_t u;
|
||||
int ch, cl;
|
||||
|
||||
sph_enc32be( &ch, sc->count_high );
|
||||
@@ -925,8 +924,8 @@ void hamsi512_4way_close( hamsi_4way_big_context *sc, void *dst )
|
||||
0UL, 0x80UL, 0UL, 0x80UL );
|
||||
hamsi_big( sc, sc->buf, 1 );
|
||||
hamsi_big_final( sc, pad );
|
||||
for ( u = 0; u < 8; u ++ )
|
||||
out[u] = mm256_bswap_32( sc->h[u] );
|
||||
|
||||
mm256_block_bswap_32( (__m256i*)dst, sc->h );
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@@ -131,12 +131,14 @@ void bastionhash(void *output, const void *input)
|
||||
memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_bastion(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done)
|
||||
int scanhash_bastion( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
uint32_t _ALIGN(64) hash32[8];
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
|
@@ -79,11 +79,12 @@ extern void heavyhash(unsigned char* output, const unsigned char* input, int len
|
||||
|
||||
}
|
||||
|
||||
int scanhash_heavy(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
|
||||
uint32_t max_nonce, uint64_t *hashes_done)
|
||||
int scanhash_heavy( uint32_t *pdata, const uint32_t *ptarget,
|
||||
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
uint32_t hash[8];
|
||||
uint32_t start_nonce = pdata[19];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
do {
|
||||
heavyhash((unsigned char *)hash, (unsigned char *)pdata, 80);
|
||||
|
@@ -83,7 +83,7 @@ void ExpandAESKey256(__m128i *keys, const __m128i *KeyBuf)
|
||||
keys[14] = tmp1;
|
||||
}
|
||||
|
||||
#ifdef __SSE4_2__
|
||||
#if defined(__SSE4_2__)
|
||||
//#ifdef __AVX__
|
||||
|
||||
#define AESENC(i,j) \
|
||||
@@ -151,7 +151,7 @@ void AES256CBC(__m128i** data, const __m128i** next, __m128i ExpandedKey[][16],
|
||||
}
|
||||
}
|
||||
|
||||
#else // NO SSE4.2
|
||||
#else // NO AVX
|
||||
|
||||
static inline __m128i AES256Core(__m128i State, const __m128i *ExpandedKey)
|
||||
{
|
||||
|
@@ -143,13 +143,13 @@ bool hodl_do_this_thread( int thr_id )
|
||||
return ( thr_id == 0 );
|
||||
}
|
||||
|
||||
int hodl_scanhash( int thr_id, struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int hodl_scanhash( struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
#if defined(__AES__)
|
||||
GenRandomGarbage( (CacheEntry*)hodl_scratchbuf, work->data, thr_id );
|
||||
GenRandomGarbage( (CacheEntry*)hodl_scratchbuf, work->data, mythr->id );
|
||||
pthread_barrier_wait( &hodl_barrier );
|
||||
return scanhash_hodl_wolf( thr_id, work, max_nonce, hashes_done );
|
||||
return scanhash_hodl_wolf( work, max_nonce, hashes_done, thr_info );
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
@@ -166,7 +166,7 @@ bool register_hodl_algo( algo_gate_t* gate )
|
||||
// return false;
|
||||
// }
|
||||
pthread_barrier_init( &hodl_barrier, NULL, opt_n_threads );
|
||||
gate->optimizations = AES_OPT | SSE42_OPT | AVX2_OPT;
|
||||
gate->optimizations = AES_OPT | AVX_OPT | AVX2_OPT;
|
||||
gate->scanhash = (void*)&hodl_scanhash;
|
||||
gate->get_new_work = (void*)&hodl_get_new_work;
|
||||
gate->longpoll_rpc_call = (void*)&hodl_longpoll_rpc_call;
|
||||
|
@@ -17,7 +17,7 @@ void GenerateGarbageCore( CacheEntry *Garbage, int ThreadID, int ThreadCount,
|
||||
const uint32_t StartChunk = ThreadID * Chunk;
|
||||
const uint32_t EndChunk = StartChunk + Chunk;
|
||||
|
||||
#ifdef __SSE4_2__
|
||||
#if defined(__SSE4_2__)
|
||||
//#ifdef __AVX__
|
||||
uint64_t* TempBufs[ SHA512_PARALLEL_N ] ;
|
||||
uint64_t* desination[ SHA512_PARALLEL_N ];
|
||||
@@ -61,13 +61,14 @@ void Rev256(uint32_t *Dest, const uint32_t *Src)
|
||||
}
|
||||
*/
|
||||
|
||||
int scanhash_hodl_wolf( int threadNumber, struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int scanhash_hodl_wolf( struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
#ifdef __SSE4_2__
|
||||
#if defined(__SSE4_2__)
|
||||
//#ifdef __AVX__
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
int threadNumber = mythr->id;
|
||||
CacheEntry *Garbage = (CacheEntry*)hodl_scratchbuf;
|
||||
CacheEntry Cache[AES_PARALLEL_N];
|
||||
__m128i* data[AES_PARALLEL_N];
|
||||
@@ -139,7 +140,7 @@ int scanhash_hodl_wolf( int threadNumber, struct work* work, uint32_t max_nonce,
|
||||
return(0);
|
||||
|
||||
|
||||
#else // no SSE4.2
|
||||
#else // no AVX
|
||||
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
@@ -147,6 +148,7 @@ int scanhash_hodl_wolf( int threadNumber, struct work* work, uint32_t max_nonce,
|
||||
CacheEntry *Garbage = (CacheEntry*)hodl_scratchbuf;
|
||||
CacheEntry Cache;
|
||||
uint32_t CollisionCount = 0;
|
||||
int threadNumber = mythr->id;
|
||||
|
||||
swab32_array( BlockHdr, pdata, 20 );
|
||||
// Search for pattern in psuedorandom data
|
||||
@@ -204,7 +206,7 @@ int scanhash_hodl_wolf( int threadNumber, struct work* work, uint32_t max_nonce,
|
||||
*hashes_done = CollisionCount;
|
||||
return(0);
|
||||
|
||||
#endif // SSE4.2 else
|
||||
#endif // AVX else
|
||||
|
||||
}
|
||||
|
||||
|
@@ -19,8 +19,8 @@ typedef union _CacheEntry
|
||||
__m128i dqwords[GARBAGE_SLICE_SIZE >> 4] __attribute__((aligned(16)));
|
||||
} CacheEntry;
|
||||
|
||||
int scanhash_hodl_wolf( int thr_id, struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_hodl_wolf( struct work* work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
void GenRandomGarbage( CacheEntry *Garbage, uint32_t *pdata, int thr_id);
|
||||
|
||||
|
@@ -23,6 +23,7 @@ typedef struct
|
||||
__m256i h[8];
|
||||
__m256i w[80];
|
||||
#elif defined(__SSE4_2__)
|
||||
//#elif defined(__AVX__)
|
||||
__m128i h[8];
|
||||
__m128i w[80];
|
||||
#else
|
||||
@@ -32,7 +33,8 @@ typedef struct
|
||||
|
||||
#ifdef __AVX2__
|
||||
#define SHA512_PARALLEL_N 8
|
||||
#elif defined(__SSE$_2__)
|
||||
#elif defined(__SSE4_2__)
|
||||
//#elif defined(__AVX__)
|
||||
#define SHA512_PARALLEL_N 4
|
||||
#else
|
||||
#define SHA512_PARALLEL_N 1 // dummy value
|
||||
|
@@ -1,6 +1,6 @@
|
||||
#ifndef __AVX2__
|
||||
|
||||
#ifdef __SSE4_2__
|
||||
#if defined(__SSE4_2__)
|
||||
//#ifdef __AVX__
|
||||
|
||||
//Dependencies
|
||||
|
@@ -6,7 +6,7 @@
|
||||
|
||||
void ExpandAESKey256(__m128i *keys, const __m128i *KeyBuf);
|
||||
|
||||
#ifdef __SSE4_2__
|
||||
#if defined(__SSE4_2__)
|
||||
//#ifdef __AVX__
|
||||
|
||||
#define AES_PARALLEL_N 8
|
||||
|
@@ -3,7 +3,6 @@
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
//#include "avxdefs.h"
|
||||
|
||||
#if defined(JHA_4WAY)
|
||||
|
||||
@@ -13,9 +12,6 @@
|
||||
#include "algo/keccak/keccak-hash-4way.h"
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
|
||||
//static __thread keccak512_4way_context jha_kec_mid
|
||||
// __attribute__ ((aligned (64)));
|
||||
|
||||
void jha_hash_4way( void *out, const void *input )
|
||||
{
|
||||
uint64_t hash0[8] __attribute__ ((aligned (64)));
|
||||
@@ -46,7 +42,7 @@ void jha_hash_4way( void *out, const void *input )
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256(
|
||||
vh[0], _mm256_set1_epi64x( 1 ) ), m256_zero );
|
||||
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
init_groestl( &ctx_groestl, 64 );
|
||||
update_and_final_groestl( &ctx_groestl, (char*)hash0,
|
||||
(char*)hash0, 512 );
|
||||
@@ -59,7 +55,7 @@ void jha_hash_4way( void *out, const void *input )
|
||||
init_groestl( &ctx_groestl, 64 );
|
||||
update_and_final_groestl( &ctx_groestl, (char*)hash3,
|
||||
(char*)hash3, 512 );
|
||||
mm256_interleave_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
skein512_4way_init( &ctx_skein );
|
||||
skein512_4way( &ctx_skein, vhash, 64 );
|
||||
@@ -77,26 +73,24 @@ void jha_hash_4way( void *out, const void *input )
|
||||
jh512_4way_close( &ctx_jh, vhashB );
|
||||
|
||||
for ( int i = 0; i < 8; i++ )
|
||||
vh[i] = _mm256_blendv_epi8( vhA[i], vhB[i], vh_mask );
|
||||
casti_m256i( out, i ) = _mm256_blendv_epi8( vhA[i], vhB[i], vh_mask );
|
||||
}
|
||||
|
||||
mm256_deinterleave_4x64( out, out+32, out+64, out+96, vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_jha_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
int scanhash_jha_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||
uint32_t *hash7 = &(hash[25]);
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t n = pdata[19];
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
uint64_t htmax[] = {
|
||||
0,
|
||||
@@ -115,11 +109,7 @@ int scanhash_jha_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
0
|
||||
};
|
||||
|
||||
for ( int i=0; i < 19; i++ )
|
||||
be32enc( &endiandata[i], pdata[i] );
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
|
||||
for ( int m = 0; m < 6; m++ )
|
||||
{
|
||||
@@ -127,29 +117,27 @@ int scanhash_jha_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
{
|
||||
uint32_t mask = masks[m];
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+2, n+1 );
|
||||
be32enc( noncep+4, n+2 );
|
||||
be32enc( noncep+6, n+3 );
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
|
||||
jha_hash_4way( hash, vdata );
|
||||
pdata[19] = n;
|
||||
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( ( !( (hash+(i<<3))[7] & mask ) == 0 )
|
||||
&& fulltest( hash+(i<<3), ptarget ) )
|
||||
for ( int i = 0; i < 4; i++ ) if ( !( (hash7[i] & mask ) == 0 ) )
|
||||
{
|
||||
pdata[19] = n;
|
||||
nonces[ num_found++ ] = n+i;
|
||||
work_set_target_ratio( work, hash+(i<<3) );
|
||||
extr_lane_4x64( lane_hash, hash, i, 256 );
|
||||
if ( fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
submit_lane_solution( work, lane_hash, mythr, i );
|
||||
}
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
||||
&& !work_restart[thr_id].restart );
|
||||
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
||||
break;
|
||||
}
|
||||
}
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
@@ -12,14 +12,14 @@
|
||||
#if defined JHA_4WAY
|
||||
void jha_hash_4way( void *state, const void *input );
|
||||
|
||||
int scanhash_jha_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_jha_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
#endif
|
||||
|
||||
void jha_hash( void *state, const void *input );
|
||||
|
||||
int scanhash_jha( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_jha( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#endif
|
||||
|
||||
|
@@ -81,7 +81,8 @@ void jha_hash(void *output, const void *input)
|
||||
memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_jha(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done)
|
||||
int scanhash_jha( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(128) hash32[8];
|
||||
uint32_t _ALIGN(128) endiandata[20];
|
||||
@@ -89,7 +90,8 @@ int scanhash_jha(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *ha
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t n = pdata[19] - 1;
|
||||
uint32_t n = pdata[19] - 1;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
uint64_t htmax[] = {
|
||||
0,
|
||||
|
@@ -16,55 +16,44 @@ void keccakhash_4way(void *state, const void *input)
|
||||
keccak256_4way_close( &ctx, state );
|
||||
}
|
||||
|
||||
int scanhash_keccak_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done)
|
||||
int scanhash_keccak_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (32)));
|
||||
uint32_t hash[16*4] __attribute__ ((aligned (32)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *hash7 = &(hash[25]); // 3*8+1
|
||||
uint32_t lane_hash[8];
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
// const uint32_t Htarg = ptarget[7];
|
||||
uint32_t endiandata[20];
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
|
||||
for ( int i=0; i < 19; i++ )
|
||||
be32enc( &endiandata[i], pdata[i] );
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+2, n+1 );
|
||||
be32enc( noncep+4, n+2 );
|
||||
be32enc( noncep+6, n+3 );
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
|
||||
keccakhash_4way( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if ( ( ( hash7[ lane<<1 ] & 0xFFFFFF00 ) == 0 ) )
|
||||
{
|
||||
mm256_extract_lane_4x64( lane_hash, hash, lane, 256 );
|
||||
extr_lane_4x64( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
nonces[ num_found++ ] = n + lane;
|
||||
work_set_target_ratio( work, lane_hash );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
n += 4;
|
||||
|
||||
} while ( (num_found == 0) && (n < max_nonce-4)
|
||||
&& !work_restart[thr_id].restart);
|
||||
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -11,13 +11,13 @@
|
||||
#if defined(KECCAK_4WAY)
|
||||
|
||||
void keccakhash_4way( void *state, const void *input );
|
||||
int scanhash_keccak_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_keccak_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#endif
|
||||
|
||||
void keccakhash( void *state, const void *input );
|
||||
int scanhash_keccak( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_keccak( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#endif
|
||||
|
@@ -18,14 +18,15 @@ void keccakhash(void *state, const void *input)
|
||||
memcpy(state, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_keccak(int thr_id, struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done)
|
||||
int scanhash_keccak( struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
//const uint32_t Htarg = ptarget[7];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
uint32_t _ALIGN(32) hash64[8];
|
||||
uint32_t endiandata[32];
|
||||
|
@@ -77,6 +77,24 @@ static const sph_u32 V_INIT[5][8] = {
|
||||
}
|
||||
};
|
||||
|
||||
#if SPH_LUFFA_PARALLEL
|
||||
|
||||
static const sph_u64 RCW010[8] = {
|
||||
SPH_C64(0xb6de10ed303994a6), SPH_C64(0x70f47aaec0e65299),
|
||||
SPH_C64(0x0707a3d46cc33a12), SPH_C64(0x1c1e8f51dc56983e),
|
||||
SPH_C64(0x707a3d451e00108f), SPH_C64(0xaeb285627800423d),
|
||||
SPH_C64(0xbaca15898f5b7882), SPH_C64(0x40a46f3e96e1db12)
|
||||
};
|
||||
|
||||
static const sph_u64 RCW014[8] = {
|
||||
SPH_C64(0x01685f3de0337818), SPH_C64(0x05a17cf4441ba90d),
|
||||
SPH_C64(0xbd09caca7f34d442), SPH_C64(0xf4272b289389217f),
|
||||
SPH_C64(0x144ae5cce5a8bce6), SPH_C64(0xfaa7ae2b5274baf4),
|
||||
SPH_C64(0x2e48f1c126889ba7), SPH_C64(0xb923c7049a226e9d)
|
||||
};
|
||||
|
||||
#else
|
||||
|
||||
static const sph_u32 RC00[8] = {
|
||||
SPH_C32(0x303994a6), SPH_C32(0xc0e65299),
|
||||
SPH_C32(0x6cc33a12), SPH_C32(0xdc56983e),
|
||||
@@ -105,20 +123,18 @@ static const sph_u32 RC14[8] = {
|
||||
SPH_C32(0x2e48f1c1), SPH_C32(0xb923c704)
|
||||
};
|
||||
|
||||
#if SPH_LUFFA_PARALLEL
|
||||
|
||||
static const sph_u64 RCW010[8] = {
|
||||
SPH_C64(0xb6de10ed303994a6), SPH_C64(0x70f47aaec0e65299),
|
||||
SPH_C64(0x0707a3d46cc33a12), SPH_C64(0x1c1e8f51dc56983e),
|
||||
SPH_C64(0x707a3d451e00108f), SPH_C64(0xaeb285627800423d),
|
||||
SPH_C64(0xbaca15898f5b7882), SPH_C64(0x40a46f3e96e1db12)
|
||||
static const sph_u32 RC30[8] = {
|
||||
SPH_C32(0xb213afa5), SPH_C32(0xc84ebe95),
|
||||
SPH_C32(0x4e608a22), SPH_C32(0x56d858fe),
|
||||
SPH_C32(0x343b138f), SPH_C32(0xd0ec4e3d),
|
||||
SPH_C32(0x2ceb4882), SPH_C32(0xb3ad2208)
|
||||
};
|
||||
|
||||
static const sph_u64 RCW014[8] = {
|
||||
SPH_C64(0x01685f3de0337818), SPH_C64(0x05a17cf4441ba90d),
|
||||
SPH_C64(0xbd09caca7f34d442), SPH_C64(0xf4272b289389217f),
|
||||
SPH_C64(0x144ae5cce5a8bce6), SPH_C64(0xfaa7ae2b5274baf4),
|
||||
SPH_C64(0x2e48f1c126889ba7), SPH_C64(0xb923c7049a226e9d)
|
||||
static const sph_u32 RC34[8] = {
|
||||
SPH_C32(0xe028c9bf), SPH_C32(0x44756f91),
|
||||
SPH_C32(0x7e8fce32), SPH_C32(0x956548be),
|
||||
SPH_C32(0xfe191be2), SPH_C32(0x3cb226e5),
|
||||
SPH_C32(0x5944a28e), SPH_C32(0xa1c4c355)
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -137,19 +153,6 @@ static const sph_u32 RC24[8] = {
|
||||
SPH_C32(0x36eda57f), SPH_C32(0x703aace7)
|
||||
};
|
||||
|
||||
static const sph_u32 RC30[8] = {
|
||||
SPH_C32(0xb213afa5), SPH_C32(0xc84ebe95),
|
||||
SPH_C32(0x4e608a22), SPH_C32(0x56d858fe),
|
||||
SPH_C32(0x343b138f), SPH_C32(0xd0ec4e3d),
|
||||
SPH_C32(0x2ceb4882), SPH_C32(0xb3ad2208)
|
||||
};
|
||||
|
||||
static const sph_u32 RC34[8] = {
|
||||
SPH_C32(0xe028c9bf), SPH_C32(0x44756f91),
|
||||
SPH_C32(0x7e8fce32), SPH_C32(0x956548be),
|
||||
SPH_C32(0xfe191be2), SPH_C32(0x3cb226e5),
|
||||
SPH_C32(0x5944a28e), SPH_C32(0xa1c4c355)
|
||||
};
|
||||
|
||||
#if SPH_LUFFA_PARALLEL
|
||||
|
||||
|
@@ -44,11 +44,11 @@ void allium_4way_hash( void *state, const void *input )
|
||||
blake256_4way( &ctx.blake, input + (64<<2), 16 );
|
||||
blake256_4way_close( &ctx.blake, vhash32 );
|
||||
|
||||
mm256_rintrlv_4x32_4x64( vhash64, vhash32, 256 );
|
||||
rintrlv_4x32_4x64( vhash64, vhash32, 256 );
|
||||
keccak256_4way( &ctx.keccak, vhash64, 32 );
|
||||
keccak256_4way_close( &ctx.keccak, vhash64 );
|
||||
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
|
||||
|
||||
LYRA2RE( hash0, 32, hash0, 32, hash0, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash1, 32, hash1, 32, hash1, 32, 1, 8, 8 );
|
||||
@@ -68,12 +68,12 @@ void allium_4way_hash( void *state, const void *input )
|
||||
LYRA2RE( hash2, 32, hash2, 32, hash2, 32, 1, 8, 8 );
|
||||
LYRA2RE( hash3, 32, hash3, 32, hash3, 32, 1, 8, 8 );
|
||||
|
||||
mm256_intrlv_4x64( vhash64, hash0, hash1, hash2, hash3, 256 );
|
||||
intrlv_4x64( vhash64, hash0, hash1, hash2, hash3, 256 );
|
||||
|
||||
skein256_4way( &ctx.skein, vhash64, 32 );
|
||||
skein256_4way_close( &ctx.skein, vhash64 );
|
||||
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
|
||||
|
||||
update_and_final_groestl256( &ctx.groestl, state, hash0, 256 );
|
||||
memcpy( &ctx.groestl, &allium_4way_ctx.groestl,
|
||||
@@ -87,7 +87,7 @@ void allium_4way_hash( void *state, const void *input )
|
||||
update_and_final_groestl256( &ctx.groestl, state+96, hash3, 256 );
|
||||
}
|
||||
|
||||
int scanhash_allium_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_allium_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
@@ -98,12 +98,12 @@ int scanhash_allium_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
__m128i *noncev = (__m128i*)vdata + 19; // aligned
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if ( opt_benchmark )
|
||||
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||
|
||||
mm128_bswap_intrlv80_4x32( vdata, pdata );
|
||||
mm128_bswap32_intrlv80_4x32( vdata, pdata );
|
||||
blake256_4way_init( &allium_4way_ctx.blake );
|
||||
blake256_4way( &allium_4way_ctx.blake, vdata, 64 );
|
||||
|
||||
@@ -118,7 +118,7 @@ int scanhash_allium_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
if ( fulltest( hash+(lane<<3), ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_solution( work, hash+(lane<<3), mythr, lane );
|
||||
submit_lane_solution( work, hash+(lane<<3), mythr, lane );
|
||||
}
|
||||
}
|
||||
n += 4;
|
||||
|
@@ -69,7 +69,7 @@ void allium_hash(void *state, const void *input)
|
||||
memcpy(state, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_allium( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_allium( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(128) hash[8];
|
||||
@@ -80,7 +80,7 @@ int scanhash_allium( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x3ffff;
|
||||
@@ -94,18 +94,14 @@ int scanhash_allium( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
do {
|
||||
be32enc( &endiandata[19], nonce );
|
||||
allium_hash( hash, endiandata );
|
||||
|
||||
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
|
||||
if ( hash[7] <= Htarg )
|
||||
if ( fulltest( hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
work_set_target_ratio( work, hash );
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 1;
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
nonce++;
|
||||
|
||||
} while (nonce < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
} while ( nonce < max_nonce && !work_restart[thr_id].restart );
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
|
@@ -19,21 +19,21 @@ bool register_lyra2rev3_algo( algo_gate_t* gate );
|
||||
#if defined(LYRA2REV3_8WAY)
|
||||
|
||||
void lyra2rev3_8way_hash( void *state, const void *input );
|
||||
int scanhash_lyra2rev3_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_lyra2rev3_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
bool init_lyra2rev3_8way_ctx();
|
||||
|
||||
#elif defined(LYRA2REV3_4WAY)
|
||||
|
||||
void lyra2rev3_4way_hash( void *state, const void *input );
|
||||
int scanhash_lyra2rev3_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_lyra2rev3_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
bool init_lyra2rev3_4way_ctx();
|
||||
|
||||
#else
|
||||
|
||||
void lyra2rev3_hash( void *state, const void *input );
|
||||
int scanhash_lyra2rev3( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_lyra2rev3( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
bool init_lyra2rev3_ctx();
|
||||
|
||||
@@ -52,14 +52,14 @@ bool register_lyra2rev2_algo( algo_gate_t* gate );
|
||||
#if defined(LYRA2REV2_4WAY)
|
||||
|
||||
void lyra2rev2_4way_hash( void *state, const void *input );
|
||||
int scanhash_lyra2rev2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_lyra2rev2_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
bool init_lyra2rev2_4way_ctx();
|
||||
|
||||
#else
|
||||
|
||||
void lyra2rev2_hash( void *state, const void *input );
|
||||
int scanhash_lyra2rev2( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_lyra2rev2( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
bool init_lyra2rev2_ctx();
|
||||
|
||||
@@ -80,21 +80,21 @@ bool init_lyra2rev2_ctx();
|
||||
#if defined(LYRA2Z_8WAY)
|
||||
|
||||
void lyra2z_8way_hash( void *state, const void *input );
|
||||
int scanhash_lyra2z_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_lyra2z_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
bool lyra2z_8way_thread_init();
|
||||
|
||||
#elif defined(LYRA2Z_4WAY)
|
||||
|
||||
void lyra2z_4way_hash( void *state, const void *input );
|
||||
int scanhash_lyra2z_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_lyra2z_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
bool lyra2z_4way_thread_init();
|
||||
|
||||
#else
|
||||
|
||||
void lyra2z_hash( void *state, const void *input );
|
||||
int scanhash_lyra2z( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_lyra2z( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
bool lyra2z_thread_init();
|
||||
|
||||
@@ -111,14 +111,14 @@ bool lyra2z_thread_init();
|
||||
#if defined(LYRA2H_4WAY)
|
||||
|
||||
void lyra2h_4way_hash( void *state, const void *input );
|
||||
int scanhash_lyra2h_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_lyra2h_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
bool lyra2h_4way_thread_init();
|
||||
|
||||
#else
|
||||
|
||||
void lyra2h_hash( void *state, const void *input );
|
||||
int scanhash_lyra2h( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_lyra2h( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
bool lyra2h_thread_init();
|
||||
|
||||
@@ -135,14 +135,14 @@ bool register_allium_algo( algo_gate_t* gate );
|
||||
#if defined(ALLIUM_4WAY)
|
||||
|
||||
void allium_4way_hash( void *state, const void *input );
|
||||
int scanhash_allium_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_allium_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
bool init_allium_4way_ctx();
|
||||
|
||||
#else
|
||||
|
||||
void allium_hash( void *state, const void *input );
|
||||
int scanhash_allium( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_allium( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
bool init_allium_ctx();
|
||||
|
||||
@@ -160,14 +160,14 @@ bool register_phi2_algo( algo_gate_t* gate );
|
||||
#if defined(PHI2_4WAY)
|
||||
|
||||
void phi2_hash_4way( void *state, const void *input );
|
||||
int scanhash_phi2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_phi2_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
//void init_phi2_ctx();
|
||||
|
||||
#else
|
||||
|
||||
void phi2_hash( void *state, const void *input );
|
||||
int scanhash_phi2( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_phi2( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
void init_phi2_ctx();
|
||||
|
||||
|
@@ -5,7 +5,7 @@
|
||||
#include <memory.h>
|
||||
#include <mm_malloc.h>
|
||||
#include "lyra2.h"
|
||||
#include "algo/blake/sph_blake.h"
|
||||
//#include "algo/blake/sph_blake.h"
|
||||
#include "algo/blake/blake-hash-4way.h"
|
||||
|
||||
__thread uint64_t* lyra2h_4way_matrix;
|
||||
@@ -36,7 +36,7 @@ void lyra2h_4way_hash( void *state, const void *input )
|
||||
blake256_4way( &ctx_blake, input + (64*4), 16 );
|
||||
blake256_4way_close( &ctx_blake, vhash );
|
||||
|
||||
mm128_dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 256 );
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 256 );
|
||||
|
||||
LYRA2Z( lyra2h_4way_matrix, state, 32, hash0, 32, hash0, 32,
|
||||
16, 16, 16 );
|
||||
@@ -48,7 +48,7 @@ void lyra2h_4way_hash( void *state, const void *input )
|
||||
32, 16, 16, 16 );
|
||||
}
|
||||
|
||||
int scanhash_lyra2h_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_lyra2h_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
@@ -59,12 +59,12 @@ int scanhash_lyra2h_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
__m128i *noncev = (__m128i*)vdata + 19; // aligned
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0000ff;
|
||||
|
||||
mm128_bswap_intrlv80_4x32( vdata, pdata );
|
||||
mm128_bswap32_intrlv80_4x32( vdata, pdata );
|
||||
lyra2h_4way_midstate( vdata );
|
||||
|
||||
do {
|
||||
@@ -76,7 +76,7 @@ int scanhash_lyra2h_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
&& !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
submit_solution( work, hash+(i<<3), mythr, i );
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 4;
|
||||
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
|
||||
|
@@ -35,7 +35,7 @@ void lyra2h_hash( void *state, const void *input )
|
||||
memcpy(state, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_lyra2h( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_lyra2h( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(64) hash[8];
|
||||
@@ -45,7 +45,7 @@ int scanhash_lyra2h( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if (opt_benchmark)
|
||||
ptarget[7] = 0x0000ff;
|
||||
@@ -54,22 +54,19 @@ int scanhash_lyra2h( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
be32enc(&endiandata[i], pdata[i]);
|
||||
}
|
||||
|
||||
lyra2h_midstate( endiandata );
|
||||
|
||||
lyra2h_midstate( endiandata );
|
||||
do {
|
||||
be32enc(&endiandata[19], nonce);
|
||||
lyra2h_hash( hash, endiandata );
|
||||
|
||||
if (hash[7] <= Htarg && fulltest(hash, ptarget)) {
|
||||
work_set_target_ratio(work, hash);
|
||||
if ( hash[7] <= Htarg )
|
||||
if ( fulltest( hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 1;
|
||||
}
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
nonce++;
|
||||
|
||||
} while (nonce < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
|
@@ -81,7 +81,7 @@ void lyra2re_hash(void *state, const void *input)
|
||||
memcpy(state, hashA, 32);
|
||||
}
|
||||
|
||||
int scanhash_lyra2re( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_lyra2re( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
@@ -91,7 +91,7 @@ int scanhash_lyra2re( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
@@ -100,20 +100,14 @@ int scanhash_lyra2re( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
do {
|
||||
be32enc(&endiandata[19], nonce);
|
||||
lyra2re_hash(hash, endiandata);
|
||||
if (hash[7] <= Htarg )
|
||||
{
|
||||
if ( fulltest(hash, ptarget) )
|
||||
{
|
||||
if ( hash[7] <= Htarg )
|
||||
if ( fulltest(hash, ptarget) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
work_set_target_ratio( work, hash );
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
nonce++;
|
||||
|
||||
} while (nonce < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
|
@@ -42,12 +42,12 @@ void lyra2rev2_4way_hash( void *state, const void *input )
|
||||
blake256_4way( &ctx.blake, input + (64<<2), 16 );
|
||||
blake256_4way_close( &ctx.blake, vhash );
|
||||
|
||||
mm256_rintrlv_4x32_4x64( vhash64, vhash, 256 );
|
||||
rintrlv_4x32_4x64( vhash64, vhash, 256 );
|
||||
|
||||
keccak256_4way( &ctx.keccak, vhash64, 32 );
|
||||
keccak256_4way_close( &ctx.keccak, vhash64 );
|
||||
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
|
||||
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash0, (const byte*) hash0, 32 );
|
||||
cubehashInit( &ctx.cube, 256, 16, 32 );
|
||||
@@ -62,12 +62,12 @@ void lyra2rev2_4way_hash( void *state, const void *input )
|
||||
LYRA2REV2( l2v2_wholeMatrix, hash2, 32, hash2, 32, hash2, 32, 1, 4, 4 );
|
||||
LYRA2REV2( l2v2_wholeMatrix, hash3, 32, hash3, 32, hash3, 32, 1, 4, 4 );
|
||||
|
||||
mm256_intrlv_4x64( vhash64, hash0, hash1, hash2, hash3, 256 );
|
||||
intrlv_4x64( vhash64, hash0, hash1, hash2, hash3, 256 );
|
||||
|
||||
skein256_4way( &ctx.skein, vhash64, 32 );
|
||||
skein256_4way_close( &ctx.skein, vhash64 );
|
||||
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash64, 256 );
|
||||
|
||||
cubehashInit( &ctx.cube, 256, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash0, (const byte*) hash0, 32 );
|
||||
@@ -78,31 +78,31 @@ void lyra2rev2_4way_hash( void *state, const void *input )
|
||||
cubehashInit( &ctx.cube, 256, 16, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*) hash3, (const byte*) hash3, 32 );
|
||||
|
||||
mm128_intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 256 );
|
||||
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 256 );
|
||||
|
||||
bmw256_4way( &ctx.bmw, vhash, 32 );
|
||||
bmw256_4way_close( &ctx.bmw, state );
|
||||
}
|
||||
|
||||
int scanhash_lyra2rev2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_lyra2rev2_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[7<<2]);
|
||||
uint32_t lane_hash[8];
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
__m128i *noncev = (__m128i*)vdata + 19; // aligned
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if ( opt_benchmark )
|
||||
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||
|
||||
mm128_bswap_intrlv80_4x32( vdata, pdata );
|
||||
mm128_bswap32_intrlv80_4x32( vdata, pdata );
|
||||
|
||||
blake256_4way_init( &l2v2_4way_ctx.blake );
|
||||
blake256_4way( &l2v2_4way_ctx.blake, vdata, 64 );
|
||||
@@ -116,11 +116,11 @@ int scanhash_lyra2rev2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ ) if ( hash7[lane] <= Htarg )
|
||||
{
|
||||
mm128_extract_lane_4x32( lane_hash, hash, lane, 256 );
|
||||
extr_lane_4x32( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_solution( work, lane_hash, mythr, lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
n += 4;
|
||||
|
@@ -40,31 +40,31 @@ void l2v2_blake256_midstate( const void* input )
|
||||
|
||||
void lyra2rev2_hash( void *state, const void *input )
|
||||
{
|
||||
lyra2v2_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
memcpy( &ctx, &lyra2v2_ctx, sizeof(lyra2v2_ctx) );
|
||||
uint8_t hash[128] __attribute__ ((aligned (64)));
|
||||
#define hashA hash
|
||||
#define hashB hash+64
|
||||
const int midlen = 64; // bytes
|
||||
const int tail = 80 - midlen; // 16
|
||||
lyra2v2_ctx_holder ctx __attribute__ ((aligned (64)));
|
||||
memcpy( &ctx, &lyra2v2_ctx, sizeof(lyra2v2_ctx) );
|
||||
uint8_t hash[128] __attribute__ ((aligned (64)));
|
||||
#define hashA hash
|
||||
#define hashB hash+64
|
||||
const int midlen = 64; // bytes
|
||||
const int tail = 80 - midlen; // 16
|
||||
|
||||
memcpy( &ctx.blake, &l2v2_blake_mid, sizeof l2v2_blake_mid );
|
||||
memcpy( &ctx.blake, &l2v2_blake_mid, sizeof l2v2_blake_mid );
|
||||
sph_blake256( &ctx.blake, (uint8_t*)input + midlen, tail );
|
||||
sph_blake256_close( &ctx.blake, hashA );
|
||||
|
||||
sph_keccak256( &ctx.keccak, hashA, 32 );
|
||||
sph_keccak256_close(&ctx.keccak, hashB);
|
||||
|
||||
cubehashUpdateDigest( &ctx.cube1, (byte*) hashA,
|
||||
(const byte*) hashB, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube1, (byte*) hashA,
|
||||
(const byte*) hashB, 32 );
|
||||
|
||||
LYRA2REV2( l2v2_wholeMatrix, hashA, 32, hashA, 32, hashA, 32, 1, 4, 4 );
|
||||
|
||||
sph_skein256( &ctx.skein, hashA, 32 );
|
||||
sph_skein256_close( &ctx.skein, hashB );
|
||||
|
||||
cubehashUpdateDigest( &ctx.cube2, (byte*) hashA,
|
||||
(const byte*) hashB, 32 );
|
||||
cubehashUpdateDigest( &ctx.cube2, (byte*) hashA,
|
||||
(const byte*) hashB, 32 );
|
||||
|
||||
sph_bmw256( &ctx.bmw, hashA, 32 );
|
||||
sph_bmw256_close( &ctx.bmw, hashB );
|
||||
@@ -72,43 +72,37 @@ void lyra2rev2_hash( void *state, const void *input )
|
||||
memcpy( state, hashB, 32 );
|
||||
}
|
||||
|
||||
int scanhash_lyra2rev2(int thr_id, struct work *work,
|
||||
int scanhash_lyra2rev2( struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t endiandata[20] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8] __attribute__((aligned(64)));
|
||||
uint32_t hash[8] __attribute__((aligned(64)));
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = 0x0000ff;
|
||||
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
l2v2_blake256_midstate( endiandata );
|
||||
l2v2_blake256_midstate( endiandata );
|
||||
|
||||
do {
|
||||
be32enc(&endiandata[19], nonce);
|
||||
lyra2rev2_hash(hash, endiandata);
|
||||
|
||||
if (hash[7] <= Htarg )
|
||||
{
|
||||
if( fulltest(hash, ptarget) )
|
||||
{
|
||||
if( fulltest( hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
work_set_target_ratio( work, hash );
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
nonce++;
|
||||
|
||||
} while (nonce < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
} while ( nonce < max_nonce && !work_restart[thr_id].restart );
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
|
@@ -41,7 +41,7 @@ void lyra2rev3_8way_hash( void *state, const void *input )
|
||||
blake256_8way( &ctx.blake, input, 80 );
|
||||
blake256_8way_close( &ctx.blake, vhash );
|
||||
|
||||
mm256_dintrlv_8x32( hash0, hash1, hash2, hash3,
|
||||
dintrlv_8x32( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash, 256 );
|
||||
|
||||
LYRA2REV3( l2v3_wholeMatrix, hash0, 32, hash0, 32, hash0, 32, 1, 4, 4 );
|
||||
@@ -78,7 +78,7 @@ void lyra2rev3_8way_hash( void *state, const void *input )
|
||||
LYRA2REV3( l2v3_wholeMatrix, hash6, 32, hash6, 32, hash6, 32, 1, 4, 4 );
|
||||
LYRA2REV3( l2v3_wholeMatrix, hash7, 32, hash7, 32, hash7, 32, 1, 4, 4 );
|
||||
|
||||
mm256_intrlv_8x32( vhash, hash0, hash1, hash2, hash3,
|
||||
intrlv_8x32( vhash, hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, 256 );
|
||||
|
||||
bmw256_8way( &ctx.bmw, vhash, 32 );
|
||||
@@ -86,25 +86,25 @@ void lyra2rev3_8way_hash( void *state, const void *input )
|
||||
|
||||
}
|
||||
|
||||
int scanhash_lyra2rev3_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_lyra2rev3_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[7<<3]);
|
||||
uint32_t lane_hash[8];
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
__m256i *noncev = (__m256i*)vdata + 19; // aligned
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if ( opt_benchmark )
|
||||
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||
|
||||
mm256_bswap_intrlv80_8x32( vdata, pdata );
|
||||
mm256_bswap32_intrlv80_8x32( vdata, pdata );
|
||||
do
|
||||
{
|
||||
*noncev = mm256_bswap_32( _mm256_set_epi32( n+7, n+6, n+5, n+4,
|
||||
@@ -115,11 +115,11 @@ int scanhash_lyra2rev3_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
|
||||
for ( int lane = 0; lane < 8; lane++ ) if ( hash7[lane] <= Htarg )
|
||||
{
|
||||
mm256_extract_lane_8x32( lane_hash, hash, lane, 256 );
|
||||
extr_lane_8x32( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_solution( work, lane_hash, mythr, lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
n += 8;
|
||||
@@ -161,7 +161,7 @@ void lyra2rev3_4way_hash( void *state, const void *input )
|
||||
|
||||
blake256_4way( &ctx.blake, input, 80 );
|
||||
blake256_4way_close( &ctx.blake, vhash );
|
||||
mm128_dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 256 );
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 256 );
|
||||
|
||||
LYRA2REV3( l2v3_wholeMatrix, hash0, 32, hash0, 32, hash0, 32, 1, 4, 4 );
|
||||
LYRA2REV3( l2v3_wholeMatrix, hash1, 32, hash1, 32, hash1, 32, 1, 4, 4 );
|
||||
@@ -181,30 +181,30 @@ void lyra2rev3_4way_hash( void *state, const void *input )
|
||||
LYRA2REV3( l2v3_wholeMatrix, hash2, 32, hash2, 32, hash2, 32, 1, 4, 4 );
|
||||
LYRA2REV3( l2v3_wholeMatrix, hash3, 32, hash3, 32, hash3, 32, 1, 4, 4 );
|
||||
|
||||
mm128_intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 256 );
|
||||
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 256 );
|
||||
bmw256_4way( &ctx.bmw, vhash, 32 );
|
||||
bmw256_4way_close( &ctx.bmw, state );
|
||||
}
|
||||
|
||||
int scanhash_lyra2rev3_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_lyra2rev3_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[7<<2]);
|
||||
uint32_t lane_hash[8];
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
__m128i *noncev = (__m128i*)vdata + 19; // aligned
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if ( opt_benchmark )
|
||||
( (uint32_t*)ptarget )[7] = 0x0000ff;
|
||||
|
||||
mm128_bswap_intrlv80_4x32( vdata, pdata );
|
||||
mm128_bswap32_intrlv80_4x32( vdata, pdata );
|
||||
do
|
||||
{
|
||||
*noncev = mm128_bswap_32( _mm_set_epi32( n+3, n+2, n+1, n ) );
|
||||
@@ -214,11 +214,11 @@ int scanhash_lyra2rev3_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ ) if ( hash7[lane] <= Htarg )
|
||||
{
|
||||
mm128_extract_lane_4x32( lane_hash, hash, lane, 256 );
|
||||
extr_lane_4x32( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_solution( work, lane_hash, mythr, lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
n += 4;
|
||||
|
@@ -57,7 +57,7 @@ void lyra2rev3_hash( void *state, const void *input )
|
||||
memcpy( state, hash, 32 );
|
||||
}
|
||||
|
||||
int scanhash_lyra2rev3( int thr_id, struct work *work,
|
||||
int scanhash_lyra2rev3( struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
@@ -67,7 +67,7 @@ int scanhash_lyra2rev3( int thr_id, struct work *work,
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if (opt_benchmark)
|
||||
((uint32_t*)ptarget)[7] = 0x0000ff;
|
||||
@@ -78,28 +78,20 @@ int scanhash_lyra2rev3( int thr_id, struct work *work,
|
||||
casti_m128i( endiandata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
||||
casti_m128i( endiandata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
||||
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
|
||||
l2v3_blake256_midstate( endiandata );
|
||||
|
||||
do
|
||||
{
|
||||
be32enc(&endiandata[19], nonce);
|
||||
lyra2rev3_hash(hash, endiandata);
|
||||
|
||||
if (hash[7] <= Htarg )
|
||||
{
|
||||
if( fulltest(hash, ptarget) )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
work_set_target_ratio( work, hash );
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
nonce++;
|
||||
|
||||
} while (nonce < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
if (hash[7] <= Htarg )
|
||||
if( fulltest( hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
nonce++;
|
||||
} while ( nonce < max_nonce && !work_restart[thr_id].restart );
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
|
@@ -36,7 +36,7 @@ void lyra2z_4way_hash( void *state, const void *input )
|
||||
blake256_4way( &ctx_blake, input + (64*4), 16 );
|
||||
blake256_4way_close( &ctx_blake, vhash );
|
||||
|
||||
mm128_dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 256 );
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 256 );
|
||||
|
||||
LYRA2Z( lyra2z_4way_matrix, state , 32, hash0, 32, hash0, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_4way_matrix, state+32, 32, hash1, 32, hash1, 32, 8, 8, 8 );
|
||||
@@ -44,7 +44,7 @@ void lyra2z_4way_hash( void *state, const void *input )
|
||||
LYRA2Z( lyra2z_4way_matrix, state+96, 32, hash3, 32, hash3, 32, 8, 8, 8 );
|
||||
}
|
||||
|
||||
int scanhash_lyra2z_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_lyra2z_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (64)));
|
||||
@@ -55,12 +55,12 @@ int scanhash_lyra2z_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
__m128i *noncev = (__m128i*)vdata + 19; // aligned
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0000ff;
|
||||
|
||||
mm128_bswap_intrlv80_4x32( vdata, pdata );
|
||||
mm128_bswap32_intrlv80_4x32( vdata, pdata );
|
||||
lyra2z_4way_midstate( vdata );
|
||||
|
||||
do {
|
||||
@@ -74,7 +74,7 @@ int scanhash_lyra2z_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
&& !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
submit_solution( work, hash+(i<<3), mythr, i );
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 4;
|
||||
} while ( (n < max_nonce-4) && !work_restart[thr_id].restart);
|
||||
@@ -119,8 +119,8 @@ void lyra2z_8way_hash( void *state, const void *input )
|
||||
blake256_8way( &ctx_blake, input + (64*8), 16 );
|
||||
blake256_8way_close( &ctx_blake, vhash );
|
||||
|
||||
mm256_dintrlv_8x32( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash, 256 );
|
||||
dintrlv_8x32( hash0, hash1, hash2, hash3,
|
||||
hash4, hash5, hash6, hash7, vhash, 256 );
|
||||
|
||||
LYRA2Z( lyra2z_8way_matrix, hash0, 32, hash0, 32, hash0, 32, 8, 8, 8 );
|
||||
LYRA2Z( lyra2z_8way_matrix, hash1, 32, hash1, 32, hash1, 32, 8, 8, 8 );
|
||||
@@ -141,7 +141,7 @@ void lyra2z_8way_hash( void *state, const void *input )
|
||||
memcpy( state+224, hash7, 32 );
|
||||
}
|
||||
|
||||
int scanhash_lyra2z_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_lyra2z_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (64)));
|
||||
@@ -152,12 +152,12 @@ int scanhash_lyra2z_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
__m256i *noncev = (__m256i*)vdata + 19; // aligned
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if ( opt_benchmark )
|
||||
ptarget[7] = 0x0000ff;
|
||||
|
||||
mm256_bswap_intrlv80_8x32( vdata, pdata );
|
||||
mm256_bswap32_intrlv80_8x32( vdata, pdata );
|
||||
lyra2z_8way_midstate( vdata );
|
||||
|
||||
do {
|
||||
@@ -171,7 +171,7 @@ int scanhash_lyra2z_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
&& !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
submit_solution( work, hash+(i<<3), mythr, i );
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 8;
|
||||
} while ( (n < max_nonce-8) && !work_restart[thr_id].restart);
|
||||
|
@@ -43,7 +43,7 @@ void lyra2z_hash( void *state, const void *input )
|
||||
memcpy(state, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_lyra2z( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_lyra2z( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(64) hash[8];
|
||||
@@ -53,7 +53,7 @@ int scanhash_lyra2z( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if (opt_benchmark)
|
||||
ptarget[7] = 0x0000ff;
|
||||
@@ -68,16 +68,14 @@ int scanhash_lyra2z( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
be32enc(&endiandata[19], nonce);
|
||||
lyra2z_hash( hash, endiandata );
|
||||
|
||||
if (hash[7] <= Htarg && fulltest(hash, ptarget)) {
|
||||
work_set_target_ratio(work, hash);
|
||||
if ( hash[7] <= Htarg )
|
||||
if ( fulltest( hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce;
|
||||
return 1;
|
||||
}
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
nonce++;
|
||||
|
||||
} while (nonce < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
} while ( nonce < max_nonce && !work_restart[thr_id].restart );
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
|
@@ -15,7 +15,7 @@ void lyra2z330_hash(void *state, const void *input, uint32_t height)
|
||||
memcpy(state, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_lyra2z330( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_lyra2z330( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8] __attribute__ ((aligned (64)));
|
||||
@@ -25,7 +25,7 @@ int scanhash_lyra2z330( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if (opt_benchmark)
|
||||
ptarget[7] = 0x0000ff;
|
||||
@@ -38,21 +38,16 @@ int scanhash_lyra2z330( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
|
||||
do
|
||||
{
|
||||
be32enc(&endiandata[19], nonce);
|
||||
be32enc( &endiandata[19], nonce );
|
||||
lyra2z330_hash( hash, endiandata, work->height );
|
||||
if ( hash[7] <= Htarg && fulltest(hash, ptarget) && !opt_benchmark )
|
||||
if ( hash[7] <= Htarg )
|
||||
if ( fulltest( hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
work_set_target_ratio(work, hash);
|
||||
pdata[19] = nonce;
|
||||
if ( submit_work( mythr, work ) )
|
||||
applog( LOG_NOTICE, "Share %d submitted by thread %d",
|
||||
accepted_share_count + rejected_share_count + 1,
|
||||
mythr->id );
|
||||
else
|
||||
applog( LOG_WARNING, "Failed to submit share." );
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
nonce++;
|
||||
} while (nonce < max_nonce && !work_restart[thr_id].restart);
|
||||
} while ( nonce < max_nonce && !work_restart[thr_id].restart );
|
||||
pdata[19] = nonce;
|
||||
*hashes_done = pdata[19] - first_nonce + 1;
|
||||
return 0;
|
||||
|
@@ -69,13 +69,13 @@ void phi2_hash_4way( void *state, const void *input )
|
||||
LYRA2RE( &hashA[3][0], 32, &hashB[3][0], 32, &hashB[3][0], 32, 1, 8, 8 );
|
||||
LYRA2RE( &hashA[3][8], 32, &hashB[3][8], 32, &hashB[3][8], 32, 1, 8, 8 );
|
||||
|
||||
mm256_intrlv_4x64( vhash, hashA[0], hashA[1], hashA[2], hashA[3], 512 );
|
||||
intrlv_4x64( vhash, hashA[0], hashA[1], hashA[2], hashA[3], 512 );
|
||||
|
||||
jh512_4way_init( &ctx.jh );
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
|
||||
mm256_dintrlv_4x64( hash[0], hash[1], hash[2], hash[3], vhash, 512 );
|
||||
dintrlv_4x64( hash[0], hash[1], hash[2], hash[3], vhash, 512 );
|
||||
|
||||
if ( hash[0][0] & 1 )
|
||||
{
|
||||
@@ -141,7 +141,7 @@ void phi2_hash_4way( void *state, const void *input )
|
||||
(const BitSequence *)hash[3], 512 );
|
||||
}
|
||||
|
||||
mm256_intrlv_4x64( vhash, hash[0], hash[1], hash[2], hash[3], 512 );
|
||||
intrlv_4x64( vhash, hash[0], hash[1], hash[2], hash[3], 512 );
|
||||
|
||||
skein512_4way_init( &ctx.skein );
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
@@ -161,20 +161,20 @@ void phi2_hash_4way( void *state, const void *input )
|
||||
memcpy( state, vhash, 128 );
|
||||
}
|
||||
|
||||
int scanhash_phi2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_phi2_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(128) hash[8];
|
||||
uint32_t _ALIGN(128) edata[36];
|
||||
uint32_t vdata[4][36] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[25]);
|
||||
uint32_t lane_hash[8];
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if(opt_benchmark){
|
||||
ptarget[7] = 0x00ff;
|
||||
@@ -217,11 +217,11 @@ int scanhash_phi2_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ ) if ( hash7[ lane<<1 ] < Htarg )
|
||||
{
|
||||
mm256_extract_lane_4x64( lane_hash, hash, lane, 256 );
|
||||
extr_lane_4x64( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_solution( work, lane_hash, mythr, lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
n += 4;
|
||||
|
@@ -92,7 +92,7 @@ void phi2_hash(void *state, const void *input)
|
||||
memcpy(state, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_phi2( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_phi2( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t _ALIGN(128) hash[8];
|
||||
@@ -102,7 +102,7 @@ int scanhash_phi2( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if(opt_benchmark){
|
||||
ptarget[7] = 0x00ff;
|
||||
@@ -111,30 +111,21 @@ int scanhash_phi2( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
phi2_has_roots = false;
|
||||
for ( int i=0; i < 36; i++ )
|
||||
{
|
||||
be32enc(&endiandata[i], pdata[i]);
|
||||
if (i >= 20 && pdata[i]) phi2_has_roots = true;
|
||||
be32enc(&endiandata[i], pdata[i]);
|
||||
if ( i >= 20 && pdata[i] ) phi2_has_roots = true;
|
||||
}
|
||||
|
||||
do {
|
||||
be32enc( &endiandata[19], n );
|
||||
phi2_hash( hash, endiandata );
|
||||
|
||||
if ( hash[7] < Htarg && fulltest( hash, ptarget ) )
|
||||
{
|
||||
pdata[19] = n;
|
||||
work_set_target_ratio( work, hash );
|
||||
if ( submit_work( mythr, work ) )
|
||||
applog( LOG_NOTICE, "Share %d submitted by thread %d.",
|
||||
accepted_share_count + rejected_share_count + 1,
|
||||
thr_id );
|
||||
else
|
||||
applog( LOG_WARNING, "Failed to submit share." );
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
}
|
||||
if ( hash[7] < Htarg )
|
||||
if ( fulltest( hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n;
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
n++;
|
||||
|
||||
} while ( n < max_nonce && !work_restart[thr_id].restart );
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
|
63
algo/m7m.c
63
algo/m7m.c
@@ -144,7 +144,7 @@ void init_m7m_ctx()
|
||||
#define NM7M 5
|
||||
#define SW_DIVS 5
|
||||
#define M7_MIDSTATE_LEN 76
|
||||
int scanhash_m7m_hash( int thr_id, struct work* work, uint64_t max_nonce,
|
||||
int scanhash_m7m_hash( struct work* work, uint64_t max_nonce,
|
||||
unsigned long *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
@@ -154,7 +154,7 @@ int scanhash_m7m_hash( int thr_id, struct work* work, uint64_t max_nonce,
|
||||
uint32_t hash[8] __attribute__((aligned(64)));
|
||||
uint8_t bhash[7][64] __attribute__((aligned(64)));
|
||||
uint32_t n = pdata[19] - 1;
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
uint32_t usw_, mpzscale;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
char data_str[161], hash_str[65], target_str[65];
|
||||
@@ -207,6 +207,7 @@ int scanhash_m7m_hash( int thr_id, struct work* work, uint64_t max_nonce,
|
||||
|
||||
SHA512_Update( &ctx2.sha512, data_p64, 80 - M7_MIDSTATE_LEN );
|
||||
SHA512_Final( (unsigned char*) (bhash[1]), &ctx2.sha512 );
|
||||
|
||||
sph_keccak512( &ctx2.keccak, data_p64, 80 - M7_MIDSTATE_LEN );
|
||||
sph_keccak512_close( &ctx2.keccak, (void*)(bhash[2]) );
|
||||
|
||||
@@ -222,18 +223,18 @@ int scanhash_m7m_hash( int thr_id, struct work* work, uint64_t max_nonce,
|
||||
sph_ripemd160( &ctx2.ripemd, data_p64, 80 - M7_MIDSTATE_LEN );
|
||||
sph_ripemd160_close( &ctx2.ripemd, (void*)(bhash[6]) );
|
||||
|
||||
mpz_import(bns0, a, -1, p, -1, 0, bhash[0]);
|
||||
mpz_import(bns0, a, -1, p, -1, 0, bhash[0]);
|
||||
mpz_set(bns1, bns0);
|
||||
mpz_set(product, bns0);
|
||||
for ( i=1; i < 7; i++ )
|
||||
mpz_set(product, bns0);
|
||||
for ( i=1; i < 7; i++ )
|
||||
{
|
||||
mpz_import(bns0, a, -1, p, -1, 0, bhash[i]);
|
||||
mpz_add(bns1, bns1, bns0);
|
||||
mpz_mul(product, product, bns0);
|
||||
mpz_import(bns0, a, -1, p, -1, 0, bhash[i]);
|
||||
mpz_add(bns1, bns1, bns0);
|
||||
mpz_mul(product, product, bns0);
|
||||
}
|
||||
mpz_mul(product, product, bns1);
|
||||
|
||||
mpz_mul(product, product, product);
|
||||
mpz_mul(product, product, product);
|
||||
bytes = mpz_sizeinbase(product, 256);
|
||||
mpz_export((void *)bdata, NULL, -1, 1, 0, 0, product);
|
||||
|
||||
@@ -243,27 +244,27 @@ int scanhash_m7m_hash( int thr_id, struct work* work, uint64_t max_nonce,
|
||||
|
||||
digits=(int)((sqrt((double)(n/2))*(1.+EPS))/9000+75);
|
||||
mp_bitcnt_t prec = (long int)(digits*BITS_PER_DIGIT+16);
|
||||
mpf_set_prec_raw(magifpi, prec);
|
||||
mpf_set_prec_raw(mptmp, prec);
|
||||
mpf_set_prec_raw(mpt1, prec);
|
||||
mpf_set_prec_raw(mpt2, prec);
|
||||
mpf_set_prec_raw(magifpi, prec);
|
||||
mpf_set_prec_raw(mptmp, prec);
|
||||
mpf_set_prec_raw(mpt1, prec);
|
||||
mpf_set_prec_raw(mpt2, prec);
|
||||
|
||||
usw_ = sw2_(n/2);
|
||||
mpzscale = 1;
|
||||
mpzscale = 1;
|
||||
mpz_set_ui(magisw, usw_);
|
||||
|
||||
for ( i = 0; i < 5; i++ )
|
||||
{
|
||||
mpf_set_d(mpt1, 0.25*mpzscale);
|
||||
mpf_sub(mpt1, mpt1, mpt2);
|
||||
mpf_sub(mpt1, mpt1, mpt2);
|
||||
mpf_abs(mpt1, mpt1);
|
||||
mpf_div(magifpi, magifpi0, mpt1);
|
||||
mpf_pow_ui(mptmp, mpten, digits >> 1);
|
||||
mpf_mul(magifpi, magifpi, mptmp);
|
||||
mpz_set_f(magipi, magifpi);
|
||||
mpz_set_f(magipi, magifpi);
|
||||
mpz_add(magipi,magipi,magisw);
|
||||
mpz_add(product,product,magipi);
|
||||
mpz_import(bns0, b, -1, p, -1, 0, (void*)(hash));
|
||||
mpz_import(bns0, b, -1, p, -1, 0, (void*)(hash));
|
||||
mpz_add(bns1, bns1, bns0);
|
||||
mpz_mul(product,product,bns1);
|
||||
mpz_cdiv_q (product, product, bns0);
|
||||
@@ -275,18 +276,18 @@ int scanhash_m7m_hash( int thr_id, struct work* work, uint64_t max_nonce,
|
||||
SHA256_Init( &ctxf_sha256 );
|
||||
SHA256_Update( &ctxf_sha256, bdata, bytes );
|
||||
SHA256_Final( (unsigned char*) hash, &ctxf_sha256 );
|
||||
}
|
||||
}
|
||||
|
||||
const unsigned char *hash_ = (const unsigned char *)hash;
|
||||
const unsigned char *target_ = (const unsigned char *)ptarget;
|
||||
for ( i = 31; i >= 0; i-- )
|
||||
const unsigned char *hash_ = (const unsigned char *)hash;
|
||||
const unsigned char *target_ = (const unsigned char *)ptarget;
|
||||
for ( i = 31; i >= 0; i-- )
|
||||
{
|
||||
if ( hash_[i] != target_[i] )
|
||||
{
|
||||
rc = hash_[i] < target_[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ( hash_[i] != target_[i] )
|
||||
{
|
||||
rc = hash_[i] < target_[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ( unlikely(rc) )
|
||||
{
|
||||
if ( opt_debug )
|
||||
@@ -299,15 +300,15 @@ int scanhash_m7m_hash( int thr_id, struct work* work, uint64_t max_nonce,
|
||||
hash_str,
|
||||
target_str);
|
||||
}
|
||||
work_set_target_ratio( work, hash );
|
||||
pdata[19] = data[19];
|
||||
goto out;
|
||||
}
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
pdata[19] = n;
|
||||
|
||||
out:
|
||||
// can this be skipped after finding a share? Seems to work ok.
|
||||
//out:
|
||||
mpf_set_prec_raw(magifpi, prec0);
|
||||
mpf_set_prec_raw(magifpi0, prec0);
|
||||
mpf_set_prec_raw(mptmp, prec0);
|
||||
|
@@ -12,9 +12,6 @@
|
||||
#include "algo/keccak/keccak-hash-4way.h"
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
|
||||
// no improvement with midstate
|
||||
//static __thread blake512_4way_context ctx_mid;
|
||||
|
||||
void nist5hash_4way( void *out, const void *input )
|
||||
{
|
||||
uint64_t hash0[8] __attribute__ ((aligned (64)));
|
||||
@@ -28,14 +25,11 @@ void nist5hash_4way( void *out, const void *input )
|
||||
skein512_4way_context ctx_skein;
|
||||
keccak512_4way_context ctx_keccak;
|
||||
|
||||
// memcpy( &ctx_blake, &ctx_mid, sizeof(ctx_mid) );
|
||||
// blake512_4way( &ctx_blake, input + (64<<2), 16 );
|
||||
|
||||
blake512_4way_init( &ctx_blake );
|
||||
blake512_4way( &ctx_blake, input, 80 );
|
||||
blake512_4way_close( &ctx_blake, vhash );
|
||||
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
init_groestl( &ctx_groestl, 64 );
|
||||
update_and_final_groestl( &ctx_groestl, (char*)hash0,
|
||||
@@ -50,7 +44,7 @@ void nist5hash_4way( void *out, const void *input )
|
||||
update_and_final_groestl( &ctx_groestl, (char*)hash3,
|
||||
(const char*)hash3, 512 );
|
||||
|
||||
mm256_interleave_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
jh512_4way_init( &ctx_jh );
|
||||
jh512_4way( &ctx_jh, vhash, 64 );
|
||||
@@ -65,22 +59,20 @@ void nist5hash_4way( void *out, const void *input )
|
||||
skein512_4way_close( &ctx_skein, out );
|
||||
}
|
||||
|
||||
int scanhash_nist5_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done)
|
||||
int scanhash_nist5_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[4*16] __attribute__ ((aligned (64)));
|
||||
uint32_t *hash7 = &(hash[25]);
|
||||
uint32_t lane_hash[8];
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t *nonces = work->nonces;
|
||||
int num_found = 0;
|
||||
uint32_t *noncep = vdata + 73; // 9*8 + 1
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
uint64_t htmax[] = { 0,
|
||||
0xF,
|
||||
@@ -96,15 +88,7 @@ int scanhash_nist5_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
0xFFFF0000,
|
||||
0 };
|
||||
|
||||
// we need bigendian data...
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
// precalc midstate
|
||||
// blake512_4way_init( &ctx_mid );
|
||||
// blake512_4way( &ctx_mid, vdata, 64 );
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
|
||||
for ( int m=0; m < 6; m++ )
|
||||
{
|
||||
@@ -113,33 +97,28 @@ int scanhash_nist5_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t mask = masks[m];
|
||||
|
||||
do {
|
||||
be32enc( noncep, n );
|
||||
be32enc( noncep+2, n+1 );
|
||||
be32enc( noncep+4, n+2 );
|
||||
be32enc( noncep+6, n+3 );
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
_mm256_set_epi32( n+3, 0, n+2, 0, n+1, 0, n, 0 ) ), *noncev );
|
||||
|
||||
nist5hash_4way( hash, vdata );
|
||||
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if ( ( hash7[ lane ] & mask ) == 0 )
|
||||
{
|
||||
mm256_extract_lane_4x64( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) )
|
||||
extr_lane_4x64( lane_hash, hash, lane, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
nonces[ num_found++ ] = n + lane;
|
||||
work_set_target_ratio( work, lane_hash );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( num_found == 0 ) && ( n < max_nonce )
|
||||
&& !work_restart[thr_id].restart );
|
||||
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return num_found;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -12,15 +12,15 @@
|
||||
|
||||
void nist5hash_4way( void *state, const void *input );
|
||||
|
||||
int scanhash_nist5_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_nist5_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#else
|
||||
|
||||
void nist5hash( void *state, const void *input );
|
||||
|
||||
int scanhash_nist5( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
int scanhash_nist5( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
void init_nist5_ctx();
|
||||
#endif
|
||||
|
||||
|
@@ -81,8 +81,8 @@ void nist5hash(void *output, const void *input)
|
||||
memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_nist5(int thr_id, struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done)
|
||||
int scanhash_nist5( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||
uint32_t hash64[8] __attribute__((aligned(32)));
|
||||
@@ -90,6 +90,7 @@ int scanhash_nist5(int thr_id, struct work *work,
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
|
||||
uint64_t htmax[] = {
|
||||
|
@@ -144,8 +144,8 @@ static const int arrOrder[][4] =
|
||||
memcpy(state, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_zr5( int thr_id, struct work *work,
|
||||
uint32_t max_nonce, unsigned long *hashes_done)
|
||||
int scanhash_zr5( struct work *work, uint32_t max_nonce,
|
||||
unsigned long *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
@@ -154,6 +154,7 @@ int scanhash_zr5( int thr_id, struct work *work,
|
||||
const uint32_t version = pdata[0] & (~POK_DATA_MASK);
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
memcpy(tmpdata, pdata, 80);
|
||||
|
||||
|
@@ -50,6 +50,7 @@ void anime_4way_hash( void *state, const void *input )
|
||||
__m256i vh_mask;
|
||||
const uint32_t mask = 8;
|
||||
const __m256i bit3_mask = _mm256_set1_epi64x( 8 );
|
||||
const __m256i zero = _mm256_setzero_si256();
|
||||
anime_4way_ctx_holder ctx;
|
||||
memcpy( &ctx, &anime_4way_ctx, sizeof(anime_4way_ctx) );
|
||||
|
||||
@@ -59,10 +60,9 @@ void anime_4way_hash( void *state, const void *input )
|
||||
blake512_4way( &ctx.blake, vhash, 64 );
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ),
|
||||
m256_zero );
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
|
||||
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
if ( hash0[0] & mask )
|
||||
{
|
||||
@@ -88,7 +88,7 @@ void anime_4way_hash( void *state, const void *input )
|
||||
(char*)hash3, 512 );
|
||||
}
|
||||
|
||||
mm256_intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
if ( mm256_anybits0( vh_mask ) )
|
||||
{
|
||||
@@ -98,7 +98,7 @@ void anime_4way_hash( void *state, const void *input )
|
||||
|
||||
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
||||
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
@@ -109,13 +109,12 @@ void anime_4way_hash( void *state, const void *input )
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
|
||||
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ),
|
||||
m256_zero );
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
|
||||
|
||||
if ( mm256_anybits1( vh_mask ) )
|
||||
{
|
||||
@@ -139,8 +138,7 @@ void anime_4way_hash( void *state, const void *input )
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ),
|
||||
m256_zero );
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
|
||||
|
||||
if ( mm256_anybits1( vh_mask ) )
|
||||
{
|
||||
@@ -157,10 +155,10 @@ void anime_4way_hash( void *state, const void *input )
|
||||
|
||||
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
||||
|
||||
mm256_dintrlv_4x64( state, state+32, state+64, state+96, vhash, 256 );
|
||||
dintrlv_4x64( state, state+32, state+64, state+96, vhash, 256 );
|
||||
}
|
||||
|
||||
int scanhash_anime_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_anime_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
@@ -170,7 +168,7 @@ int scanhash_anime_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint64_t htmax[] = {
|
||||
0,
|
||||
@@ -189,7 +187,7 @@ int scanhash_anime_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
0
|
||||
};
|
||||
|
||||
mm256_bswap_intrlv80_4x64( vdata, pdata );
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
|
||||
for (int m=0; m < 6; m++)
|
||||
if (Htarg <= htmax[m])
|
||||
@@ -209,7 +207,7 @@ int scanhash_anime_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
&& fulltest( hash+(i<<3), ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
submit_solution( work, hash+(i<<3), mythr, i );
|
||||
submit_lane_solution( work, hash+(i<<3), mythr, i );
|
||||
}
|
||||
n += 4;
|
||||
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
||||
|
@@ -13,14 +13,14 @@ bool register_anime_algo( algo_gate_t* gate );
|
||||
#if defined(ANIME_4WAY)
|
||||
|
||||
void anime_4way_hash( void *state, const void *input );
|
||||
int scanhash_anime_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_anime_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
void init_anime_4way_ctx();
|
||||
|
||||
#endif
|
||||
|
||||
void anime_hash( void *state, const void *input );
|
||||
int scanhash_anime( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_anime( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
void init_anime_ctx();
|
||||
|
||||
|
@@ -119,7 +119,7 @@ void anime_hash( void *state, const void *input )
|
||||
memcpy( state, hash, 32 );
|
||||
}
|
||||
|
||||
int scanhash_anime( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_anime( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
uint32_t hash[8] __attribute__ ((aligned (64)));
|
||||
@@ -128,7 +128,7 @@ int scanhash_anime( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint64_t htmax[] = {
|
||||
0,
|
||||
|
@@ -47,7 +47,7 @@ typedef union _hmq1725_4way_context_overlay hmq1725_4way_context_overlay;
|
||||
|
||||
extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
{
|
||||
// why so big? only really need 8, haval thing uses 16.
|
||||
// why so big? only really need 16.
|
||||
uint32_t hash0 [32] __attribute__ ((aligned (64)));
|
||||
uint32_t hash1 [32] __attribute__ ((aligned (64)));
|
||||
uint32_t hash2 [32] __attribute__ ((aligned (64)));
|
||||
@@ -67,7 +67,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
bmw512_4way( &ctx.bmw, input, 80 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
|
||||
@@ -84,7 +84,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
|
||||
// first fork, A is groestl serial, B is skein parallel.
|
||||
|
||||
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], vmask ),
|
||||
m256_zero );
|
||||
@@ -116,7 +116,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
(char*)hash3, 512 );
|
||||
// }
|
||||
|
||||
mm256_intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
// B
|
||||
|
||||
@@ -158,7 +158,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
|
||||
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
||||
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
init_luffa( &ctx.luffa, 512 );
|
||||
update_and_final_luffa( &ctx.luffa, (BitSequence*)hash0,
|
||||
@@ -186,7 +186,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
cubehashUpdateDigest( &ctx.cube, (BitSequence *)hash3,
|
||||
(const BitSequence *)hash3, 64 );
|
||||
|
||||
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
// A= keccak parallel, B= jh parallel
|
||||
|
||||
@@ -209,7 +209,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
|
||||
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
||||
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
sph_shavite512_init( &ctx.shavite );
|
||||
sph_shavite512 ( &ctx.shavite, hash0, 64 );
|
||||
@@ -240,7 +240,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
// A is whirlpool serial, B is haval parallel.
|
||||
|
||||
|
||||
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], vmask ),
|
||||
m256_zero );
|
||||
@@ -271,7 +271,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash3 );
|
||||
// }
|
||||
|
||||
mm256_intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
// B
|
||||
|
||||
@@ -285,7 +285,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
|
||||
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
||||
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
init_echo( &ctx.echo, 512 );
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||
@@ -300,13 +300,13 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash3,
|
||||
(const BitSequence *)hash3, 512 );
|
||||
|
||||
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
blake512_4way_init( &ctx.blake );
|
||||
blake512_4way( &ctx.blake, vhash, 64 );
|
||||
blake512_4way_close( &ctx.blake, vhash );
|
||||
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
// shavite & luffa, both serial, select individually.
|
||||
|
||||
@@ -362,13 +362,13 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
(const BitSequence *)hash3, 64 );
|
||||
}
|
||||
|
||||
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
hamsi512_4way_init( &ctx.hamsi );
|
||||
hamsi512_4way( &ctx.hamsi, vhash, 64 );
|
||||
hamsi512_4way_close( &ctx.hamsi, vhash );
|
||||
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
sph_fugue512_init( &ctx.fugue );
|
||||
sph_fugue512( &ctx.fugue, hash0, 64 );
|
||||
@@ -438,13 +438,13 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
(const BitSequence *)hash3, 512 );
|
||||
}
|
||||
|
||||
mm128_intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
shabal512_4way_init( &ctx.shabal );
|
||||
shabal512_4way( &ctx.shabal, vhash, 64 );
|
||||
shabal512_4way_close( &ctx.shabal, vhash );
|
||||
|
||||
mm128_dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x32( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
sph_whirlpool_init( &ctx.whirlpool );
|
||||
sph_whirlpool( &ctx.whirlpool, hash0, 64 );
|
||||
@@ -461,7 +461,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
|
||||
// A = fugue serial, B = sha512 prarallel
|
||||
|
||||
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], vmask ),
|
||||
m256_zero );
|
||||
@@ -491,7 +491,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
sph_fugue512_close( &ctx.fugue, hash3 );
|
||||
// }
|
||||
|
||||
mm256_intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
// if ( mm256_any_clr_256( vh_mask ) )
|
||||
// {
|
||||
@@ -502,7 +502,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
|
||||
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
||||
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
@@ -513,7 +513,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
init_groestl( &ctx.groestl, 64 );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
|
||||
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
sha512_4way_init( &ctx.sha512 );
|
||||
sha512_4way( &ctx.sha512, vhash, 64 );
|
||||
@@ -524,7 +524,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], vmask ),
|
||||
m256_zero );
|
||||
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
// if ( mm256_any_set_256( vh_mask ) ) //4
|
||||
// {
|
||||
@@ -559,7 +559,7 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
sph_whirlpool_close( &ctx.whirlpool, hash3 );
|
||||
// }
|
||||
|
||||
mm256_intrlv_4x64( vhashB, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x64( vhashB, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
||||
|
||||
@@ -570,26 +570,26 @@ extern void hmq1725_4way_hash(void *state, const void *input)
|
||||
memcpy(state, vhash, 32<<2 );
|
||||
}
|
||||
|
||||
int scanhash_hmq1725_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_hmq1725_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
// uint32_t *hash7 = &(hash[7<<2]);
|
||||
// uint32_t lane_hash[8];
|
||||
// uint32_t *hash7 = &(hash[25]);
|
||||
// uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
||||
0xFFF, 0xFFFF, 0x10000000 };
|
||||
uint32_t masks[] = { 0xFFFFFFFF, 0xFFFFFFF0, 0xFFFFFF00,
|
||||
0xFFFFF000, 0xFFFF0000, 0 };
|
||||
|
||||
mm256_bswap_intrlv80_4x64( vdata, pdata );
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
for ( int m = 0; m < 6; m++ ) if ( Htarg <= htmax[m] )
|
||||
{
|
||||
uint32_t mask = masks[ m ];
|
||||
@@ -604,7 +604,7 @@ int scanhash_hmq1725_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
if ( fulltest( (hash+(i<<3)), ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + i;
|
||||
submit_solution( work, (hash+(i<<3)), mythr, i );
|
||||
submit_lane_solution( work, (hash+(i<<3)), mythr, i );
|
||||
}
|
||||
}
|
||||
n += 4;
|
||||
|
@@ -13,13 +13,13 @@ bool register_hmq1725_algo( algo_gate_t* gate );
|
||||
#if defined(HMQ1725_4WAY)
|
||||
|
||||
void hmq1725_4way_hash( void *state, const void *input );
|
||||
int scanhash_hmq1725_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_hmq1725_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
|
||||
#else
|
||||
|
||||
void hmq1725hash( void *state, const void *input );
|
||||
int scanhash_hmq1725( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_hmq1725( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
void init_hmq1725_ctx();
|
||||
|
||||
|
@@ -298,7 +298,7 @@ extern void hmq1725hash(void *state, const void *input)
|
||||
memcpy(state, hashA, 32);
|
||||
}
|
||||
|
||||
int scanhash_hmq1725( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_hmq1725( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
// uint32_t endiandata[32] __attribute__((aligned(64)));
|
||||
@@ -308,7 +308,7 @@ int scanhash_hmq1725( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
//const uint32_t Htarg = ptarget[7];
|
||||
|
||||
//we need bigendian data...
|
||||
|
@@ -51,6 +51,7 @@ void quark_4way_hash( void *state, const void *input )
|
||||
quark_4way_ctx_holder ctx;
|
||||
const __m256i bit3_mask = _mm256_set1_epi64x( 8 );
|
||||
const uint32_t mask = 8;
|
||||
const __m256i zero = _mm256_setzero_si256();
|
||||
|
||||
memcpy( &ctx, &quark_4way_ctx, sizeof(quark_4way_ctx) );
|
||||
|
||||
@@ -60,10 +61,9 @@ void quark_4way_hash( void *state, const void *input )
|
||||
bmw512_4way( &ctx.bmw, vhash, 64 );
|
||||
bmw512_4way_close( &ctx.bmw, vhash );
|
||||
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ),
|
||||
m256_zero );
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
|
||||
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
if ( hash0[0] & mask )
|
||||
{
|
||||
@@ -89,7 +89,7 @@ void quark_4way_hash( void *state, const void *input )
|
||||
(char*)hash3, 512 );
|
||||
}
|
||||
|
||||
mm256_intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
if ( mm256_anybits0( vh_mask ) )
|
||||
{
|
||||
@@ -99,7 +99,7 @@ void quark_4way_hash( void *state, const void *input )
|
||||
|
||||
mm256_blend_hash_4x64( vh, vhA, vhB, vh_mask );
|
||||
|
||||
mm256_dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
|
||||
@@ -110,13 +110,12 @@ void quark_4way_hash( void *state, const void *input )
|
||||
reinit_groestl( &ctx.groestl );
|
||||
update_and_final_groestl( &ctx.groestl, (char*)hash3, (char*)hash3, 512 );
|
||||
|
||||
mm256_intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
intrlv_4x64( vhash, hash0, hash1, hash2, hash3, 512 );
|
||||
|
||||
jh512_4way( &ctx.jh, vhash, 64 );
|
||||
jh512_4way_close( &ctx.jh, vhash );
|
||||
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ),
|
||||
m256_zero );
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
|
||||
|
||||
if ( mm256_anybits1( vh_mask ) )
|
||||
{
|
||||
@@ -141,8 +140,7 @@ void quark_4way_hash( void *state, const void *input )
|
||||
skein512_4way( &ctx.skein, vhash, 64 );
|
||||
skein512_4way_close( &ctx.skein, vhash );
|
||||
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ),
|
||||
m256_zero );
|
||||
vh_mask = _mm256_cmpeq_epi64( _mm256_and_si256( vh[0], bit3_mask ), zero );
|
||||
|
||||
if ( mm256_anybits1( vh_mask ) )
|
||||
{
|
||||
@@ -165,7 +163,7 @@ void quark_4way_hash( void *state, const void *input )
|
||||
casti_m256i( state, 3 ) = _mm256_blendv_epi8( vhA[3], vhB[3], vh_mask );
|
||||
}
|
||||
|
||||
int scanhash_quark_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_quark_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
@@ -177,9 +175,9 @@ int scanhash_quark_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
__m256i *noncev = (__m256i*)vdata + 9; // aligned
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
mm256_bswap_intrlv80_4x64( vdata, pdata );
|
||||
mm256_bswap32_intrlv80_4x64( vdata, pdata );
|
||||
do
|
||||
{
|
||||
*noncev = mm256_intrlv_blend_32( mm256_bswap_32(
|
||||
@@ -191,11 +189,11 @@ int scanhash_quark_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
if ( ( hash7[ i<<1 ] & 0xFFFFFF00 ) == 0 )
|
||||
{
|
||||
mm256_extract_lane_4x64( lane_hash, hash, i, 256 );
|
||||
extr_lane_4x64( lane_hash, hash, i, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+i;
|
||||
submit_solution( work, lane_hash, mythr, i );
|
||||
submit_lane_solution( work, lane_hash, mythr, i );
|
||||
}
|
||||
}
|
||||
n += 4;
|
||||
|
@@ -13,14 +13,14 @@ bool register_quark_algo( algo_gate_t* gate );
|
||||
#if defined(QUARK_4WAY)
|
||||
|
||||
void quark_4way_hash( void *state, const void *input );
|
||||
int scanhash_quark_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_quark_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
void init_quark_4way_ctx();
|
||||
|
||||
#endif
|
||||
|
||||
void quark_hash( void *state, const void *input );
|
||||
int scanhash_quark( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_quark( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
void init_quark_ctx();
|
||||
|
||||
|
@@ -172,7 +172,7 @@ void quark_hash(void *state, const void *input)
|
||||
memcpy(state, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_quark( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_quark( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||
@@ -181,7 +181,7 @@ int scanhash_quark( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
swab32_array( endiandata, pdata, 20 );
|
||||
|
||||
|
@@ -39,7 +39,7 @@ void deep_2way_hash( void *output, const void *input )
|
||||
memcpy( &ctx, &deep_2way_ctx, sizeof(deep_2way_ctx) );
|
||||
luffa_2way_update( &ctx.luffa, input + (64<<1), 16 );
|
||||
luffa_2way_close( &ctx.luffa, vhash );
|
||||
mm256_deinterleave_2x128( hash0, hash1, vhash, 512 );
|
||||
dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash0,
|
||||
(const byte*) hash0, 64 );
|
||||
@@ -63,7 +63,7 @@ void deep_2way_hash( void *output, const void *input )
|
||||
memcpy( output+32, hash1, 32 );
|
||||
}
|
||||
|
||||
int scanhash_deep_2way( int thr_id, struct work *work,uint32_t max_nonce,
|
||||
int scanhash_deep_2way( struct work *work,uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
@@ -74,7 +74,7 @@ int scanhash_deep_2way( int thr_id, struct work *work,uint32_t max_nonce,
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t *noncep = vdata + 32+3; // 4*8 + 3
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
||||
0xFFF, 0xFFFF, 0x10000000 };
|
||||
@@ -86,7 +86,7 @@ int scanhash_deep_2way( int thr_id, struct work *work,uint32_t max_nonce,
|
||||
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
mm256_interleave_2x128( (uint64_t*)vdata, edata, edata, 640 );
|
||||
intrlv_2x128( (uint64_t*)vdata, edata, edata, 640 );
|
||||
|
||||
luffa_2way_init( &deep_2way_ctx.luffa, 512 );
|
||||
luffa_2way_update( &deep_2way_ctx.luffa, vdata, 64 );
|
||||
@@ -106,13 +106,13 @@ int scanhash_deep_2way( int thr_id, struct work *work,uint32_t max_nonce,
|
||||
if ( fulltest( hash, ptarget) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n;
|
||||
submit_solution( work, hash, mythr, 0 );
|
||||
submit_lane_solution( work, hash, mythr, 0 );
|
||||
}
|
||||
if ( !( (hash+8)[7] & mask ) )
|
||||
if ( fulltest( hash+8, ptarget) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+1;
|
||||
submit_solution( work, hash+8, mythr, 1 );
|
||||
submit_lane_solution( work, hash+8, mythr, 1 );
|
||||
}
|
||||
n += 2;
|
||||
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
||||
|
@@ -13,14 +13,14 @@ bool register_deep_algo( algo_gate_t* gate );
|
||||
#if defined(DEEP_2WAY)
|
||||
|
||||
void deep_2way_hash( void *state, const void *input );
|
||||
int scanhash_deep_2way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_deep_2way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
void init_deep_2way_ctx();
|
||||
|
||||
#endif
|
||||
|
||||
void deep_hash( void *state, const void *input );
|
||||
int scanhash_deep( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_deep( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
void init_deep_ctx();
|
||||
|
||||
|
@@ -71,7 +71,7 @@ void deep_hash(void *output, const void *input)
|
||||
memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_deep( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_deep( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||
@@ -80,7 +80,7 @@ int scanhash_deep( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
|
||||
uint64_t htmax[] = { 0, 0xF, 0xFF, 0xFFF, 0xFFFF, 0x10000000 };
|
||||
|
@@ -41,7 +41,7 @@ void qubit_2way_hash( void *output, const void *input )
|
||||
memcpy( &ctx, &qubit_2way_ctx, sizeof(qubit_2way_ctx) );
|
||||
luffa_2way_update( &ctx.luffa, input + (64<<1), 16 );
|
||||
luffa_2way_close( &ctx.luffa, vhash );
|
||||
mm256_deinterleave_2x128( hash0, hash1, vhash, 512 );
|
||||
dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||
|
||||
cubehashUpdateDigest( &ctx.cube, (byte*)hash0,
|
||||
(const byte*) hash0, 64 );
|
||||
@@ -55,9 +55,9 @@ void qubit_2way_hash( void *output, const void *input )
|
||||
sph_shavite512( &ctx.shavite, hash1, 64 );
|
||||
sph_shavite512_close( &ctx.shavite, hash1 );
|
||||
|
||||
mm256_interleave_2x128( vhash, hash0, hash1, 512 );
|
||||
intrlv_2x128( vhash, hash0, hash1, 512 );
|
||||
simd_2way_update_close( &ctx.simd, vhash, vhash, 512 );
|
||||
mm256_deinterleave_2x128( hash0, hash1, vhash, 512 );
|
||||
dintrlv_2x128( hash0, hash1, vhash, 512 );
|
||||
|
||||
update_final_echo( &ctx.echo, (BitSequence *)hash0,
|
||||
(const BitSequence *) hash0, 512 );
|
||||
@@ -69,7 +69,7 @@ void qubit_2way_hash( void *output, const void *input )
|
||||
memcpy( output+32, hash1, 32 );
|
||||
}
|
||||
|
||||
int scanhash_qubit_2way( int thr_id, struct work *work,uint32_t max_nonce,
|
||||
int scanhash_qubit_2way( struct work *work,uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[4*8] __attribute__ ((aligned (64)));
|
||||
@@ -80,7 +80,7 @@ int scanhash_qubit_2way( int thr_id, struct work *work,uint32_t max_nonce,
|
||||
uint32_t n = pdata[19];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t *noncep = vdata + 32+3; // 4*8 + 3
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
||||
0xFFF, 0xFFFF, 0x10000000 };
|
||||
@@ -92,7 +92,7 @@ int scanhash_qubit_2way( int thr_id, struct work *work,uint32_t max_nonce,
|
||||
casti_m128i( endiandata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
mm256_interleave_2x128( (uint64_t*)vdata, edata, edata, 640 );
|
||||
intrlv_2x128( (uint64_t*)vdata, edata, edata, 640 );
|
||||
|
||||
luffa_2way_init( &qubit_2way_ctx.luffa, 512 );
|
||||
luffa_2way_update( &qubit_2way_ctx.luffa, vdata, 64 );
|
||||
@@ -111,13 +111,13 @@ int scanhash_qubit_2way( int thr_id, struct work *work,uint32_t max_nonce,
|
||||
if ( fulltest( hash, ptarget) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n;
|
||||
submit_solution( work, hash, mythr, 0 );
|
||||
submit_lane_solution( work, hash, mythr, 0 );
|
||||
}
|
||||
if ( !( (hash+8)[7] & mask ) )
|
||||
if ( fulltest( hash+8, ptarget) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n+1;
|
||||
submit_solution( work, hash+8, mythr, 1 );
|
||||
submit_lane_solution( work, hash+8, mythr, 1 );
|
||||
}
|
||||
n += 2;
|
||||
} while ( ( n < max_nonce ) && !work_restart[thr_id].restart );
|
||||
|
@@ -13,14 +13,14 @@ bool register_qubit_algo( algo_gate_t* gate );
|
||||
#if defined(QUBIT_2WAY)
|
||||
|
||||
void qubit_2way_hash( void *state, const void *input );
|
||||
int scanhash_qubit_2way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_qubit_2way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
void init_qubit_2way_ctx();
|
||||
|
||||
#endif
|
||||
|
||||
void qubit_hash( void *state, const void *input );
|
||||
int scanhash_qubit( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_qubit( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
void init_qubit_ctx();
|
||||
|
||||
|
@@ -83,7 +83,7 @@ void qubit_hash(void *output, const void *input)
|
||||
memcpy(output, hash, 32);
|
||||
}
|
||||
|
||||
int scanhash_qubit( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_qubit( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t endiandata[20] __attribute__((aligned(64)));
|
||||
@@ -92,7 +92,7 @@ int scanhash_qubit( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t *ptarget = work->target;
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
|
||||
uint64_t htmax[] = { 0, 0xF, 0xFF, 0xFFF, 0xFFFF, 0x10000000 };
|
||||
|
@@ -40,9 +40,9 @@ void lbry_8way_hash( void* output, const void* input )
|
||||
sha256_8way_close( &ctx_sha256, vhashA );
|
||||
|
||||
// reinterleave to do sha512 4-way 64 bit twice.
|
||||
mm256_dintrlv_8x32( h0, h1, h2, h3, h4, h5, h6, h7, vhashA, 256 );
|
||||
mm256_intrlv_4x64( vhashA, h0, h1, h2, h3, 256 );
|
||||
mm256_intrlv_4x64( vhashB, h4, h5, h6, h7, 256 );
|
||||
dintrlv_8x32( h0, h1, h2, h3, h4, h5, h6, h7, vhashA, 256 );
|
||||
intrlv_4x64( vhashA, h0, h1, h2, h3, 256 );
|
||||
intrlv_4x64( vhashB, h4, h5, h6, h7, 256 );
|
||||
|
||||
sha512_4way_init( &ctx_sha512 );
|
||||
sha512_4way( &ctx_sha512, vhashA, 32 );
|
||||
@@ -53,9 +53,9 @@ void lbry_8way_hash( void* output, const void* input )
|
||||
sha512_4way_close( &ctx_sha512, vhashB );
|
||||
|
||||
// back to 8-way 32 bit
|
||||
mm256_dintrlv_4x64( h0, h1, h2, h3, vhashA, 512 );
|
||||
mm256_dintrlv_4x64( h4, h5, h6, h7, vhashB, 512 );
|
||||
mm256_intrlv_8x32( vhashA, h0, h1, h2, h3, h4, h5, h6, h7, 512 );
|
||||
dintrlv_4x64( h0, h1, h2, h3, vhashA, 512 );
|
||||
dintrlv_4x64( h4, h5, h6, h7, vhashB, 512 );
|
||||
intrlv_8x32( vhashA, h0, h1, h2, h3, h4, h5, h6, h7, 512 );
|
||||
|
||||
ripemd160_8way_init( &ctx_ripemd );
|
||||
ripemd160_8way( &ctx_ripemd, vhashA, 32 );
|
||||
@@ -75,7 +75,7 @@ void lbry_8way_hash( void* output, const void* input )
|
||||
sha256_8way_close( &ctx_sha256, output );
|
||||
}
|
||||
|
||||
int scanhash_lbry_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_lbry_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (64)));
|
||||
@@ -89,7 +89,7 @@ int scanhash_lbry_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
uint32_t edata[32] __attribute__ ((aligned (64)));
|
||||
__m256i *noncev = (__m256i*)vdata + 27; // aligned
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
uint64_t htmax[] = { 0, 0xF, 0xFF,
|
||||
0xFFF, 0xFFFF, 0x10000000 };
|
||||
@@ -97,11 +97,15 @@ int scanhash_lbry_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
0xFFFFF000, 0xFFFF0000, 0 };
|
||||
|
||||
// we need bigendian data...
|
||||
casti_m256i( edata, 0 ) = mm256_bswap_32( casti_m256i( pdata, 0 ) );
|
||||
casti_m256i( edata, 1 ) = mm256_bswap_32( casti_m256i( pdata, 1 ) );
|
||||
casti_m256i( edata, 2 ) = mm256_bswap_32( casti_m256i( pdata, 2 ) );
|
||||
casti_m256i( edata, 3 ) = mm256_bswap_32( casti_m256i( pdata, 3 ) );
|
||||
mm256_intrlv_8x32( vdata, edata, edata, edata, edata,
|
||||
casti_m128i( edata, 0 ) = mm128_bswap_32( casti_m128i( pdata, 0 ) );
|
||||
casti_m128i( edata, 1 ) = mm128_bswap_32( casti_m128i( pdata, 1 ) );
|
||||
casti_m128i( edata, 2 ) = mm128_bswap_32( casti_m128i( pdata, 2 ) );
|
||||
casti_m128i( edata, 3 ) = mm128_bswap_32( casti_m128i( pdata, 3 ) );
|
||||
casti_m128i( edata, 4 ) = mm128_bswap_32( casti_m128i( pdata, 4 ) );
|
||||
casti_m128i( edata, 5 ) = mm128_bswap_32( casti_m128i( pdata, 5 ) );
|
||||
casti_m128i( edata, 6 ) = mm128_bswap_32( casti_m128i( pdata, 6 ) );
|
||||
casti_m128i( edata, 7 ) = mm128_bswap_32( casti_m128i( pdata, 7 ) );
|
||||
intrlv_8x32( vdata, edata, edata, edata, edata,
|
||||
edata, edata, edata, edata, 1024 );
|
||||
sha256_8way_init( &sha256_8w_mid );
|
||||
sha256_8way( &sha256_8w_mid, vdata, LBRY_MIDSTATE );
|
||||
@@ -118,11 +122,11 @@ int scanhash_lbry_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
for ( int i = 0; i < 8; i++ ) if ( !( hash7[ i ] & mask ) )
|
||||
{
|
||||
// deinterleave hash for lane
|
||||
mm256_extract_lane_8x32( lane_hash, hash, i, 256 );
|
||||
extr_lane_8x32( lane_hash, hash, i, 256 );
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[27] = n + i;
|
||||
submit_solution( work, lane_hash, mythr, i );
|
||||
submit_lane_solution( work, lane_hash, mythr, i );
|
||||
}
|
||||
}
|
||||
n += 8;
|
||||
|
@@ -21,19 +21,19 @@ bool register_lbry_algo( algo_gate_t* gate );
|
||||
#if defined(LBRY_8WAY)
|
||||
|
||||
void lbry_8way_hash( void *state, const void *input );
|
||||
int scanhash_lbry_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_lbry_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
/*
|
||||
#elif defined(LBRY_4WAY)
|
||||
|
||||
void lbry_4way_hash( void *state, const void *input );
|
||||
int scanhash_lbry_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_lbry_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done );
|
||||
*/
|
||||
#else
|
||||
|
||||
void lbry_hash( void *state, const void *input );
|
||||
int scanhash_lbry( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_lbry( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr );
|
||||
#endif
|
||||
#endif
|
||||
|
@@ -47,7 +47,7 @@ void lbry_hash(void* output, const void* input)
|
||||
memcpy( output, hashA, 32 );
|
||||
}
|
||||
|
||||
int scanhash_lbry( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_lbry( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr)
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
@@ -55,7 +55,7 @@ int scanhash_lbry( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t n = pdata[27] - 1;
|
||||
const uint32_t first_nonce = pdata[27];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
uint32_t hash64[8] __attribute__((aligned(64)));
|
||||
uint32_t endiandata[32] __attribute__ ((aligned (64)));
|
||||
|
@@ -1042,8 +1042,8 @@ static bool fulltest_le(const uint *hash, const uint *target)
|
||||
return(rc);
|
||||
}
|
||||
|
||||
int scanhash_neoscrypt( int thr_id, struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done )
|
||||
int scanhash_neoscrypt( struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr )
|
||||
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
@@ -1051,6 +1051,7 @@ int scanhash_neoscrypt( int thr_id, struct work *work,
|
||||
uint32_t _ALIGN(64) hash[8];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
while (pdata[19] < max_nonce && !work_restart[thr_id].restart)
|
||||
{
|
@@ -444,7 +444,7 @@ void pluck_hash(uint32_t *hash, const uint32_t *data, uchar *hashbuffer, const i
|
||||
memcpy(hash, hashbuffer, 32);
|
||||
}
|
||||
|
||||
int scanhash_pluck(int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_pluck( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
@@ -452,9 +452,9 @@ int scanhash_pluck(int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
uint32_t _ALIGN(64) hash[8];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
volatile uint8_t *restart = &(work_restart[thr_id].restart);
|
||||
uint32_t n = first_nonce;
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
|
||||
if (opt_benchmark)
|
@@ -695,7 +695,7 @@ static void scrypt_1024_1_1_256_24way(const uint32_t *input,
|
||||
}
|
||||
#endif /* HAVE_SCRYPT_6WAY */
|
||||
|
||||
extern int scanhash_scrypt( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
extern int scanhash_scrypt( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
@@ -704,7 +704,7 @@ extern int scanhash_scrypt( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t midstate[8];
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
int throughput = scrypt_best_throughput();
|
||||
int i;
|
||||
|
@@ -135,8 +135,8 @@ unsigned char GetNfactor(unsigned int nTimestamp, unsigned int ntime) {
|
||||
}
|
||||
|
||||
|
||||
int scanhash_scryptjane( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done)
|
||||
int scanhash_scryptjane( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
scrypt_aligned_alloc YX, V;
|
||||
uint8_t *X, *Y;
|
||||
@@ -150,6 +150,7 @@ int scanhash_scryptjane( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint32_t _ALIGN(64) endiandata[20];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t nonce = first_nonce;
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
if (opt_benchmark)
|
||||
ptarget[7] = 0x00ff;
|
||||
|
@@ -196,9 +196,9 @@ SPH_XCAT( HASH, _addbits_and_close )(void *cc, unsigned ub, unsigned n,
|
||||
ptr = (unsigned)sc->count & (SPH_BLEN - 1U);
|
||||
|
||||
#ifdef PW01
|
||||
sc->buf[ptr>>3] = _mm256_set1_epi64x( 0x100 >> 8 );
|
||||
sc->buf[ptr>>3] = m256_const1_64( 0x100 >> 8 );
|
||||
#else
|
||||
sc->buf[ptr>>3] = _mm256_set1_epi64x( 0x80 );
|
||||
sc->buf[ptr>>3] = m256_const1_64( 0x80 );
|
||||
#endif
|
||||
ptr += 8;
|
||||
|
||||
|
@@ -86,8 +86,7 @@ static const sph_u32 K256[64] = {
|
||||
// SHA-256 4 way
|
||||
|
||||
#define SHA2s_MEXP( a, b, c, d ) \
|
||||
_mm_add_epi32( _mm_add_epi32( _mm_add_epi32( \
|
||||
SSG2_1( W[a] ), W[b] ), SSG2_0( W[c] ) ), W[d] );
|
||||
mm128_add4_32( SSG2_1( W[a] ), W[b], SSG2_0( W[c] ), W[d] );
|
||||
|
||||
#define CHs(X, Y, Z) \
|
||||
_mm_xor_si128( _mm_and_si128( _mm_xor_si128( Y, Z ), X ), Z )
|
||||
@@ -115,9 +114,8 @@ static const sph_u32 K256[64] = {
|
||||
#define SHA2s_4WAY_STEP(A, B, C, D, E, F, G, H, i, j) \
|
||||
do { \
|
||||
register __m128i T1, T2; \
|
||||
T1 = _mm_add_epi32( _mm_add_epi32( _mm_add_epi32( \
|
||||
_mm_add_epi32( H, BSG2_1(E) ), CHs(E, F, G) ), \
|
||||
_mm_set1_epi32( K256[( (j)+(i) )] ) ), W[i] ); \
|
||||
T1 = _mm_add_epi32( H, mm128_add4_32( BSG2_1(E), CHs(E, F, G), \
|
||||
_mm_set1_epi32( K256[( (j)+(i) )] ), W[i] ) ); \
|
||||
T2 = _mm_add_epi32( BSG2_0(A), MAJs(A, B, C) ); \
|
||||
D = _mm_add_epi32( D, T1 ); \
|
||||
H = _mm_add_epi32( T1, T2 ); \
|
||||
@@ -129,22 +127,8 @@ sha256_4way_round( __m128i *in, __m128i r[8] )
|
||||
register __m128i A, B, C, D, E, F, G, H;
|
||||
__m128i W[16];
|
||||
|
||||
W[ 0] = mm128_bswap_32( in[ 0] );
|
||||
W[ 1] = mm128_bswap_32( in[ 1] );
|
||||
W[ 2] = mm128_bswap_32( in[ 2] );
|
||||
W[ 3] = mm128_bswap_32( in[ 3] );
|
||||
W[ 4] = mm128_bswap_32( in[ 4] );
|
||||
W[ 5] = mm128_bswap_32( in[ 5] );
|
||||
W[ 6] = mm128_bswap_32( in[ 6] );
|
||||
W[ 7] = mm128_bswap_32( in[ 7] );
|
||||
W[ 8] = mm128_bswap_32( in[ 8] );
|
||||
W[ 9] = mm128_bswap_32( in[ 9] );
|
||||
W[10] = mm128_bswap_32( in[10] );
|
||||
W[11] = mm128_bswap_32( in[11] );
|
||||
W[12] = mm128_bswap_32( in[12] );
|
||||
W[13] = mm128_bswap_32( in[13] );
|
||||
W[14] = mm128_bswap_32( in[14] );
|
||||
W[15] = mm128_bswap_32( in[15] );
|
||||
mm128_block_bswap_32( W, in );
|
||||
mm128_block_bswap_32( W+8, in+8 );
|
||||
|
||||
A = r[0];
|
||||
B = r[1];
|
||||
@@ -266,7 +250,7 @@ void sha256_4way( sha256_4way_context *sc, const void *data, size_t len )
|
||||
|
||||
void sha256_4way_close( sha256_4way_context *sc, void *dst )
|
||||
{
|
||||
unsigned ptr, u;
|
||||
unsigned ptr;
|
||||
uint32_t low, high;
|
||||
const int buf_size = 64;
|
||||
const int pad = buf_size - 8;
|
||||
@@ -294,8 +278,7 @@ void sha256_4way_close( sha256_4way_context *sc, void *dst )
|
||||
mm128_bswap_32( _mm_set1_epi32( low ) );
|
||||
sha256_4way_round( sc->buf, sc->val );
|
||||
|
||||
for ( u = 0; u < 8; u ++ )
|
||||
((__m128i*)dst)[u] = mm128_bswap_32( sc->val[u] );
|
||||
mm128_block_bswap_32( dst, sc->val );
|
||||
}
|
||||
|
||||
#if defined(__AVX2__)
|
||||
@@ -326,15 +309,13 @@ void sha256_4way_close( sha256_4way_context *sc, void *dst )
|
||||
mm256_ror_32(x, 17), mm256_ror_32(x, 19) ), _mm256_srli_epi32(x, 10) )
|
||||
|
||||
#define SHA2x_MEXP( a, b, c, d ) \
|
||||
_mm256_add_epi32( _mm256_add_epi32( _mm256_add_epi32( \
|
||||
SSG2_1x( W[a] ), W[b] ), SSG2_0x( W[c] ) ), W[d] );
|
||||
mm256_add4_32( SSG2_1x( W[a] ), W[b], SSG2_0x( W[c] ), W[d] );
|
||||
|
||||
#define SHA2s_8WAY_STEP(A, B, C, D, E, F, G, H, i, j) \
|
||||
do { \
|
||||
register __m256i T1, T2; \
|
||||
T1 = _mm256_add_epi32( _mm256_add_epi32( _mm256_add_epi32( \
|
||||
_mm256_add_epi32( H, BSG2_1x(E) ), CHx(E, F, G) ), \
|
||||
_mm256_set1_epi32( K256[( (j)+(i) )] ) ), W[i] ); \
|
||||
T1 = _mm256_add_epi32( H, mm256_add4_32( BSG2_1x(E), CHx(E, F, G), \
|
||||
_mm256_set1_epi32( K256[( (j)+(i) )] ), W[i] ) ); \
|
||||
T2 = _mm256_add_epi32( BSG2_0x(A), MAJx(A, B, C) ); \
|
||||
D = _mm256_add_epi32( D, T1 ); \
|
||||
H = _mm256_add_epi32( T1, T2 ); \
|
||||
@@ -346,22 +327,8 @@ sha256_8way_round( __m256i *in, __m256i r[8] )
|
||||
register __m256i A, B, C, D, E, F, G, H;
|
||||
__m256i W[16];
|
||||
|
||||
W[ 0] = mm256_bswap_32( in[ 0] );
|
||||
W[ 1] = mm256_bswap_32( in[ 1] );
|
||||
W[ 2] = mm256_bswap_32( in[ 2] );
|
||||
W[ 3] = mm256_bswap_32( in[ 3] );
|
||||
W[ 4] = mm256_bswap_32( in[ 4] );
|
||||
W[ 5] = mm256_bswap_32( in[ 5] );
|
||||
W[ 6] = mm256_bswap_32( in[ 6] );
|
||||
W[ 7] = mm256_bswap_32( in[ 7] );
|
||||
W[ 8] = mm256_bswap_32( in[ 8] );
|
||||
W[ 9] = mm256_bswap_32( in[ 9] );
|
||||
W[10] = mm256_bswap_32( in[10] );
|
||||
W[11] = mm256_bswap_32( in[11] );
|
||||
W[12] = mm256_bswap_32( in[12] );
|
||||
W[13] = mm256_bswap_32( in[13] );
|
||||
W[14] = mm256_bswap_32( in[14] );
|
||||
W[15] = mm256_bswap_32( in[15] );
|
||||
mm256_block_bswap_32( W , in );
|
||||
mm256_block_bswap_32( W+8, in+8 );
|
||||
|
||||
A = r[0];
|
||||
B = r[1];
|
||||
@@ -484,7 +451,7 @@ void sha256_8way( sha256_8way_context *sc, const void *data, size_t len )
|
||||
|
||||
void sha256_8way_close( sha256_8way_context *sc, void *dst )
|
||||
{
|
||||
unsigned ptr, u;
|
||||
unsigned ptr;
|
||||
uint32_t low, high;
|
||||
const int buf_size = 64;
|
||||
const int pad = buf_size - 8;
|
||||
@@ -513,8 +480,7 @@ void sha256_8way_close( sha256_8way_context *sc, void *dst )
|
||||
|
||||
sha256_8way_round( sc->buf, sc->val );
|
||||
|
||||
for ( u = 0; u < 8; u ++ )
|
||||
((__m256i*)dst)[u] = mm256_bswap_32( sc->val[u] );
|
||||
mm256_block_bswap_32( dst, sc->val );
|
||||
}
|
||||
|
||||
|
||||
@@ -596,9 +562,8 @@ static const sph_u64 K512[80] = {
|
||||
#define SHA3_4WAY_STEP(A, B, C, D, E, F, G, H, i) \
|
||||
do { \
|
||||
register __m256i T1, T2; \
|
||||
T1 = _mm256_add_epi64( _mm256_add_epi64( _mm256_add_epi64( \
|
||||
_mm256_add_epi64( H, BSG5_1(E) ), CH(E, F, G) ), \
|
||||
_mm256_set1_epi64x( K512[i] ) ), W[i] ); \
|
||||
T1 = _mm256_add_epi64( H, mm256_add4_64( BSG5_1(E), CH(E, F, G), \
|
||||
_mm256_set1_epi64x( K512[i] ), W[i] ) ); \
|
||||
T2 = _mm256_add_epi64( BSG5_0(A), MAJ(A, B, C) ); \
|
||||
D = _mm256_add_epi64( D, T1 ); \
|
||||
H = _mm256_add_epi64( T1, T2 ); \
|
||||
@@ -611,11 +576,12 @@ sha512_4way_round( __m256i *in, __m256i r[8] )
|
||||
register __m256i A, B, C, D, E, F, G, H;
|
||||
__m256i W[80];
|
||||
|
||||
for ( i = 0; i < 16; i++ )
|
||||
W[i] = mm256_bswap_64( in[i] );
|
||||
mm256_block_bswap_64( W , in );
|
||||
mm256_block_bswap_64( W+8, in+8 );
|
||||
|
||||
for ( i = 16; i < 80; i++ )
|
||||
W[i] = _mm256_add_epi64( _mm256_add_epi64( _mm256_add_epi64(
|
||||
SSG5_1( W[ i-2 ] ), W[ i-7 ] ), SSG5_0( W[ i-15 ] ) ), W[ i-16 ] );
|
||||
W[i] = mm256_add4_64( SSG5_1( W[ i- 2 ] ), W[ i- 7 ],
|
||||
SSG5_0( W[ i-15 ] ), W[ i-16 ] );
|
||||
|
||||
A = r[0];
|
||||
B = r[1];
|
||||
@@ -689,12 +655,12 @@ void sha512_4way( sha512_4way_context *sc, const void *data, size_t len )
|
||||
|
||||
void sha512_4way_close( sha512_4way_context *sc, void *dst )
|
||||
{
|
||||
unsigned ptr, u;
|
||||
unsigned ptr;
|
||||
const int buf_size = 128;
|
||||
const int pad = buf_size - 16;
|
||||
|
||||
ptr = (unsigned)sc->count & (buf_size - 1U);
|
||||
sc->buf[ ptr>>3 ] = _mm256_set1_epi64x( 0x80 );
|
||||
sc->buf[ ptr>>3 ] = m256_const1_64( 0x80 );
|
||||
ptr += 8;
|
||||
if ( ptr > pad )
|
||||
{
|
||||
@@ -711,8 +677,7 @@ void sha512_4way_close( sha512_4way_context *sc, void *dst )
|
||||
mm256_bswap_64( _mm256_set1_epi64x( sc->count << 3 ) );
|
||||
sha512_4way_round( sc->buf, sc->val );
|
||||
|
||||
for ( u = 0; u < 8; u ++ )
|
||||
((__m256i*)dst)[u] = mm256_bswap_64( sc->val[u] );
|
||||
mm256_block_bswap_64( dst, sc->val );
|
||||
}
|
||||
|
||||
#endif // __AVX2__
|
||||
|
@@ -586,8 +586,8 @@ static inline int scanhash_sha256d_8way(int thr_id, struct work *work,
|
||||
|
||||
#endif /* HAVE_SHA256_8WAY */
|
||||
|
||||
int scanhash_sha256d(int thr_id, struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done)
|
||||
int scanhash_sha256d( struct work *work,
|
||||
uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
@@ -598,7 +598,8 @@ int scanhash_sha256d(int thr_id, struct work *work,
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
#ifdef HAVE_SHA256_8WAY
|
||||
if (sha256_use_8way())
|
||||
return scanhash_sha256d_8way(thr_id, work,
|
||||
@@ -621,16 +622,14 @@ int scanhash_sha256d(int thr_id, struct work *work,
|
||||
do {
|
||||
data[3] = ++n;
|
||||
sha256d_ms(hash, data, midstate, prehash);
|
||||
if (unlikely(swab32(hash[7]) <= Htarg)) {
|
||||
if (unlikely(swab32(hash[7]) <= Htarg))
|
||||
{
|
||||
pdata[19] = data[3];
|
||||
sha256d_80_swap(hash, pdata);
|
||||
if (fulltest(hash, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 1;
|
||||
}
|
||||
if ( fulltest(hash, ptarget) && !opt_benchmark )
|
||||
submit_solution( work, hash, mythr );
|
||||
}
|
||||
} while (likely(n < max_nonce && !work_restart[thr_id].restart));
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
|
@@ -31,18 +31,19 @@ void sha256q_8way_hash( void* output, const void* input )
|
||||
sha256_8way_close( &ctx, output );
|
||||
}
|
||||
|
||||
int scanhash_sha256q_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_sha256q_8way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[20*8] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*8] __attribute__ ((aligned (32)));
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
__m256i *noncev = (__m256i*)vdata + 19; // aligned
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
const uint64_t htmax[] = { 0,
|
||||
0xF,
|
||||
@@ -58,7 +59,7 @@ int scanhash_sha256q_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
0 };
|
||||
|
||||
// Need big endian data
|
||||
mm256_bswap_intrlv80_8x32( vdata, pdata );
|
||||
mm256_bswap32_intrlv80_8x32( vdata, pdata );
|
||||
sha256_8way_init( &sha256_ctx8 );
|
||||
sha256_8way( &sha256_ctx8, vdata, 64 );
|
||||
|
||||
@@ -79,13 +80,12 @@ int scanhash_sha256q_8way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
if ( !( hash7[ lane ] & mask ) )
|
||||
{
|
||||
// deinterleave hash for lane
|
||||
uint32_t lane_hash[8];
|
||||
mm256_extract_lane_8x32( lane_hash, hash, lane, 256 );
|
||||
extr_lane_8x32( lane_hash, hash, lane, 256 );
|
||||
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_solution( work, lane_hash, mythr, lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
n += 8;
|
||||
@@ -124,20 +124,20 @@ void sha256q_4way_hash( void* output, const void* input )
|
||||
sha256_4way_close( &ctx, output );
|
||||
}
|
||||
|
||||
int scanhash_sha256q_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
int scanhash_sha256q_4way( struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done, struct thr_info *mythr )
|
||||
{
|
||||
uint32_t vdata[20*4] __attribute__ ((aligned (64)));
|
||||
uint32_t hash[8*4] __attribute__ ((aligned (32)));
|
||||
uint32_t *hash7 = &(hash[7<<2]);
|
||||
uint32_t lane_hash[8];
|
||||
uint32_t lane_hash[8] __attribute__ ((aligned (32)));
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
uint32_t n = first_nonce;
|
||||
__m128i *noncev = (__m128i*)vdata + 19; // aligned
|
||||
/* int */ thr_id = mythr->id; // thr_id arg is deprecated
|
||||
int thr_id = mythr->id; // thr_id arg is deprecated
|
||||
|
||||
const uint64_t htmax[] = { 0,
|
||||
0xF,
|
||||
@@ -152,7 +152,7 @@ int scanhash_sha256q_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
0xFFFF0000,
|
||||
0 };
|
||||
|
||||
mm128_bswap_intrlv80_4x32( vdata, pdata );
|
||||
mm128_bswap32_intrlv80_4x32( vdata, pdata );
|
||||
sha256_4way_init( &sha256_ctx4 );
|
||||
sha256_4way( &sha256_ctx4, vdata, 64 );
|
||||
|
||||
@@ -168,12 +168,12 @@ int scanhash_sha256q_4way( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
for ( int lane = 0; lane < 4; lane++ )
|
||||
if ( !( hash7[ lane ] & mask ) )
|
||||
{
|
||||
mm128_extract_lane_4x32( lane_hash, hash, lane, 256 );
|
||||
extr_lane_4x32( lane_hash, hash, lane, 256 );
|
||||
|
||||
if ( fulltest( lane_hash, ptarget ) && !opt_benchmark )
|
||||
{
|
||||
pdata[19] = n + lane;
|
||||
submit_solution( work, lane_hash, mythr, lane );
|
||||
submit_lane_solution( work, lane_hash, mythr, lane );
|
||||
}
|
||||
}
|
||||
n += 4;
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user