mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v24.2
This commit is contained in:
@@ -745,7 +745,7 @@ do{ \
|
||||
SALSA_2ROUNDS; SALSA_2ROUNDS; SALSA_2ROUNDS; SALSA_2ROUNDS;
|
||||
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#if defined(SIMD512)
|
||||
|
||||
// Tested OK but very slow
|
||||
// 16 way parallel, requires 16x32 interleaving
|
||||
@@ -2487,7 +2487,7 @@ static void salsa8_simd128_2buf( uint32_t * const ba, uint32_t * const bb,
|
||||
XA3 = BA[3] = v128_xor( BA[3], CA[3] );
|
||||
XB3 = BB[3] = v128_xor( BB[3], CB[3] );
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#if defined(SIMD512)
|
||||
|
||||
SALSA_8ROUNDS_SIMD128_2BUF;
|
||||
|
||||
@@ -2886,7 +2886,7 @@ static void salsa8_simd128_3buf( uint32_t *ba, uint32_t *bb, uint32_t *bc,
|
||||
XB3 = BB[3] = v128_xor( BB[3], CB[3] );
|
||||
XC3 = BC[3] = v128_xor( BC[3], CC[3] );
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#if defined(SIMD512)
|
||||
|
||||
SALSA_8ROUNDS_SIMD128_3BUF;
|
||||
|
||||
|
@@ -5,7 +5,7 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#if defined(SIMD512)
|
||||
|
||||
void scrypt_core_16way( __m512i *X, __m512i *V, const uint32_t N );
|
||||
|
||||
|
@@ -35,7 +35,7 @@
|
||||
//#include <mm_malloc.h>
|
||||
#include "malloc-huge.h"
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#if defined(SIMD512)
|
||||
#define SCRYPT_THROUGHPUT 16
|
||||
#elif defined(__SHA__) || defined(__ARM_FEATURE_SHA2)
|
||||
#define SCRYPT_THROUGHPUT 2
|
||||
@@ -592,7 +592,7 @@ static inline void PBKDF2_SHA256_128_32_8way( uint32_t *tstate,
|
||||
|
||||
#endif /* HAVE_SHA256_8WAY */
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#if defined(SIMD512)
|
||||
|
||||
static inline void sha256_16way_init_state( void *state )
|
||||
{
|
||||
@@ -1494,7 +1494,7 @@ bool register_scrypt_algo( algo_gate_t* gate )
|
||||
// scrypt_throughput defined at compile time and used to replace
|
||||
// MAX_WAYS to reduce memory usage.
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
#if defined(SIMD512)
|
||||
// scrypt_throughput = 16;
|
||||
if ( opt_param_n > 0x4000 )
|
||||
scratchbuf_size = opt_param_n * 3 * 128; // 3 buf
|
||||
|
Reference in New Issue
Block a user