mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v24.1
This commit is contained in:
@@ -21,7 +21,7 @@
|
||||
#define EPS1 DBL_EPSILON
|
||||
#define EPS2 3.0e-11
|
||||
|
||||
inline double exp_n( double xt )
|
||||
static inline double exp_n( double xt )
|
||||
{
|
||||
if ( xt < -700.0 )
|
||||
return 0;
|
||||
@@ -33,7 +33,7 @@ inline double exp_n( double xt )
|
||||
return exp( xt );
|
||||
}
|
||||
|
||||
inline double exp_n2( double x1, double x2 )
|
||||
static inline double exp_n2( double x1, double x2 )
|
||||
{
|
||||
double p1 = -700., p2 = -37., p3 = -0.8e-8, p4 = 0.8e-8,
|
||||
p5 = 37., p6 = 700.;
|
||||
|
@@ -1,75 +0,0 @@
|
||||
// Copyright (c) 2014 The Magi developers
|
||||
// Distributed under the MIT/X11 software license, see the accompanying
|
||||
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
||||
|
||||
#include <iostream>
|
||||
#include <cfloat>
|
||||
#include <limits>
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "magimath.h"
|
||||
|
||||
#define EPS1 (std::numeric_limits<double>::epsilon())
|
||||
#define EPS2 3.0e-11
|
||||
|
||||
static void gauleg(double x1, double x2, double x[], double w[], const int n)
|
||||
{
|
||||
int m,j,i;
|
||||
double z1, z, xm, xl, pp, p3, p2, p1;
|
||||
m=(n+1)/2;
|
||||
xm=0.5*(x2+x1);
|
||||
xl=0.5*(x2-x1);
|
||||
for (i=1;i<=m;i++) {
|
||||
z=cos(3.141592654*(i-0.25)/(n+0.5));
|
||||
do {
|
||||
p1=1.0;
|
||||
p2=0.0;
|
||||
for (j=1;j<=n;j++) {
|
||||
p3=p2;
|
||||
p2=p1;
|
||||
p1=((2.0*j-1.0)*z*p2-(j-1.0)*p3)/j;
|
||||
}
|
||||
pp=n*(z*p1-p2)/(z*z-1.0);
|
||||
z1=z;
|
||||
z=z1-p1/pp;
|
||||
} while (fabs(z-z1) > EPS2);
|
||||
x[i]=xm-xl*z;
|
||||
x[n+1-i]=xm+xl*z;
|
||||
w[i]=2.0*xl/((1.0-z*z)*pp*pp);
|
||||
w[n+1-i]=w[i];
|
||||
}
|
||||
}
|
||||
|
||||
static double GaussianQuad_N(double func(const double), const double a2, const double b2, const int NptGQ)
|
||||
{
|
||||
double s=0.0;
|
||||
#ifdef _MSC_VER
|
||||
#define SW_DIVS 23
|
||||
double x[SW_DIVS+1], w[SW_DIVS+1];
|
||||
#else
|
||||
double x[NptGQ+1], w[NptGQ+1];
|
||||
#endif
|
||||
|
||||
gauleg(a2, b2, x, w, NptGQ);
|
||||
|
||||
for (int j=1; j<=NptGQ; j++) {
|
||||
s += w[j]*func(x[j]);
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
static double swit_(double wvnmb)
|
||||
{
|
||||
return pow( (5.55243*(exp_n(-0.3*wvnmb/15.762) - exp_n(-0.6*wvnmb/15.762)))*wvnmb, 0.5)
|
||||
/ 1034.66 * pow(sin(wvnmb/65.), 2.);
|
||||
}
|
||||
|
||||
uint32_t sw_(int nnounce, int divs)
|
||||
{
|
||||
double wmax = ((sqrt((double)(nnounce))*(1.+EPS1))/450+100);
|
||||
return ((uint32_t)(GaussianQuad_N(swit_, 0., wmax, divs)*(1.+EPS1)*1.e6));
|
||||
}
|
@@ -1,54 +0,0 @@
|
||||
// Copyright (c) 2014 The Magi developers
|
||||
// Distributed under the MIT/X11 software license, see the accompanying
|
||||
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
||||
#ifndef MAGI_MATH_H
|
||||
#define MAGI_MATH_H
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
uint32_t sw_(int nnounce, int divs);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
inline double exp_n(double xt)
|
||||
{
|
||||
double p1 = -700.0, p3 = -0.8e-8, p4 = 0.8e-8, p6 = 700.0;
|
||||
if(xt < p1)
|
||||
return 0;
|
||||
else if(xt > p6)
|
||||
return 1e200;
|
||||
else if(xt > p3 && xt < p4)
|
||||
return (1.0 + xt);
|
||||
else
|
||||
return exp(xt);
|
||||
}
|
||||
|
||||
// 1 / (1 + exp(x1-x2))
|
||||
inline double exp_n2(double x1, double x2)
|
||||
{
|
||||
double p1 = -700., p2 = -37., p3 = -0.8e-8, p4 = 0.8e-8, p5 = 37., p6 = 700.;
|
||||
double xt = x1 - x2;
|
||||
if (xt < p1+1.e-200)
|
||||
return 1.;
|
||||
else if (xt > p1 && xt < p2 + 1.e-200)
|
||||
return ( 1. - exp(xt) );
|
||||
else if (xt > p2 && xt < p3 + 1.e-200)
|
||||
return ( 1. / (1. + exp(xt)) );
|
||||
else if (xt > p3 && xt < p4)
|
||||
return ( 1. / (2. + xt) );
|
||||
else if (xt > p4 - 1.e-200 && xt < p5)
|
||||
return ( exp(-xt) / (1. + exp(-xt)) );
|
||||
else if (xt > p5 - 1.e-200 && xt < p6)
|
||||
return ( exp(-xt) );
|
||||
else //if (xt > p6 - 1.e-200)
|
||||
return 0.;
|
||||
}
|
||||
|
||||
#endif
|
@@ -205,7 +205,7 @@ void sha1_x86_sha_transform_be( uint32_t *state_out, const void *input,
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__aarch64__) && defined(__ARM_FEATURE_SHA2)
|
||||
#if defined(__ARM_NEON) && defined(__ARM_FEATURE_SHA2)
|
||||
|
||||
#define sha1_neon_rounds( state_out, data, state_in ) \
|
||||
{ \
|
||||
|
@@ -1,6 +1,6 @@
|
||||
#include "sha256-hash.h"
|
||||
|
||||
#if ( defined(__x86_64__) && defined(__SHA__) ) || defined(__ARM_NEON) && defined(__ARM_FEATURE_SHA2)
|
||||
#if ( defined(__x86_64__) && defined(__SHA__) ) || ( defined(__ARM_NEON) && defined(__ARM_FEATURE_SHA2) )
|
||||
|
||||
static const uint32_t SHA256_IV[8] =
|
||||
{
|
||||
@@ -189,7 +189,7 @@ static const uint32_t SHA256_IV[8] =
|
||||
_mm_store_si128( (__m128i*) &state_out[4], STATE1 ); \
|
||||
}
|
||||
|
||||
void sha256_opt_transform_le( uint32_t *state_out, const void *input,
|
||||
void sha256_x86_sha_transform_le( uint32_t *state_out, const void *input,
|
||||
const uint32_t *state_in )
|
||||
{
|
||||
#define load_msg( m, i ) casti_v128( m, i )
|
||||
@@ -197,7 +197,7 @@ void sha256_opt_transform_le( uint32_t *state_out, const void *input,
|
||||
#undef load_msg
|
||||
}
|
||||
|
||||
void sha256_opt_transform_be( uint32_t *state_out, const void *input,
|
||||
void sha256_x86_sha_transform_be( uint32_t *state_out, const void *input,
|
||||
const uint32_t *state_in )
|
||||
{
|
||||
#define load_msg( m, i ) v128_bswap32( casti_v128( m, i ) )
|
||||
@@ -517,7 +517,7 @@ void sha256_opt_transform_be( uint32_t *state_out, const void *input,
|
||||
_mm_store_si128( (__m128i*) &out_Y[4], STATE1_Y ); \
|
||||
}
|
||||
|
||||
void sha256_ni2x_transform_le( uint32_t *out_X, uint32_t*out_Y,
|
||||
void sha256_x86_x2sha_transform_le( uint32_t *out_X, uint32_t*out_Y,
|
||||
const void *msg_X, const void *msg_Y,
|
||||
const uint32_t *in_X, const uint32_t *in_Y )
|
||||
{
|
||||
@@ -526,7 +526,7 @@ void sha256_ni2x_transform_le( uint32_t *out_X, uint32_t*out_Y,
|
||||
#undef load_msg
|
||||
}
|
||||
|
||||
void sha256_ni2x_transform_be( uint32_t *out_X, uint32_t*out_Y,
|
||||
void sha256_x86_x2sha_transform_be( uint32_t *out_X, uint32_t*out_Y,
|
||||
const void *msg_X, const void *msg_Y,
|
||||
const uint32_t *in_X, const uint32_t *in_Y )
|
||||
{
|
||||
@@ -541,7 +541,7 @@ void sha256_ni2x_transform_be( uint32_t *out_X, uint32_t*out_Y,
|
||||
// The goal is to avoid any redundant processing in final. Prehash is almost
|
||||
// 4 rounds total, only missing the final addition of the nonce.
|
||||
// Nonce must be set to zero for prehash.
|
||||
void sha256_ni_prehash_3rounds( uint32_t *ostate, const void *msg,
|
||||
void sha256_x86_sha_prehash_3rounds( uint32_t *ostate, const void *msg,
|
||||
uint32_t *sstate, const uint32_t *istate )
|
||||
{
|
||||
__m128i STATE0, STATE1, MSG, TMP;
|
||||
@@ -569,7 +569,7 @@ void sha256_ni_prehash_3rounds( uint32_t *ostate, const void *msg,
|
||||
casti_m128i( ostate, 1 ) = STATE1;
|
||||
}
|
||||
|
||||
void sha256_ni2x_final_rounds( uint32_t *out_X, uint32_t *out_Y,
|
||||
void sha256_x86_x2sha_final_rounds( uint32_t *out_X, uint32_t *out_Y,
|
||||
const void *msg_X, const void *msg_Y,
|
||||
const uint32_t *state_mid_X, const uint32_t *state_mid_Y,
|
||||
const uint32_t *state_save_X, const uint32_t *state_save_Y )
|
||||
|
@@ -5,27 +5,21 @@
|
||||
#include "simd-utils.h"
|
||||
#include "cpuminer-config.h"
|
||||
|
||||
// generic interface
|
||||
static const uint32_t SHA256_IV[8];
|
||||
|
||||
#if defined(__x86_64__) && defined(__SHA__)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
unsigned char buf[64]; /* first field, for alignment */
|
||||
unsigned char buf[64];
|
||||
uint32_t state[8];
|
||||
uint64_t count;
|
||||
} sha256_context __attribute__((aligned(64)));
|
||||
|
||||
static const uint32_t SHA256_IV[8];
|
||||
|
||||
void sha256_full( void *hash, const void *data, size_t len );
|
||||
void sha256_update( sha256_context *ctx, const void *data, size_t len );
|
||||
void sha256_final( sha256_context *ctx, void *hash );
|
||||
void sha256_ctx_init( sha256_context *ctx );
|
||||
void sha256_transform_le( uint32_t *state_out, const uint32_t *data,
|
||||
const uint32_t *state_in );
|
||||
void sha256_transform_be( uint32_t *state_out, const uint32_t *data,
|
||||
const uint32_t *state_in );
|
||||
|
||||
#if defined(__x86_64__) && defined(__SHA__)
|
||||
|
||||
void sha256_x86_sha_transform_le( uint32_t *state_out, const void *input,
|
||||
const uint32_t *state_in );
|
||||
@@ -50,14 +44,6 @@ void sha256_x86_x2sha_final_rounds( uint32_t *state_out_X, uint32_t *state_out_Y
|
||||
const uint32_t *state_mid_X, const uint32_t *state_mid_Y,
|
||||
const uint32_t *state_save_X, const uint32_t *state_save_Y );
|
||||
|
||||
// Temporary during name transition
|
||||
#define sha256_opt_transform_le sha256_x86_sha_transform_le
|
||||
#define sha256_opt_transform_be sha256_x86_sha_transform_be
|
||||
#define sha256_ni2x_transform_le sha256_x86_x2sha_transform_le
|
||||
#define sha256_ni2x_transform_be sha256_x86_x2sha_transform_be
|
||||
#define sha256_ni_prehash_3rounds sha256_x86_sha_prehash_3rounds
|
||||
#define sha256_ni2x_final_rounds sha256_x86_x2sha_final_rounds
|
||||
|
||||
// generic API
|
||||
#define sha256_transform_le sha256_x86_sha_transform_le
|
||||
#define sha256_transform_be sha256_x86_sha_transform_be
|
||||
@@ -68,6 +54,20 @@ void sha256_x86_x2sha_final_rounds( uint32_t *state_out_X, uint32_t *state_out_Y
|
||||
|
||||
#elif defined(__ARM_NEON) && defined(__ARM_FEATURE_SHA2)
|
||||
|
||||
// SHA-256 AArch64 with NEON & SHA2
|
||||
|
||||
typedef struct
|
||||
{
|
||||
unsigned char buf[64];
|
||||
uint32_t state[8];
|
||||
uint64_t count;
|
||||
} sha256_context __attribute__((aligned(64)));
|
||||
|
||||
void sha256_full( void *hash, const void *data, size_t len );
|
||||
void sha256_update( sha256_context *ctx, const void *data, size_t len );
|
||||
void sha256_final( sha256_context *ctx, void *hash );
|
||||
void sha256_ctx_init( sha256_context *ctx );
|
||||
|
||||
void sha256_neon_sha_transform_be( uint32_t *state_out, const void *input,
|
||||
const uint32_t *state_in );
|
||||
void sha256_neon_sha_transform_le( uint32_t *state_out, const void *input,
|
||||
@@ -89,14 +89,6 @@ void sha256_neon_x2sha_final_rounds( uint32_t *state_out_X,
|
||||
const uint32_t *state_mid_X, const uint32_t *state_mid_Y,
|
||||
const uint32_t *state_save_X, const uint32_t *state_save_Y );
|
||||
|
||||
// Temporary during name transition
|
||||
#define sha256_transform_le sha256_neon_sha_transform_le
|
||||
#define sha256_transform_be sha256_neon_sha_transform_be
|
||||
#define sha256_2x_transform_le sha256_neon_x2sha_transform_le
|
||||
#define sha256_2x_transform_be sha256_neon_x2sha_transform_be
|
||||
#define sha256_prehash_3rounds sha256_neon_sha_prehash_3rounds
|
||||
#define sha256_2x_final_rounds sha256_neon_x2sha_final_rounds
|
||||
|
||||
// generic API
|
||||
#define sha256_transform_le sha256_neon_sha_transform_le
|
||||
#define sha256_transform_be sha256_neon_sha_transform_be
|
||||
@@ -106,9 +98,11 @@ void sha256_neon_x2sha_final_rounds( uint32_t *state_out_X,
|
||||
#define sha256_2x_final_rounds sha256_neon_x2sha_final_rounds
|
||||
|
||||
#else
|
||||
|
||||
// without HW acceleration...
|
||||
#include "sph_sha2.h"
|
||||
|
||||
#define sha256_context sph_sha256_context
|
||||
#define sha256_full sph_sha256_full
|
||||
#define sha256_ctx_init sph_sha256_init
|
||||
#define sha256_update sph_sha256
|
||||
@@ -117,12 +111,11 @@ void sha256_neon_x2sha_final_rounds( uint32_t *state_out_X,
|
||||
#define sha256_transform_be sph_sha256_transform_be
|
||||
#define sha256_prehash_3rounds sph_sha256_prehash_3rounds
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__)
|
||||
|
||||
// SHA-256 16 way
|
||||
// SHA-256 16 way x86_64
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@@ -147,7 +140,7 @@ void sha256_16x32_final_rounds( __m512i *state_out, const __m512i *data,
|
||||
int sha256_16x32_transform_le_short( __m512i *state_out, const __m512i *data,
|
||||
const __m512i *state_in, const uint32_t *target );
|
||||
|
||||
#define sha256_16way_context sha256_16x32_context
|
||||
#define sha256_16way_context sha256_16x32_context
|
||||
#define sha256_16way_init sha256_16x32_init
|
||||
#define sha256_16way_update sha256_16x32_update
|
||||
#define sha256_16way_close sha256_16x32_close
|
||||
@@ -162,7 +155,7 @@ int sha256_16x32_transform_le_short( __m512i *state_out, const __m512i *data,
|
||||
|
||||
#if defined (__AVX2__)
|
||||
|
||||
// SHA-256 8 way
|
||||
// SHA-256 8 way x86_64
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@@ -201,7 +194,7 @@ int sha256_8x32_transform_le_short( __m256i *state_out, const __m256i *data,
|
||||
|
||||
#endif // AVX2
|
||||
|
||||
// SHA-256 4 way
|
||||
// SHA-256 4 way x86_64 with SSE2 or AArch64 with NEON
|
||||
|
||||
typedef struct
|
||||
{
|
||||
|
@@ -16,14 +16,14 @@ bool register_timetravel_algo( algo_gate_t* gate )
|
||||
return true;
|
||||
};
|
||||
|
||||
inline void tt_swap( int *a, int *b )
|
||||
static inline void tt_swap( int *a, int *b )
|
||||
{
|
||||
int c = *a;
|
||||
*a = *b;
|
||||
*b = c;
|
||||
}
|
||||
|
||||
inline void reverse( int *pbegin, int *pend )
|
||||
static inline void reverse( int *pbegin, int *pend )
|
||||
{
|
||||
while ( (pbegin != pend) && (pbegin != --pend) )
|
||||
{
|
||||
|
@@ -16,14 +16,14 @@ bool register_timetravel10_algo( algo_gate_t* gate )
|
||||
return true;
|
||||
};
|
||||
|
||||
inline void tt10_swap( int *a, int *b )
|
||||
static inline void tt10_swap( int *a, int *b )
|
||||
{
|
||||
int c = *a;
|
||||
*a = *b;
|
||||
*b = c;
|
||||
}
|
||||
|
||||
inline void reverse( int *pbegin, int *pend )
|
||||
static inline void reverse( int *pbegin, int *pend )
|
||||
{
|
||||
while ( (pbegin != pend) && (pbegin != --pend) )
|
||||
{
|
||||
|
@@ -730,7 +730,7 @@ typedef union _x16rv2_4way_context_overlay x16rv2_4way_context_overlay;
|
||||
static __thread x16rv2_4way_context_overlay x16rv2_ctx;
|
||||
|
||||
// Pad the 24 bytes tiger hash to 64 bytes
|
||||
inline void padtiger512( uint32_t* hash )
|
||||
static inline void padtiger512( uint32_t* hash )
|
||||
{
|
||||
for ( int i = 6; i < 16; i++ ) hash[i] = 0;
|
||||
}
|
||||
|
@@ -33,7 +33,7 @@ union _x16rv2_context_overlay
|
||||
typedef union _x16rv2_context_overlay x16rv2_context_overlay;
|
||||
|
||||
// Pad the 24 bytes tiger hash to 64 bytes
|
||||
inline void padtiger512(uint32_t* hash) {
|
||||
static inline void padtiger512(uint32_t* hash) {
|
||||
for (int i = (24/4); i < (64/4); i++) hash[i] = 0;
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user