mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v3.7.5
This commit is contained in:
@@ -140,7 +140,7 @@ HASH ( void *cc, const void *data, size_t len )
|
||||
clen = SPH_BLEN - ptr;
|
||||
if ( clen > len )
|
||||
clen = len;
|
||||
memcpy_m256i( sc->buf + (ptr>>3), vdata, clen>>3 );
|
||||
memcpy_256( sc->buf + (ptr>>3), vdata, clen>>3 );
|
||||
vdata = vdata + (clen>>3);
|
||||
ptr += clen;
|
||||
len -= clen;
|
||||
@@ -195,19 +195,19 @@ SPH_XCAT( HASH, _addbits_and_close )(void *cc, unsigned ub, unsigned n,
|
||||
sc = cc;
|
||||
ptr = (unsigned)sc->count & (SPH_BLEN - 1U);
|
||||
|
||||
uint64_t *b= (uint64_t*)sc->buf;
|
||||
uint64_t *s= (uint64_t*)sc->state;
|
||||
//uint64_t *b= (uint64_t*)sc->buf;
|
||||
//uint64_t *s= (uint64_t*)sc->state;
|
||||
//printf("Vptr 1= %u\n", ptr);
|
||||
//printf("VBuf %016llx %016llx %016llx %016llx\n", b[0], b[4], b[8], b[12] );
|
||||
//printf("VBuf %016llx %016llx %016llx %016llx\n", b[16], b[20], b[24], b[28] );
|
||||
|
||||
#ifdef PW01
|
||||
sc->buf[ptr>>3] = mm256_vec_epi64( 0x100 >> 8 );
|
||||
sc->buf[ptr>>3] = _mm256_set1_epi64x( 0x100 >> 8 );
|
||||
// sc->buf[ptr++] = 0x100 >> 8;
|
||||
#else
|
||||
// need to overwrite exactly one byte
|
||||
// sc->buf[ptr>>3] = _mm256_set_epi64x( 0, 0, 0, 0x80 );
|
||||
sc->buf[ptr>>3] = mm256_vec_epi64( 0x80 );
|
||||
sc->buf[ptr>>3] = _mm256_set1_epi64x( 0x80 );
|
||||
// ptr++;
|
||||
#endif
|
||||
ptr += 8;
|
||||
@@ -218,43 +218,43 @@ uint64_t *s= (uint64_t*)sc->state;
|
||||
|
||||
if ( ptr > SPH_MAXPAD )
|
||||
{
|
||||
memset_zero_m256i( sc->buf + (ptr>>3), (SPH_BLEN - ptr) >> 3 );
|
||||
memset_zero_256( sc->buf + (ptr>>3), (SPH_BLEN - ptr) >> 3 );
|
||||
RFUN( sc->buf, SPH_VAL );
|
||||
memset_zero_m256i( sc->buf, SPH_MAXPAD >> 3 );
|
||||
memset_zero_256( sc->buf, SPH_MAXPAD >> 3 );
|
||||
}
|
||||
else
|
||||
{
|
||||
memset_zero_m256i( sc->buf + (ptr>>3), (SPH_MAXPAD - ptr) >> 3 );
|
||||
memset_zero_256( sc->buf + (ptr>>3), (SPH_MAXPAD - ptr) >> 3 );
|
||||
}
|
||||
#if defined BE64
|
||||
#if defined PLW1
|
||||
sc->buf[ SPH_MAXPAD>>3 ] =
|
||||
mm256_byteswap_epi64( mm256_vec_epi64( sc->count << 3 ) );
|
||||
mm256_byteswap_64( _mm256_set1_epi64x( sc->count << 3 ) );
|
||||
#elif defined PLW4
|
||||
memset_zero_m256i( sc->buf + (SPH_MAXPAD>>3), ( 2 * SPH_WLEN ) >> 3 );
|
||||
memset_zero_256( sc->buf + (SPH_MAXPAD>>3), ( 2 * SPH_WLEN ) >> 3 );
|
||||
sc->buf[ (SPH_MAXPAD + 2 * SPH_WLEN ) >> 3 ] =
|
||||
mm256_byteswap_epi64( mm256_vec_epi64( sc->count >> 61 ) );
|
||||
mm256_byteswap_64( _mm256_set1_epi64x( sc->count >> 61 ) );
|
||||
sc->buf[ (SPH_MAXPAD + 3 * SPH_WLEN ) >> 3 ] =
|
||||
mm256_byteswap_epi64( mm256_vec_epi64( sc->count << 3 ) );
|
||||
mm256_byteswap_64( _mm256_set1_epi64x( sc->count << 3 ) );
|
||||
#else
|
||||
sc->buf[ ( SPH_MAXPAD + 2 * SPH_WLEN ) >> 3 ] =
|
||||
mm256_byteswap_epi64( mm256_vec_epi64( sc->count >> 61 ) );
|
||||
mm256_byteswap_64( _mm256_set1_epi64x( sc->count >> 61 ) );
|
||||
sc->buf[ ( SPH_MAXPAD + 3 * SPH_WLEN ) >> 3 ] =
|
||||
mm256_byteswap_epi64( mm256_vec_epi64( sc->count << 3 ) );
|
||||
mm256_byteswap_64( _mm256_set1_epi64x( sc->count << 3 ) );
|
||||
#endif // PLW
|
||||
#else // LE64
|
||||
#if defined PLW1
|
||||
sc->buf[ SPH_MAXPAD >> 3 ] = mm256_vec_epi64( sc->count << 3 );
|
||||
sc->buf[ SPH_MAXPAD >> 3 ] = _mm256_set1_epi64x( sc->count << 3 );
|
||||
#elif defined PLW4
|
||||
sc->buf[ SPH_MAXPAD >> 3 ] = _mm256_vec_epi64( sc->count << 3 );
|
||||
sc->buf[ SPH_MAXPAD >> 3 ] = _mm256_set1_epi64x( sc->count << 3 );
|
||||
sc->buf[ ( SPH_MAXPAD + SPH_WLEN ) >> 3 ] =
|
||||
mm256_vec_epi64( c->count >> 61 );
|
||||
memset_zero_m256i( sc->buf + ( ( SPH_MAXPAD + 2 * SPH_WLEN ) >> 3 ),
|
||||
_mm256_set1_epi64x( c->count >> 61 );
|
||||
memset_zero_256( sc->buf + ( ( SPH_MAXPAD + 2 * SPH_WLEN ) >> 3 ),
|
||||
2 * SPH_WLEN );
|
||||
#else
|
||||
sc->buf[ SPH_MAXPAD >> 3 ] = mm256_vec_epi64( sc->count << 3 );
|
||||
sc->buf[ SPH_MAXPAD >> 3 ] = _mm256_set1_epi64x( sc->count << 3 );
|
||||
sc->buf[ ( SPH_MAXPAD + SPH_WLEN ) >> 3 ] =
|
||||
mm256_vec_epi64( sc->count >> 61 );
|
||||
_mm256_set1_epi64x( sc->count >> 61 );
|
||||
#endif // PLW
|
||||
|
||||
#endif // LE64
|
||||
@@ -276,7 +276,7 @@ uint64_t *s= (uint64_t*)sc->state;
|
||||
for ( u = 0; u < rnum; u ++ )
|
||||
{
|
||||
#if defined BE64
|
||||
((__m256i*)dst)[u] = mm256_byteswap_epi64( sc->val[u] );
|
||||
((__m256i*)dst)[u] = mm256_byteswap_64( sc->val[u] );
|
||||
#else // LE64
|
||||
((__m256i*)dst)[u] = sc->val[u];
|
||||
#endif
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,209 +0,0 @@
|
||||
/* $Id: sph_whirlpool.h 216 2010-06-08 09:46:57Z tp $ */
|
||||
/**
|
||||
* WHIRLPOOL interface.
|
||||
*
|
||||
* WHIRLPOOL knows three variants, dubbed "WHIRLPOOL-0" (original
|
||||
* version, published in 2000, studied by NESSIE), "WHIRLPOOL-1"
|
||||
* (first revision, 2001, with a new S-box) and "WHIRLPOOL" (current
|
||||
* version, 2003, with a new diffusion matrix, also described as "plain
|
||||
* WHIRLPOOL"). All three variants are implemented here.
|
||||
*
|
||||
* The original WHIRLPOOL (i.e. WHIRLPOOL-0) was published in: P. S. L.
|
||||
* M. Barreto, V. Rijmen, "The Whirlpool Hashing Function", First open
|
||||
* NESSIE Workshop, Leuven, Belgium, November 13--14, 2000.
|
||||
*
|
||||
* The current WHIRLPOOL specification and a reference implementation
|
||||
* can be found on the WHIRLPOOL web page:
|
||||
* http://paginas.terra.com.br/informatica/paulobarreto/WhirlpoolPage.html
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_whirlpool.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef SPH_WHIRLPOOL_H__
|
||||
#define SPH_WHIRLPOOL_H__
|
||||
|
||||
#include <stddef.h>
|
||||
#include "algo/sha/sph_types.h"
|
||||
|
||||
#if SPH_64
|
||||
|
||||
/**
|
||||
* Output size (in bits) for WHIRLPOOL.
|
||||
*/
|
||||
#define SPH_SIZE_whirlpool 512
|
||||
|
||||
/**
|
||||
* Output size (in bits) for WHIRLPOOL-0.
|
||||
*/
|
||||
#define SPH_SIZE_whirlpool0 512
|
||||
|
||||
/**
|
||||
* Output size (in bits) for WHIRLPOOL-1.
|
||||
*/
|
||||
#define SPH_SIZE_whirlpool1 512
|
||||
|
||||
/**
|
||||
* This structure is a context for WHIRLPOOL computations: it contains the
|
||||
* intermediate values and some data from the last entered block. Once
|
||||
* a WHIRLPOOL computation has been performed, the context can be reused for
|
||||
* another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running WHIRLPOOL computation
|
||||
* can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[64]; /* first field, for alignment */
|
||||
sph_u64 state[8];
|
||||
#if SPH_64
|
||||
sph_u64 count;
|
||||
#else
|
||||
sph_u32 count_high, count_low;
|
||||
#endif
|
||||
#endif
|
||||
} sph_whirlpool_context;
|
||||
|
||||
/**
|
||||
* Initialize a WHIRLPOOL context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the WHIRLPOOL context (pointer to a
|
||||
* <code>sph_whirlpool_context</code>)
|
||||
*/
|
||||
void sph_whirlpool_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing). This function applies the
|
||||
* plain WHIRLPOOL algorithm.
|
||||
*
|
||||
* @param cc the WHIRLPOOL context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_whirlpool(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current WHIRLPOOL computation and output the result into the
|
||||
* provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the WHIRLPOOL context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_whirlpool_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* WHIRLPOOL-0 uses the same structure than plain WHIRLPOOL.
|
||||
*/
|
||||
typedef sph_whirlpool_context sph_whirlpool0_context;
|
||||
|
||||
#ifdef DOXYGEN_IGNORE
|
||||
/**
|
||||
* Initialize a WHIRLPOOL-0 context. This function is identical to
|
||||
* <code>sph_whirlpool_init()</code>.
|
||||
*
|
||||
* @param cc the WHIRLPOOL context (pointer to a
|
||||
* <code>sph_whirlpool0_context</code>)
|
||||
*/
|
||||
void sph_whirlpool0_init(void *cc);
|
||||
#endif
|
||||
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
#define sph_whirlpool0_init sph_whirlpool_init
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing). This function applies the
|
||||
* WHIRLPOOL-0 algorithm.
|
||||
*
|
||||
* @param cc the WHIRLPOOL context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_whirlpool0(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current WHIRLPOOL-0 computation and output the result into the
|
||||
* provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the WHIRLPOOL-0 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_whirlpool0_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* WHIRLPOOL-1 uses the same structure than plain WHIRLPOOL.
|
||||
*/
|
||||
typedef sph_whirlpool_context sph_whirlpool1_context;
|
||||
|
||||
#ifdef DOXYGEN_IGNORE
|
||||
/**
|
||||
* Initialize a WHIRLPOOL-1 context. This function is identical to
|
||||
* <code>sph_whirlpool_init()</code>.
|
||||
*
|
||||
* @param cc the WHIRLPOOL context (pointer to a
|
||||
* <code>sph_whirlpool1_context</code>)
|
||||
*/
|
||||
void sph_whirlpool1_init(void *cc);
|
||||
#endif
|
||||
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
#define sph_whirlpool1_init sph_whirlpool_init
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing). This function applies the
|
||||
* WHIRLPOOL-1 algorithm.
|
||||
*
|
||||
* @param cc the WHIRLPOOL context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_whirlpool1(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current WHIRLPOOL-1 computation and output the result into the
|
||||
* provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the WHIRLPOOL-1 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_whirlpool1_close(void *cc, void *dst);
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -41,7 +41,7 @@ void whirlpool_hash_4way( void *state, const void *input )
|
||||
whirlpool1_4way( &ctx, vhash, 64 );
|
||||
whirlpool1_4way_close( &ctx, vhash);
|
||||
|
||||
m256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
|
||||
|
||||
memcpy( state , hash0, 32 );
|
||||
memcpy( state+32, hash1, 32 );
|
||||
@@ -74,7 +74,7 @@ int scanhash_whirlpool_4way( int thr_id, struct work* work, uint32_t max_nonce,
|
||||
be32enc(&endiandata[i], pdata[i]);
|
||||
|
||||
uint64_t *edata = (uint64_t*)endiandata;
|
||||
m256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
|
||||
|
||||
// midstate
|
||||
whirlpool1_4way_init( &whirl_mid );
|
||||
|
||||
@@ -3346,7 +3346,7 @@ do { \
|
||||
#define ROUND0 MUL8(ROUND0_W)
|
||||
#define UPDATE_STATE MUL8(UPDATE_STATE_W)
|
||||
#define BYTE(x, n) \
|
||||
_mm256_and_si256( _mm256_srli_epi64( x, n<<3 ), mm256_vec_epi64( 0xFF ) )
|
||||
_mm256_and_si256( _mm256_srli_epi64( x, n<<3 ), _mm256_set1_epi64x( 0xFF ) )
|
||||
|
||||
// A very complex, but structured, expression with a mix of scalar
|
||||
// and vector operations to retrieve specific 64 bit constants from
|
||||
@@ -3359,7 +3359,7 @@ do { \
|
||||
// Pack the data in a vector and return it.
|
||||
#define t_row( inv, row ) \
|
||||
_mm256_and_si256( \
|
||||
_mm256_srli_epi64( inv, row << 3 ), mm256_vec_epi64( 0xFF ) )
|
||||
_mm256_srli_epi64( inv, row << 3 ), _mm256_set1_epi64x( 0xFF ) )
|
||||
|
||||
// Extract vector element from "lane" of vector "in[row]" and use it to index
|
||||
// scalar array of constants "table" and return referenced 64 bit entry.
|
||||
@@ -3454,7 +3454,7 @@ void
|
||||
whirlpool_4way_init(void *cc)
|
||||
{
|
||||
whirlpool_4way_context *sc = cc;;
|
||||
memset_zero_m256i( sc->state, 8 );
|
||||
memset_zero_256( sc->state, 8 );
|
||||
sc->count = 0;
|
||||
}
|
||||
|
||||
@@ -3470,7 +3470,7 @@ name ## _round( const void *src, __m256i *state ) \
|
||||
ROUND0; \
|
||||
for (r = 0; r < 10; r ++) { \
|
||||
DECL8(tmp); \
|
||||
ROUND_KSCHED( type ## _T, h, tmp, mm256_vec_epi64( type ## _RC[r] ) ); \
|
||||
ROUND_KSCHED( type ## _T, h, tmp, _mm256_set1_epi64x( type ## _RC[r] ) ); \
|
||||
TRANSFER( h, tmp ); \
|
||||
ROUND_WENC( type ## _T, n, h, tmp ); \
|
||||
TRANSFER( n, tmp ); \
|
||||
|
||||
Reference in New Issue
Block a user