diff --git a/Makefile.am b/Makefile.am index 736297a..7ae1d4a 100644 --- a/Makefile.am +++ b/Makefile.am @@ -163,6 +163,7 @@ cpuminer_SOURCES = \ algo/sha/sha256-hash-4way.c \ algo/sha/sha512-hash-4way.c \ algo/sha/hmac-sha256-hash.c \ + algo/sha/hmac-sha256-hash-4way.c \ algo/sha/sha2.c \ algo/sha/sha256t-gate.c \ algo/sha/sha256t-4way.c \ diff --git a/RELEASE_NOTES b/RELEASE_NOTES index 6d0e1e1..505d88e 100644 --- a/RELEASE_NOTES +++ b/RELEASE_NOTES @@ -65,6 +65,13 @@ If not what makes it happen or not happen? Change Log ---------- +v3.12.7 + +Issue #257: fixed a file descriptor leak which caused the CPU temperature +and frequency query to report zeros after mining for a couple of hours. + +Issue #253: stale share reduction for yescrypt, sonoa. + v3.12.6.1 Issue #252: Fixed SSL mining (stratum+tcps://) @@ -106,7 +113,7 @@ a specific algo name. v3.12.4.6 Issue #246: fixed getwork repeated new block logs with same height. New work -for the same block is now reported as "New work" instead of New block". +for the same block is now reported as "New work" instead of "New block". Also added a check that work is new before generating "New work" log. Added target diff to getwork new block log. diff --git a/algo-gate-api.c b/algo-gate-api.c index b934529..5f75f1e 100644 --- a/algo-gate-api.c +++ b/algo-gate-api.c @@ -232,11 +232,6 @@ bool register_algo_gate( int algo, algo_gate_t *gate ) case ALGO_X22I: register_x22i_algo ( gate ); break; case ALGO_X25X: register_x25x_algo ( gate ); break; case ALGO_XEVAN: register_xevan_algo ( gate ); break; -/* case ALGO_YESCRYPT: register_yescrypt_05_algo ( gate ); break; - case ALGO_YESCRYPTR8: register_yescryptr8_05_algo ( gate ); break; - case ALGO_YESCRYPTR16: register_yescryptr16_05_algo ( gate ); break; - case ALGO_YESCRYPTR32: register_yescryptr32_05_algo ( gate ); break; -*/ case ALGO_YESCRYPT: register_yescrypt_algo ( gate ); break; case ALGO_YESCRYPTR8: register_yescryptr8_algo ( gate ); break; case ALGO_YESCRYPTR8G: register_yescryptr8g_algo ( gate ); break; diff --git a/algo/sha/hmac-sha256-hash-4way.c b/algo/sha/hmac-sha256-hash-4way.c new file mode 100644 index 0000000..43fa272 --- /dev/null +++ b/algo/sha/hmac-sha256-hash-4way.c @@ -0,0 +1,440 @@ +/*- + * Copyright 2005,2007,2009 Colin Percival + * Copywright 2020 JayDDee246@gmail.com + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include "hmac-sha256-hash-4way.h" +#include "compat.h" + +// HMAC 4-way SSE2 + +/** + * HMAC_SHA256_Buf(K, Klen, in, len, digest): + * Compute the HMAC-SHA256 of ${len} bytes from ${in} using the key ${K} of + * length ${Klen}, and write the result to ${digest}. + */ +void +hmac_sha256_4way_full( void *digest, const void *K, size_t Klen, + const void *in, size_t len ) +{ + hmac_sha256_4way_context ctx; + hmac_sha256_4way_init( &ctx, K, Klen ); + hmac_sha256_4way_update( &ctx, in, len ); + hmac_sha256_4way_close( &ctx, digest ); +} + +/* Initialize an HMAC-SHA256 operation with the given key. */ +void +hmac_sha256_4way_init( hmac_sha256_4way_context *ctx, const void *_K, + size_t Klen ) +{ + unsigned char pad[64*4] __attribute__ ((aligned (64))); + unsigned char khash[32*4] __attribute__ ((aligned (64))); + const unsigned char * K = _K; + size_t i; + + /* If Klen > 64, the key is really SHA256(K). */ + if ( Klen > 64 ) + { + sha256_4way_init( &ctx->ictx ); + sha256_4way_update( &ctx->ictx, K, Klen ); + sha256_4way_close( &ctx->ictx, khash ); + K = khash; + Klen = 32; + } + + /* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */ + sha256_4way_init( &ctx->ictx ); + memset( pad, 0x36, 64*4 ); + + for ( i = 0; i < Klen; i++ ) + casti_m128i( pad, i ) = _mm_xor_si128( casti_m128i( pad, i ), + casti_m128i( K, i ) ); + + sha256_4way_update( &ctx->ictx, pad, 64 ); + + /* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */ + sha256_4way_init( &ctx->octx ); + memset( pad, 0x5c, 64*4 ); + for ( i = 0; i < Klen/4; i++ ) + casti_m128i( pad, i ) = _mm_xor_si128( casti_m128i( pad, i ), + casti_m128i( K, i ) ); + sha256_4way_update( &ctx->octx, pad, 64 ); +} + +/* Add bytes to the HMAC-SHA256 operation. */ +void +hmac_sha256_4way_update( hmac_sha256_4way_context *ctx, const void *in, + size_t len ) +{ + /* Feed data to the inner SHA256 operation. */ + sha256_4way_update( &ctx->ictx, in, len ); +} + +/* Finish an HMAC-SHA256 operation. */ +void +hmac_sha256_4way_close( hmac_sha256_4way_context *ctx, void *digest ) +{ + unsigned char ihash[32*4] __attribute__ ((aligned (64))); + + /* Finish the inner SHA256 operation. */ + sha256_4way_close( &ctx->ictx, ihash ); + + /* Feed the inner hash to the outer SHA256 operation. */ + sha256_4way_update( &ctx->octx, ihash, 32 ); + + /* Finish the outer SHA256 operation. */ + sha256_4way_close( &ctx->octx, digest ); +} + +/** + * PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen): + * Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and + * write the output to buf. The value dkLen must be at most 32 * (2^32 - 1). + */ +void +pbkdf2_sha256_4way( uint8_t *buf, size_t dkLen, + const uint8_t *passwd, size_t passwdlen, + const uint8_t *salt, size_t saltlen, uint64_t c ) +{ + hmac_sha256_4way_context PShctx, hctx; + uint8_t _ALIGN(128) T[32*4]; + uint8_t _ALIGN(128) U[32*4]; + __m128i ivec; + size_t i, clen; + uint64_t j; + int k; + + /* Compute HMAC state after processing P and S. */ + hmac_sha256_4way_init( &PShctx, passwd, passwdlen ); + hmac_sha256_4way_update( &PShctx, salt, saltlen ); + + /* Iterate through the blocks. */ + for ( i = 0; i * 32 < dkLen; i++ ) + { + /* Generate INT(i + 1). */ + ivec = _mm_set1_epi32( bswap_32( i+1 ) ); + + /* Compute U_1 = PRF(P, S || INT(i)). */ + memcpy( &hctx, &PShctx, sizeof(hmac_sha256_4way_context) ); + hmac_sha256_4way_update( &hctx, &ivec, 4 ); + hmac_sha256_4way_close( &hctx, U ); + + /* T_i = U_1 ... */ + memcpy( T, U, 32*4 ); + + for ( j = 2; j <= c; j++ ) + { + /* Compute U_j. */ + hmac_sha256_4way_init( &hctx, passwd, passwdlen ); + hmac_sha256_4way_update( &hctx, U, 32 ); + hmac_sha256_4way_close( &hctx, U ); + + /* ... xor U_j ... */ + for ( k = 0; k < 8; k++ ) + casti_m128i( T, k ) = _mm_xor_si128( casti_m128i( T, k ), + casti_m128i( U, k ) ); + } + + /* Copy as many bytes as necessary into buf. */ + clen = dkLen - i * 32; + if ( clen > 32 ) + clen = 32; + memcpy( &buf[ i*32*4 ], T, clen*4 ); + } +} + +#if defined(__AVX2__) + +// HMAC 8-way AVX2 + +void +hmac_sha256_8way_full( void *digest, const void *K, size_t Klen, + const void *in, size_t len ) +{ + hmac_sha256_8way_context ctx; + hmac_sha256_8way_init( &ctx, K, Klen ); + hmac_sha256_8way_update( &ctx, in, len ); + hmac_sha256_8way_close( &ctx, digest ); +} + +/* Initialize an HMAC-SHA256 operation with the given key. */ +void +hmac_sha256_8way_init( hmac_sha256_8way_context *ctx, const void *_K, + size_t Klen ) +{ + unsigned char pad[64*8] __attribute__ ((aligned (128))); + unsigned char khash[32*8] __attribute__ ((aligned (128))); + const unsigned char * K = _K; + size_t i; + + /* If Klen > 64, the key is really SHA256(K). */ + if ( Klen > 64 ) + { + sha256_8way_init( &ctx->ictx ); + sha256_8way_update( &ctx->ictx, K, Klen ); + sha256_8way_close( &ctx->ictx, khash ); + K = khash; + Klen = 32; + } + + /* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */ + sha256_8way_init( &ctx->ictx ); + memset( pad, 0x36, 64*8); + + for ( i = 0; i < Klen/4; i++ ) + casti_m256i( pad, i ) = _mm256_xor_si256( casti_m256i( pad, i ), + casti_m256i( K, i ) ); + + sha256_8way_update( &ctx->ictx, pad, 64 ); + + /* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */ + sha256_8way_init( &ctx->octx ); + memset( pad, 0x5c, 64*8 ); + for ( i = 0; i < Klen/4; i++ ) + casti_m256i( pad, i ) = _mm256_xor_si256( casti_m256i( pad, i ), + casti_m256i( K, i ) ); + sha256_8way_update( &ctx->octx, pad, 64 ); +} + +void +hmac_sha256_8way_update( hmac_sha256_8way_context *ctx, const void *in, + size_t len ) +{ + /* Feed data to the inner SHA256 operation. */ + sha256_8way_update( &ctx->ictx, in, len ); +} + +/* Finish an HMAC-SHA256 operation. */ +void +hmac_sha256_8way_close( hmac_sha256_8way_context *ctx, void *digest ) +{ + unsigned char ihash[32*8] __attribute__ ((aligned (128))); + + /* Finish the inner SHA256 operation. */ + sha256_8way_close( &ctx->ictx, ihash ); + + /* Feed the inner hash to the outer SHA256 operation. */ + sha256_8way_update( &ctx->octx, ihash, 32 ); + + /* Finish the outer SHA256 operation. */ + sha256_8way_close( &ctx->octx, digest ); +} + +/** + * PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen): + * Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and + * write the output to buf. The value dkLen must be at most 32 * (2^32 - 1). + */ +void +pbkdf2_sha256_8way( uint8_t *buf, size_t dkLen, const uint8_t *passwd, + size_t passwdlen, const uint8_t *salt, size_t saltlen, + uint64_t c ) +{ + hmac_sha256_8way_context PShctx, hctx; + uint8_t _ALIGN(128) T[32*8]; + uint8_t _ALIGN(128) U[32*8]; + size_t i, clen; + uint64_t j; + int k; + + /* Compute HMAC state after processing P and S. */ + hmac_sha256_8way_init( &PShctx, passwd, passwdlen ); + +// saltlen can be odd number of bytes + hmac_sha256_8way_update( &PShctx, salt, saltlen ); + + /* Iterate through the blocks. */ + for ( i = 0; i * 32 < dkLen; i++ ) + { + __m256i ivec = _mm256_set1_epi32( bswap_32( i+1 ) ); + + /* Compute U_1 = PRF(P, S || INT(i)). */ + memcpy( &hctx, &PShctx, sizeof(hmac_sha256_8way_context) ); + hmac_sha256_8way_update( &hctx, &ivec, 4 ); + hmac_sha256_8way_close( &hctx, U ); + + /* T_i = U_1 ... */ + memcpy( T, U, 32*8 ); + + for ( j = 2; j <= c; j++ ) + { + /* Compute U_j. */ + hmac_sha256_8way_init( &hctx, passwd, passwdlen ); + hmac_sha256_8way_update( &hctx, U, 32 ); + hmac_sha256_8way_close( &hctx, U ); + + /* ... xor U_j ... */ + for ( k = 0; k < 8; k++ ) + casti_m256i( T, k ) = _mm256_xor_si256( casti_m256i( T, k ), + casti_m256i( U, k ) ); + } + + /* Copy as many bytes as necessary into buf. */ + clen = dkLen - i * 32; + if ( clen > 32 ) + clen = 32; + memcpy( &buf[ i*32*8 ], T, clen*8 ); + } +} + +#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__) + +// HMAC 16-way AVX512 + +void +hmac_sha256_16way_full( void *digest, const void *K, size_t Klen, + const void *in, size_t len ) +{ + hmac_sha256_16way_context ctx; + hmac_sha256_16way_init( &ctx, K, Klen ); + hmac_sha256_16way_update( &ctx, in, len ); + hmac_sha256_16way_close( &ctx, digest ); +} + +void +hmac_sha256_16way_init( hmac_sha256_16way_context *ctx, const void *_K, + size_t Klen ) +{ + unsigned char pad[64*16] __attribute__ ((aligned (128))); + unsigned char khash[32*16] __attribute__ ((aligned (128))); + const unsigned char * K = _K; + size_t i; + + /* If Klen > 64, the key is really SHA256(K). */ + if ( Klen > 64 ) + { + sha256_16way_init( &ctx->ictx ); + sha256_16way_update( &ctx->ictx, K, Klen ); + sha256_16way_close( &ctx->ictx, khash ); + K = khash; + Klen = 32; + } + + /* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */ + sha256_16way_init( &ctx->ictx ); + memset( pad, 0x36, 64*16 ); + + for ( i = 0; i < Klen; i++ ) + casti_m512i( pad, i ) = _mm512_xor_si512( casti_m512i( pad, i ), + casti_m512i( K, i ) ); + sha256_16way_update( &ctx->ictx, pad, 64 ); + + /* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */ + sha256_16way_init( &ctx->octx ); + memset( pad, 0x5c, 64*16 ); + for ( i = 0; i < Klen/4; i++ ) + casti_m512i( pad, i ) = _mm512_xor_si512( casti_m512i( pad, i ), + casti_m512i( K, i ) ); + sha256_16way_update( &ctx->octx, pad, 64 ); +} + +void +hmac_sha256_16way_update( hmac_sha256_16way_context *ctx, const void *in, + size_t len ) +{ + /* Feed data to the inner SHA256 operation. */ + sha256_16way_update( &ctx->ictx, in, len ); +} + +/* Finish an HMAC-SHA256 operation. */ +void +hmac_sha256_16way_close( hmac_sha256_16way_context *ctx, void *digest ) +{ + unsigned char ihash[32*16] __attribute__ ((aligned (128))); + + /* Finish the inner SHA256 operation. */ + sha256_16way_close( &ctx->ictx, ihash ); + + /* Feed the inner hash to the outer SHA256 operation. */ + sha256_16way_update( &ctx->octx, ihash, 32 ); + + /* Finish the outer SHA256 operation. */ + sha256_16way_close( &ctx->octx, digest ); +} + +/** + * PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen): + * Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and + * write the output to buf. The value dkLen must be at most 32 * (2^32 - 1). + */ +void +pbkdf2_sha256_16way( uint8_t *buf, size_t dkLen, + const uint8_t *passwd, size_t passwdlen, + const uint8_t *salt, size_t saltlen, uint64_t c ) +{ + hmac_sha256_16way_context PShctx, hctx; + uint8_t _ALIGN(128) T[32*16]; + uint8_t _ALIGN(128) U[32*16]; + __m512i ivec; + size_t i, clen; + uint64_t j; + int k; + + /* Compute HMAC state after processing P and S. */ + hmac_sha256_16way_init( &PShctx, passwd, passwdlen ); + hmac_sha256_16way_update( &PShctx, salt, saltlen ); + + /* Iterate through the blocks. */ + for ( i = 0; i * 32 < dkLen; i++ ) + { + /* Generate INT(i + 1). */ + ivec = _mm512_set1_epi32( bswap_32( i+1 ) ); + + /* Compute U_1 = PRF(P, S || INT(i)). */ + memcpy( &hctx, &PShctx, sizeof(hmac_sha256_16way_context) ); + hmac_sha256_16way_update( &hctx, &ivec, 4 ); + hmac_sha256_16way_close( &hctx, U ); + + /* T_i = U_1 ... */ + memcpy( T, U, 32*16 ); + + for ( j = 2; j <= c; j++ ) + { + /* Compute U_j. */ + hmac_sha256_16way_init( &hctx, passwd, passwdlen ); + hmac_sha256_16way_update( &hctx, U, 32 ); + hmac_sha256_16way_close( &hctx, U ); + + /* ... xor U_j ... */ + for ( k = 0; k < 8; k++ ) + casti_m512i( T, k ) = _mm512_xor_si512( casti_m512i( T, k ), + casti_m512i( U, k ) ); + } + + /* Copy as many bytes as necessary into buf. */ + clen = dkLen - i * 32; + if ( clen > 32 ) + clen = 32; + memcpy( &buf[ i*32*16 ], T, clen*16 ); + } +} + +#endif // AVX512 +#endif // AVX2 + diff --git a/algo/sha/hmac-sha256-hash-4way.h b/algo/sha/hmac-sha256-hash-4way.h new file mode 100644 index 0000000..f33fa23 --- /dev/null +++ b/algo/sha/hmac-sha256-hash-4way.h @@ -0,0 +1,107 @@ +/*- + * Copyright 2005,2007,2009 Colin Percival + * Copyright 2020 JayDDee@gmailcom + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/lib/libmd/sha256_Y.h,v 1.2 2006/01/17 15:35:56 phk Exp $ + */ + +#ifndef HMAC_SHA256_4WAY_H__ +#define HMAC_SHA256_4WAY_H__ + + +// Tested only 8-way with null pers + +#include +#include +#include "simd-utils.h" +#include "sha-hash-4way.h" + +typedef struct _hmac_sha256_4way_context +{ + sha256_4way_context ictx; + sha256_4way_context octx; +} hmac_sha256_4way_context; + +//void SHA256_Buf( const void *, size_t len, uint8_t digest[32] ); +void hmac_sha256_4way_init( hmac_sha256_4way_context *, const void *, size_t ); +void hmac_sha256_4way_update( hmac_sha256_4way_context *, const void *, + size_t ); +void hmac_sha256_4way_close( hmac_sha256_4way_context *, void* ); +void hmac_sha256_4way_full( void*, const void *, size_t Klen, const void *, + size_t len ); + +/** + * PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen): + * Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and + * write the output to buf. The value dkLen must be at most 32 * (2^32 - 1). + */ +void pbkdf2_sha256_4way( uint8_t *, size_t, const uint8_t *, size_t, + const uint8_t *, size_t, uint64_t ); + +#if defined(__AVX2__) + +typedef struct _hmac_sha256_8way_context +{ + sha256_8way_context ictx; + sha256_8way_context octx; +} hmac_sha256_8way_context; + +//void SHA256_Buf( const void *, size_t len, uint8_t digest[32] ); +void hmac_sha256_8way_init( hmac_sha256_8way_context *, const void *, size_t ); +void hmac_sha256_8way_update( hmac_sha256_8way_context *, const void *, + size_t ); +void hmac_sha256_8way_close( hmac_sha256_8way_context *, void* ); +void hmac_sha256_8way_full( void*, const void *, size_t Klen, const void *, + size_t len ); + +void pbkdf2_sha256_8way( uint8_t *, size_t, const uint8_t *, size_t, + const uint8_t *, size_t, uint64_t ); + +#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__) + +typedef struct _hmac_sha256_16way_context +{ + sha256_16way_context ictx; + sha256_16way_context octx; +} hmac_sha256_16way_context; + +//void SHA256_Buf( const void *, size_t len, uint8_t digest[32] ); +void hmac_sha256_16way_init( hmac_sha256_16way_context *, + const void *, size_t ); +void hmac_sha256_16way_update( hmac_sha256_16way_context *, const void *, + size_t ); +void hmac_sha256_16way_close( hmac_sha256_16way_context *, void* ); +void hmac_sha256_16way_full( void*, const void *, size_t Klen, const void *, + size_t len ); + +void pbkdf2_sha256_16way( uint8_t *, size_t, const uint8_t *, size_t, + const uint8_t *, size_t, uint64_t ); + + + +#endif // AVX512 +#endif // AVX2 + +#endif // HMAC_SHA256_4WAY_H__ diff --git a/algo/sha/hmac-sha256-hash.c b/algo/sha/hmac-sha256-hash.c index 291b122..99b68d8 100644 --- a/algo/sha/hmac-sha256-hash.c +++ b/algo/sha/hmac-sha256-hash.c @@ -81,16 +81,17 @@ HMAC_SHA256_Init( HMAC_SHA256_CTX *ctx, const void *_K, size_t Klen ) /* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */ SHA256_Init( &ctx->ictx ); - memset( pad, 0x36, 64 ); - for ( i = 0; i < Klen; i++ ) - pad[i] ^= K[i]; + + + for ( i = 0; i < Klen; i++ ) pad[i] = K[i] ^ 0x36; + memset( pad + Klen, 0x36, 64 - Klen ); SHA256_Update( &ctx->ictx, pad, 64 ); /* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */ SHA256_Init( &ctx->octx ); - memset(pad, 0x5c, 64); - for ( i = 0; i < Klen; i++ ) - pad[i] ^= K[i]; + + for ( i = 0; i < Klen; i++ ) pad[i] = K[i] ^ 0x5c; + memset( pad + Klen, 0x5c, 64 - Klen ); SHA256_Update( &ctx->octx, pad, 64 ); } @@ -161,7 +162,13 @@ PBKDF2_SHA256( const uint8_t *passwd, size_t passwdlen, const uint8_t *salt, HMAC_SHA256_Final( U, &hctx ); /* ... xor U_j ... */ - for ( k = 0; k < 32; k++ ) +// _mm256_xor_si256( *(__m256i*)T, *(__m256i*)U ); +// _mm_xor_si128( ((__m128i*)T)[0], ((__m128i*)U)[0] ); +// _mm_xor_si128( ((__m128i*)T)[1], ((__m128i*)U)[1] ); + +// for ( k = 0; k < 4; k++ ) T[k] ^= U[k]; + + for ( k = 0; k < 32; k++ ) T[k] ^= U[k]; } diff --git a/algo/sha/sha-hash-4way.h b/algo/sha/sha-hash-4way.h index 3635dd9..3a0c61b 100644 --- a/algo/sha/sha-hash-4way.h +++ b/algo/sha/sha-hash-4way.h @@ -58,6 +58,7 @@ void sha256_4way_init( sha256_4way_context *sc ); void sha256_4way_update( sha256_4way_context *sc, const void *data, size_t len ); void sha256_4way_close( sha256_4way_context *sc, void *dst ); +void sha256_4way_full( void *dst, const void *data, size_t len ); #endif // SSE2 @@ -75,6 +76,7 @@ typedef struct { void sha256_8way_init( sha256_8way_context *sc ); void sha256_8way_update( sha256_8way_context *sc, const void *data, size_t len ); void sha256_8way_close( sha256_8way_context *sc, void *dst ); +void sha256_8way_full( void *dst, const void *data, size_t len ); #endif // AVX2 @@ -92,6 +94,7 @@ typedef struct { void sha256_16way_init( sha256_16way_context *sc ); void sha256_16way_update( sha256_16way_context *sc, const void *data, size_t len ); void sha256_16way_close( sha256_16way_context *sc, void *dst ); +void sha256_16way_full( void *dst, const void *data, size_t len ); #endif // AVX512 @@ -110,6 +113,7 @@ void sha512_4way_init( sha512_4way_context *sc); void sha512_4way_update( sha512_4way_context *sc, const void *data, size_t len ); void sha512_4way_close( sha512_4way_context *sc, void *dst ); +void sha512_4way_full( void *dst, const void *data, size_t len ); #endif // AVX2 @@ -128,6 +132,7 @@ void sha512_8way_init( sha512_8way_context *sc); void sha512_8way_update( sha512_8way_context *sc, const void *data, size_t len ); void sha512_8way_close( sha512_8way_context *sc, void *dst ); +void sha512_8way_full( void *dst, const void *data, size_t len ); #endif // AVX512 diff --git a/algo/sha/sha256-hash-4way.c b/algo/sha/sha256-hash-4way.c index ed10673..d9fb503 100644 --- a/algo/sha/sha256-hash-4way.c +++ b/algo/sha/sha256-hash-4way.c @@ -330,6 +330,14 @@ void sha256_4way_close( sha256_4way_context *sc, void *dst ) mm128_block_bswap_32( dst, sc->val ); } +void sha256_4way_full( void *dst, const void *data, size_t len ) +{ + sha256_4way_context ctx; + sha256_4way_init( &ctx ); + sha256_4way_update( &ctx, data, len ); + sha256_4way_close( &ctx, dst ); +} + #if defined(__AVX2__) // SHA-256 8 way @@ -498,6 +506,10 @@ void sha256_8way_init( sha256_8way_context *sc ) */ } + +// need to handle odd byte length for yespower. +// Assume only last update is odd. + void sha256_8way_update( sha256_8way_context *sc, const void *data, size_t len ) { __m256i *vdata = (__m256i*)data; @@ -564,6 +576,13 @@ void sha256_8way_close( sha256_8way_context *sc, void *dst ) mm256_block_bswap_32( dst, sc->val ); } +void sha256_8way_full( void *dst, const void *data, size_t len ) +{ + sha256_8way_context ctx; + sha256_8way_init( &ctx ); + sha256_8way_update( &ctx, data, len ); + sha256_8way_close( &ctx, dst ); +} #if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512DQ__) && defined(__AVX512BW__) @@ -791,6 +810,14 @@ void sha256_16way_close( sha256_16way_context *sc, void *dst ) mm512_block_bswap_32( dst, sc->val ); } +void sha256_16way_full( void *dst, const void *data, size_t len ) +{ + sha256_16way_context ctx; + sha256_16way_init( &ctx ); + sha256_16way_update( &ctx, data, len ); + sha256_16way_close( &ctx, dst ); +} + #endif // AVX512 #endif // __AVX2__ #endif // __SSE2__ diff --git a/algo/x17/sonoa-4way.c b/algo/x17/sonoa-4way.c index 9b9380b..28ddd7e 100644 --- a/algo/x17/sonoa-4way.c +++ b/algo/x17/sonoa-4way.c @@ -58,7 +58,7 @@ union _sonoa_8way_context_overlay typedef union _sonoa_8way_context_overlay sonoa_8way_context_overlay; -void sonoa_8way_hash( void *state, const void *input ) +int sonoa_8way_hash( void *state, const void *input, int thrid ) { uint64_t vhash[8*8] __attribute__ ((aligned (128))); uint64_t vhashA[8*8] __attribute__ ((aligned (64))); @@ -186,6 +186,7 @@ void sonoa_8way_hash( void *state, const void *input ) #endif + if ( work_restart[thrid].restart ) return 0; // 2 bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 ); @@ -301,6 +302,7 @@ void sonoa_8way_hash( void *state, const void *input ) hamsi512_8way_update( &ctx.hamsi, vhash, 64 ); hamsi512_8way_close( &ctx.hamsi, vhash ); + if ( work_restart[thrid].restart ) return 0; // 3 bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 ); @@ -430,6 +432,7 @@ void sonoa_8way_hash( void *state, const void *input ) sph_fugue512_full( &ctx.fugue, hash6, hash6, 64 ); sph_fugue512_full( &ctx.fugue, hash7, hash7, 64 ); + if ( work_restart[thrid].restart ) return 0; // 4 intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6, @@ -627,6 +630,7 @@ void sonoa_8way_hash( void *state, const void *input ) #endif + if ( work_restart[thrid].restart ) return 0; // 5 bmw512_8way_full( &ctx.bmw, vhash, vhash, 64 ); @@ -779,6 +783,7 @@ void sonoa_8way_hash( void *state, const void *input ) sph_whirlpool512_full( &ctx.whirlpool, hash6, hash6, 64 ); sph_whirlpool512_full( &ctx.whirlpool, hash7, hash7, 64 ); + if ( work_restart[thrid].restart ) return 0; // 6 intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6, @@ -947,6 +952,7 @@ void sonoa_8way_hash( void *state, const void *input ) sph_whirlpool512_full( &ctx.whirlpool, hash6, hash6, 64 ); sph_whirlpool512_full( &ctx.whirlpool, hash7, hash7, 64 ); + if ( work_restart[thrid].restart ) return 0; // 7 intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6, @@ -1108,6 +1114,8 @@ void sonoa_8way_hash( void *state, const void *input ) haval256_5_8way_init( &ctx.haval ); haval256_5_8way_update( &ctx.haval, vhashA, 64 ); haval256_5_8way_close( &ctx.haval, state ); + + return 1; } int scanhash_sonoa_8way( struct work *work, uint32_t max_nonce, @@ -1133,8 +1141,7 @@ int scanhash_sonoa_8way( struct work *work, uint32_t max_nonce, do { - sonoa_8way_hash( hash, vdata ); - + if ( sonoa_8way_hash( hash, vdata, thr_id ) ) for ( int lane = 0; lane < 8; lane++ ) if unlikely( ( hashd7[ lane ] <= targ32 ) ) { @@ -1142,7 +1149,7 @@ int scanhash_sonoa_8way( struct work *work, uint32_t max_nonce, if ( likely( valid_hash( lane_hash, ptarget ) && !opt_benchmark ) ) { pdata[19] = bswap_32( n + lane ); - submit_lane_solution( work, lane_hash, mythr, lane ); + submit_solution( work, lane_hash, mythr ); } } *noncev = _mm512_add_epi32( *noncev, @@ -1179,7 +1186,7 @@ union _sonoa_4way_context_overlay typedef union _sonoa_4way_context_overlay sonoa_4way_context_overlay; -void sonoa_4way_hash( void *state, const void *input ) +int sonoa_4way_hash( void *state, const void *input, int thrid ) { uint64_t hash0[8] __attribute__ ((aligned (64))); uint64_t hash1[8] __attribute__ ((aligned (64))); @@ -1243,6 +1250,7 @@ void sonoa_4way_hash( void *state, const void *input ) echo_full( &ctx.echo, (BitSequence *)hash3, 512, (const BitSequence *)hash3, 64 ); + if ( work_restart[thrid].restart ) return 0; // 2 intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 ); @@ -1302,6 +1310,7 @@ void sonoa_4way_hash( void *state, const void *input ) hamsi512_4way_update( &ctx.hamsi, vhash, 64 ); hamsi512_4way_close( &ctx.hamsi, vhash ); + if ( work_restart[thrid].restart ) return 0; // 3 bmw512_4way_init( &ctx.bmw ); @@ -1366,6 +1375,7 @@ void sonoa_4way_hash( void *state, const void *input ) sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 ); sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 ); + if ( work_restart[thrid].restart ) return 0; // 4 intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 ); @@ -1462,6 +1472,7 @@ void sonoa_4way_hash( void *state, const void *input ) shavite512_2way_init( &ctx.shavite ); shavite512_2way_update_close( &ctx.shavite, vhashB, vhashB, 64 ); + if ( work_restart[thrid].restart ) return 0; // 5 rintrlv_2x128_4x64( vhash, vhashA, vhashB, 512 ); @@ -1546,6 +1557,7 @@ void sonoa_4way_hash( void *state, const void *input ) sph_whirlpool512_full( &ctx.whirlpool, hash2, hash2, 64 ); sph_whirlpool512_full( &ctx.whirlpool, hash3, hash3, 64 ); + if ( work_restart[thrid].restart ) return 0; // 6 intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 ); @@ -1638,6 +1650,7 @@ void sonoa_4way_hash( void *state, const void *input ) sph_whirlpool512_full( &ctx.whirlpool, hash2, hash2, 64 ); sph_whirlpool512_full( &ctx.whirlpool, hash3, hash3, 64 ); + if ( work_restart[thrid].restart ) return 0; // 7 intrlv_4x64_512( vhash, hash0, hash1, hash2, hash3 ); @@ -1728,6 +1741,8 @@ void sonoa_4way_hash( void *state, const void *input ) haval256_5_4way_init( &ctx.haval ); haval256_5_4way_update( &ctx.haval, vhashB, 64 ); haval256_5_4way_close( &ctx.haval, state ); + + return 1; } int scanhash_sonoa_4way( struct work *work, const uint32_t max_nonce, @@ -1752,8 +1767,7 @@ int scanhash_sonoa_4way( struct work *work, const uint32_t max_nonce, do { - sonoa_4way_hash( hash, vdata ); - + if ( sonoa_4way_hash( hash, vdata, thr_id ) ) for ( int lane = 0; lane < 4; lane++ ) if ( unlikely( hashd7[ lane ] <= targ32 ) ) { @@ -1761,7 +1775,7 @@ int scanhash_sonoa_4way( struct work *work, const uint32_t max_nonce, if ( likely( valid_hash( lane_hash, ptarget ) && !opt_benchmark ) ) { pdata[19] = bswap_32( n + lane ); - submit_lane_solution( work, lane_hash, mythr, lane ); + submit_solution( work, lane_hash, mythr ); } } *noncev = _mm256_add_epi32( *noncev, diff --git a/algo/x17/sonoa-gate.c b/algo/x17/sonoa-gate.c index 7dce68f..dfd8a70 100644 --- a/algo/x17/sonoa-gate.c +++ b/algo/x17/sonoa-gate.c @@ -4,14 +4,14 @@ bool register_sonoa_algo( algo_gate_t* gate ) { #if defined (SONOA_8WAY) gate->scanhash = (void*)&scanhash_sonoa_8way; - gate->hash = (void*)&sonoa_8way_hash; +// gate->hash = (void*)&sonoa_8way_hash; #elif defined (SONOA_4WAY) gate->scanhash = (void*)&scanhash_sonoa_4way; - gate->hash = (void*)&sonoa_4way_hash; +// gate->hash = (void*)&sonoa_4way_hash; #else init_sonoa_ctx(); gate->scanhash = (void*)&scanhash_sonoa; - gate->hash = (void*)&sonoa_hash; +// gate->hash = (void*)&sonoa_hash; #endif gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT | VAES_OPT; return true; diff --git a/algo/x17/sonoa-gate.h b/algo/x17/sonoa-gate.h index aaad2a4..895bcd2 100644 --- a/algo/x17/sonoa-gate.h +++ b/algo/x17/sonoa-gate.h @@ -14,19 +14,19 @@ bool register_sonoa_algo( algo_gate_t* gate ); #if defined(SONOA_8WAY) -void sonoa_8way_hash( void *state, const void *input ); +int sonoa_8way_hash( void *state, const void *input, int thrid ); int scanhash_sonoa_8way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); #elif defined(SONOA_4WAY) -void sonoa_4way_hash( void *state, const void *input ); +int sonoa_4way_hash( void *state, const void *input, int thrid ); int scanhash_sonoa_4way( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); #else -void sonoa_hash( void *state, const void *input ); +int sonoa_hash( void *state, const void *input, int thrid ); int scanhash_sonoa( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ); void init_sonoa_ctx(); diff --git a/algo/x17/sonoa.c b/algo/x17/sonoa.c index 2bf8ade..bad5946 100644 --- a/algo/x17/sonoa.c +++ b/algo/x17/sonoa.c @@ -83,27 +83,27 @@ void init_sonoa_ctx() sph_haval256_5_init(&sonoa_ctx.haval); }; -void sonoa_hash( void *state, const void *input ) +int sonoa_hash( void *state, const void *input, int thrid ) { uint8_t hash[128] __attribute__ ((aligned (64))); - sonoa_ctx_holder ctx __attribute__ ((aligned (64))); - memcpy( &ctx, &sonoa_ctx, sizeof(sonoa_ctx) ); + sonoa_ctx_holder ctx __attribute__ ((aligned (64))); + memcpy( &ctx, &sonoa_ctx, sizeof(sonoa_ctx) ); - sph_blake512(&ctx.blake, input, 80); + sph_blake512(&ctx.blake, input, 80); sph_blake512_close(&ctx.blake, hash); sph_bmw512(&ctx.bmw, hash, 64); sph_bmw512_close(&ctx.bmw, hash); #if defined(__AES__) - update_and_final_groestl( &ctx.groestl, (char*)hash, - (const char*)hash, 512 ); + update_and_final_groestl( &ctx.groestl, (char*)hash, + (const char*)hash, 512 ); #else - sph_groestl512(&ctx.groestl, hash, 64); - sph_groestl512_close(&ctx.groestl, hash); + sph_groestl512(&ctx.groestl, hash, 64); + sph_groestl512_close(&ctx.groestl, hash); #endif - sph_skein512(&ctx.skein, hash, 64); + sph_skein512(&ctx.skein, hash, 64); sph_skein512_close(&ctx.skein, hash); sph_jh512(&ctx.jh, hash, 64); @@ -112,454 +112,461 @@ void sonoa_hash( void *state, const void *input ) sph_keccak512(&ctx.keccak, hash, 64); sph_keccak512_close(&ctx.keccak, hash); - update_and_final_luffa( &ctx.luffa, (BitSequence*)hash, - (const BitSequence*)hash, 64 ); + update_and_final_luffa( &ctx.luffa, (BitSequence*)hash, + (const BitSequence*)hash, 64 ); - cubehashUpdateDigest( &ctx.cubehash, (byte*) hash, - (const byte*)hash, 64 ); + cubehashUpdateDigest( &ctx.cubehash, (byte*) hash, + (const byte*)hash, 64 ); sph_shavite512(&ctx.shavite, hash, 64); sph_shavite512_close(&ctx.shavite, hash); - update_final_sd( &ctx.simd, (BitSequence *)hash, + update_final_sd( &ctx.simd, (BitSequence *)hash, (const BitSequence *)hash, 512 ); #if defined(__AES__) - update_final_echo ( &ctx.echo, (BitSequence *)hash, + update_final_echo ( &ctx.echo, (BitSequence *)hash, (const BitSequence *)hash, 512 ); #else - sph_echo512(&ctx.echo, hash, 64); - sph_echo512_close(&ctx.echo, hash); + sph_echo512(&ctx.echo, hash, 64); + sph_echo512_close(&ctx.echo, hash); #endif + if ( work_restart[thrid].restart ) return 0; // - sph_bmw512_init( &ctx.bmw); - sph_bmw512(&ctx.bmw, hash, 64); - sph_bmw512_close(&ctx.bmw, hash); + sph_bmw512_init( &ctx.bmw); + sph_bmw512(&ctx.bmw, hash, 64); + sph_bmw512_close(&ctx.bmw, hash); #if defined(__AES__) - init_groestl( &ctx.groestl, 64 ); - update_and_final_groestl( &ctx.groestl, (char*)hash, - (const char*)hash, 512 ); + init_groestl( &ctx.groestl, 64 ); + update_and_final_groestl( &ctx.groestl, (char*)hash, + (const char*)hash, 512 ); #else - sph_groestl512_init(&ctx.groestl ); - sph_groestl512(&ctx.groestl, hash, 64); - sph_groestl512_close(&ctx.groestl, hash); + sph_groestl512_init(&ctx.groestl ); + sph_groestl512(&ctx.groestl, hash, 64); + sph_groestl512_close(&ctx.groestl, hash); #endif - sph_skein512_init( &ctx.skein); - sph_skein512(&ctx.skein, hash, 64); - sph_skein512_close(&ctx.skein, hash); + sph_skein512_init( &ctx.skein); + sph_skein512(&ctx.skein, hash, 64); + sph_skein512_close(&ctx.skein, hash); - sph_jh512_init( &ctx.jh); - sph_jh512(&ctx.jh, hash, 64); - sph_jh512_close(&ctx.jh, hash); + sph_jh512_init( &ctx.jh); + sph_jh512(&ctx.jh, hash, 64); + sph_jh512_close(&ctx.jh, hash); - sph_keccak512_init( &ctx.keccak ); - sph_keccak512(&ctx.keccak, hash, 64); - sph_keccak512_close(&ctx.keccak, hash); + sph_keccak512_init( &ctx.keccak ); + sph_keccak512(&ctx.keccak, hash, 64); + sph_keccak512_close(&ctx.keccak, hash); - init_luffa( &ctx.luffa, 512 ); - update_and_final_luffa( &ctx.luffa, (BitSequence*)hash, - (const BitSequence*)hash, 64 ); + init_luffa( &ctx.luffa, 512 ); + update_and_final_luffa( &ctx.luffa, (BitSequence*)hash, + (const BitSequence*)hash, 64 ); - cubehashInit( &ctx.cubehash, 512, 16, 32 ); - cubehashUpdateDigest( &ctx.cubehash, (byte*) hash, - (const byte*)hash, 64 ); + cubehashInit( &ctx.cubehash, 512, 16, 32 ); + cubehashUpdateDigest( &ctx.cubehash, (byte*) hash, + (const byte*)hash, 64 ); - sph_shavite512_init( &ctx.shavite ); - sph_shavite512(&ctx.shavite, hash, 64); - sph_shavite512_close(&ctx.shavite, hash); + sph_shavite512_init( &ctx.shavite ); + sph_shavite512(&ctx.shavite, hash, 64); + sph_shavite512_close(&ctx.shavite, hash); - init_sd( &ctx.simd, 512 ); - update_final_sd( &ctx.simd, (BitSequence *)hash, + init_sd( &ctx.simd, 512 ); + update_final_sd( &ctx.simd, (BitSequence *)hash, (const BitSequence *)hash, 512 ); #if defined(__AES__) - init_echo( &ctx.echo, 512 ); - update_final_echo ( &ctx.echo, (BitSequence *)hash, + init_echo( &ctx.echo, 512 ); + update_final_echo ( &ctx.echo, (BitSequence *)hash, (const BitSequence *)hash, 512 ); #else - sph_echo512_init( &ctx.echo ); - sph_echo512(&ctx.echo, hash, 64); - sph_echo512_close(&ctx.echo, hash); + sph_echo512_init( &ctx.echo ); + sph_echo512(&ctx.echo, hash, 64); + sph_echo512_close(&ctx.echo, hash); #endif - sph_hamsi512(&ctx.hamsi, hash, 64); - sph_hamsi512_close(&ctx.hamsi, hash); + sph_hamsi512(&ctx.hamsi, hash, 64); + sph_hamsi512_close(&ctx.hamsi, hash); + if ( work_restart[thrid].restart ) return 0; // - sph_bmw512_init( &ctx.bmw); - sph_bmw512(&ctx.bmw, hash, 64); - sph_bmw512_close(&ctx.bmw, hash); + sph_bmw512_init( &ctx.bmw); + sph_bmw512(&ctx.bmw, hash, 64); + sph_bmw512_close(&ctx.bmw, hash); #if defined(__AES__) - init_groestl( &ctx.groestl, 64 ); - update_and_final_groestl( &ctx.groestl, (char*)hash, - (const char*)hash, 512 ); + init_groestl( &ctx.groestl, 64 ); + update_and_final_groestl( &ctx.groestl, (char*)hash, + (const char*)hash, 512 ); #else - sph_groestl512_init(&ctx.groestl ); - sph_groestl512(&ctx.groestl, hash, 64); - sph_groestl512_close(&ctx.groestl, hash); + sph_groestl512_init(&ctx.groestl ); + sph_groestl512(&ctx.groestl, hash, 64); + sph_groestl512_close(&ctx.groestl, hash); #endif - sph_skein512_init( &ctx.skein); - sph_skein512(&ctx.skein, hash, 64); - sph_skein512_close(&ctx.skein, hash); + sph_skein512_init( &ctx.skein); + sph_skein512(&ctx.skein, hash, 64); + sph_skein512_close(&ctx.skein, hash); - sph_jh512_init( &ctx.jh); - sph_jh512(&ctx.jh, hash, 64); - sph_jh512_close(&ctx.jh, hash); + sph_jh512_init( &ctx.jh); + sph_jh512(&ctx.jh, hash, 64); + sph_jh512_close(&ctx.jh, hash); - sph_keccak512_init( &ctx.keccak ); - sph_keccak512(&ctx.keccak, hash, 64); - sph_keccak512_close(&ctx.keccak, hash); + sph_keccak512_init( &ctx.keccak ); + sph_keccak512(&ctx.keccak, hash, 64); + sph_keccak512_close(&ctx.keccak, hash); - init_luffa( &ctx.luffa, 512 ); - update_and_final_luffa( &ctx.luffa, (BitSequence*)hash, - (const BitSequence*)hash, 64 ); + init_luffa( &ctx.luffa, 512 ); + update_and_final_luffa( &ctx.luffa, (BitSequence*)hash, + (const BitSequence*)hash, 64 ); - cubehashInit( &ctx.cubehash, 512, 16, 32 ); - cubehashUpdateDigest( &ctx.cubehash, (byte*) hash, - (const byte*)hash, 64 ); + cubehashInit( &ctx.cubehash, 512, 16, 32 ); + cubehashUpdateDigest( &ctx.cubehash, (byte*)hash, + (const byte*)hash, 64 ); - sph_shavite512_init( &ctx.shavite ); - sph_shavite512(&ctx.shavite, hash, 64); - sph_shavite512_close(&ctx.shavite, hash); + sph_shavite512_init( &ctx.shavite ); + sph_shavite512(&ctx.shavite, hash, 64); + sph_shavite512_close(&ctx.shavite, hash); - init_sd( &ctx.simd, 512 ); - update_final_sd( &ctx.simd, (BitSequence *)hash, + init_sd( &ctx.simd, 512 ); + update_final_sd( &ctx.simd, (BitSequence *)hash, (const BitSequence *)hash, 512 ); #if defined(__AES__) - init_echo( &ctx.echo, 512 ); - update_final_echo ( &ctx.echo, (BitSequence *)hash, + init_echo( &ctx.echo, 512 ); + update_final_echo ( &ctx.echo, (BitSequence *)hash, (const BitSequence *)hash, 512 ); #else - sph_echo512_init( &ctx.echo ); - sph_echo512(&ctx.echo, hash, 64); - sph_echo512_close(&ctx.echo, hash); + sph_echo512_init( &ctx.echo ); + sph_echo512(&ctx.echo, hash, 64); + sph_echo512_close(&ctx.echo, hash); #endif - sph_hamsi512_init( &ctx.hamsi ); - sph_hamsi512(&ctx.hamsi, hash, 64); - sph_hamsi512_close(&ctx.hamsi, hash); + sph_hamsi512_init( &ctx.hamsi ); + sph_hamsi512(&ctx.hamsi, hash, 64); + sph_hamsi512_close(&ctx.hamsi, hash); - sph_fugue512(&ctx.fugue, hash, 64); - sph_fugue512_close(&ctx.fugue, hash); + sph_fugue512(&ctx.fugue, hash, 64); + sph_fugue512_close(&ctx.fugue, hash); + if ( work_restart[thrid].restart ) return 0; // - sph_bmw512_init( &ctx.bmw); - sph_bmw512(&ctx.bmw, hash, 64); - sph_bmw512_close(&ctx.bmw, hash); + sph_bmw512_init( &ctx.bmw); + sph_bmw512(&ctx.bmw, hash, 64); + sph_bmw512_close(&ctx.bmw, hash); #if defined(__AES__) - init_groestl( &ctx.groestl, 64 ); - update_and_final_groestl( &ctx.groestl, (char*)hash, + init_groestl( &ctx.groestl, 64 ); + update_and_final_groestl( &ctx.groestl, (char*)hash, (const char*)hash, 512 ); #else - sph_groestl512_init(&ctx.groestl ); - sph_groestl512(&ctx.groestl, hash, 64); - sph_groestl512_close(&ctx.groestl, hash); + sph_groestl512_init(&ctx.groestl ); + sph_groestl512(&ctx.groestl, hash, 64); + sph_groestl512_close(&ctx.groestl, hash); #endif - sph_skein512_init( &ctx.skein); - sph_skein512(&ctx.skein, hash, 64); - sph_skein512_close(&ctx.skein, hash); + sph_skein512_init( &ctx.skein); + sph_skein512(&ctx.skein, hash, 64); + sph_skein512_close(&ctx.skein, hash); - sph_jh512_init( &ctx.jh); - sph_jh512(&ctx.jh, hash, 64); - sph_jh512_close(&ctx.jh, hash); + sph_jh512_init( &ctx.jh); + sph_jh512(&ctx.jh, hash, 64); + sph_jh512_close(&ctx.jh, hash); - sph_keccak512_init( &ctx.keccak ); - sph_keccak512(&ctx.keccak, hash, 64); - sph_keccak512_close(&ctx.keccak, hash); + sph_keccak512_init( &ctx.keccak ); + sph_keccak512(&ctx.keccak, hash, 64); + sph_keccak512_close(&ctx.keccak, hash); - init_luffa( &ctx.luffa, 512 ); - update_and_final_luffa( &ctx.luffa, (BitSequence*)hash, + init_luffa( &ctx.luffa, 512 ); + update_and_final_luffa( &ctx.luffa, (BitSequence*)hash, (const BitSequence*)hash, 64 ); - cubehashInit( &ctx.cubehash, 512, 16, 32 ); - cubehashUpdateDigest( &ctx.cubehash, (byte*) hash, + cubehashInit( &ctx.cubehash, 512, 16, 32 ); + cubehashUpdateDigest( &ctx.cubehash, (byte*) hash, (const byte*)hash, 64 ); - sph_shavite512_init( &ctx.shavite ); - sph_shavite512(&ctx.shavite, hash, 64); - sph_shavite512_close(&ctx.shavite, hash); + sph_shavite512_init( &ctx.shavite ); + sph_shavite512(&ctx.shavite, hash, 64); + sph_shavite512_close(&ctx.shavite, hash); - init_sd( &ctx.simd, 512 ); - update_final_sd( &ctx.simd, (BitSequence *)hash, + init_sd( &ctx.simd, 512 ); + update_final_sd( &ctx.simd, (BitSequence *)hash, (const BitSequence *)hash, 512 ); #if defined(__AES__) - init_echo( &ctx.echo, 512 ); - update_final_echo ( &ctx.echo, (BitSequence *)hash, + init_echo( &ctx.echo, 512 ); + update_final_echo ( &ctx.echo, (BitSequence *)hash, (const BitSequence *)hash, 512 ); #else - sph_echo512_init( &ctx.echo ); - sph_echo512(&ctx.echo, hash, 64); - sph_echo512_close(&ctx.echo, hash); + sph_echo512_init( &ctx.echo ); + sph_echo512(&ctx.echo, hash, 64); + sph_echo512_close(&ctx.echo, hash); #endif - sph_hamsi512_init( &ctx.hamsi ); - sph_hamsi512(&ctx.hamsi, hash, 64); - sph_hamsi512_close(&ctx.hamsi, hash); + sph_hamsi512_init( &ctx.hamsi ); + sph_hamsi512(&ctx.hamsi, hash, 64); + sph_hamsi512_close(&ctx.hamsi, hash); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512(&ctx.fugue, hash, 64); - sph_fugue512_close(&ctx.fugue, hash); + sph_fugue512_init( &ctx.fugue ); + sph_fugue512(&ctx.fugue, hash, 64); + sph_fugue512_close(&ctx.fugue, hash); - sph_shabal512(&ctx.shabal, hash, 64); - sph_shabal512_close(&ctx.shabal, hash); + sph_shabal512(&ctx.shabal, hash, 64); + sph_shabal512_close(&ctx.shabal, hash); - sph_hamsi512_init( &ctx.hamsi ); - sph_hamsi512(&ctx.hamsi, hash, 64); - sph_hamsi512_close(&ctx.hamsi, hash); + sph_hamsi512_init( &ctx.hamsi ); + sph_hamsi512(&ctx.hamsi, hash, 64); + sph_hamsi512_close(&ctx.hamsi, hash); #if defined(__AES__) - init_echo( &ctx.echo, 512 ); - update_final_echo ( &ctx.echo, (BitSequence *)hash, + init_echo( &ctx.echo, 512 ); + update_final_echo ( &ctx.echo, (BitSequence *)hash, (const BitSequence *)hash, 512 ); #else - sph_echo512_init( &ctx.echo ); - sph_echo512(&ctx.echo, hash, 64); - sph_echo512_close(&ctx.echo, hash); + sph_echo512_init( &ctx.echo ); + sph_echo512(&ctx.echo, hash, 64); + sph_echo512_close(&ctx.echo, hash); #endif - sph_shavite512_init( &ctx.shavite ); - sph_shavite512(&ctx.shavite, hash, 64); - sph_shavite512_close(&ctx.shavite, hash); + sph_shavite512_init( &ctx.shavite ); + sph_shavite512(&ctx.shavite, hash, 64); + sph_shavite512_close(&ctx.shavite, hash); + if ( work_restart[thrid].restart ) return 0; // - sph_bmw512_init( &ctx.bmw); - sph_bmw512(&ctx.bmw, hash, 64); - sph_bmw512_close(&ctx.bmw, hash); + sph_bmw512_init( &ctx.bmw); + sph_bmw512(&ctx.bmw, hash, 64); + sph_bmw512_close(&ctx.bmw, hash); - sph_shabal512_init( &ctx.shabal ); + sph_shabal512_init( &ctx.shabal ); sph_shabal512(&ctx.shabal, hash, 64); - sph_shabal512_close(&ctx.shabal, hash); + sph_shabal512_close(&ctx.shabal, hash); #if defined(__AES__) - init_groestl( &ctx.groestl, 64 ); - update_and_final_groestl( &ctx.groestl, (char*)hash, + init_groestl( &ctx.groestl, 64 ); + update_and_final_groestl( &ctx.groestl, (char*)hash, (const char*)hash, 512 ); #else - sph_groestl512_init(&ctx.groestl ); - sph_groestl512(&ctx.groestl, hash, 64); - sph_groestl512_close(&ctx.groestl, hash); + sph_groestl512_init(&ctx.groestl ); + sph_groestl512(&ctx.groestl, hash, 64); + sph_groestl512_close(&ctx.groestl, hash); #endif - sph_skein512_init( &ctx.skein); - sph_skein512(&ctx.skein, hash, 64); - sph_skein512_close(&ctx.skein, hash); + sph_skein512_init( &ctx.skein); + sph_skein512(&ctx.skein, hash, 64); + sph_skein512_close(&ctx.skein, hash); - sph_jh512_init( &ctx.jh); - sph_jh512(&ctx.jh, hash, 64); - sph_jh512_close(&ctx.jh, hash); + sph_jh512_init( &ctx.jh); + sph_jh512(&ctx.jh, hash, 64); + sph_jh512_close(&ctx.jh, hash); - sph_keccak512_init( &ctx.keccak ); - sph_keccak512(&ctx.keccak, hash, 64); - sph_keccak512_close(&ctx.keccak, hash); + sph_keccak512_init( &ctx.keccak ); + sph_keccak512(&ctx.keccak, hash, 64); + sph_keccak512_close(&ctx.keccak, hash); - init_luffa( &ctx.luffa, 512 ); - update_and_final_luffa( &ctx.luffa, (BitSequence*)hash, + init_luffa( &ctx.luffa, 512 ); + update_and_final_luffa( &ctx.luffa, (BitSequence*)hash, (const BitSequence*)hash, 64 ); - cubehashInit( &ctx.cubehash, 512, 16, 32 ); - cubehashUpdateDigest( &ctx.cubehash, (byte*) hash, + cubehashInit( &ctx.cubehash, 512, 16, 32 ); + cubehashUpdateDigest( &ctx.cubehash, (byte*) hash, (const byte*)hash, 64 ); - sph_shavite512_init( &ctx.shavite ); - sph_shavite512(&ctx.shavite, hash, 64); - sph_shavite512_close(&ctx.shavite, hash); + sph_shavite512_init( &ctx.shavite ); + sph_shavite512(&ctx.shavite, hash, 64); + sph_shavite512_close(&ctx.shavite, hash); - init_sd( &ctx.simd, 512 ); - update_final_sd( &ctx.simd, (BitSequence *)hash, + init_sd( &ctx.simd, 512 ); + update_final_sd( &ctx.simd, (BitSequence *)hash, (const BitSequence *)hash, 512 ); #if defined(__AES__) - init_echo( &ctx.echo, 512 ); - update_final_echo ( &ctx.echo, (BitSequence *)hash, + init_echo( &ctx.echo, 512 ); + update_final_echo ( &ctx.echo, (BitSequence *)hash, (const BitSequence *)hash, 512 ); #else - sph_echo512_init( &ctx.echo ); - sph_echo512(&ctx.echo, hash, 64); - sph_echo512_close(&ctx.echo, hash); + sph_echo512_init( &ctx.echo ); + sph_echo512(&ctx.echo, hash, 64); + sph_echo512_close(&ctx.echo, hash); #endif - sph_hamsi512_init( &ctx.hamsi ); - sph_hamsi512(&ctx.hamsi, hash, 64); - sph_hamsi512_close(&ctx.hamsi, hash); + sph_hamsi512_init( &ctx.hamsi ); + sph_hamsi512(&ctx.hamsi, hash, 64); + sph_hamsi512_close(&ctx.hamsi, hash); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512(&ctx.fugue, hash, 64); - sph_fugue512_close(&ctx.fugue, hash); + sph_fugue512_init( &ctx.fugue ); + sph_fugue512(&ctx.fugue, hash, 64); + sph_fugue512_close(&ctx.fugue, hash); - sph_shabal512_init( &ctx.shabal ); - sph_shabal512(&ctx.shabal, hash, 64); - sph_shabal512_close(&ctx.shabal, hash); + sph_shabal512_init( &ctx.shabal ); + sph_shabal512(&ctx.shabal, hash, 64); + sph_shabal512_close(&ctx.shabal, hash); - sph_whirlpool(&ctx.whirlpool, hash, 64); - sph_whirlpool_close(&ctx.whirlpool, hash); + sph_whirlpool(&ctx.whirlpool, hash, 64); + sph_whirlpool_close(&ctx.whirlpool, hash); + if ( work_restart[thrid].restart ) return 0; // - sph_bmw512_init( &ctx.bmw); - sph_bmw512(&ctx.bmw, hash, 64); - sph_bmw512_close(&ctx.bmw, hash); + sph_bmw512_init( &ctx.bmw); + sph_bmw512(&ctx.bmw, hash, 64); + sph_bmw512_close(&ctx.bmw, hash); #if defined(__AES__) - init_groestl( &ctx.groestl, 64 ); - update_and_final_groestl( &ctx.groestl, (char*)hash, + init_groestl( &ctx.groestl, 64 ); + update_and_final_groestl( &ctx.groestl, (char*)hash, (const char*)hash, 512 ); #else - sph_groestl512_init(&ctx.groestl ); - sph_groestl512(&ctx.groestl, hash, 64); - sph_groestl512_close(&ctx.groestl, hash); + sph_groestl512_init(&ctx.groestl ); + sph_groestl512(&ctx.groestl, hash, 64); + sph_groestl512_close(&ctx.groestl, hash); #endif - sph_skein512_init( &ctx.skein); - sph_skein512(&ctx.skein, hash, 64); - sph_skein512_close(&ctx.skein, hash); + sph_skein512_init( &ctx.skein); + sph_skein512(&ctx.skein, hash, 64); + sph_skein512_close(&ctx.skein, hash); - sph_jh512_init( &ctx.jh); - sph_jh512(&ctx.jh, hash, 64); - sph_jh512_close(&ctx.jh, hash); + sph_jh512_init( &ctx.jh); + sph_jh512(&ctx.jh, hash, 64); + sph_jh512_close(&ctx.jh, hash); - sph_keccak512_init( &ctx.keccak ); - sph_keccak512(&ctx.keccak, hash, 64); - sph_keccak512_close(&ctx.keccak, hash); + sph_keccak512_init( &ctx.keccak ); + sph_keccak512(&ctx.keccak, hash, 64); + sph_keccak512_close(&ctx.keccak, hash); - init_luffa( &ctx.luffa, 512 ); - update_and_final_luffa( &ctx.luffa, (BitSequence*)hash, + init_luffa( &ctx.luffa, 512 ); + update_and_final_luffa( &ctx.luffa, (BitSequence*)hash, (const BitSequence*)hash, 64 ); - cubehashInit( &ctx.cubehash, 512, 16, 32 ); - cubehashUpdateDigest( &ctx.cubehash, (byte*) hash, + cubehashInit( &ctx.cubehash, 512, 16, 32 ); + cubehashUpdateDigest( &ctx.cubehash, (byte*) hash, (const byte*)hash, 64 ); - sph_shavite512_init( &ctx.shavite ); - sph_shavite512(&ctx.shavite, hash, 64); - sph_shavite512_close(&ctx.shavite, hash); + sph_shavite512_init( &ctx.shavite ); + sph_shavite512(&ctx.shavite, hash, 64); + sph_shavite512_close(&ctx.shavite, hash); - init_sd( &ctx.simd, 512 ); - update_final_sd( &ctx.simd, (BitSequence *)hash, + init_sd( &ctx.simd, 512 ); + update_final_sd( &ctx.simd, (BitSequence *)hash, (const BitSequence *)hash, 512 ); #if defined(__AES__) - init_echo( &ctx.echo, 512 ); - update_final_echo ( &ctx.echo, (BitSequence *)hash, + init_echo( &ctx.echo, 512 ); + update_final_echo ( &ctx.echo, (BitSequence *)hash, (const BitSequence *)hash, 512 ); #else - sph_echo512_init( &ctx.echo ); - sph_echo512(&ctx.echo, hash, 64); - sph_echo512_close(&ctx.echo, hash); + sph_echo512_init( &ctx.echo ); + sph_echo512(&ctx.echo, hash, 64); + sph_echo512_close(&ctx.echo, hash); #endif - sph_hamsi512_init( &ctx.hamsi ); - sph_hamsi512(&ctx.hamsi, hash, 64); - sph_hamsi512_close(&ctx.hamsi, hash); + sph_hamsi512_init( &ctx.hamsi ); + sph_hamsi512(&ctx.hamsi, hash, 64); + sph_hamsi512_close(&ctx.hamsi, hash); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512(&ctx.fugue, hash, 64); - sph_fugue512_close(&ctx.fugue, hash); + sph_fugue512_init( &ctx.fugue ); + sph_fugue512(&ctx.fugue, hash, 64); + sph_fugue512_close(&ctx.fugue, hash); - sph_shabal512_init( &ctx.shabal ); - sph_shabal512(&ctx.shabal, hash, 64); - sph_shabal512_close(&ctx.shabal, hash); + sph_shabal512_init( &ctx.shabal ); + sph_shabal512(&ctx.shabal, hash, 64); + sph_shabal512_close(&ctx.shabal, hash); - sph_whirlpool_init( &ctx.whirlpool ); - sph_whirlpool(&ctx.whirlpool, hash, 64); - sph_whirlpool_close(&ctx.whirlpool, hash); + sph_whirlpool_init( &ctx.whirlpool ); + sph_whirlpool(&ctx.whirlpool, hash, 64); + sph_whirlpool_close(&ctx.whirlpool, hash); - SHA512_Update( &ctx.sha512, hash, 64 ); - SHA512_Final( (unsigned char*) hash, &ctx.sha512 ); + SHA512_Update( &ctx.sha512, hash, 64 ); + SHA512_Final( (unsigned char*) hash, &ctx.sha512 ); - sph_whirlpool_init( &ctx.whirlpool ); - sph_whirlpool(&ctx.whirlpool, hash, 64); - sph_whirlpool_close(&ctx.whirlpool, hash); + sph_whirlpool_init( &ctx.whirlpool ); + sph_whirlpool(&ctx.whirlpool, hash, 64); + sph_whirlpool_close(&ctx.whirlpool, hash); + if ( work_restart[thrid].restart ) return 0; // - sph_bmw512_init( &ctx.bmw); - sph_bmw512(&ctx.bmw, hash, 64); - sph_bmw512_close(&ctx.bmw, hash); + sph_bmw512_init( &ctx.bmw); + sph_bmw512(&ctx.bmw, hash, 64); + sph_bmw512_close(&ctx.bmw, hash); #if defined(__AES__) - init_groestl( &ctx.groestl, 64 ); - update_and_final_groestl( &ctx.groestl, (char*)hash, + init_groestl( &ctx.groestl, 64 ); + update_and_final_groestl( &ctx.groestl, (char*)hash, (const char*)hash, 512 ); #else - sph_groestl512_init(&ctx.groestl ); - sph_groestl512(&ctx.groestl, hash, 64); - sph_groestl512_close(&ctx.groestl, hash); + sph_groestl512_init(&ctx.groestl ); + sph_groestl512(&ctx.groestl, hash, 64); + sph_groestl512_close(&ctx.groestl, hash); #endif - sph_skein512_init( &ctx.skein); - sph_skein512(&ctx.skein, hash, 64); - sph_skein512_close(&ctx.skein, hash); + sph_skein512_init( &ctx.skein); + sph_skein512(&ctx.skein, hash, 64); + sph_skein512_close(&ctx.skein, hash); - sph_jh512_init( &ctx.jh); - sph_jh512(&ctx.jh, hash, 64); - sph_jh512_close(&ctx.jh, hash); + sph_jh512_init( &ctx.jh); + sph_jh512(&ctx.jh, hash, 64); + sph_jh512_close(&ctx.jh, hash); - sph_keccak512_init( &ctx.keccak ); - sph_keccak512(&ctx.keccak, hash, 64); - sph_keccak512_close(&ctx.keccak, hash); + sph_keccak512_init( &ctx.keccak ); + sph_keccak512(&ctx.keccak, hash, 64); + sph_keccak512_close(&ctx.keccak, hash); - init_luffa( &ctx.luffa, 512 ); - update_and_final_luffa( &ctx.luffa, (BitSequence*)hash, + init_luffa( &ctx.luffa, 512 ); + update_and_final_luffa( &ctx.luffa, (BitSequence*)hash, (const BitSequence*)hash, 64 ); - cubehashInit( &ctx.cubehash, 512, 16, 32 ); - cubehashUpdateDigest( &ctx.cubehash, (byte*) hash, + cubehashInit( &ctx.cubehash, 512, 16, 32 ); + cubehashUpdateDigest( &ctx.cubehash, (byte*) hash, (const byte*)hash, 64 ); - sph_shavite512_init( &ctx.shavite ); - sph_shavite512(&ctx.shavite, hash, 64); - sph_shavite512_close(&ctx.shavite, hash); + sph_shavite512_init( &ctx.shavite ); + sph_shavite512(&ctx.shavite, hash, 64); + sph_shavite512_close(&ctx.shavite, hash); - init_sd( &ctx.simd, 512 ); - update_final_sd( &ctx.simd, (BitSequence *)hash, + init_sd( &ctx.simd, 512 ); + update_final_sd( &ctx.simd, (BitSequence *)hash, (const BitSequence *)hash, 512 ); #if defined(__AES__) - init_echo( &ctx.echo, 512 ); - update_final_echo ( &ctx.echo, (BitSequence *)hash, + init_echo( &ctx.echo, 512 ); + update_final_echo ( &ctx.echo, (BitSequence *)hash, (const BitSequence *)hash, 512 ); #else - sph_echo512_init( &ctx.echo ); - sph_echo512(&ctx.echo, hash, 64); - sph_echo512_close(&ctx.echo, hash); + sph_echo512_init( &ctx.echo ); + sph_echo512(&ctx.echo, hash, 64); + sph_echo512_close(&ctx.echo, hash); #endif - sph_hamsi512_init( &ctx.hamsi ); - sph_hamsi512(&ctx.hamsi, hash, 64); - sph_hamsi512_close(&ctx.hamsi, hash); + sph_hamsi512_init( &ctx.hamsi ); + sph_hamsi512(&ctx.hamsi, hash, 64); + sph_hamsi512_close(&ctx.hamsi, hash); - sph_fugue512_init( &ctx.fugue ); - sph_fugue512(&ctx.fugue, hash, 64); - sph_fugue512_close(&ctx.fugue, hash); + sph_fugue512_init( &ctx.fugue ); + sph_fugue512(&ctx.fugue, hash, 64); + sph_fugue512_close(&ctx.fugue, hash); - sph_shabal512_init( &ctx.shabal ); - sph_shabal512(&ctx.shabal, hash, 64); - sph_shabal512_close(&ctx.shabal, hash); + sph_shabal512_init( &ctx.shabal ); + sph_shabal512(&ctx.shabal, hash, 64); + sph_shabal512_close(&ctx.shabal, hash); - sph_whirlpool_init( &ctx.whirlpool ); - sph_whirlpool(&ctx.whirlpool, hash, 64); - sph_whirlpool_close(&ctx.whirlpool, hash); + sph_whirlpool_init( &ctx.whirlpool ); + sph_whirlpool(&ctx.whirlpool, hash, 64); + sph_whirlpool_close(&ctx.whirlpool, hash); - SHA512_Init( &ctx.sha512 ); - SHA512_Update( &ctx.sha512, hash, 64 ); - SHA512_Final( (unsigned char*) hash, &ctx.sha512 ); + SHA512_Init( &ctx.sha512 ); + SHA512_Update( &ctx.sha512, hash, 64 ); + SHA512_Final( (unsigned char*) hash, &ctx.sha512 ); - sph_haval256_5(&ctx.haval,(const void*) hash, 64); - sph_haval256_5_close(&ctx.haval, hash); + sph_haval256_5(&ctx.haval,(const void*) hash, 64); + sph_haval256_5_close(&ctx.haval, hash); memcpy(state, hash, 32); + return 1; } int scanhash_sonoa( struct work *work, uint32_t max_nonce, @@ -579,7 +586,7 @@ int scanhash_sonoa( struct work *work, uint32_t max_nonce, do { edata[19] = n; - sonoa_hash( hash64, edata ); + if ( sonoa_hash( hash64, edata, thr_id ) ) if ( unlikely( valid_hash( hash64, ptarget ) && !bench ) ) { pdata[19] = bswap_32( n ); diff --git a/algo/yescrypt/yescrypt-platform.h b/algo/yescrypt/yescrypt-platform.h index a80640c..bf6df91 100644 --- a/algo/yescrypt/yescrypt-platform.h +++ b/algo/yescrypt/yescrypt-platform.h @@ -31,6 +31,7 @@ #undef HUGEPAGE_SIZE #endif +/* static __inline uint32_t le32dec(const void *pp) { @@ -50,6 +51,7 @@ le32enc(void *pp, uint32_t x) p[2] = (x >> 16) & 0xff; p[3] = (x >> 24) & 0xff; } +*/ static void * alloc_region(yescrypt_region_t * region, size_t size) @@ -154,7 +156,7 @@ int yescrypt_init_shared(yescrypt_shared_t * shared, const uint8_t * param, size if (yescrypt_kdf(&dummy, shared1, param, paramlen, NULL, 0, N, r, p, 0, YESCRYPT_RW | YESCRYPT_PARALLEL_SMIX | __YESCRYPT_INIT_SHARED_1, - salt, sizeof(salt))) + salt, sizeof(salt), 0 ) ) goto out; half1 = half2 = *shared; @@ -166,19 +168,19 @@ int yescrypt_init_shared(yescrypt_shared_t * shared, const uint8_t * param, size if (p > 1 && yescrypt_kdf(&half1, &half2.shared1, param, paramlen, salt, sizeof(salt), N, r, p, 0, YESCRYPT_RW | YESCRYPT_PARALLEL_SMIX | __YESCRYPT_INIT_SHARED_2, - salt, sizeof(salt))) + salt, sizeof(salt), 0 )) goto out; if (yescrypt_kdf(&half2, &half1.shared1, param, paramlen, salt, sizeof(salt), N, r, p, 0, YESCRYPT_RW | YESCRYPT_PARALLEL_SMIX | __YESCRYPT_INIT_SHARED_1, - salt, sizeof(salt))) + salt, sizeof(salt), 0)) goto out; if (yescrypt_kdf(&half1, &half2.shared1, param, paramlen, salt, sizeof(salt), N, r, p, 0, YESCRYPT_RW | YESCRYPT_PARALLEL_SMIX | __YESCRYPT_INIT_SHARED_1, - buf, buflen)) + buf, buflen, 0)) goto out; shared->mask1 = mask; diff --git a/algo/yescrypt/yescrypt-simd.c b/algo/yescrypt/yescrypt-simd.c index d2e46b1..41d97fe 100644 --- a/algo/yescrypt/yescrypt-simd.c +++ b/algo/yescrypt/yescrypt-simd.c @@ -1149,7 +1149,7 @@ yescrypt_kdf(const yescrypt_shared_t * shared, yescrypt_local_t * local, const uint8_t * passwd, size_t passwdlen, const uint8_t * salt, size_t saltlen, uint64_t N, uint32_t r, uint32_t p, uint32_t t, yescrypt_flags_t flags, - uint8_t * buf, size_t buflen) + uint8_t * buf, size_t buflen, int thrid ) { uint8_t _ALIGN(128) sha256[32]; yescrypt_region_t tmp; @@ -1157,6 +1157,7 @@ yescrypt_kdf(const yescrypt_shared_t * shared, yescrypt_local_t * local, size_t B_size, V_size, XY_size, need; uint8_t * B, * S; salsa20_blk_t * V, * XY; + int retval = 1; /* * YESCRYPT_PARALLEL_SMIX is a no-op at p = 1 for its intended purpose, @@ -1312,6 +1313,12 @@ yescrypt_kdf(const yescrypt_shared_t * shared, yescrypt_local_t * local, /* 1: (B_0 ... B_{p-1}) <-- PBKDF2(P, S, 1, p * MFLen) */ PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, 1, B, B_size); + if ( work_restart[thrid].restart ) + { + retval = 0; + goto out; + } + if (t || flags) memcpy(sha256, B, sizeof(sha256)); @@ -1339,9 +1346,21 @@ yescrypt_kdf(const yescrypt_shared_t * shared, yescrypt_local_t * local, } } + if ( work_restart[thrid].restart ) + { + retval = 0; + goto out; + } + /* 5: DK <-- PBKDF2(P, B, 1, dkLen) */ PBKDF2_SHA256(passwd, passwdlen, B, B_size, 1, buf, buflen); + if ( work_restart[thrid].restart ) + { + retval = 0; + goto out; + } + /* * Except when computing classic scrypt, allow all computation so far * to be performed on the client. The final steps below match those of @@ -1370,9 +1389,10 @@ yescrypt_kdf(const yescrypt_shared_t * shared, yescrypt_local_t * local, } } +out: if (free_region(&tmp)) return -1; /* Success! */ - return 0; + return retval; } diff --git a/algo/yescrypt/yescrypt.c b/algo/yescrypt/yescrypt.c index f4adc47..fc78ff2 100644 --- a/algo/yescrypt/yescrypt.c +++ b/algo/yescrypt/yescrypt.c @@ -106,7 +106,8 @@ static const uint8_t* decode64_uint32(uint32_t* dst, uint32_t dstbits, const uin } uint8_t* yescrypt_r(const yescrypt_shared_t* shared, yescrypt_local_t* local, - const uint8_t* passwd, size_t passwdlen, const uint8_t* setting, uint8_t* buf, size_t buflen) + const uint8_t* passwd, size_t passwdlen, const uint8_t* setting, + uint8_t* buf, size_t buflen, int thrid ) { uint8_t hash[HASH_SIZE]; const uint8_t * src, * salt; @@ -210,7 +211,9 @@ uint8_t* yescrypt_r(const yescrypt_shared_t* shared, yescrypt_local_t* local, return NULL; } - if (yescrypt_kdf(shared, local, passwd, passwdlen, salt, saltlen, N, r, p, 0, flags, hash, sizeof(hash))) { + if ( yescrypt_kdf( shared, local, passwd, passwdlen, salt, saltlen, N, r, p, + 0, flags, hash, sizeof(hash), thrid ) == -1 ) + { printf("died10 ..."); fflush(stdout); return NULL; @@ -237,7 +240,7 @@ uint8_t* yescrypt_r(const yescrypt_shared_t* shared, yescrypt_local_t* local, return buf; } -uint8_t* yescrypt(const uint8_t* passwd, const uint8_t* setting) +uint8_t* yescrypt(const uint8_t* passwd, const uint8_t* setting, int thrid ) { static uint8_t buf[4 + 1 + 5 + 5 + BYTES2CHARS(32) + 1 + HASH_LEN + 1]; yescrypt_shared_t shared; @@ -252,7 +255,7 @@ uint8_t* yescrypt(const uint8_t* passwd, const uint8_t* setting) return NULL; } retval = yescrypt_r(&shared, &local, - passwd, 80, setting, buf, sizeof(buf)); + passwd, 80, setting, buf, sizeof(buf), thrid ); //printf("hashse='%s'\n", (char *)retval); if (yescrypt_free_local(&local)) { yescrypt_free_shared(&shared); @@ -329,7 +332,7 @@ uint8_t* yescrypt_gensalt(uint32_t N_log2, uint32_t r, uint32_t p, yescrypt_flag static int yescrypt_bsty(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt, size_t saltlen, uint64_t N, uint32_t r, uint32_t p, - uint8_t * buf, size_t buflen) + uint8_t * buf, size_t buflen, int thrid ) { static __thread int initialized = 0; static __thread yescrypt_shared_t shared; @@ -349,7 +352,7 @@ static int yescrypt_bsty(const uint8_t * passwd, size_t passwdlen, } retval = yescrypt_kdf(&shared, &local, passwd, passwdlen, salt, saltlen, N, r, p, 0, YESCRYPT_FLAGS, - buf, buflen); + buf, buflen, thrid ); #if 0 if (yescrypt_free_local(&local)) { yescrypt_free_shared(&shared); @@ -370,16 +373,16 @@ char *yescrypt_client_key = NULL; int yescrypt_client_key_len = 0; /* main hash 80 bytes input */ -void yescrypt_hash( const char *input, char *output, uint32_t len ) +int yescrypt_hash( const char *input, char *output, uint32_t len, int thrid ) { - yescrypt_bsty( (uint8_t*)input, len, (uint8_t*)input, len, YESCRYPT_N, - YESCRYPT_R, YESCRYPT_P, (uint8_t*)output, 32 ); + return yescrypt_bsty( (uint8_t*)input, len, (uint8_t*)input, len, YESCRYPT_N, + YESCRYPT_R, YESCRYPT_P, (uint8_t*)output, 32, thrid ); } /* for util.c test */ -void yescrypthash(void *output, const void *input) +int yescrypthash(void *output, const void *input, int thrid) { - yescrypt_hash((char*) input, (char*) output, 80); + return yescrypt_hash((char*) input, (char*) output, 80, thrid); } int scanhash_yescrypt( struct work *work, uint32_t max_nonce, @@ -392,13 +395,13 @@ int scanhash_yescrypt( struct work *work, uint32_t max_nonce, const uint32_t first_nonce = pdata[19]; const uint32_t last_nonce = max_nonce; uint32_t n = first_nonce; - int thr_id = mythr->id; // thr_id arg is deprecated + int thr_id = mythr->id; for ( int k = 0; k < 19; k++ ) be32enc( &endiandata[k], pdata[k] ); endiandata[19] = n; do { - yescrypt_hash((char*) endiandata, (char*) vhash, 80); + if ( yescrypt_hash((char*) endiandata, (char*) vhash, 80, thr_id ) ) if unlikely( valid_hash( vhash, ptarget ) && !opt_benchmark ) { be32enc( pdata+19, n ); diff --git a/algo/yescrypt/yescrypt.h b/algo/yescrypt/yescrypt.h index c33ba40..51be262 100644 --- a/algo/yescrypt/yescrypt.h +++ b/algo/yescrypt/yescrypt.h @@ -38,12 +38,13 @@ extern "C" { #include #include /* for size_t */ #include +#include "miner.h" //#define __SSE4_1__ -void yescrypt_hash(const char* input, char* output, uint32_t len); +int yescrypt_hash(const char* input, char* output, uint32_t len, int thrid ); -void yescrypthash(void *output, const void *input); +int yescrypthash(void *output, const void *input, int thrid ); /** * crypto_scrypt(passwd, passwdlen, salt, saltlen, N, r, p, buf, buflen): @@ -301,7 +302,7 @@ extern int yescrypt_kdf(const yescrypt_shared_t * __shared, const uint8_t * __salt, size_t __saltlen, uint64_t __N, uint32_t __r, uint32_t __p, uint32_t __t, yescrypt_flags_t __flags, - uint8_t * __buf, size_t __buflen); + uint8_t * __buf, size_t __buflen, int thrid); /** * yescrypt_r(shared, local, passwd, passwdlen, setting, buf, buflen): @@ -321,7 +322,7 @@ extern uint8_t * yescrypt_r(const yescrypt_shared_t * __shared, yescrypt_local_t * __local, const uint8_t * __passwd, size_t __passwdlen, const uint8_t * __setting, - uint8_t * __buf, size_t __buflen); + uint8_t * __buf, size_t __buflen, int thrid); /** * yescrypt(passwd, setting): @@ -339,7 +340,7 @@ extern uint8_t * yescrypt_r(const yescrypt_shared_t * __shared, * * MT-unsafe. */ -extern uint8_t * yescrypt(const uint8_t * __passwd, const uint8_t * __setting); +extern uint8_t * yescrypt(const uint8_t * __passwd, const uint8_t * __setting, int thrid ); /** * yescrypt_gensalt_r(N_log2, r, p, flags, src, srclen, buf, buflen): diff --git a/algo/yespower/yescrypt-r8g.c b/algo/yespower/yescrypt-r8g.c index d2df431..5ee79ea 100644 --- a/algo/yespower/yescrypt-r8g.c +++ b/algo/yespower/yescrypt-r8g.c @@ -51,6 +51,10 @@ int scanhash_yespower_r8g( struct work *work, uint32_t max_nonce, be32enc( &endiandata[ i], pdata[ i ]); endiandata[19] = n; +// do sha256 prehash + SHA256_Init( &sha256_prehash_ctx ); + SHA256_Update( &sha256_prehash_ctx, endiandata, 64 ); + do { yespower_tls( (unsigned char *)endiandata, params.perslen, ¶ms, (yespower_binary_t*)hash, thr_id ); diff --git a/algo/yespower/yespower-4way.c b/algo/yespower/yespower-4way.c new file mode 100644 index 0000000..8f38e90 --- /dev/null +++ b/algo/yespower/yespower-4way.c @@ -0,0 +1,692 @@ +/*- + * Copyright 2009 Colin Percival + * Copyright 2013-2018 Alexander Peslyak + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * This file was originally written by Colin Percival as part of the Tarsnap + * online backup system. + * + * This is a proof-of-work focused fork of yescrypt, including reference and + * cut-down implementation of the obsolete yescrypt 0.5 (based off its first + * submission to PHC back in 2014) and a new proof-of-work specific variation + * known as yespower 1.0. The former is intended as an upgrade for + * cryptocurrencies that already use yescrypt 0.5 and the latter may be used + * as a further upgrade (hard fork) by those and other cryptocurrencies. The + * version of algorithm to use is requested through parameters, allowing for + * both algorithms to co-exist in client and miner implementations (such as in + * preparation for a hard-fork). + * + * This is the reference implementation. Its purpose is to provide a simple + * human- and machine-readable specification that implementations intended + * for actual use should be tested against. It is deliberately mostly not + * optimized, and it is not meant to be used in production. Instead, use + * yespower-opt.c. + */ +/* +#warning "This reference implementation is deliberately mostly not optimized. Use yespower-opt.c instead unless you're testing (against) the reference implementation on purpose." +*/ +#include +#include +#include +#include + +#include "algo/sha/hmac-sha256-hash-4way.h" +//#include "sysendian.h" + +#include "yespower.h" + + +#if defined(__AVX2__) + + +static void blkcpy_8way( __m256i *dst, const __m256i *src, size_t count ) +{ + do { + *dst++ = *src++; + } while (--count); +} + +static void blkxor_8way( __m256i *dst, const __m256i *src, size_t count ) +{ + do { + *dst++ ^= *src++; + } while (--count); +} + +/** + * salsa20(B): + * Apply the Salsa20 core to the provided block. + */ +static void salsa20_8way( __m256i B[16], uint32_t rounds ) +{ + __m256i x[16]; + size_t i; + + /* SIMD unshuffle */ + for ( i = 0; i < 16; i++ ) + x[i * 5 % 16] = B[i]; + + for ( i = 0; i < rounds; i += 2 ) + { +#define R( a, b, c ) mm256_rol_32( _mm256_add_epi32( a, b ), c ) + /* Operate on columns */ + + x[ 4] = _mm256_xor_si256( x[ 4], R( x[ 0], x[12], 7 ) ); + x[ 8] = _mm256_xor_si256( x[ 8], R( x[ 4], x[ 0], 9 ) ); + x[12] = _mm256_xor_si256( x[12], R( x[ 8], x[ 4], 13 ) ); + x[ 0] = _mm256_xor_si256( x[ 0], R( x[12], x[ 8], 18 ) ); + + x[ 9] = _mm256_xor_si256( x[ 9], R( x[ 5], x[ 1], 7 ) ); + x[13] = _mm256_xor_si256( x[13], R( x[ 9], x[ 5], 9 ) ); + x[ 1] = _mm256_xor_si256( x[ 1], R( x[13], x[ 9], 13 ) ); + x[ 5] = _mm256_xor_si256( x[ 5], R( x[ 1], x[13], 18 ) ); + + x[14] = _mm256_xor_si256( x[14], R( x[10], x[ 6], 7 ) ); + x[ 2] = _mm256_xor_si256( x[ 2], R( x[14], x[10], 9 ) ); + x[ 6] = _mm256_xor_si256( x[ 6], R( x[ 2], x[14], 13 ) ); + x[10] = _mm256_xor_si256( x[10], R( x[ 6], x[ 2], 18 ) ); + + x[ 3] = _mm256_xor_si256( x[ 3], R( x[15], x[11], 7 ) ); + x[ 7] = _mm256_xor_si256( x[ 7], R( x[ 3], x[15], 9 ) ); + x[11] = _mm256_xor_si256( x[11], R( x[ 7], x[ 3], 13 ) ); + x[15] = _mm256_xor_si256( x[15], R( x[11], x[ 7], 18 ) ); + + /* Operate on rows */ + + x[ 1] = _mm256_xor_si256( x[ 1], R( x[ 0], x[ 3], 7 ) ); + x[ 2] = _mm256_xor_si256( x[ 2], R( x[ 1], x[ 0], 9 ) ); + x[ 3] = _mm256_xor_si256( x[ 3], R( x[ 2], x[ 1], 13 ) ); + x[ 0] = _mm256_xor_si256( x[ 0], R( x[ 3], x[ 2], 18 ) ); + + x[ 6] = _mm256_xor_si256( x[ 6], R( x[ 5], x[ 4], 7 ) ); + x[ 7] = _mm256_xor_si256( x[ 7], R( x[ 6], x[ 5], 9 ) ); + x[ 4] = _mm256_xor_si256( x[ 4], R( x[ 7], x[ 6], 13 ) ); + x[ 5] = _mm256_xor_si256( x[ 5], R( x[ 4], x[ 7], 18 ) ); + + x[11] = _mm256_xor_si256( x[11], R( x[10], x[ 9], 7 ) ); + x[ 8] = _mm256_xor_si256( x[ 8], R( x[11], x[10], 9 ) ); + x[ 9] = _mm256_xor_si256( x[ 9], R( x[ 8], x[11], 13 ) ); + x[10] = _mm256_xor_si256( x[10], R( x[ 9], x[ 8], 18 ) ); + + x[12] = _mm256_xor_si256( x[12], R( x[15], x[14], 7 ) ); + x[13] = _mm256_xor_si256( x[13], R( x[12], x[15], 9 ) ); + x[14] = _mm256_xor_si256( x[14], R( x[13], x[12], 13 ) ); + x[15] = _mm256_xor_si256( x[15], R( x[14], x[13], 18 ) ); + +#undef R + } + + /* SIMD shuffle */ + for (i = 0; i < 16; i++) + B[i] = _mm256_add_epi32( B[i], x[i * 5 % 16] ); +} + +/** + * blockmix_salsa(B): + * Compute B = BlockMix_{salsa20, 1}(B). The input B must be 128 bytes in + * length. + */ +static void blockmix_salsa_8way( __m256i *B, uint32_t rounds ) +{ + __m256i X[16]; + size_t i; + + /* 1: X <-- B_{2r - 1} */ + blkcpy_8way( X, &B[16], 16 ); + + /* 2: for i = 0 to 2r - 1 do */ + for ( i = 0; i < 2; i++ ) + { + /* 3: X <-- H(X xor B_i) */ + blkxor_8way( X, &B[i * 16], 16 ); + salsa20_8way( X, rounds ); + + /* 4: Y_i <-- X */ + /* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */ + blkcpy_8way( &B[i * 16], X, 16 ); + } +} + +/* + * These are tunable, but they must meet certain constraints and are part of + * what defines a yespower version. + */ +#define PWXsimple 2 +#define PWXgather 4 +/* Version 0.5 */ +#define PWXrounds_0_5 6 +#define Swidth_0_5 8 +/* Version 1.0 */ +#define PWXrounds_1_0 3 +#define Swidth_1_0 11 + +/* Derived values. Not tunable on their own. */ +#define PWXbytes (PWXgather * PWXsimple * 8) +#define PWXwords (PWXbytes / sizeof(uint32_t)) +#define rmin ((PWXbytes + 127) / 128) + +/* Runtime derived values. Not tunable on their own. */ +#define Swidth_to_Sbytes1(Swidth) ((1 << Swidth) * PWXsimple * 8) +#define Swidth_to_Smask(Swidth) (((1 << Swidth) - 1) * PWXsimple * 8) + +typedef struct { + __m256i (*S0)[2], (*S1)[2], (*S2)[2]; + __m256i *S; + yespower_version_t version; + uint32_t salsa20_rounds; + uint32_t PWXrounds, Swidth, Sbytes, Smask; + size_t w; +} pwxform_8way_ctx_t __attribute__ ((aligned (128))); + +/** + * pwxform(B): + * Transform the provided block using the provided S-boxes. + */ +static void pwxform_8way( __m256i *B, pwxform_8way_ctx_t *ctx ) +{ + __m256i (*X)[PWXsimple][2] = (__m256i (*)[PWXsimple][2])B; + __m256i (*S0)[2] = ctx->S0, (*S1)[2] = ctx->S1, (*S2)[2] = ctx->S2; + __m256i Smask = _mm256_set1_epi32( ctx->Smask ); + size_t w = ctx->w; + size_t i, j, k; + + /* 1: for i = 0 to PWXrounds - 1 do */ + for ( i = 0; i < ctx->PWXrounds; i++ ) + { + /* 2: for j = 0 to PWXgather - 1 do */ + for ( j = 0; j < PWXgather; j++ ) + { +// Are these pointers or data? + __m256i xl = X[j][0][0]; + __m256i xh = X[j][0][1]; + __m256i (*p0)[2], (*p1)[2]; + + // 3: p0 <-- (lo(B_{j,0}) & Smask) / (PWXsimple * 8) + +// playing with pointers +/* + p0 = S0 + (xl & Smask) / sizeof(*S0); + // 4: p1 <-- (hi(B_{j,0}) & Smask) / (PWXsimple * 8) + p1 = S1 + (xh & Smask) / sizeof(*S1); +*/ + /* 5: for k = 0 to PWXsimple - 1 do */ + for ( k = 0; k < PWXsimple; k++ ) + { + +// shift from 32 bit data to 64 bit data + __m256i x0, x1, s00, s01, s10, s11; + __m128i *p0k = (__m128i*)p0[k]; + __m128i *p1k = (__m128i*)p1[k]; + + + s00 = _mm256_add_epi64( _mm256_cvtepu32_epi64( p0k[0] ), + _mm256_slli_epi64( _mm256_cvtepu32_epi64( p0k[2] ), 32 ) ); + s01 = _mm256_add_epi64( _mm256_cvtepu32_epi64( p0k[1] ), + _mm256_slli_epi64( _mm256_cvtepu32_epi64( p0k[3] ), 32 ) ); + s10 = _mm256_add_epi64( _mm256_cvtepu32_epi64( p1k[0] ), + _mm256_slli_epi64( _mm256_cvtepu32_epi64( p1k[2] ), 32 ) ); + s11 = _mm256_add_epi64( _mm256_cvtepu32_epi64( p1k[1] ), + _mm256_slli_epi64( _mm256_cvtepu32_epi64( p1k[3] ), 32 ) ); + + __m128i *xx = (__m128i*)X[j][k]; + x0 = _mm256_mul_epu32( _mm256_cvtepu32_epi64( xx[0] ), + _mm256_cvtepu32_epi64( xx[2] ) ); + x1 = _mm256_mul_epu32( _mm256_cvtepu32_epi64( xx[1] ), + _mm256_cvtepu32_epi64( xx[3] ) ); + + x0 = _mm256_add_epi64( x0, s00 ); + x1 = _mm256_add_epi64( x1, s01 ); + + x0 = _mm256_xor_si256( x0, s10 ); + x1 = _mm256_xor_si256( x1, s11 ); + + X[j][k][0] = x0; + X[j][k][1] = x1; + } + + if ( ctx->version != YESPOWER_0_5 && + ( i == 0 || j < PWXgather / 2 ) ) + { + if ( j & 1 ) + { + for ( k = 0; k < PWXsimple; k++ ) + { + S1[w][0] = X[j][k][0]; + S1[w][1] = X[j][k][1]; + w++; + } + } + else + { + for ( k = 0; k < PWXsimple; k++ ) + { + S0[w + k][0] = X[j][k][0]; + S0[w + k][1] = X[j][k][1]; + } + } + } + } + } + + if ( ctx->version != YESPOWER_0_5 ) + { + /* 14: (S0, S1, S2) <-- (S2, S0, S1) */ + ctx->S0 = S2; + ctx->S1 = S0; + ctx->S2 = S1; + /* 15: w <-- w mod 2^Swidth */ + ctx->w = w & ( ( 1 << ctx->Swidth ) * PWXsimple - 1 ); + } +} + +/** + * blockmix_pwxform(B, ctx, r): + * Compute B = BlockMix_pwxform{salsa20, ctx, r}(B). The input B must be + * 128r bytes in length. + */ +static void blockmix_pwxform_8way( uint32_t *B, pwxform_8way_ctx_t *ctx, + size_t r ) +{ + __m256i X[PWXwords]; + size_t r1, i; + + /* Convert 128-byte blocks to PWXbytes blocks */ + /* 1: r_1 <-- 128r / PWXbytes */ + r1 = 128 * r / PWXbytes; + + /* 2: X <-- B'_{r_1 - 1} */ + blkcpy_8way( X, &B[ (r1 - 1) * PWXwords ], PWXwords ); + + /* 3: for i = 0 to r_1 - 1 do */ + for ( i = 0; i < r1; i++ ) + { + /* 4: if r_1 > 1 */ + if ( r1 > 1 ) + { + /* 5: X <-- X xor B'_i */ + blkxor_8way( X, &B[ i * PWXwords ], PWXwords ); + } + + /* 7: X <-- pwxform(X) */ + pwxform_8way( X, ctx ); + + /* 8: B'_i <-- X */ + blkcpy_8way( &B[ i * PWXwords ], X, PWXwords ); + } + + /* 10: i <-- floor((r_1 - 1) * PWXbytes / 64) */ + i = ( r1 - 1 ) * PWXbytes / 64; + + /* 11: B_i <-- H(B_i) */ + salsa20_8way( &B[i * 16], ctx->salsa20_rounds ); + +#if 1 /* No-op with our current pwxform settings, but do it to make sure */ + /* 12: for i = i + 1 to 2r - 1 do */ + for ( i++; i < 2 * r; i++ ) + { + /* 13: B_i <-- H(B_i xor B_{i-1}) */ + blkxor_8way( &B[i * 16], &B[ (i - 1) * 16 ], 16 ); + salsa20_8way( &B[i * 16], ctx->salsa20_rounds ); + } +#endif +} + +// This looks a lot like data dependent addressing + +/** + * integerify(B, r): + * Return the result of parsing B_{2r-1} as a little-endian integer. + */ +static __m256i integerify8( const __m256i *B, size_t r ) +{ +/* + * Our 32-bit words are in host byte order. Also, they are SIMD-shuffled, but + * we only care about the least significant 32 bits anyway. + */ + const __m256i *X = &B[ (2 * r - 1) * 16 ]; + return X[0]; +} + +/** + * p2floor(x): + * Largest power of 2 not greater than argument. + */ +static uint32_t p2floor8( uint32_t x ) +{ + uint32_t y; + while ( ( y = x & (x - 1) ) ) + x = y; + return x; +} + +/** + * wrap(x, i): + * Wrap x to the range 0 to i-1. + */ +static uint32_t wrap8( uint32_t x, uint32_t i ) +{ + uint32_t n = p2floor( i ); + return ( x & (n - 1) ) + (i - n); +} + +/** + * smix1(B, r, N, V, X, ctx): + * Compute first loop of B = SMix_r(B, N). The input B must be 128r bytes in + * length; the temporary storage V must be 128rN bytes in length; the temporary + * storage X must be 128r bytes in length. + */ +static void smix1_8way( __m256i *B, size_t r, uint32_t N, + __m256i *V, __m256i *X, pwxform_8way_ctx_t *ctx ) +{ + size_t s = 32 * r; + uint32_t i, j; + size_t k; + + /* 1: X <-- B */ + for ( k = 0; k < 2 * r; k++ ) + for ( i = 0; i < 16; i++ ) + X[ k * 16 + i ] = B[ k * 16 + ( i * 5 % 16 ) ]; + + if ( ctx->version != YESPOWER_0_5 ) + { + for ( k = 1; k < r; k++ ) + { + blkcpy_8way( &X[k * 32], &X[ (k - 1) * 32 ], 32 ); + blockmix_pwxform_8way( &X[k * 32], ctx, 1 ); + } + } + + /* 2: for i = 0 to N - 1 do */ + for ( i = 0; i < N; i++ ) + { + /* 3: V_i <-- X */ + blkcpy_8way( &V[i * s], X, s ); + + if ( i > 1 ) + { + +// is j int or vector? Integrify has data dependent addressing? + + /* j <-- Wrap(Integerify(X), i) */ +// j = wrap8( integerify8( X, r ), i ); + + /* X <-- X xor V_j */ + blkxor_8way( X, &V[j * s], s ); + } + + /* 4: X <-- H(X) */ + if ( V != ctx->S ) + blockmix_pwxform_8way( X, ctx, r ); + else + blockmix_salsa_8way( X, ctx->salsa20_rounds ); + } + + /* B' <-- X */ + for ( k = 0; k < 2 * r; k++ ) + for ( i = 0; i < 16; i++ ) + B[ k * 16 + ( i * 5 % 16 ) ] = X[ k * 16 + i ]; +} + +/** + * smix2(B, r, N, Nloop, V, X, ctx): + * Compute second loop of B = SMix_r(B, N). The input B must be 128r bytes in + * length; the temporary storage V must be 128rN bytes in length; the temporary + * storage X must be 128r bytes in length. The value N must be a power of 2 + * greater than 1. + */ +static void smix2_8way( __m256i *B, size_t r, uint32_t N, uint32_t Nloop, + __m256i *V, __m256i *X, pwxform_8way_ctx_t *ctx ) +{ + size_t s = 32 * r; + uint32_t i, j; + size_t k; + + /* X <-- B */ + for ( k = 0; k < 2 * r; k++ ) + for ( i = 0; i < 16; i++ ) + X[ k * 16 + i ] = B[ k * 16 + ( i * 5 % 16 ) ]; + + /* 6: for i = 0 to N - 1 do */ + for ( i = 0; i < Nloop; i++ ) + { + /* 7: j <-- Integerify(X) mod N */ +// j = integerify8(X, r) & (N - 1); + + /* 8.1: X <-- X xor V_j */ + blkxor_8way( X, &V[j * s], s ); + /* V_j <-- X */ + if ( Nloop != 2 ) + blkcpy_8way( &V[j * s], X, s ); + + /* 8.2: X <-- H(X) */ + blockmix_pwxform_8way( X, ctx, r ); + } + + /* 10: B' <-- X */ + for ( k = 0; k < 2 * r; k++ ) + for ( i = 0; i < 16; i++ ) + B[ k * 16 + ( i * 5 % 16 ) ] = X[ k * 16 + i ]; +} + +/** + * smix(B, r, N, p, t, V, X, ctx): + * Compute B = SMix_r(B, N). The input B must be 128rp bytes in length; the + * temporary storage V must be 128rN bytes in length; the temporary storage + * X must be 128r bytes in length. The value N must be a power of 2 and at + * least 16. + */ +static void smix_8way( __m256i *B, size_t r, uint32_t N, + __m256i *V, __m256i *X, pwxform_8way_ctx_t *ctx) +{ + uint32_t Nloop_all = (N + 2) / 3; /* 1/3, round up */ + uint32_t Nloop_rw = Nloop_all; + + Nloop_all++; Nloop_all &= ~(uint32_t)1; /* round up to even */ + + if ( ctx->version == YESPOWER_0_5 ) + Nloop_rw &= ~(uint32_t)1; /* round down to even */ + else + Nloop_rw++; Nloop_rw &= ~(uint32_t)1; /* round up to even */ + + smix1_8way( B, 1, ctx->Sbytes / 128, ctx->S, X, ctx ); + smix1_8way( B, r, N, V, X, ctx ); + smix2_8way( B, r, N, Nloop_rw /* must be > 2 */, V, X, ctx ); + smix2_8way( B, r, N, Nloop_all - Nloop_rw /* 0 or 2 */, V, X, ctx ); +} + +/** + * yespower(local, src, srclen, params, dst): + * Compute yespower(src[0 .. srclen - 1], N, r), to be checked for "< target". + * + * Return 0 on success; or -1 on error. + */ +int yespower_8way( yespower_local_t *local, const __m256i *src, size_t srclen, + const yespower_params_t *params, yespower_8way_binary_t *dst, + int thrid ) +{ + yespower_version_t version = params->version; + uint32_t N = params->N; + uint32_t r = params->r; + const uint8_t *pers = params->pers; + size_t perslen = params->perslen; + int retval = -1; + size_t B_size, V_size; + uint32_t *B, *V, *X, *S; + pwxform_8way_ctx_t ctx; + __m256i sha256[8]; + + /* Sanity-check parameters */ + if ( (version != YESPOWER_0_5 && version != YESPOWER_1_0 ) || + N < 1024 || N > 512 * 1024 || r < 8 || r > 32 || + (N & (N - 1)) != 0 || r < rmin || + (!pers && perslen) ) + { + errno = EINVAL; + return -1; + } + + /* Allocate memory */ + B_size = (size_t)128 * r; + V_size = B_size * N; + if ((V = malloc(V_size)) == NULL) + return -1; + if ((B = malloc(B_size)) == NULL) + goto free_V; + if ((X = malloc(B_size)) == NULL) + goto free_B; + ctx.version = version; + if (version == YESPOWER_0_5) { + ctx.salsa20_rounds = 8; + ctx.PWXrounds = PWXrounds_0_5; + ctx.Swidth = Swidth_0_5; + ctx.Sbytes = 2 * Swidth_to_Sbytes1(ctx.Swidth); + } else { + ctx.salsa20_rounds = 2; + ctx.PWXrounds = PWXrounds_1_0; + ctx.Swidth = Swidth_1_0; + ctx.Sbytes = 3 * Swidth_to_Sbytes1(ctx.Swidth); + } + if ((S = malloc(ctx.Sbytes)) == NULL) + goto free_X; + ctx.S = S; + ctx.S0 = (__m256i (*)[2])S; + ctx.S1 = ctx.S0 + (1 << ctx.Swidth) * PWXsimple; + ctx.S2 = ctx.S1 + (1 << ctx.Swidth) * PWXsimple; + ctx.Smask = Swidth_to_Smask(ctx.Swidth); + ctx.w = 0; + + // do prehash + sha256_8way_full( sha256, src, srclen ); + + + // need flexible size, use malloc; + __m256i vpers[128]; + + if ( version != YESPOWER_0_5 && perslen ) + for ( int i = 0; i < perslen/4 + 1; i++ ) + vpers[i] = _mm256_set1_epi32( pers[i] ); + + /* 1: (B_0 ... B_{p-1}) <-- PBKDF2(P, S, 1, p * MFLen) */ + pbkdf2_sha256_8way( B, B_size, sha256, sizeof(sha256), vpers, perslen, 1 ); + + blkcpy_8way( sha256, B, sizeof(sha256) / sizeof(sha256[0] ) ); + + /* 3: B_i <-- MF(B_i, N) */ + smix_8way( B, r, N, V, X, &ctx ); + + if ( version == YESPOWER_0_5 ) + { + /* 5: DK <-- PBKDF2(P, B, 1, dkLen) */ + pbkdf2_sha256_8way( dst, sizeof(*dst), sha256, sizeof(sha256), + B, B_size, 1 ); + + if ( pers ) + { + hmac_sha256_8way_full( dst, sizeof(*dst), vpers, perslen, sha256 ); + sha256_8way_full( dst, sha256, sizeof(sha256) ); + } + } + else + hmac_sha256_8way_full( dst, B + B_size - 64, 64, sha256, sizeof(sha256) ); + + /* Success! */ + retval = 1; + + /* Free memory */ + free(S); +free_X: + free(X); +free_B: + free(B); +free_V: + free(V); + + return retval; +} + +int yespower_8way_tls( const __m256i *src, size_t srclen, + const yespower_params_t *params, yespower_8way_binary_t *dst, int trhid ) +{ +/* The reference implementation doesn't use thread-local storage */ + return yespower_8way( NULL, src, srclen, params, dst, trhid ); +} + +int yespower_init_local8( yespower_local_t *local ) +{ +/* The reference implementation doesn't use the local structure */ + local->base = local->aligned = NULL; + local->base_size = local->aligned_size = 0; + return 0; +} + +int yespower_free_local8( yespower_local_t *local ) +{ +/* The reference implementation frees its memory in yespower() */ + (void)local; /* unused */ + return 0; +} + +int yespower_8way_hash( const char *input, char *output, uint32_t len, + int thrid ) +{ + return yespower_8way_tls( input, len, &yespower_params, + (yespower_binary_t*)output, thrid ); +} + +int scanhash_yespower_8way( struct work *work, uint32_t max_nonce, + uint64_t *hashes_done, struct thr_info *mythr ) +{ + uint32_t _ALIGN(128) hash[8*8]; + uint32_t _ALIGN(128) vdata[20*8]; + uint32_t _ALIGN(128) endiandata[20]; + uint32_t *pdata = work->data; + uint32_t *ptarget = work->target; + const uint32_t first_nonce = pdata[19]; + const uint32_t last_nonce = max_nonce; + uint32_t n = first_nonce; + const int thr_id = mythr->id; + + for ( int k = 0; k < 19; k++ ) + be32enc( &endiandata[k], pdata[k] ); + endiandata[19] = n; + +// do sha256 prehash + SHA256_Init( &sha256_prehash_ctx ); + SHA256_Update( &sha256_prehash_ctx, endiandata, 64 ); + + do { + if ( yespower_hash( vdata, hash, 80, thr_id ) ) + if unlikely( valid_hash( hash, ptarget ) && !opt_benchmark ) + { + be32enc( pdata+19, n ); + submit_solution( work, hash, mythr ); + } + endiandata[19] = ++n; + } while ( n < last_nonce && !work_restart[thr_id].restart ); + *hashes_done = n - first_nonce; + pdata[19] = n; + return 0; +} + +#endif // AVX2 diff --git a/algo/yespower/yespower-gate.c b/algo/yespower/yespower-gate.c index 384d434..a2255ac 100644 --- a/algo/yespower/yespower-gate.c +++ b/algo/yespower/yespower-gate.c @@ -30,7 +30,9 @@ #include "algo-gate-api.h" -static yespower_params_t yespower_params; +yespower_params_t yespower_params; + +SHA256_CTX sha256_prehash_ctx; // YESPOWER @@ -55,6 +57,11 @@ int scanhash_yespower( struct work *work, uint32_t max_nonce, for ( int k = 0; k < 19; k++ ) be32enc( &endiandata[k], pdata[k] ); endiandata[19] = n; + +// do sha256 prehash + SHA256_Init( &sha256_prehash_ctx ); + SHA256_Update( &sha256_prehash_ctx, endiandata, 64 ); + do { if ( yespower_hash( (char*)endiandata, (char*)vhash, 80, thr_id ) ) if unlikely( valid_hash( vhash, ptarget ) && !opt_benchmark ) @@ -86,11 +93,16 @@ int scanhash_yespower_b2b( struct work *work, uint32_t max_nonce, const uint32_t first_nonce = pdata[19]; uint32_t n = first_nonce; const uint32_t last_nonce = max_nonce; - const int thr_id = mythr->id; // thr_id arg is deprecated + const int thr_id = mythr->id; for ( int k = 0; k < 19; k++ ) be32enc( &endiandata[k], pdata[k] ); endiandata[19] = n; + +// do sha256 prehash + SHA256_Init( &sha256_prehash_ctx ); + SHA256_Update( &sha256_prehash_ctx, endiandata, 64 ); + do { if (yespower_b2b_hash( (char*) endiandata, (char*) vhash, 80, thr_id ) ) if unlikely( valid_hash( vhash, ptarget ) && !opt_benchmark ) @@ -152,7 +164,7 @@ bool register_yespowerr16_algo( algo_gate_t* gate ) return true; }; -/* not used +/* not used, doesn't work bool register_yescrypt_05_algo( algo_gate_t* gate ) { gate->optimizations = SSE2_OPT | SHA_OPT; @@ -166,6 +178,40 @@ bool register_yescrypt_05_algo( algo_gate_t* gate ) return true; } +bool register_yescrypt_05_algo( algo_gate_t* gate ) +{ + gate->optimizations = SSE2_OPT | SHA_OPT; + gate->scanhash = (void*)&scanhash_yespower; + yespower_params.version = YESPOWER_0_5; + + if ( opt_param_n ) yespower_params.N = opt_param_n; + else yespower_params.N = 2048; + + if ( opt_param_r ) yespower_params.r = opt_param_r; + else yespower_params.r = 8; + + if ( opt_param_key ) + { + yespower_params.pers = opt_param_key; + yespower_params.perslen = strlen( opt_param_key ); + } + else + { + yespower_params.pers = NULL; + yespower_params.perslen = 0; + } + +// YESCRYPT_P = 1; + + applog( LOG_NOTICE,"Yescrypt parameters: N= %d, R= %d.", + yespower_params.N, yespower_params.r ); + if ( yespower_params.pers ) + applog( LOG_NOTICE,"Key= \"%s\"\n", yespower_params.pers ); + + return true; +} + + bool register_yescryptr8_05_algo( algo_gate_t* gate ) { gate->optimizations = SSE2_OPT | SHA_OPT; diff --git a/algo/yespower/yespower-opt.c b/algo/yespower/yespower-opt.c index 8c1ddb7..f26de51 100644 --- a/algo/yespower/yespower-opt.c +++ b/algo/yespower/yespower-opt.c @@ -96,6 +96,8 @@ #include #include #include "algo/sha/hmac-sha256-hash.h" +#include "algo/sha/hmac-sha256-hash-4way.h" + #include "yespower.h" #include "yespower-platform.c" @@ -1038,12 +1040,13 @@ int yespower(yespower_local_t *local, salsa20_blk_t *V, *XY; pwxform_ctx_t ctx; uint8_t sha256[32]; + SHA256_CTX sha256_ctx; /* Sanity-check parameters */ - if ((version != YESPOWER_0_5 && version != YESPOWER_1_0) || - N < 1024 || N > 512 * 1024 || r < 8 || r > 32 || - (N & (N - 1)) != 0 || - (!pers && perslen)) { + if ( (version != YESPOWER_0_5 && version != YESPOWER_1_0) + || N < 1024 || N > 512 * 1024 || r < 8 || r > 32 + || (N & (N - 1)) != 0 || ( !pers && perslen ) ) + { errno = EINVAL; return -1; } @@ -1051,20 +1054,22 @@ int yespower(yespower_local_t *local, /* Allocate memory */ B_size = (size_t)128 * r; V_size = B_size * N; - if (version == YESPOWER_0_5) { + if ( version == YESPOWER_0_5 ) + { XY_size = B_size * 2; Swidth = Swidth_0_5; - ctx.Sbytes = 2 * Swidth_to_Sbytes1(Swidth); + ctx.Sbytes = 2 * Swidth_to_Sbytes1( Swidth ); } else { XY_size = B_size + 64; Swidth = Swidth_1_0; - ctx.Sbytes = 3 * Swidth_to_Sbytes1(Swidth); + ctx.Sbytes = 3 * Swidth_to_Sbytes1( Swidth ); } need = B_size + V_size + XY_size + ctx.Sbytes; - if (local->aligned_size < need) { - if (free_region(local)) + if ( local->aligned_size < need ) + { + if ( free_region( local ) ) return -1; - if (!alloc_region(local, need)) + if ( !alloc_region( local, need ) ) return -1; } B = (uint8_t *)local->aligned; @@ -1072,48 +1077,81 @@ int yespower(yespower_local_t *local, XY = (salsa20_blk_t *)((uint8_t *)V + V_size); S = (uint8_t *)XY + XY_size; ctx.S0 = S; - ctx.S1 = S + Swidth_to_Sbytes1(Swidth); + ctx.S1 = S + Swidth_to_Sbytes1( Swidth ); - SHA256_Buf(src, srclen, sha256); - if (version == YESPOWER_0_5) { - PBKDF2_SHA256(sha256, sizeof(sha256), src, srclen, 1, - B, B_size); +// copy prehash, do tail + memcpy( &sha256_ctx, &sha256_prehash_ctx, sizeof sha256_ctx ); + SHA256_Update( &sha256_ctx, src+64, srclen-64 ); + SHA256_Final( sha256, &sha256_ctx ); - if ( work_restart[thrid].restart ) return false; +// SHA256_Buf(src, srclen, sha256); + + if ( version == YESPOWER_0_5 ) + { + PBKDF2_SHA256( sha256, sizeof(sha256), src, srclen, 1, B, B_size ); + + if ( work_restart[thrid].restart ) return 0; - memcpy(sha256, B, sizeof(sha256)); - smix(B, r, N, V, XY, &ctx); + memcpy( sha256, B, sizeof(sha256) ); + smix( B, r, N, V, XY, &ctx ); - if ( work_restart[thrid].restart ) return false; + if ( work_restart[thrid].restart ) return 0; - PBKDF2_SHA256(sha256, sizeof(sha256), B, B_size, 1, - (uint8_t *)dst, sizeof(*dst)); + PBKDF2_SHA256( sha256, sizeof(sha256), B, B_size, 1, (uint8_t *)dst, + sizeof(*dst) ); - if (pers) { - HMAC_SHA256_Buf(dst, sizeof(*dst), pers, perslen, - sha256); + if ( work_restart[thrid].restart ) return 0; - if ( work_restart[thrid].restart ) return false; + if ( pers ) + { + src = pers; + srclen = perslen; + } + else + srclen = 0; + + HMAC_SHA256_CTX ctx; + HMAC_SHA256_Init( &ctx, dst, sizeof(*dst) ); + HMAC_SHA256_Update( &ctx, src, srclen ); + HMAC_SHA256_Final( sha256, &ctx ); - SHA256_Buf(sha256, sizeof(sha256), (uint8_t *)dst); +// SHA256_CTX ctx; + SHA256_Init( &sha256_ctx ); + SHA256_Update( &sha256_ctx, sha256, sizeof(sha256) ); + SHA256_Final( (unsigned char*)dst, &sha256_ctx ); + + +/* + if ( pers ) + { + HMAC_SHA256_Buf( dst, sizeof(*dst), pers, perslen, sha256 ); + SHA256_Buf( sha256, sizeof(sha256), (uint8_t *)dst ); } - } else { - ctx.S2 = S + 2 * Swidth_to_Sbytes1(Swidth); +*/ + } + else + { + ctx.S2 = S + 2 * Swidth_to_Sbytes1( Swidth ); ctx.w = 0; - if (pers) { + if ( pers ) + { src = pers; srclen = perslen; - } else { - srclen = 0; } + else + srclen = 0; - PBKDF2_SHA256(sha256, sizeof(sha256), src, srclen, 1, B, 128); - memcpy(sha256, B, sizeof(sha256)); - smix_1_0(B, r, N, V, XY, &ctx); - HMAC_SHA256_Buf(B + B_size - 64, 64, - sha256, sizeof(sha256), (uint8_t *)dst); + PBKDF2_SHA256( sha256, sizeof(sha256), src, srclen, 1, B, 128 ); + memcpy( sha256, B, sizeof(sha256) ); + + if ( work_restart[thrid].restart ) return 0; + + smix_1_0( B, r, N, V, XY, &ctx ); + + HMAC_SHA256_Buf( B + B_size - 64, 64, sha256, sizeof(sha256), + (uint8_t *)dst ); } /* Success! */ diff --git a/algo/yespower/yespower-ref.c b/algo/yespower/yespower-ref.c index b04cfbb..e9a498a 100644 --- a/algo/yespower/yespower-ref.c +++ b/algo/yespower/yespower-ref.c @@ -453,9 +453,8 @@ static void smix(uint32_t *B, size_t r, uint32_t N, * * Return 0 on success; or -1 on error. */ -int yespower(yespower_local_t *local, - const uint8_t *src, size_t srclen, - const yespower_params_t *params, yespower_binary_t *dst) +int yespower( yespower_local_t *local, const uint8_t *src, size_t srclen, + const yespower_params_t *params, yespower_binary_t *dst, int thrid ) { yespower_version_t version = params->version; uint32_t N = params->N; @@ -534,17 +533,16 @@ int yespower(yespower_local_t *local, if (pers) { HMAC_SHA256_Buf(dst, sizeof(*dst), pers, perslen, - return true; (uint8_t *)sha256); SHA256_Buf(sha256, sizeof(sha256), (uint8_t *)dst); } } else { - HMAC_SHA256_Buf_P((uint8_t *)B + B_size - 64, 64, + HMAC_SHA256_Buf((uint8_t *)B + B_size - 64, 64, sha256, sizeof(sha256), (uint8_t *)dst); } /* Success! */ - retval = 0; + retval = 1; /* Free memory */ free(S); @@ -559,10 +557,10 @@ free_V: } int yespower_tls(const uint8_t *src, size_t srclen, - const yespower_params_t *params, yespower_binary_t *dst) + const yespower_params_t *params, yespower_binary_t *dst, int thrid ) { /* The reference implementation doesn't use thread-local storage */ - return yespower(NULL, src, srclen, params, dst); + return yespower(NULL, src, srclen, params, dst, thrid ); } int yespower_init_local(yespower_local_t *local) diff --git a/algo/yespower/yespower.h b/algo/yespower/yespower.h index 718ecb7..4436780 100644 --- a/algo/yespower/yespower.h +++ b/algo/yespower/yespower.h @@ -33,6 +33,8 @@ #include #include /* for size_t */ #include "miner.h" +#include "simd-utils.h" +#include #ifdef __cplusplus extern "C" { @@ -74,6 +76,10 @@ typedef struct { unsigned char uc[32]; } yespower_binary_t __attribute__ ((aligned (64))); +yespower_params_t yespower_params; + +SHA256_CTX sha256_prehash_ctx; + /** * yespower_init_local(local): * Initialize the thread-local (RAM) data structure. Actual memory allocation @@ -131,6 +137,24 @@ extern int yespower_tls(const uint8_t *src, size_t srclen, extern int yespower_b2b_tls(const uint8_t *src, size_t srclen, const yespower_params_t *params, yespower_binary_t *dst, int thr_id); + +#if defined(__AVX2__) + +typedef struct +{ + __m256i uc[8]; +} yespower_8way_binary_t __attribute__ ((aligned (128))); + +extern int yespower_8way( yespower_local_t *local, const __m256i *src, + size_t srclen, const yespower_params_t *params, + yespower_8way_binary_t *dst, int thrid ); + + +extern int yespower_8way_tls( const __m256i *src, size_t srclen, + const yespower_params_t *params, yespower_8way_binary_t *dst, int thr_id ); + +#endif // AVX2 + #ifdef __cplusplus } #endif diff --git a/configure b/configure index 2ed21e8..c66f4d9 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.12.6.1. +# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.12.7. # # # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. @@ -577,8 +577,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='cpuminer-opt' PACKAGE_TARNAME='cpuminer-opt' -PACKAGE_VERSION='3.12.6.1' -PACKAGE_STRING='cpuminer-opt 3.12.6.1' +PACKAGE_VERSION='3.12.7' +PACKAGE_STRING='cpuminer-opt 3.12.7' PACKAGE_BUGREPORT='' PACKAGE_URL='' @@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures cpuminer-opt 3.12.6.1 to adapt to many kinds of systems. +\`configure' configures cpuminer-opt 3.12.7 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1404,7 +1404,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of cpuminer-opt 3.12.6.1:";; + short | recursive ) echo "Configuration of cpuminer-opt 3.12.7:";; esac cat <<\_ACEOF @@ -1509,7 +1509,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -cpuminer-opt configure 3.12.6.1 +cpuminer-opt configure 3.12.7 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by cpuminer-opt $as_me 3.12.6.1, which was +It was created by cpuminer-opt $as_me 3.12.7, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -2993,7 +2993,7 @@ fi # Define the identity of the package. PACKAGE='cpuminer-opt' - VERSION='3.12.6.1' + VERSION='3.12.7' cat >>confdefs.h <<_ACEOF @@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by cpuminer-opt $as_me 3.12.6.1, which was +This file was extended by cpuminer-opt $as_me 3.12.7, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -6756,7 +6756,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -cpuminer-opt config.status 3.12.6.1 +cpuminer-opt config.status 3.12.7 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" diff --git a/configure.ac b/configure.ac index 056d5ee..4184b18 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -AC_INIT([cpuminer-opt], [3.12.6.1]) +AC_INIT([cpuminer-opt], [3.12.7]) AC_PREREQ([2.59c]) AC_CANONICAL_SYSTEM diff --git a/cpu-miner.c b/cpu-miner.c index 1e4615d..bd97641 100644 --- a/cpu-miner.c +++ b/cpu-miner.c @@ -193,6 +193,7 @@ static uint64_t submit_sum = 0; static uint64_t accept_sum = 0; static uint64_t stale_sum = 0; static uint64_t reject_sum = 0; +static uint64_t solved_sum = 0; static double norm_diff_sum = 0.; static uint32_t last_block_height = 0; static double highest_share = 0; // all shares include discard and reject @@ -954,6 +955,7 @@ void report_summary_log( bool force ) uint64_t accepts = accept_sum; accept_sum = 0; uint64_t rejects = reject_sum; reject_sum = 0; uint64_t stales = stale_sum; stale_sum = 0; + uint64_t solved = solved_sum; solved_sum = 0; memcpy( &start_time, &five_min_start, sizeof start_time ); memcpy( &five_min_start, &now, sizeof now ); @@ -1020,8 +1022,8 @@ void report_summary_log( bool force ) applog2( LOG_INFO,"Rejected %6d %6d", rejects, rejected_share_count ); if ( solved_block_count ) - applog2( LOG_INFO,"Blocks Solved %6d", - solved_block_count ); + applog2( LOG_INFO,"Blocks Solved %6d %6d", + solved, solved_block_count ); applog2( LOG_INFO, "Hi/Lo Share Diff %.5g / %.5g", highest_share, lowest_share ); @@ -1132,6 +1134,7 @@ static int share_result( int result, struct work *work, { accept_sum++; norm_diff_sum += my_stats.target_diff; + if ( solved ) solved_sum++; } else { @@ -2197,16 +2200,18 @@ static void *miner_thread( void *userdata ) } #if !(defined(__WINDOWS__) || defined(_WIN64) || defined(_WIN32)) + // Display CPU temperature and clock rate. if (!opt_quiet && mythr->id == 0 ) { int temp = cpu_temp(0); static struct timeval cpu_temp_time = {0}; timeval_subtract( &diff, &tv_end, &cpu_temp_time ); int wait = temp >= 80 ? 30 : temp >= 70 ? 60 : 120; + if ( ( diff.tv_sec > wait ) || ( temp > hi_temp ) ) { char tempstr[32]; - int lo_freq, hi_freq; + float lo_freq = 0., hi_freq = 0.; linux_cpu_hilo_freq( &lo_freq, &hi_freq ); memcpy( &cpu_temp_time, &tv_end, sizeof(cpu_temp_time) ); if ( use_colors && ( temp >= 70 ) ) @@ -2219,11 +2224,12 @@ static void *miner_thread( void *userdata ) else sprintf( tempstr, "%d C", temp ); applog( LOG_NOTICE,"CPU temp: curr %s (max %d), Freq: %.3f/%.3f GHz", - tempstr, hi_temp, (float)lo_freq / 1e6, (float)hi_freq/ 1e6 ); + tempstr, hi_temp, lo_freq / 1e6, hi_freq / 1e6 ); if ( temp > hi_temp ) hi_temp = temp; } } #endif + // display hashrate if ( unlikely( opt_hash_meter ) ) { diff --git a/simd-utils/intrlv.h b/simd-utils/intrlv.h index b923fd3..93a5e19 100644 --- a/simd-utils/intrlv.h +++ b/simd-utils/intrlv.h @@ -676,6 +676,14 @@ static inline void mm128_bswap32_intrlv80_4x32( void *d, const void *src ) d[7] = *( (const uint32_t*)(s7) +(i) ); \ } while(0) +static inline void intrlv_8x32b( void *dst, const void *s0, const void *s1, + const void *s2, const void *s3, const void *s4, const void *s5, + const void *s6, const void *s7, const int bit_len ) +{ + for ( int i = 0; i < bit_len/32; i++ ) + ILEAVE_8x32( i ); +} + static inline void intrlv_8x32( void *dst, const void *s0, const void *s1, const void *s2, const void *s3, const void *s4, const void *s5, const void *s6, const void *s7, const int bit_len ) @@ -730,6 +738,14 @@ static inline void intrlv_8x32_512( void *dst, const void *s0, const void *s1, *( (uint32_t*)(d7) +(i) ) = s[7]; \ } while(0) +static inline void dintrlv_8x32b( void *d0, void *d1, void *d2, void *d3, + void *d4, void *d5, void *d6, void *d7, const void *src, + const int bit_len ) +{ + for ( int i = 0; i < bit_len/32; i++ ) + DLEAVE_8x32( i ); +} + static inline void dintrlv_8x32( void *d0, void *d1, void *d2, void *d3, void *d4, void *d5, void *d6, void *d7, const void *src, const int bit_len ) diff --git a/sysinfos.c b/sysinfos.c index 0f74a75..17aa69e 100644 --- a/sysinfos.c +++ b/sysinfos.c @@ -67,7 +67,6 @@ #define HWMON_ALT5 \ "/sys/class/hwmon/hwmon0/device/temp1_input" - static inline float linux_cputemp(int core) { float tc = 0.0; @@ -97,49 +96,43 @@ static inline float linux_cputemp(int core) return tc; } -#define CPUFREQ_PATH \ + +#define CPUFREQ_PATH0\ "/sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq" #define CPUFREQ_PATHn \ "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_cur_freq" - -// "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_cur_freq" -static inline uint32_t linux_cpufreq(int core) +static inline float linux_cpufreq(int core) { - FILE *fd = fopen(CPUFREQ_PATH, "r"); - uint32_t freq = 0; + FILE *fd = fopen( CPUFREQ_PATH0, "r" ); + long int freq = 0; - if (!fd) - return freq; - - if (!fscanf(fd, "%d", &freq)) - return freq; - - return freq; + if ( !fd ) return (float)freq; + if ( !fscanf( fd, "%ld", &freq ) ) freq = 0; + fclose( fd ); + return (float)freq; } -static inline void linux_cpu_hilo_freq( uint32_t* lo, uint32_t *hi ) +static inline void linux_cpu_hilo_freq( float *lo, float *hi ) { - uint64_t freq = 0, hi_freq = 0, lo_freq = 0xffffffffffffffff; + long int freq = 0, hi_freq = 0, lo_freq = 0x7fffffff; for ( int i = 0; i < num_cpus; i++ ) { char path[64]; sprintf( path, CPUFREQ_PATHn, i ); - FILE *fd = fopen( path, "r" ); - if ( fd ) + if ( !fd ) return; + else if ( fscanf( fd, "%ld", &freq ) ) { - if ( fscanf( fd, "%ld", &freq ) ) - { - if ( freq > hi_freq ) hi_freq = freq; - if ( freq < lo_freq ) lo_freq = freq; - } + if ( freq > hi_freq ) hi_freq = freq; + if ( freq < lo_freq ) lo_freq = freq; } + fclose( fd ); } - *hi = hi_freq; - *lo = lo_freq; + *hi = (float)hi_freq; + *lo = (float)lo_freq; }