mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
232 lines
8.1 KiB
C
232 lines
8.1 KiB
C
/* ====================================================================
|
|
* Copyright (c) 2014 - 2017 The GmSSL Project. All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
*
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
*
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in
|
|
* the documentation and/or other materials provided with the
|
|
* distribution.
|
|
*
|
|
* 3. All advertising materials mentioning features or use of this
|
|
* software must display the following acknowledgment:
|
|
* "This product includes software developed by the GmSSL Project.
|
|
* (http://gmssl.org/)"
|
|
*
|
|
* 4. The name "GmSSL Project" must not be used to endorse or promote
|
|
* products derived from this software without prior written
|
|
* permission. For written permission, please contact
|
|
* guanzhi1980@gmail.com.
|
|
*
|
|
* 5. Products derived from this software may not be called "GmSSL"
|
|
* nor may "GmSSL" appear in their names without prior written
|
|
* permission of the GmSSL Project.
|
|
*
|
|
* 6. Redistributions of any form whatsoever must retain the following
|
|
* acknowledgment:
|
|
* "This product includes software developed by the GmSSL Project
|
|
* (http://gmssl.org/)"
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE GmSSL PROJECT ``AS IS'' AND ANY
|
|
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE GmSSL PROJECT OR
|
|
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
|
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
|
* OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
* ====================================================================
|
|
*/
|
|
|
|
#include <string.h>
|
|
#include "sm3-hash-4way.h"
|
|
|
|
#ifdef __SSE4_2__
|
|
|
|
void sm3_4way_init( sm3_4way_ctx_t *ctx )
|
|
{
|
|
ctx->digest[0] = _mm_set1_epi32( 0x7380166F );
|
|
ctx->digest[1] = _mm_set1_epi32( 0x4914B2B9 );
|
|
ctx->digest[2] = _mm_set1_epi32( 0x172442D7 );
|
|
ctx->digest[3] = _mm_set1_epi32( 0xDA8A0600 );
|
|
ctx->digest[4] = _mm_set1_epi32( 0xA96F30BC );
|
|
ctx->digest[5] = _mm_set1_epi32( 0x163138AA );
|
|
ctx->digest[6] = _mm_set1_epi32( 0xE38DEE4D );
|
|
ctx->digest[7] = _mm_set1_epi32( 0xB0FB0E4E );
|
|
ctx->nblocks = 0;
|
|
ctx->num = 0;
|
|
}
|
|
|
|
void sm3_4way( void *cc, const void *data, size_t len )
|
|
{
|
|
sm3_4way_ctx_t *ctx = (sm3_4way_ctx_t*)cc;
|
|
__m128i *block = (__m128i*)ctx->block;
|
|
__m128i *vdata = (__m128i*)data;
|
|
|
|
if ( ctx->num )
|
|
{
|
|
unsigned int left = SM3_BLOCK_SIZE - ctx->num;
|
|
if ( len < left )
|
|
{
|
|
memcpy_128( block + (ctx->num >> 2), vdata , len>>2 );
|
|
ctx->num += len;
|
|
return;
|
|
}
|
|
else
|
|
{
|
|
memcpy_128( block + (ctx->num >> 2), vdata , left>>2 );
|
|
sm3_4way_compress( ctx->digest, block );
|
|
ctx->nblocks++;
|
|
vdata += left>>2;
|
|
len -= left;
|
|
}
|
|
}
|
|
while ( len >= SM3_BLOCK_SIZE )
|
|
{
|
|
sm3_4way_compress( ctx->digest, vdata );
|
|
ctx->nblocks++;
|
|
vdata += SM3_BLOCK_SIZE>>2;
|
|
len -= SM3_BLOCK_SIZE;
|
|
}
|
|
ctx->num = len;
|
|
if ( len )
|
|
memcpy_128( block, vdata, len>>2 );
|
|
}
|
|
|
|
void sm3_4way_close( void *cc, void *dst )
|
|
{
|
|
sm3_4way_ctx_t *ctx = (sm3_4way_ctx_t*)cc;
|
|
__m128i *hash = (__m128i*)dst;
|
|
__m128i *count = (__m128i*)(ctx->block + ( (SM3_BLOCK_SIZE - 8) >> 2 ) );
|
|
__m128i *block = (__m128i*)ctx->block;
|
|
int i;
|
|
|
|
block[ctx->num] = _mm_set1_epi32( 0x80 );
|
|
|
|
if ( ctx->num + 8 <= SM3_BLOCK_SIZE )
|
|
{
|
|
memset_zero_128( block + (ctx->num >> 2) + 1,
|
|
( SM3_BLOCK_SIZE - ctx->num - 8 ) >> 2 );
|
|
}
|
|
else
|
|
{
|
|
memset_zero_128( block + (ctx->num >> 2) + 1,
|
|
( SM3_BLOCK_SIZE - (ctx->num >> 2) - 1 ) );
|
|
sm3_4way_compress( ctx->digest, block );
|
|
memset_zero_128( block, ( SM3_BLOCK_SIZE - 8 ) >> 2 );
|
|
}
|
|
|
|
count[0] = mm128_bswap_32(
|
|
_mm_set1_epi32( ctx->nblocks >> 23 ) );
|
|
count[1] = mm128_bswap_32( _mm_set1_epi32( ( ctx->nblocks << 9 ) +
|
|
( ctx->num << 3 ) ) );
|
|
sm3_4way_compress( ctx->digest, block );
|
|
|
|
for ( i = 0; i < 8 ; i++ )
|
|
hash[i] = mm128_bswap_32( ctx->digest[i] );
|
|
}
|
|
|
|
#define P0(x) _mm_xor_si128( x, _mm_xor_si128( mm128_rol_32( x, 9 ), \
|
|
mm128_rol_32( x, 17 ) ) )
|
|
#define P1(x) _mm_xor_si128( x, _mm_xor_si128( mm128_rol_32( x, 15 ), \
|
|
mm128_rol_32( x, 23 ) ) )
|
|
|
|
#define FF0(x,y,z) _mm_xor_si128( x, _mm_xor_si128( y, z ) )
|
|
#define FF1(x,y,z) _mm_or_si128( _mm_or_si128( _mm_and_si128( x, y ), \
|
|
_mm_and_si128( x, z ) ), \
|
|
_mm_and_si128( y, z ) )
|
|
|
|
#define GG0(x,y,z) FF0(x,y,z)
|
|
#define GG1(x,y,z) _mm_or_si128( _mm_and_si128( x, y ), \
|
|
_mm_andnot_si128( x, z ) )
|
|
|
|
|
|
void sm3_4way_compress( __m128i *digest, __m128i *block )
|
|
{
|
|
__m128i W[68], W1[64];
|
|
__m128i A = digest[ 0 ];
|
|
__m128i B = digest[ 1 ];
|
|
__m128i C = digest[ 2 ];
|
|
__m128i D = digest[ 3 ];
|
|
__m128i E = digest[ 4 ];
|
|
__m128i F = digest[ 5 ];
|
|
__m128i G = digest[ 6 ];
|
|
__m128i H = digest[ 7 ];
|
|
__m128i SS1, SS2, TT1, TT2, T;
|
|
int j;
|
|
|
|
for ( j = 0; j < 16; j++ )
|
|
W[j] = mm128_bswap_32( block[j] );
|
|
|
|
for ( j = 16; j < 68; j++ )
|
|
W[j] = _mm_xor_si128( P1( _mm_xor_si128( _mm_xor_si128( W[ j-16 ],
|
|
W[ j-9 ] ),
|
|
mm128_rol_32( W[ j-3 ], 15 ) ) ),
|
|
_mm_xor_si128( mm128_rol_32( W[ j-13 ], 7 ),
|
|
W[ j-6 ] ) );
|
|
|
|
for( j = 0; j < 64; j++ )
|
|
W1[j] = _mm_xor_si128( W[j], W[j+4] );
|
|
|
|
T = _mm_set1_epi32( 0x79CC4519UL );
|
|
for( j =0; j < 16; j++ )
|
|
{
|
|
SS1 = mm128_rol_32( _mm_add_epi32( _mm_add_epi32( mm128_rol_32(A,12), E ),
|
|
mm128_rol_var_32( T, j ) ), 7 );
|
|
SS2 = _mm_xor_si128( SS1, mm128_rol_32( A, 12 ) );
|
|
TT1 = _mm_add_epi32( _mm_add_epi32( _mm_add_epi32( FF0( A, B, C ), D ),
|
|
SS2 ), W1[j] );
|
|
TT2 = _mm_add_epi32( _mm_add_epi32( _mm_add_epi32( GG0( E, F, G ), H ),
|
|
SS1 ), W[j] );
|
|
D = C;
|
|
C = mm128_rol_32( B, 9 );
|
|
B = A;
|
|
A = TT1;
|
|
H = G;
|
|
G = mm128_rol_32( F, 19 );
|
|
F = E;
|
|
E = P0( TT2 );
|
|
}
|
|
|
|
T = _mm_set1_epi32( 0x7A879D8AUL );
|
|
for( j =16; j < 64; j++ )
|
|
{
|
|
SS1 = mm128_rol_32( _mm_add_epi32( _mm_add_epi32( mm128_rol_32(A,12), E ),
|
|
mm128_rol_var_32( T, j&31 ) ), 7 );
|
|
SS2 = _mm_xor_si128( SS1, mm128_rol_32( A, 12 ) );
|
|
TT1 = _mm_add_epi32( _mm_add_epi32( _mm_add_epi32( FF1( A, B, C ), D ),
|
|
SS2 ), W1[j] );
|
|
TT2 = _mm_add_epi32( _mm_add_epi32( _mm_add_epi32( GG1( E, F, G ), H ),
|
|
SS1 ), W[j] );
|
|
D = C;
|
|
C = mm128_rol_32( B, 9 );
|
|
B = A;
|
|
A = TT1;
|
|
H = G;
|
|
G = mm128_rol_32( F, 19 );
|
|
F = E;
|
|
E = P0( TT2 );
|
|
}
|
|
|
|
digest[0] = _mm_xor_si128( digest[0], A );
|
|
digest[1] = _mm_xor_si128( digest[1], B );
|
|
digest[2] = _mm_xor_si128( digest[2], C );
|
|
digest[3] = _mm_xor_si128( digest[3], D );
|
|
digest[4] = _mm_xor_si128( digest[4], E );
|
|
digest[5] = _mm_xor_si128( digest[5], F );
|
|
digest[6] = _mm_xor_si128( digest[6], G );
|
|
digest[7] = _mm_xor_si128( digest[7], H );
|
|
}
|
|
|
|
#endif
|
|
|