This commit is contained in:
Jay D Dee
2017-01-12 19:40:17 -05:00
parent 06f82c5b97
commit badc80f071
54 changed files with 410 additions and 25234 deletions

View File

@@ -12,40 +12,36 @@ void blakecoin_close(void *cc, void *dst);
#include <memory.h>
#include <openssl/sha.h>
/* Move init out of loop, so init once externally,
* and then use one single memcpy */
static sph_blake256_context blake_mid;
static bool ctx_midstate_done = false;
// context management is staged for efficiency.
// 1. global initial ctx cached on startup
// 2. per-thread midstate ctx cache refreshed every scan
// 3. local ctx for final hash calculation
static void init_blake_hash(void)
static sph_blake256_context blake_init_ctx;
static __thread sph_blake256_context blake_mid_ctx;
static void blake_midstate_init( const void* input )
{
blakecoin_init(&blake_mid);
ctx_midstate_done = true;
// copy cached initial state
memcpy( &blake_mid_ctx, &blake_init_ctx, sizeof blake_mid_ctx );
blakecoin( &blake_mid_ctx, input, 64 );
}
void blakecoinhash(void *state, const void *input)
void blakecoinhash( void *state, const void *input )
{
sph_blake256_context ctx;
uint8_t hash[64];
uint8_t *ending = (uint8_t*) input;
ending += 64;
uint8_t *ending = (uint8_t*) input + 64;
// do one memcopy to get a fresh context
if (!ctx_midstate_done) {
init_blake_hash();
blakecoin(&blake_mid, input, 64);
}
memcpy(&ctx, &blake_mid, sizeof(blake_mid));
blakecoin(&ctx, ending, 16);
blakecoin_close(&ctx, hash);
memcpy(state, hash, 32);
// copy cached midstate
memcpy( &ctx, &blake_mid_ctx, sizeof ctx );
blakecoin( &ctx, ending, 16 );
blakecoin_close( &ctx, hash );
memcpy( state, hash, 32 );
}
int scanhash_blakecoin(int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done)
int scanhash_blakecoin( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done )
{
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
@@ -57,16 +53,14 @@ int scanhash_blakecoin(int thr_id, struct work *work, uint32_t max_nonce,
uint32_t n = first_nonce;
ctx_midstate_done = false;
if (opt_benchmark)
HTarget = 0x7f;
// we need big endian data...
// be32enc_array( endiandata, pdata, 19 );
for (int kk=0; kk < 19; kk++)
be32enc(&endiandata[kk], ((uint32_t*)pdata)[kk]);
blake_midstate_init( endiandata );
#ifdef DEBUG_ALGO
applog(LOG_DEBUG,"[%d] Target=%08x %08x", thr_id, ptarget[6], ptarget[7]);
@@ -117,6 +111,7 @@ bool register_vanilla_algo( algo_gate_t* gate )
gate->hash = (void*)&blakecoinhash;
gate->hash_alt = (void*)&blakecoinhash;
gate->get_max64 = (void*)&blakecoin_get_max64;
blakecoin_init( &blake_init_ctx );
return true;
}

View File

@@ -317,7 +317,6 @@ static const sph_u64 blkIV512[8] = {
#define COMPRESS64 do { \
int r; \
int b=0; \
sph_u64 M0, M1, M2, M3, M4, M5, M6, M7; \
sph_u64 M8, M9, MA, MB, MC, MD, ME, MF; \

View File

@@ -1,133 +0,0 @@
/*
---------------------------------------------------------------------------
Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
LICENSE TERMS
The redistribution and use of this software (with or without changes)
is allowed without the payment of fees or royalties provided that:
1. source code distributions include the above copyright notice, this
list of conditions and the following disclaimer;
2. binary distributions include the above copyright notice, this list
of conditions and the following disclaimer in their documentation;
3. the name of the copyright holder is not used to endorse products
built using this software without specific written permission.
DISCLAIMER
This software is provided 'as is' with no explicit or implied warranties
in respect of its properties, including, but not limited to, correctness
and/or fitness for purpose.
---------------------------------------------------------------------------
Issue Date: 20/12/2007
*/
#ifndef _BRG_ENDIAN_H
#define _BRG_ENDIAN_H
#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */
#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */
/* Include files where endian defines and byteswap functions may reside */
#if defined( __sun )
# include <sys/isa_defs.h>
#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ )
# include <sys/endian.h>
#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \
defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ )
# include <machine/endian.h>
#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ )
# if !defined( __MINGW32__ ) && !defined( _AIX )
# include <endian.h>
# if !defined( __BEOS__ )
# include <byteswap.h>
# endif
# endif
#endif
/* Now attempt to set the define for platform byte order using any */
/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */
/* seem to encompass most endian symbol definitions */
#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN )
# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
# elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
# endif
#elif defined( BIG_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#elif defined( LITTLE_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#endif
#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN )
# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
# endif
#elif defined( _BIG_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#elif defined( _LITTLE_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#endif
#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN )
# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
# endif
#elif defined( __BIG_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#elif defined( __LITTLE_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#endif
#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ )
# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
# endif
#elif defined( __BIG_ENDIAN__ )
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#elif defined( __LITTLE_ENDIAN__ )
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#endif
/* if the platform byte order could not be determined, then try to */
/* set this define using common machine defines */
#if !defined(PLATFORM_BYTE_ORDER)
#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \
defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \
defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \
defined( vax ) || defined( vms ) || defined( VMS ) || \
defined( __VMS ) || defined( _M_X64 )
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \
defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \
defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \
defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \
defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \
defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \
defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX )
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#elif 0 /* **** EDIT HERE IF NECESSARY **** */
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#elif 0 /* **** EDIT HERE IF NECESSARY **** */
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#else
# error Please edit lines 126 or 128 in brg_endian.h to set the platform byte order
#endif
#endif
#endif

View File

@@ -1,231 +0,0 @@
/*
---------------------------------------------------------------------------
Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
(a few lines added by Soeren S. Thomsen, October 2008)
LICENSE TERMS
The redistribution and use of this software (with or without changes)
is allowed without the payment of fees or royalties provided that:
1. source code distributions include the above copyright notice, this
list of conditions and the following disclaimer;
2. binary distributions include the above copyright notice, this list
of conditions and the following disclaimer in their documentation;
3. the name of the copyright holder is not used to endorse products
built using this software without specific written permission.
DISCLAIMER
This software is provided 'as is' with no explicit or implied warranties
in respect of its properties, including, but not limited to, correctness
and/or fitness for purpose.
---------------------------------------------------------------------------
Issue Date: 20/12/2007
The unsigned integer types defined here are of the form uint_<nn>t where
<nn> is the length of the type; for example, the unsigned 32-bit type is
'uint_32t'. These are NOT the same as the 'C99 integer types' that are
defined in the inttypes.h and stdint.h headers since attempts to use these
types have shown that support for them is still highly variable. However,
since the latter are of the form uint<nn>_t, a regular expression search
and replace (in VC++ search on 'uint_{:z}t' and replace with 'uint\1_t')
can be used to convert the types used here to the C99 standard types.
*/
#ifndef _BRG_TYPES_H
#define _BRG_TYPES_H
#if defined(__cplusplus)
extern "C" {
#endif
#include <limits.h>
#if defined( _MSC_VER ) && ( _MSC_VER >= 1300 )
# include <stddef.h>
# define ptrint_t intptr_t
#elif defined( __GNUC__ ) && ( __GNUC__ >= 3 )
# include <stdint.h>
# define ptrint_t intptr_t
#else
# define ptrint_t int
#endif
#ifndef BRG_UI8
# define BRG_UI8
# if UCHAR_MAX == 255u
typedef unsigned char uint_8t;
# else
# error Please define uint_8t as an 8-bit unsigned integer type in brg_types.h
# endif
#endif
#ifndef BRG_UI16
# define BRG_UI16
# if USHRT_MAX == 65535u
typedef unsigned short uint_16t;
# else
# error Please define uint_16t as a 16-bit unsigned short type in brg_types.h
# endif
#endif
#ifndef BRG_UI32
# define BRG_UI32
# if UINT_MAX == 4294967295u
# define li_32(h) 0x##h##u
typedef unsigned int uint_32t;
# elif ULONG_MAX == 4294967295u
# define li_32(h) 0x##h##ul
typedef unsigned long uint_32t;
# elif defined( _CRAY )
# error This code needs 32-bit data types, which Cray machines do not provide
# else
# error Please define uint_32t as a 32-bit unsigned integer type in brg_types.h
# endif
#endif
#ifndef BRG_UI64
# if defined( __BORLANDC__ ) && !defined( __MSDOS__ )
# define BRG_UI64
# define li_64(h) 0x##h##ui64
typedef unsigned __int64 uint_64t;
# elif defined( _MSC_VER ) && ( _MSC_VER < 1300 ) /* 1300 == VC++ 7.0 */
# define BRG_UI64
# define li_64(h) 0x##h##ui64
typedef unsigned __int64 uint_64t;
# elif defined( __sun ) && defined( ULONG_MAX ) && ULONG_MAX == 0xfffffffful
# define BRG_UI64
# define li_64(h) 0x##h##ull
typedef unsigned long long uint_64t;
# elif defined( __MVS__ )
# define BRG_UI64
# define li_64(h) 0x##h##ull
typedef unsigned int long long uint_64t;
# elif defined( UINT_MAX ) && UINT_MAX > 4294967295u
# if UINT_MAX == 18446744073709551615u
# define BRG_UI64
# define li_64(h) 0x##h##u
typedef unsigned int uint_64t;
# endif
# elif defined( ULONG_MAX ) && ULONG_MAX > 4294967295u
# if ULONG_MAX == 18446744073709551615ul
# define BRG_UI64
# define li_64(h) 0x##h##ul
typedef unsigned long uint_64t;
# endif
# elif defined( ULLONG_MAX ) && ULLONG_MAX > 4294967295u
# if ULLONG_MAX == 18446744073709551615ull
# define BRG_UI64
# define li_64(h) 0x##h##ull
typedef unsigned long long uint_64t;
# endif
# elif defined( ULONG_LONG_MAX ) && ULONG_LONG_MAX > 4294967295u
# if ULONG_LONG_MAX == 18446744073709551615ull
# define BRG_UI64
# define li_64(h) 0x##h##ull
typedef unsigned long long uint_64t;
# endif
# endif
#endif
#if !defined( BRG_UI64 )
# if defined( NEED_UINT_64T )
# error Please define uint_64t as an unsigned 64 bit type in brg_types.h
# endif
#endif
#ifndef RETURN_VALUES
# define RETURN_VALUES
# if defined( DLL_EXPORT )
# if defined( _MSC_VER ) || defined ( __INTEL_COMPILER )
# define VOID_RETURN __declspec( dllexport ) void __stdcall
# define INT_RETURN __declspec( dllexport ) int __stdcall
# elif defined( __GNUC__ )
# define VOID_RETURN __declspec( __dllexport__ ) void
# define INT_RETURN __declspec( __dllexport__ ) int
# else
# error Use of the DLL is only available on the Microsoft, Intel and GCC compilers
# endif
# elif defined( DLL_IMPORT )
# if defined( _MSC_VER ) || defined ( __INTEL_COMPILER )
# define VOID_RETURN __declspec( dllimport ) void __stdcall
# define INT_RETURN __declspec( dllimport ) int __stdcall
# elif defined( __GNUC__ )
# define VOID_RETURN __declspec( __dllimport__ ) void
# define INT_RETURN __declspec( __dllimport__ ) int
# else
# error Use of the DLL is only available on the Microsoft, Intel and GCC compilers
# endif
# elif defined( __WATCOMC__ )
# define VOID_RETURN void __cdecl
# define INT_RETURN int __cdecl
# else
# define VOID_RETURN void
# define INT_RETURN int
# endif
#endif
/* These defines are used to detect and set the memory alignment of pointers.
Note that offsets are in bytes.
ALIGN_OFFSET(x,n) return the positive or zero offset of
the memory addressed by the pointer 'x'
from an address that is aligned on an
'n' byte boundary ('n' is a power of 2)
ALIGN_FLOOR(x,n) return a pointer that points to memory
that is aligned on an 'n' byte boundary
and is not higher than the memory address
pointed to by 'x' ('n' is a power of 2)
ALIGN_CEIL(x,n) return a pointer that points to memory
that is aligned on an 'n' byte boundary
and is not lower than the memory address
pointed to by 'x' ('n' is a power of 2)
*/
#define ALIGN_OFFSET(x,n) (((ptrint_t)(x)) & ((n) - 1))
#define ALIGN_FLOOR(x,n) ((uint_8t*)(x) - ( ((ptrint_t)(x)) & ((n) - 1)))
#define ALIGN_CEIL(x,n) ((uint_8t*)(x) + (-((ptrint_t)(x)) & ((n) - 1)))
/* These defines are used to declare buffers in a way that allows
faster operations on longer variables to be used. In all these
defines 'size' must be a power of 2 and >= 8. NOTE that the
buffer size is in bytes but the type length is in bits
UNIT_TYPEDEF(x,size) declares a variable 'x' of length
'size' bits
BUFR_TYPEDEF(x,size,bsize) declares a buffer 'x' of length 'bsize'
bytes defined as an array of variables
each of 'size' bits (bsize must be a
multiple of size / 8)
UNIT_CAST(x,size) casts a variable to a type of
length 'size' bits
UPTR_CAST(x,size) casts a pointer to a pointer to a
varaiable of length 'size' bits
*/
#define UI_TYPE(size) uint_##size##t
#define UNIT_TYPEDEF(x,size) typedef UI_TYPE(size) x
#define BUFR_TYPEDEF(x,size,bsize) typedef UI_TYPE(size) x[bsize / (size >> 3)]
#define UNIT_CAST(x,size) ((UI_TYPE(size) )(x))
#define UPTR_CAST(x,size) ((UI_TYPE(size)*)(x))
/* Added by Soeren S. Thomsen (begin) */
#define u8 uint_8t
#define u32 uint_32t
#define u64 uint_64t
/* (end) */
#if defined(__cplusplus)
}
#endif
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -1,956 +0,0 @@
/* groestl-intr-vperm.h Aug 2011
*
* Groestl implementation with intrinsics using ssse3 instructions.
* Author: Günther A. Roland, Martin Schläffer
*
* Based on the vperm and aes_ni implementations of the hash function Groestl
* by Cagdas Calik <ccalik@metu.edu.tr> http://www.metu.edu.tr/~ccalik/
* Institute of Applied Mathematics, Middle East Technical University, Turkey
*
* This code is placed in the public domain
*/
#include <tmmintrin.h>
#include "grsi.h"
/*define data alignment for different C compilers*/
#if defined(__GNUC__)
#define DATA_ALIGN16(x) x __attribute__ ((aligned(16)))
#else
#define DATA_ALIGN16(x) __declspec(align(16)) x
#endif
//#if defined(DECLARE_GLOBAL)
#if 1
#define GLOBAL
#else
#define GLOBAL extern
#endif
//#if defined(DECLARE_IFUN)
#if 1
#define IFUN
#else
#define IFUN extern
#endif
/* global constants */
//GLOBAL __m128i grsiROUND_CONST_Lx;
//GLOBAL __m128i grsiROUND_CONST_L0[grsiROUNDS512];
//GLOBAL __m128i grsiROUND_CONST_L7[grsiROUNDS512];
DATA_ALIGN16(int32_t grsiSUBSH_MASK_short[8*4]) = {
0x03020100, 0x07060504, 0x0b0a0908, 0x0f0e0d0c,
0x04030201, 0x08070605, 0x0c0b0a09, 0x000f0e0d,
0x05040302, 0x09080706, 0x0d0c0b0a, 0x01000f0e,
0x06050403, 0x0a090807, 0x0e0d0c0b, 0x0201000f,
0x07060504, 0x0b0a0908, 0x0f0e0d0c, 0x03020100,
0x08070605, 0x0c0b0a09, 0x000f0e0d, 0x04030201,
0x09080706, 0x0d0c0b0a, 0x01000f0e, 0x05040302,
0x0e0d0c0b, 0x0201000f, 0x06050403, 0x0a090807
};
GLOBAL __m128i *grsiSUBSH_MASK = grsiSUBSH_MASK_short;
GLOBAL __m128i grsiALL_0F = {0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f};
GLOBAL __m128i grsiALL_1B = {0x1b1b1b1b, 0x1b1b1b1b, 0x1b1b1b1b, 0x1b1b1b1b};
GLOBAL __m128i grsiALL_FF = {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff};
/* global unsknown */
GLOBAL __m128i grsiVPERM_OPT[2];
GLOBAL __m128i grsiVPERM_INV[2];
GLOBAL __m128i grsiVPERM_SB1[2];
GLOBAL __m128i grsiVPERM_SB2[2];
GLOBAL __m128i grsiVPERM_SB4[2];
GLOBAL __m128i grsiVPERM_SBO[2];
/* state vars */
GLOBAL __m128i grsiTRANSP_MASK;
GLOBAL __m128i grsiVPERM_IPT[2];
GLOBAL __m128i grsiALL_15;
GLOBAL __m128i grsiALL_63;
GLOBAL __m128i grsiROUND_CONST_P[grsiROUNDS1024];
GLOBAL __m128i grsiROUND_CONST_Q[grsiROUNDS1024];
#define grsitos(a) #a
#define grsitostr(a) grsitos(a)
/*
grsiALL_1B = _mm_set_epi32(0x1b1b1b1b, 0x1b1b1b1b, 0x1b1b1b1b, 0x1b1b1b1b);\
grsiALL_63 = _mm_set_epi32(0x63636363, 0x63636363, 0x63636363, 0x63636363);\
*/
#define grsiSET_SHARED_CONSTANTS(){\
grsiTRANSP_MASK = _mm_set_epi32(0x0f070b03, 0x0e060a02, 0x0d050901, 0x0c040800);\
grsiALL_0F = _mm_set_epi32(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f);\
grsiALL_15 = _mm_set_epi32(0x15151515, 0x15151515, 0x15151515, 0x15151515);\
\
grsiVPERM_IPT[0] = _mm_set_epi32(0xCD80B1FC, 0xB0FDCC81, 0x4C01307D, 0x317C4D00);\
grsiVPERM_IPT[1] = _mm_set_epi32(0xCABAE090, 0x52227808, 0xC2B2E898, 0x5A2A7000);\
grsiVPERM_OPT[0] = _mm_set_epi32(0xE10D5DB1, 0xB05C0CE0, 0x01EDBD51, 0x50BCEC00);\
grsiVPERM_OPT[1] = _mm_set_epi32(0xF7974121, 0xDEBE6808, 0xFF9F4929, 0xD6B66000);\
grsiVPERM_INV[0] = _mm_set_epi32(0x030D0E0C, 0x02050809, 0x01040A06, 0x0F0B0780);\
grsiVPERM_INV[1] = _mm_set_epi32(0x04070309, 0x0A0B0C02, 0x0E05060F, 0x0D080180);\
grsiVPERM_SB1[0] = _mm_set_epi32(0x3BF7CCC1, 0x0D2ED9EF, 0x3618D415, 0xFAE22300);\
grsiVPERM_SB1[1] = _mm_set_epi32(0xA5DF7A6E, 0x142AF544, 0xB19BE18F, 0xCB503E00);\
grsiVPERM_SB2[0] = _mm_set_epi32(0xC2A163C8, 0xAB82234A, 0x69EB8840, 0x0AE12900);\
grsiVPERM_SB2[1] = _mm_set_epi32(0x5EB7E955, 0xBC982FCD, 0xE27A93C6, 0x0B712400);\
grsiVPERM_SB4[0] = _mm_set_epi32(0xBA44FE79, 0x876D2914, 0x3D50AED7, 0xC393EA00);\
grsiVPERM_SB4[1] = _mm_set_epi32(0xA876DE97, 0x49087E9F, 0xE1E937A0, 0x3FD64100);\
}/**/
/* grsiVPERM
* Transform w/o settings c*
* transforms 2 rows to/from "vperm mode"
* this function is derived from:
* vperm and aes_ni implementations of hash function Grostl
* by Cagdas CALIK
* inputs:
* a0, a1 = 2 rows
* table = transformation table to use
* t*, c* = clobbers
* outputs:
* a0, a1 = 2 rows transformed with table
* */
#define grsiVPERM_Transform_No_Const(a0, a1, t0, t1, t2, t3, c0, c1, c2){\
t0 = c0;\
t1 = c0;\
t0 = _mm_andnot_si128(t0, a0);\
t1 = _mm_andnot_si128(t1, a1);\
t0 = _mm_srli_epi32(t0, 4);\
t1 = _mm_srli_epi32(t1, 4);\
a0 = _mm_and_si128(a0, c0);\
a1 = _mm_and_si128(a1, c0);\
t2 = c2;\
t3 = c2;\
t2 = _mm_shuffle_epi8(t2, a0);\
t3 = _mm_shuffle_epi8(t3, a1);\
a0 = c1;\
a1 = c1;\
a0 = _mm_shuffle_epi8(a0, t0);\
a1 = _mm_shuffle_epi8(a1, t1);\
a0 = _mm_xor_si128(a0, t2);\
a1 = _mm_xor_si128(a1, t3);\
}/**/
#define grsiVPERM_Transform_Set_Const(table, c0, c1, c2){\
c0 = grsiALL_0F;\
c1 = ((__m128i*) table )[0];\
c2 = ((__m128i*) table )[1];\
}/**/
/* grsiVPERM
* Transform
* transforms 2 rows to/from "vperm mode"
* this function is derived from:
* vperm and aes_ni implementations of hash function Grostl
* by Cagdas CALIK
* inputs:
* a0, a1 = 2 rows
* table = transformation table to use
* t*, c* = clobbers
* outputs:
* a0, a1 = 2 rows transformed with table
* */
#define grsiVPERM_Transform(a0, a1, table, t0, t1, t2, t3, c0, c1, c2){\
grsiVPERM_Transform_Set_Const(table, c0, c1, c2);\
grsiVPERM_Transform_No_Const(a0, a1, t0, t1, t2, t3, c0, c1, c2);\
}/**/
/* grsiVPERM
* Transform State
* inputs:
* a0-a3 = state
* table = transformation table to use
* t* = clobbers
* outputs:
* a0-a3 = transformed state
* */
#define grsiVPERM_Transform_State(a0, a1, a2, a3, table, t0, t1, t2, t3, c0, c1, c2){\
grsiVPERM_Transform_Set_Const(table, c0, c1, c2);\
grsiVPERM_Transform_No_Const(a0, a1, t0, t1, t2, t3, c0, c1, c2);\
grsiVPERM_Transform_No_Const(a2, a3, t0, t1, t2, t3, c0, c1, c2);\
}/**/
/* grsiVPERM
* Add Constant to State
* inputs:
* a0-a7 = state
* constant = constant to add
* t0 = clobber
* outputs:
* a0-a7 = state + constant
* */
#define grsiVPERM_Add_Constant(a0, a1, a2, a3, a4, a5, a6, a7, constant, t0){\
t0 = constant;\
a0 = _mm_xor_si128(a0, t0);\
a1 = _mm_xor_si128(a1, t0);\
a2 = _mm_xor_si128(a2, t0);\
a3 = _mm_xor_si128(a3, t0);\
a4 = _mm_xor_si128(a4, t0);\
a5 = _mm_xor_si128(a5, t0);\
a6 = _mm_xor_si128(a6, t0);\
a7 = _mm_xor_si128(a7, t0);\
}/**/
/* grsiVPERM
* Set Substitute Core Constants
* */
#define grsiVPERM_Substitute_Core_Set_Const(c0, c1, c2){\
grsiVPERM_Transform_Set_Const(grsiVPERM_INV, c0, c1, c2);\
}/**/
/* grsiVPERM
* Substitute Core
* first part of sbox inverse computation
* this function is derived from:
* vperm and aes_ni implementations of hash function Grostl
* by Cagdas CALIK
* inputs:
* a0 = 1 row
* t*, c* = clobbers
* outputs:
* b0a, b0b = inputs for lookup step
* */
#define grsiVPERM_Substitute_Core(a0, b0a, b0b, t0, t1, c0, c1, c2){\
t0 = c0;\
t0 = _mm_andnot_si128(t0, a0);\
t0 = _mm_srli_epi32(t0, 4);\
a0 = _mm_and_si128(a0, c0);\
b0a = c1;\
b0a = _mm_shuffle_epi8(b0a, a0);\
a0 = _mm_xor_si128(a0, t0);\
b0b = c2;\
b0b = _mm_shuffle_epi8(b0b, t0);\
b0b = _mm_xor_si128(b0b, b0a);\
t1 = c2;\
t1 = _mm_shuffle_epi8(t1, a0);\
t1 = _mm_xor_si128(t1, b0a);\
b0a = c2;\
b0a = _mm_shuffle_epi8(b0a, b0b);\
b0a = _mm_xor_si128(b0a, a0);\
b0b = c2;\
b0b = _mm_shuffle_epi8(b0b, t1);\
b0b = _mm_xor_si128(b0b, t0);\
}/**/
/* grsiVPERM
* Lookup
* second part of sbox inverse computation
* this function is derived from:
* vperm and aes_ni implementations of hash function Grostl
* by Cagdas CALIK
* inputs:
* a0a, a0b = output of Substitution Core
* table = lookup table to use (*1 / *2 / *4)
* t0 = clobber
* outputs:
* b0 = output of sbox + multiplication
* */
#define grsiVPERM_Lookup(a0a, a0b, table, b0, t0){\
b0 = ((__m128i*) table )[0];\
t0 = ((__m128i*) table )[1];\
b0 = _mm_shuffle_epi8(b0, a0b);\
t0 = _mm_shuffle_epi8(t0, a0a);\
b0 = _mm_xor_si128(b0, t0);\
}/**/
/* grsiVPERM
* SubBytes and *2 / *4
* this function is derived from:
* Constant-time SSSE3 AES core implementation
* by Mike Hamburg
* and
* vperm and aes_ni implementations of hash function Grostl
* by Cagdas CALIK
* inputs:
* a0-a7 = state
* t*, c* = clobbers
* outputs:
* a0-a7 = state * 4
* c2 = row0 * 2 -> b0
* c1 = row7 * 2 -> b3
* c0 = row7 * 1 -> b4
* t2 = row4 * 1 -> b7
* TEMP_MUL1 = row(i) * 1
* TEMP_MUL2 = row(i) * 2
*
* call:grsiVPERM_SUB_MULTIPLY(a0, a1, a2, a3, a4, a5, a6, a7, b1, b2, b5, b6, b0, b3, b4, b7) */
#define grsiVPERM_SUB_MULTIPLY(a0, a1, a2, a3, a4, a5, a6, a7, t0, t1, t3, t4, c2, c1, c0, t2){\
/* set Constants */\
grsiVPERM_Substitute_Core_Set_Const(c0, c1, c2);\
/* row 1 */\
grsiVPERM_Substitute_Core(a1, t0, t1, t3, t4, c0, c1, c2);\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB1, t2, t4);\
TEMP_MUL1[1] = t2;\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB2, t3, t4);\
TEMP_MUL2[1] = t3;\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB4, a1, t4);\
/* --- */\
/* row 2 */\
grsiVPERM_Substitute_Core(a2, t0, t1, t3, t4, c0, c1, c2);\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB1, t2, t4);\
TEMP_MUL1[2] = t2;\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB2, t3, t4);\
TEMP_MUL2[2] = t3;\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB4, a2, t4);\
/* --- */\
/* row 3 */\
grsiVPERM_Substitute_Core(a3, t0, t1, t3, t4, c0, c1, c2);\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB1, t2, t4);\
TEMP_MUL1[3] = t2;\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB2, t3, t4);\
TEMP_MUL2[3] = t3;\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB4, a3, t4);\
/* --- */\
/* row 5 */\
grsiVPERM_Substitute_Core(a5, t0, t1, t3, t4, c0, c1, c2);\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB1, t2, t4);\
TEMP_MUL1[5] = t2;\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB2, t3, t4);\
TEMP_MUL2[5] = t3;\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB4, a5, t4);\
/* --- */\
/* row 6 */\
grsiVPERM_Substitute_Core(a6, t0, t1, t3, t4, c0, c1, c2);\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB1, t2, t4);\
TEMP_MUL1[6] = t2;\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB2, t3, t4);\
TEMP_MUL2[6] = t3;\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB4, a6, t4);\
/* --- */\
/* row 7 */\
grsiVPERM_Substitute_Core(a7, t0, t1, t3, t4, c0, c1, c2);\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB1, t2, t4);\
TEMP_MUL1[7] = t2;\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB2, c1, t4); /*c1 -> b3*/\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB4, a7, t4);\
/* --- */\
/* row 4 */\
grsiVPERM_Substitute_Core(a4, t0, t1, t3, t4, c0, (grsiVPERM_INV[0]), c2);\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB1, t2, t4); /*t2 -> b7*/\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB2, t3, t4);\
TEMP_MUL2[4] = t3;\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB4, a4, t4);\
/* --- */\
/* row 0 */\
grsiVPERM_Substitute_Core(a0, t0, t1, t3, t4, c0, (grsiVPERM_INV[0]), c2);\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB1, c0, t4); /*c0 -> b4*/\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB2, c2, t4); /*c2 -> b0*/\
TEMP_MUL2[0] = c2;\
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB4, a0, t4);\
/* --- */\
}/**/
/* Optimized grsiMixBytes
* inputs:
* a0-a7 = (row0-row7) * 4
* b0 = row0 * 2
* b3 = row7 * 2
* b4 = row7 * 1
* b7 = row4 * 1
* all *1 and *2 values must also be in TEMP_MUL1, TEMP_MUL2
* output: b0-b7
* */
#define grsiMixBytes(a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3, b4, b5, b6, b7){\
/* save one value */\
TEMP_MUL4 = a3;\
/* 1 */\
b1 = a0;\
b1 = _mm_xor_si128(b1, a5);\
b1 = _mm_xor_si128(b1, b4); /* -> helper! */\
b1 = _mm_xor_si128(b1, (TEMP_MUL2[3]));\
b2 = b1;\
\
/* 2 */\
b5 = a1;\
b5 = _mm_xor_si128(b5, a4);\
b5 = _mm_xor_si128(b5, b7); /* -> helper! */\
b5 = _mm_xor_si128(b5, b3); /* -> helper! */\
b6 = b5;\
\
/* 4 */\
b7 = _mm_xor_si128(b7, a6);\
/*b7 = _mm_xor_si128(b7, (TEMP_MUL1[4])); -> helper! */\
b7 = _mm_xor_si128(b7, (TEMP_MUL1[6]));\
b7 = _mm_xor_si128(b7, (TEMP_MUL2[1]));\
b7 = _mm_xor_si128(b7, b3); /* -> helper! */\
b2 = _mm_xor_si128(b2, b7);\
\
/* 3 */\
b0 = _mm_xor_si128(b0, a7);\
b0 = _mm_xor_si128(b0, (TEMP_MUL1[5]));\
b0 = _mm_xor_si128(b0, (TEMP_MUL1[7]));\
/*b0 = _mm_xor_si128(b0, (TEMP_MUL2[0])); -> helper! */\
b0 = _mm_xor_si128(b0, (TEMP_MUL2[2]));\
b3 = b0;\
b1 = _mm_xor_si128(b1, b0);\
b0 = _mm_xor_si128(b0, b7); /* moved from 4 */\
\
/* 5 */\
b4 = _mm_xor_si128(b4, a2);\
/*b4 = _mm_xor_si128(b4, (TEMP_MUL1[0])); -> helper! */\
b4 = _mm_xor_si128(b4, (TEMP_MUL1[2]));\
b4 = _mm_xor_si128(b4, (TEMP_MUL2[3]));\
b4 = _mm_xor_si128(b4, (TEMP_MUL2[5]));\
b3 = _mm_xor_si128(b3, b4);\
b6 = _mm_xor_si128(b6, b4);\
\
/* 6 */\
a3 = _mm_xor_si128(a3, (TEMP_MUL1[1]));\
a3 = _mm_xor_si128(a3, (TEMP_MUL1[3]));\
a3 = _mm_xor_si128(a3, (TEMP_MUL2[4]));\
a3 = _mm_xor_si128(a3, (TEMP_MUL2[6]));\
b4 = _mm_xor_si128(b4, a3);\
b5 = _mm_xor_si128(b5, a3);\
b7 = _mm_xor_si128(b7, a3);\
\
/* 7 */\
a1 = _mm_xor_si128(a1, (TEMP_MUL1[1]));\
a1 = _mm_xor_si128(a1, (TEMP_MUL2[4]));\
b2 = _mm_xor_si128(b2, a1);\
b3 = _mm_xor_si128(b3, a1);\
\
/* 8 */\
a5 = _mm_xor_si128(a5, (TEMP_MUL1[5]));\
a5 = _mm_xor_si128(a5, (TEMP_MUL2[0]));\
b6 = _mm_xor_si128(b6, a5);\
b7 = _mm_xor_si128(b7, a5);\
\
/* 9 */\
a3 = TEMP_MUL1[2];\
a3 = _mm_xor_si128(a3, (TEMP_MUL2[5]));\
b0 = _mm_xor_si128(b0, a3);\
b5 = _mm_xor_si128(b5, a3);\
\
/* 10 */\
a1 = TEMP_MUL1[6];\
a1 = _mm_xor_si128(a1, (TEMP_MUL2[1]));\
b1 = _mm_xor_si128(b1, a1);\
b4 = _mm_xor_si128(b4, a1);\
\
/* 11 */\
a5 = TEMP_MUL1[3];\
a5 = _mm_xor_si128(a5, (TEMP_MUL2[6]));\
b1 = _mm_xor_si128(b1, a5);\
b6 = _mm_xor_si128(b6, a5);\
\
/* 12 */\
a3 = TEMP_MUL1[7];\
a3 = _mm_xor_si128(a3, (TEMP_MUL2[2]));\
b2 = _mm_xor_si128(b2, a3);\
b5 = _mm_xor_si128(b5, a3);\
\
/* 13 */\
b0 = _mm_xor_si128(b0, (TEMP_MUL4));\
b0 = _mm_xor_si128(b0, a4);\
b1 = _mm_xor_si128(b1, a4);\
b3 = _mm_xor_si128(b3, a6);\
b4 = _mm_xor_si128(b4, a0);\
b4 = _mm_xor_si128(b4, a7);\
b5 = _mm_xor_si128(b5, a0);\
b7 = _mm_xor_si128(b7, a2);\
}/**/
/*
grsiSUBSH_MASK[0] = _mm_set_epi32(0x0f0e0d0c, 0x0b0a0908, 0x07060504, 0x03020100);\
grsiSUBSH_MASK[1] = _mm_set_epi32(0x000f0e0d, 0x0c0b0a09, 0x08070605, 0x04030201);\
grsiSUBSH_MASK[2] = _mm_set_epi32(0x01000f0e, 0x0d0c0b0a, 0x09080706, 0x05040302);\
grsiSUBSH_MASK[3] = _mm_set_epi32(0x0201000f, 0x0e0d0c0b, 0x0a090807, 0x06050403);\
grsiSUBSH_MASK[4] = _mm_set_epi32(0x03020100, 0x0f0e0d0c, 0x0b0a0908, 0x07060504);\
grsiSUBSH_MASK[5] = _mm_set_epi32(0x04030201, 0x000f0e0d, 0x0c0b0a09, 0x08070605);\
grsiSUBSH_MASK[6] = _mm_set_epi32(0x05040302, 0x01000f0e, 0x0d0c0b0a, 0x09080706);\
grsiSUBSH_MASK[7] = _mm_set_epi32(0x0a090807, 0x06050403, 0x0201000f, 0x0e0d0c0b);\
*/
#define grsiSET_CONSTANTS(){\
grsiSET_SHARED_CONSTANTS();\
grsiALL_FF = _mm_set_epi32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff);\
for(i = 0; i < grsiROUNDS1024; i++)\
{\
grsiROUND_CONST_P[i] = _mm_set_epi32(0xf0e0d0c0 ^ (i * 0x01010101), 0xb0a09080 ^ (i * 0x01010101), 0x70605040 ^ (i * 0x01010101), 0x30201000 ^ (i * 0x01010101));\
grsiROUND_CONST_Q[i] = _mm_set_epi32(0x0f1f2f3f ^ (i * 0x01010101), 0x4f5f6f7f ^ (i * 0x01010101), 0x8f9fafbf ^ (i * 0x01010101), 0xcfdfefff ^ (i * 0x01010101));\
}\
}/**/
/* one round
* a0-a7 = input rows
* b0-b7 = output rows
*/
#define grsiSUBMIX(a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3, b4, b5, b6, b7){\
/* SubBytes + Multiplication */\
grsiVPERM_SUB_MULTIPLY(a0, a1, a2, a3, a4, a5, a6, a7, b1, b2, b5, b6, b0, b3, b4, b7);\
/* grsiMixBytes */\
grsiMixBytes(a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3, b4, b5, b6, b7);\
}/**/
#define grsiROUNDS_P(){\
u32 round_counter;\
for(round_counter = 0; round_counter < 14; round_counter+=2) {\
/* AddRoundConstant P1024 */\
xmm8 = _mm_xor_si128(xmm8, (grsiROUND_CONST_P[round_counter]));\
/* ShiftBytes P1024 + pre-AESENCLAST */\
xmm8 = _mm_shuffle_epi8(xmm8, (grsiSUBSH_MASK[0]));\
xmm9 = _mm_shuffle_epi8(xmm9, (grsiSUBSH_MASK[1]));\
xmm10 = _mm_shuffle_epi8(xmm10, (grsiSUBSH_MASK[2]));\
xmm11 = _mm_shuffle_epi8(xmm11, (grsiSUBSH_MASK[3]));\
xmm12 = _mm_shuffle_epi8(xmm12, (grsiSUBSH_MASK[4]));\
xmm13 = _mm_shuffle_epi8(xmm13, (grsiSUBSH_MASK[5]));\
xmm14 = _mm_shuffle_epi8(xmm14, (grsiSUBSH_MASK[6]));\
xmm15 = _mm_shuffle_epi8(xmm15, (grsiSUBSH_MASK[7]));\
/* SubBytes + grsiMixBytes */\
grsiSUBMIX(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);\
grsiVPERM_Add_Constant(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, grsiALL_15, xmm8);\
\
/* AddRoundConstant P1024 */\
xmm0 = _mm_xor_si128(xmm0, (grsiROUND_CONST_P[round_counter+1]));\
/* ShiftBytes P1024 + pre-AESENCLAST */\
xmm0 = _mm_shuffle_epi8(xmm0, (grsiSUBSH_MASK[0]));\
xmm1 = _mm_shuffle_epi8(xmm1, (grsiSUBSH_MASK[1]));\
xmm2 = _mm_shuffle_epi8(xmm2, (grsiSUBSH_MASK[2]));\
xmm3 = _mm_shuffle_epi8(xmm3, (grsiSUBSH_MASK[3]));\
xmm4 = _mm_shuffle_epi8(xmm4, (grsiSUBSH_MASK[4]));\
xmm5 = _mm_shuffle_epi8(xmm5, (grsiSUBSH_MASK[5]));\
xmm6 = _mm_shuffle_epi8(xmm6, (grsiSUBSH_MASK[6]));\
xmm7 = _mm_shuffle_epi8(xmm7, (grsiSUBSH_MASK[7]));\
/* SubBytes + grsiMixBytes */\
grsiSUBMIX(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15);\
grsiVPERM_Add_Constant(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, grsiALL_15, xmm0);\
}\
}/**/
#define grsiROUNDS_Q(){\
grsiVPERM_Add_Constant(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, grsiALL_15, xmm1);\
u32 round_counter = 0;\
for(round_counter = 0; round_counter < 14; round_counter+=2) {\
/* AddRoundConstant Q1024 */\
xmm1 = grsiALL_FF;\
xmm8 = _mm_xor_si128(xmm8, xmm1);\
xmm9 = _mm_xor_si128(xmm9, xmm1);\
xmm10 = _mm_xor_si128(xmm10, xmm1);\
xmm11 = _mm_xor_si128(xmm11, xmm1);\
xmm12 = _mm_xor_si128(xmm12, xmm1);\
xmm13 = _mm_xor_si128(xmm13, xmm1);\
xmm14 = _mm_xor_si128(xmm14, xmm1);\
xmm15 = _mm_xor_si128(xmm15, (grsiROUND_CONST_Q[round_counter]));\
/* ShiftBytes Q1024 + pre-AESENCLAST */\
xmm8 = _mm_shuffle_epi8(xmm8, (grsiSUBSH_MASK[1]));\
xmm9 = _mm_shuffle_epi8(xmm9, (grsiSUBSH_MASK[3]));\
xmm10 = _mm_shuffle_epi8(xmm10, (grsiSUBSH_MASK[5]));\
xmm11 = _mm_shuffle_epi8(xmm11, (grsiSUBSH_MASK[7]));\
xmm12 = _mm_shuffle_epi8(xmm12, (grsiSUBSH_MASK[0]));\
xmm13 = _mm_shuffle_epi8(xmm13, (grsiSUBSH_MASK[2]));\
xmm14 = _mm_shuffle_epi8(xmm14, (grsiSUBSH_MASK[4]));\
xmm15 = _mm_shuffle_epi8(xmm15, (grsiSUBSH_MASK[6]));\
/* SubBytes + grsiMixBytes */\
grsiSUBMIX(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);\
\
/* AddRoundConstant Q1024 */\
xmm9 = grsiALL_FF;\
xmm0 = _mm_xor_si128(xmm0, xmm9);\
xmm1 = _mm_xor_si128(xmm1, xmm9);\
xmm2 = _mm_xor_si128(xmm2, xmm9);\
xmm3 = _mm_xor_si128(xmm3, xmm9);\
xmm4 = _mm_xor_si128(xmm4, xmm9);\
xmm5 = _mm_xor_si128(xmm5, xmm9);\
xmm6 = _mm_xor_si128(xmm6, xmm9);\
xmm7 = _mm_xor_si128(xmm7, (grsiROUND_CONST_Q[round_counter+1]));\
/* ShiftBytes Q1024 + pre-AESENCLAST */\
xmm0 = _mm_shuffle_epi8(xmm0, (grsiSUBSH_MASK[1]));\
xmm1 = _mm_shuffle_epi8(xmm1, (grsiSUBSH_MASK[3]));\
xmm2 = _mm_shuffle_epi8(xmm2, (grsiSUBSH_MASK[5]));\
xmm3 = _mm_shuffle_epi8(xmm3, (grsiSUBSH_MASK[7]));\
xmm4 = _mm_shuffle_epi8(xmm4, (grsiSUBSH_MASK[0]));\
xmm5 = _mm_shuffle_epi8(xmm5, (grsiSUBSH_MASK[2]));\
xmm6 = _mm_shuffle_epi8(xmm6, (grsiSUBSH_MASK[4]));\
xmm7 = _mm_shuffle_epi8(xmm7, (grsiSUBSH_MASK[6]));\
/* SubBytes + grsiMixBytes*/ \
grsiSUBMIX(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15);\
}\
grsiVPERM_Add_Constant(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, grsiALL_15, xmm1);\
}/**/
/* Matrix Transpose
* input is a 1024-bit state with two columns in one xmm
* output is a 1024-bit state with two rows in one xmm
* inputs: i0-i7
* outputs: i0-i7
* clobbers: t0-t7
*/
#define grsiMatrix_Transpose(i0, i1, i2, i3, i4, i5, i6, i7, t0, t1, t2, t3, t4, t5, t6, t7){\
t0 = grsiTRANSP_MASK;\
\
i6 = _mm_shuffle_epi8(i6, t0);\
i0 = _mm_shuffle_epi8(i0, t0);\
i1 = _mm_shuffle_epi8(i1, t0);\
i2 = _mm_shuffle_epi8(i2, t0);\
i3 = _mm_shuffle_epi8(i3, t0);\
t1 = i2;\
i4 = _mm_shuffle_epi8(i4, t0);\
i5 = _mm_shuffle_epi8(i5, t0);\
t2 = i4;\
t3 = i6;\
i7 = _mm_shuffle_epi8(i7, t0);\
\
/* continue with unpack using 4 temp registers */\
t0 = i0;\
t2 = _mm_unpackhi_epi16(t2, i5);\
i4 = _mm_unpacklo_epi16(i4, i5);\
t3 = _mm_unpackhi_epi16(t3, i7);\
i6 = _mm_unpacklo_epi16(i6, i7);\
t0 = _mm_unpackhi_epi16(t0, i1);\
t1 = _mm_unpackhi_epi16(t1, i3);\
i2 = _mm_unpacklo_epi16(i2, i3);\
i0 = _mm_unpacklo_epi16(i0, i1);\
\
/* shuffle with immediate */\
t0 = _mm_shuffle_epi32(t0, 216);\
t1 = _mm_shuffle_epi32(t1, 216);\
t2 = _mm_shuffle_epi32(t2, 216);\
t3 = _mm_shuffle_epi32(t3, 216);\
i0 = _mm_shuffle_epi32(i0, 216);\
i2 = _mm_shuffle_epi32(i2, 216);\
i4 = _mm_shuffle_epi32(i4, 216);\
i6 = _mm_shuffle_epi32(i6, 216);\
\
/* continue with unpack */\
t4 = i0;\
i0 = _mm_unpacklo_epi32(i0, i2);\
t4 = _mm_unpackhi_epi32(t4, i2);\
t5 = t0;\
t0 = _mm_unpacklo_epi32(t0, t1);\
t5 = _mm_unpackhi_epi32(t5, t1);\
t6 = i4;\
i4 = _mm_unpacklo_epi32(i4, i6);\
t7 = t2;\
t6 = _mm_unpackhi_epi32(t6, i6);\
i2 = t0;\
t2 = _mm_unpacklo_epi32(t2, t3);\
i3 = t0;\
t7 = _mm_unpackhi_epi32(t7, t3);\
\
/* there are now 2 rows in each xmm */\
/* unpack to get 1 row of CV in each xmm */\
i1 = i0;\
i1 = _mm_unpackhi_epi64(i1, i4);\
i0 = _mm_unpacklo_epi64(i0, i4);\
i4 = t4;\
i3 = _mm_unpackhi_epi64(i3, t2);\
i5 = t4;\
i2 = _mm_unpacklo_epi64(i2, t2);\
i6 = t5;\
i5 = _mm_unpackhi_epi64(i5, t6);\
i7 = t5;\
i4 = _mm_unpacklo_epi64(i4, t6);\
i7 = _mm_unpackhi_epi64(i7, t7);\
i6 = _mm_unpacklo_epi64(i6, t7);\
/* transpose done */\
}/**/
/* Matrix Transpose Inverse
* input is a 1024-bit state with two rows in one xmm
* output is a 1024-bit state with two columns in one xmm
* inputs: i0-i7
* outputs: (i0, o0, i1, i3, o1, o2, i5, i7)
* clobbers: t0-t4
*/
#define grsiMatrix_Transpose_INV(i0, i1, i2, i3, i4, i5, i6, i7, o0, o1, o2, t0, t1, t2, t3, t4){\
/* transpose matrix to get output format */\
o1 = i0;\
i0 = _mm_unpacklo_epi64(i0, i1);\
o1 = _mm_unpackhi_epi64(o1, i1);\
t0 = i2;\
i2 = _mm_unpacklo_epi64(i2, i3);\
t0 = _mm_unpackhi_epi64(t0, i3);\
t1 = i4;\
i4 = _mm_unpacklo_epi64(i4, i5);\
t1 = _mm_unpackhi_epi64(t1, i5);\
t2 = i6;\
o0 = grsiTRANSP_MASK;\
i6 = _mm_unpacklo_epi64(i6, i7);\
t2 = _mm_unpackhi_epi64(t2, i7);\
/* load transpose mask into a register, because it will be used 8 times */\
i0 = _mm_shuffle_epi8(i0, o0);\
i2 = _mm_shuffle_epi8(i2, o0);\
i4 = _mm_shuffle_epi8(i4, o0);\
i6 = _mm_shuffle_epi8(i6, o0);\
o1 = _mm_shuffle_epi8(o1, o0);\
t0 = _mm_shuffle_epi8(t0, o0);\
t1 = _mm_shuffle_epi8(t1, o0);\
t2 = _mm_shuffle_epi8(t2, o0);\
/* continue with unpack using 4 temp registers */\
t3 = i4;\
o2 = o1;\
o0 = i0;\
t4 = t1;\
\
t3 = _mm_unpackhi_epi16(t3, i6);\
i4 = _mm_unpacklo_epi16(i4, i6);\
o0 = _mm_unpackhi_epi16(o0, i2);\
i0 = _mm_unpacklo_epi16(i0, i2);\
o2 = _mm_unpackhi_epi16(o2, t0);\
o1 = _mm_unpacklo_epi16(o1, t0);\
t4 = _mm_unpackhi_epi16(t4, t2);\
t1 = _mm_unpacklo_epi16(t1, t2);\
/* shuffle with immediate */\
i4 = _mm_shuffle_epi32(i4, 216);\
t3 = _mm_shuffle_epi32(t3, 216);\
o1 = _mm_shuffle_epi32(o1, 216);\
o2 = _mm_shuffle_epi32(o2, 216);\
i0 = _mm_shuffle_epi32(i0, 216);\
o0 = _mm_shuffle_epi32(o0, 216);\
t1 = _mm_shuffle_epi32(t1, 216);\
t4 = _mm_shuffle_epi32(t4, 216);\
/* continue with unpack */\
i1 = i0;\
i3 = o0;\
i5 = o1;\
i7 = o2;\
i0 = _mm_unpacklo_epi32(i0, i4);\
i1 = _mm_unpackhi_epi32(i1, i4);\
o0 = _mm_unpacklo_epi32(o0, t3);\
i3 = _mm_unpackhi_epi32(i3, t3);\
o1 = _mm_unpacklo_epi32(o1, t1);\
i5 = _mm_unpackhi_epi32(i5, t1);\
o2 = _mm_unpacklo_epi32(o2, t4);\
i7 = _mm_unpackhi_epi32(i7, t4);\
/* transpose done */\
}/**/
/* transform round constants into grsiVPERM mode */
#define grsiVPERM_Transform_RoundConst_CNT2(i, j){\
xmm0 = grsiROUND_CONST_P[i];\
xmm1 = grsiROUND_CONST_P[j];\
xmm2 = grsiROUND_CONST_Q[i];\
xmm3 = grsiROUND_CONST_Q[j];\
grsiVPERM_Transform_State(xmm0, xmm1, xmm2, xmm3, grsiVPERM_IPT, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10);\
xmm2 = _mm_xor_si128(xmm2, (grsiALL_15));\
xmm3 = _mm_xor_si128(xmm3, (grsiALL_15));\
grsiROUND_CONST_P[i] = xmm0;\
grsiROUND_CONST_P[j] = xmm1;\
grsiROUND_CONST_Q[i] = xmm2;\
grsiROUND_CONST_Q[j] = xmm3;\
}/**/
/* transform round constants into grsiVPERM mode */
#define grsiVPERM_Transform_RoundConst(){\
grsiVPERM_Transform_RoundConst_CNT2(0, 1);\
grsiVPERM_Transform_RoundConst_CNT2(2, 3);\
grsiVPERM_Transform_RoundConst_CNT2(4, 5);\
grsiVPERM_Transform_RoundConst_CNT2(6, 7);\
grsiVPERM_Transform_RoundConst_CNT2(8, 9);\
grsiVPERM_Transform_RoundConst_CNT2(10, 11);\
grsiVPERM_Transform_RoundConst_CNT2(12, 13);\
xmm0 = grsiALL_FF;\
grsiVPERM_Transform(xmm0, xmm1, grsiVPERM_IPT, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10);\
xmm0 = _mm_xor_si128(xmm0, (grsiALL_15));\
grsiALL_FF = xmm0;\
}/**/
IFUN void grsiINIT(u64* h)
#if !defined(DECLARE_IFUN)
;
#else
{
__m128i* const chaining = (__m128i*) h;
static __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
static __m128i xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15;
/* transform round constants into grsiVPERM mode */
grsiVPERM_Transform_RoundConst();
/* load IV into registers xmm8 - xmm15 */
xmm8 = chaining[0];
xmm9 = chaining[1];
xmm10 = chaining[2];
xmm11 = chaining[3];
xmm12 = chaining[4];
xmm13 = chaining[5];
xmm14 = chaining[6];
xmm15 = chaining[7];
/* transform chaining value from column ordering into row ordering */
grsiVPERM_Transform_State(xmm8, xmm9, xmm10, xmm11, grsiVPERM_IPT, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
grsiVPERM_Transform_State(xmm12, xmm13, xmm14, xmm15, grsiVPERM_IPT, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
grsiMatrix_Transpose(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
/* store transposed IV */
chaining[0] = xmm8;
chaining[1] = xmm9;
chaining[2] = xmm10;
chaining[3] = xmm11;
chaining[4] = xmm12;
chaining[5] = xmm13;
chaining[6] = xmm14;
chaining[7] = xmm15;
}
#endif
IFUN void grsiTF1024(u64* h, u64* m)
#if !defined(DECLARE_IFUN)
;
#else
{
__m128i* const chaining = (__m128i*) h;
__m128i* const message = (__m128i*) m;
static __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
static __m128i xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15;
static __m128i TEMP_MUL1[8];
static __m128i TEMP_MUL2[8];
static __m128i TEMP_MUL4;
static __m128i QTEMP[8];
/* load message into registers xmm8 - xmm15 (Q = message) */
xmm8 = message[0];
xmm9 = message[1];
xmm10 = message[2];
xmm11 = message[3];
xmm12 = message[4];
xmm13 = message[5];
xmm14 = message[6];
xmm15 = message[7];
/* transform message M from column ordering into row ordering */
grsiVPERM_Transform_State(xmm8, xmm9, xmm10, xmm11, grsiVPERM_IPT, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
grsiVPERM_Transform_State(xmm12, xmm13, xmm14, xmm15, grsiVPERM_IPT, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
grsiMatrix_Transpose(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
/* store message M (Q input) for later */
QTEMP[0] = xmm8;
QTEMP[1] = xmm9;
QTEMP[2] = xmm10;
QTEMP[3] = xmm11;
QTEMP[4] = xmm12;
QTEMP[5] = xmm13;
QTEMP[6] = xmm14;
QTEMP[7] = xmm15;
/* xor CV to message to get P input */
/* result: CV+M in xmm8...xmm15 */
xmm8 = _mm_xor_si128(xmm8, (chaining[0]));
xmm9 = _mm_xor_si128(xmm9, (chaining[1]));
xmm10 = _mm_xor_si128(xmm10, (chaining[2]));
xmm11 = _mm_xor_si128(xmm11, (chaining[3]));
xmm12 = _mm_xor_si128(xmm12, (chaining[4]));
xmm13 = _mm_xor_si128(xmm13, (chaining[5]));
xmm14 = _mm_xor_si128(xmm14, (chaining[6]));
xmm15 = _mm_xor_si128(xmm15, (chaining[7]));
/* compute permutation P */
/* result: P(CV+M) in xmm8...xmm15 */
grsiROUNDS_P();
/* xor CV to P output (feed-forward) */
/* result: P(CV+M)+CV in xmm8...xmm15 */
xmm8 = _mm_xor_si128(xmm8, (chaining[0]));
xmm9 = _mm_xor_si128(xmm9, (chaining[1]));
xmm10 = _mm_xor_si128(xmm10, (chaining[2]));
xmm11 = _mm_xor_si128(xmm11, (chaining[3]));
xmm12 = _mm_xor_si128(xmm12, (chaining[4]));
xmm13 = _mm_xor_si128(xmm13, (chaining[5]));
xmm14 = _mm_xor_si128(xmm14, (chaining[6]));
xmm15 = _mm_xor_si128(xmm15, (chaining[7]));
/* store P(CV+M)+CV */
chaining[0] = xmm8;
chaining[1] = xmm9;
chaining[2] = xmm10;
chaining[3] = xmm11;
chaining[4] = xmm12;
chaining[5] = xmm13;
chaining[6] = xmm14;
chaining[7] = xmm15;
/* load message M (Q input) into xmm8-15 */
xmm8 = QTEMP[0];
xmm9 = QTEMP[1];
xmm10 = QTEMP[2];
xmm11 = QTEMP[3];
xmm12 = QTEMP[4];
xmm13 = QTEMP[5];
xmm14 = QTEMP[6];
xmm15 = QTEMP[7];
/* compute permutation Q */
/* result: Q(M) in xmm8...xmm15 */
grsiROUNDS_Q();
/* xor Q output */
/* result: P(CV+M)+CV+Q(M) in xmm8...xmm15 */
xmm8 = _mm_xor_si128(xmm8, (chaining[0]));
xmm9 = _mm_xor_si128(xmm9, (chaining[1]));
xmm10 = _mm_xor_si128(xmm10, (chaining[2]));
xmm11 = _mm_xor_si128(xmm11, (chaining[3]));
xmm12 = _mm_xor_si128(xmm12, (chaining[4]));
xmm13 = _mm_xor_si128(xmm13, (chaining[5]));
xmm14 = _mm_xor_si128(xmm14, (chaining[6]));
xmm15 = _mm_xor_si128(xmm15, (chaining[7]));
/* store CV */
chaining[0] = xmm8;
chaining[1] = xmm9;
chaining[2] = xmm10;
chaining[3] = xmm11;
chaining[4] = xmm12;
chaining[5] = xmm13;
chaining[6] = xmm14;
chaining[7] = xmm15;
return;
}
#endif
IFUN void grsiOF1024(u64* h)
#if !defined(DECLARE_IFUN)
;
#else
{
__m128i* const chaining = (__m128i*) h;
static __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
static __m128i xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15;
static __m128i TEMP_MUL1[8];
static __m128i TEMP_MUL2[8];
static __m128i TEMP_MUL4;
/* load CV into registers xmm8 - xmm15 */
xmm8 = chaining[0];
xmm9 = chaining[1];
xmm10 = chaining[2];
xmm11 = chaining[3];
xmm12 = chaining[4];
xmm13 = chaining[5];
xmm14 = chaining[6];
xmm15 = chaining[7];
/* compute permutation P */
/* result: P(CV) in xmm8...xmm15 */
grsiROUNDS_P();
/* xor CV to P output (feed-forward) */
/* result: P(CV)+CV in xmm8...xmm15 */
xmm8 = _mm_xor_si128(xmm8, (chaining[0]));
xmm9 = _mm_xor_si128(xmm9, (chaining[1]));
xmm10 = _mm_xor_si128(xmm10, (chaining[2]));
xmm11 = _mm_xor_si128(xmm11, (chaining[3]));
xmm12 = _mm_xor_si128(xmm12, (chaining[4]));
xmm13 = _mm_xor_si128(xmm13, (chaining[5]));
xmm14 = _mm_xor_si128(xmm14, (chaining[6]));
xmm15 = _mm_xor_si128(xmm15, (chaining[7]));
/* transpose CV back from row ordering to column ordering */
/* result: final hash value in xmm0, xmm6, xmm13, xmm15 */
grsiMatrix_Transpose_INV(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm4, xmm0, xmm6, xmm1, xmm2, xmm3, xmm5, xmm7);
grsiVPERM_Transform_State(xmm0, xmm6, xmm13, xmm15, grsiVPERM_OPT, xmm1, xmm2, xmm3, xmm5, xmm7, xmm10, xmm12);
/* we only need to return the truncated half of the state */
chaining[4] = xmm0;
chaining[5] = xmm6;
chaining[6] = xmm13;
chaining[7] = xmm15;
return;
}
#endif

View File

@@ -1,273 +0,0 @@
/* hash.c Aug 2011
*
* Groestl implementation for different versions.
* Author: Krystian Matusiewicz, Günther A. Roland, Martin Schläffer
*
* This code is placed in the public domain
*/
#include "grsi.h"
#include "grsi-asm.h"
/* void grsiInit(grsiState* ctx) { */
#define GRS_I \
do { \
grsiState *ctx = &sts_grs; \
u8 i = 0; \
\
/* set number of state columns and state size depending on \
variant */ \
ctx->grsicolumns = grsiCOLS; \
ctx->grsistatesize = grsiSIZE; \
ctx->grsiv = LONG; \
\
grsiSET_CONSTANTS(); \
\
memset(ctx->grsichaining, 0, sizeof(u64)*grsiSIZE/8); \
memset(ctx->grsibuffer, 0, sizeof(grsiBitSequence)*grsiSIZE); \
\
if (ctx->grsichaining == NULL || ctx->grsibuffer == NULL) \
return; \
\
/* set initial value */ \
ctx->grsichaining[ctx->grsicolumns-1] = grsiU64BIG((u64)grsiLENGTH); \
\
grsiINIT(ctx->grsichaining); \
\
/* set other variables */ \
ctx->grsibuf_ptr = 0; \
ctx->grsiblock_counter = 0; \
ctx->grsibits_in_last_byte = 0; \
\
} while (0)
/* digest up to len bytes of input (full blocks only) */
void grsiTransform(grsiState *ctx,
const u8 *in,
unsigned long long len) {
/* increment block counter */
ctx->grsiblock_counter += len/grsiSIZE;
/* digest message, one block at a time */
for (; len >= grsiSIZE; len -= grsiSIZE, in += grsiSIZE)
grsiTF1024((u64*)ctx->grsichaining, (u64*)in);
asm volatile ("emms");
}
/* given state h, do h <- P(h)+h */
void grsiOutputTransformation(grsiState *ctx) {
/* determine variant */
grsiOF1024((u64*)ctx->grsichaining);
asm volatile ("emms");
}
/* initialise context */
void grsiInit(grsiState* ctx) {
u8 i = 0;
/* output size (in bits) must be a positive integer less than or
equal to 512, and divisible by 8 */
if (grsiLENGTH <= 0 || (grsiLENGTH%8) || grsiLENGTH > 512)
return;
/* set number of state columns and state size depending on
variant */
ctx->grsicolumns = grsiCOLS;
ctx->grsistatesize = grsiSIZE;
ctx->grsiv = LONG;
grsiSET_CONSTANTS();
for (i=0; i<grsiSIZE/8; i++)
ctx->grsichaining[i] = 0;
for (i=0; i<grsiSIZE; i++)
ctx->grsibuffer[i] = 0;
if (ctx->grsichaining == NULL || ctx->grsibuffer == NULL)
return;
/* set initial value */
ctx->grsichaining[ctx->grsicolumns-1] = grsiU64BIG((u64)grsiLENGTH);
grsiINIT(ctx->grsichaining);
/* set other variables */
ctx->grsibuf_ptr = 0;
ctx->grsiblock_counter = 0;
ctx->grsibits_in_last_byte = 0;
return;
}
/* update state with databitlen bits of input */
void grsiUpdate(grsiState* ctx,
const grsiBitSequence* input,
grsiDataLength databitlen) {
int index = 0;
int msglen = (int)(databitlen/8);
int rem = (int)(databitlen%8);
/* non-integral number of message bytes can only be supplied in the
last call to this function */
if (ctx->grsibits_in_last_byte) return;
/* if the buffer contains data that has not yet been digested, first
add data to buffer until full */
if (ctx->grsibuf_ptr) {
while (ctx->grsibuf_ptr < ctx->grsistatesize && index < msglen) {
ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = input[index++];
}
if (ctx->grsibuf_ptr < ctx->grsistatesize) {
/* buffer still not full, return */
if (rem) {
ctx->grsibits_in_last_byte = rem;
ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = input[index];
}
return;
}
/* digest buffer */
ctx->grsibuf_ptr = 0;
printf("error\n");
grsiTransform(ctx, ctx->grsibuffer, ctx->grsistatesize);
}
/* digest bulk of message */
grsiTransform(ctx, input+index, msglen-index);
index += ((msglen-index)/ctx->grsistatesize)*ctx->grsistatesize;
/* store remaining data in buffer */
while (index < msglen) {
ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = input[index++];
}
/* if non-integral number of bytes have been supplied, store
remaining bits in last byte, together with information about
number of bits */
if (rem) {
ctx->grsibits_in_last_byte = rem;
ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = input[index];
}
return;
}
/* update state with databitlen bits of input */
void grsiUpdateq(grsiState* ctx, const grsiBitSequence* input)
{
grsiDataLength databitlen= 64*8;
int index = 0;
int msglen = (int)(databitlen/8);
int rem = (int)(databitlen%8);
/* non-integral number of message bytes can only be supplied in the
last call to this function */
if (ctx->grsibits_in_last_byte) return;
/* if the buffer contains data that has not yet been digested, first
add data to buffer until full */
if (ctx->grsibuf_ptr) {
while (ctx->grsibuf_ptr < ctx->grsistatesize && index < msglen) {
ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = input[index++];
}
if (ctx->grsibuf_ptr < ctx->grsistatesize) {
/* buffer still not full, return */
if (rem) {
ctx->grsibits_in_last_byte = rem;
ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = input[index];
}
return;
}
/* digest buffer */
ctx->grsibuf_ptr = 0;
printf("error\n");
grsiTransform(ctx, ctx->grsibuffer, ctx->grsistatesize);
}
/* digest bulk of message */
grsiTransform(ctx, input+index, msglen-index);
index += ((msglen-index)/ctx->grsistatesize)*ctx->grsistatesize;
/* store remaining data in buffer */
while (index < msglen) {
ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = input[index++];
}
/* if non-integral number of bytes have been supplied, store
remaining bits in last byte, together with information about
number of bits */
if (rem) {
ctx->grsibits_in_last_byte = rem;
ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = input[index];
}
return;
}
#define BILB ctx->grsibits_in_last_byte
/* finalise: process remaining data (including padding), perform
output transformation, and write hash result to 'output' */
void grsiFinal(grsiState* ctx,
grsiBitSequence* output) {
int i, j = 0, grsibytelen = grsiLENGTH/8;
u8 *s = (grsiBitSequence*)ctx->grsichaining;
/* pad with '1'-bit and first few '0'-bits */
if (BILB) {
ctx->grsibuffer[(int)ctx->grsibuf_ptr-1] &= ((1<<BILB)-1)<<(8-BILB);
ctx->grsibuffer[(int)ctx->grsibuf_ptr-1] ^= 0x1<<(7-BILB);
BILB = 0;
}
else ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = 0x80;
/* pad with '0'-bits */
if (ctx->grsibuf_ptr > ctx->grsistatesize-grsiLENGTHFIELDLEN) {
/* padding requires two blocks */
while (ctx->grsibuf_ptr < ctx->grsistatesize) {
ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = 0;
}
/* digest first padding block */
grsiTransform(ctx, ctx->grsibuffer, ctx->grsistatesize);
ctx->grsibuf_ptr = 0;
}
while (ctx->grsibuf_ptr < ctx->grsistatesize-grsiLENGTHFIELDLEN) {
ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = 0;
}
/* length padding */
ctx->grsiblock_counter++;
ctx->grsibuf_ptr = ctx->grsistatesize;
while (ctx->grsibuf_ptr > ctx->grsistatesize-grsiLENGTHFIELDLEN) {
ctx->grsibuffer[(int)--ctx->grsibuf_ptr] = (u8)ctx->grsiblock_counter;
ctx->grsiblock_counter >>= 8;
}
/* digest final padding block */
grsiTransform(ctx, ctx->grsibuffer, ctx->grsistatesize);
/* perform output transformation */
grsiOutputTransformation(ctx);
/* store hash result in output */
for (i = ctx->grsistatesize-grsibytelen; i < ctx->grsistatesize; i++,j++) {
output[j] = s[i];
}
/* zeroise relevant variables and deallocate memory */
for (i = 0; i < ctx->grsicolumns; i++) {
ctx->grsichaining[i] = 0;
}
for (i = 0; i < ctx->grsistatesize; i++) {
ctx->grsibuffer[i] = 0;
}
// free(ctx->grsichaining);
// free(ctx->grsibuffer);
return;
}

View File

@@ -1,79 +0,0 @@
/* hash.h Aug 2011
*
* Groestl implementation for different versions.
* Author: Krystian Matusiewicz, Günther A. Roland, Martin Schläffer
*
* This code is placed in the public domain
*/
#ifndef __grsi_h
#define __grsi_h
#include <stdio.h>
#include <stdlib.h>
#include "brg_endian.h"
#define NEED_UINT_64T
#include "brg_types.h"
#define grsiLENGTH 512
/* some sizes (number of bytes) */
#define grsiROWS 8
#define grsiLENGTHFIELDLEN grsiROWS
#define grsiCOLS512 8
#define grsiCOLS1024 16
#define grsiSIZE512 (grsiROWS*grsiCOLS512)
#define grsiSIZE1024 (grsiROWS*grsiCOLS1024)
#define grsiROUNDS512 10
#define grsiROUNDS1024 14
#if grsiLENGTH<=256
#define grsiCOLS grsiCOLS512
#define grsiSIZE grsiSIZE512
#define grsiROUNDS grsiROUNDS512
#else
#define grsiCOLS grsiCOLS1024
#define grsiSIZE grsiSIZE1024
#define grsiROUNDS grsiROUNDS1024
#endif
#define ROTL64(a,n) ((((a)<<(n))|((a)>>(64-(n))))&li_64(ffffffffffffffff))
#if (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN)
#define grsiEXT_BYTE(var,n) ((u8)((u64)(var) >> (8*(7-(n)))))
#define grsiU64BIG(a) (a)
#endif /* IS_BIG_ENDIAN */
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
#define grsiEXT_BYTE(var,n) ((u8)((u64)(var) >> (8*n)))
#define grsiU64BIG(a) \
((ROTL64(a, 8) & li_64(000000FF000000FF)) | \
(ROTL64(a,24) & li_64(0000FF000000FF00)) | \
(ROTL64(a,40) & li_64(00FF000000FF0000)) | \
(ROTL64(a,56) & li_64(FF000000FF000000)))
#endif /* IS_LITTLE_ENDIAN */
typedef enum { LONG, SHORT } grsiVar;
/* NIST API begin */
typedef unsigned char grsiBitSequence;
typedef unsigned long long grsiDataLength;
typedef struct {
__attribute__ ((aligned (32))) u64 grsichaining[grsiSIZE/8]; /* actual state */
__attribute__ ((aligned (32))) grsiBitSequence grsibuffer[grsiSIZE]; /* data buffer */
u64 grsiblock_counter; /* message block counter */
int grsibuf_ptr; /* data buffer pointer */
int grsibits_in_last_byte; /* no. of message bits in last byte of
data buffer */
int grsicolumns; /* no. of columns in state */
int grsistatesize; /* total no. of bytes in state */
grsiVar grsiv; /* LONG or SHORT */
} grsiState;
void grsiInit(grsiState*);
void grsiUpdate(grsiState*, const grsiBitSequence*, grsiDataLength);
void grsiFinal(grsiState*, grsiBitSequence*);
/* NIST API end */
#endif /* __hash_h */

File diff suppressed because it is too large Load Diff

View File

@@ -1,247 +0,0 @@
/* hash.c Aug 2011
*
* Groestl implementation for different versions.
* Author: Krystian Matusiewicz, Günther A. Roland, Martin Schläffer
*
* This code is placed in the public domain
*/
#include "grsn-asm.h"
/* digest up to len bytes of input (full blocks only) */
void grsnTransform(grsnState *ctx,
const u8 *in,
unsigned long long len) {
/* increment block counter */
ctx->block_counter += len/grsnSIZE;
/* digest message, one block at a time */
for (; len >= grsnSIZE; len -= grsnSIZE, in += grsnSIZE)
#if grsnLENGTH<=256
TF512((u64*)ctx->chaining, (u64*)in);
#else
TF1024((u64*)ctx->chaining, (u64*)in);
#endif
asm volatile ("emms");
}
/* given state h, do h <- P(h)+h */
void grsnOutputTransformation(grsnState *ctx) {
/* determine variant */
#if (grsnLENGTH <= 256)
OF512((u64*)ctx->chaining);
#else
OF1024((u64*)ctx->chaining);
#endif
asm volatile ("emms");
}
/* initialise context */
void grsnInit(grsnState* ctx) {
u8 i = 0;
/* output size (in bits) must be a positive integer less than or
equal to 512, and divisible by 8 */
if (grsnLENGTH <= 0 || (grsnLENGTH%8) || grsnLENGTH > 512)
return;
/* set number of state columns and state size depending on
variant */
ctx->columns = grsnCOLS;
ctx->statesize = grsnSIZE;
#if (grsnLENGTH <= 256)
ctx->v = SHORT;
#else
ctx->v = LONG;
#endif
SET_CONSTANTS();
for (i=0; i<grsnSIZE/8; i++)
ctx->chaining[i] = 0;
for (i=0; i<grsnSIZE; i++)
ctx->buffer[i] = 0;
if (ctx->chaining == NULL || ctx->buffer == NULL)
return;
/* set initial value */
ctx->chaining[ctx->columns-1] = U64BIG((u64)grsnLENGTH);
INIT(ctx->chaining);
/* set other variables */
ctx->buf_ptr = 0;
ctx->block_counter = 0;
ctx->bits_in_last_byte = 0;
return;
}
/* update state with databitlen bits of input */
void grsnUpdate(grsnState* ctx,
const BitSequence* input,
DataLength databitlen) {
int index = 0;
int msglen = (int)(databitlen/8);
int rem = (int)(databitlen%8);
/* non-integral number of message bytes can only be supplied in the
last call to this function */
if (ctx->bits_in_last_byte) return;
/* if the buffer contains data that has not yet been digested, first
add data to buffer until full */
if (ctx->buf_ptr) {
while (ctx->buf_ptr < ctx->statesize && index < msglen) {
ctx->buffer[(int)ctx->buf_ptr++] = input[index++];
}
if (ctx->buf_ptr < ctx->statesize) {
/* buffer still not full, return */
if (rem) {
ctx->bits_in_last_byte = rem;
ctx->buffer[(int)ctx->buf_ptr++] = input[index];
}
return;
}
/* digest buffer */
ctx->buf_ptr = 0;
printf("error\n");
grsnTransform(ctx, ctx->buffer, ctx->statesize);
}
/* digest bulk of message */
grsnTransform(ctx, input+index, msglen-index);
index += ((msglen-index)/ctx->statesize)*ctx->statesize;
/* store remaining data in buffer */
while (index < msglen) {
ctx->buffer[(int)ctx->buf_ptr++] = input[index++];
}
/* if non-integral number of bytes have been supplied, store
remaining bits in last byte, together with information about
number of bits */
if (rem) {
ctx->bits_in_last_byte = rem;
ctx->buffer[(int)ctx->buf_ptr++] = input[index];
}
return;
}
/* update state with databitlen bits of input */
void grsnUpdateq(grsnState* ctx, const BitSequence* input)
{
int index = 0;
int msglen = (int)((64*8)/8);
int rem = (int)((64*8)%8);
/* if the buffer contains data that has not yet been digested, first
add data to buffer until full */
if (ctx->buf_ptr) {
while (ctx->buf_ptr < ctx->statesize && index < msglen) {
ctx->buffer[(int)ctx->buf_ptr++] = input[index++];
}
if (ctx->buf_ptr < ctx->statesize) {
/* buffer still not full, return */
if (rem) {
ctx->bits_in_last_byte = rem;
ctx->buffer[(int)ctx->buf_ptr++] = input[index];
}
return;
}
/* digest buffer */
ctx->buf_ptr = 0;
printf("error\n");
grsnTransform(ctx, ctx->buffer, ctx->statesize);
}
/* digest bulk of message */
grsnTransform(ctx, input+index, msglen-index);
index += ((msglen-index)/ctx->statesize)*ctx->statesize;
/* store remaining data in buffer */
while (index < msglen) {
ctx->buffer[(int)ctx->buf_ptr++] = input[index++];
}
/* if non-integral number of bytes have been supplied, store
remaining bits in last byte, together with information about
number of bits */
if (rem) {
ctx->bits_in_last_byte = rem;
ctx->buffer[(int)ctx->buf_ptr++] = input[index];
}
return;
}
#define BILB ctx->bits_in_last_byte
/* finalise: process remaining data (including padding), perform
output transformation, and write hash result to 'output' */
void grsnFinal(grsnState* ctx,
BitSequence* output) {
int i, j = 0, grsnbytelen = grsnLENGTH/8;
u8 *s = (BitSequence*)ctx->chaining;
/* pad with '1'-bit and first few '0'-bits */
if (BILB) {
ctx->buffer[(int)ctx->buf_ptr-1] &= ((1<<BILB)-1)<<(8-BILB);
ctx->buffer[(int)ctx->buf_ptr-1] ^= 0x1<<(7-BILB);
BILB = 0;
}
else ctx->buffer[(int)ctx->buf_ptr++] = 0x80;
/* pad with '0'-bits */
if (ctx->buf_ptr > ctx->statesize-grsnLENGTHFIELDLEN) {
/* padding requires two blocks */
while (ctx->buf_ptr < ctx->statesize) {
ctx->buffer[(int)ctx->buf_ptr++] = 0;
}
/* digest first padding block */
grsnTransform(ctx, ctx->buffer, ctx->statesize);
ctx->buf_ptr = 0;
}
while (ctx->buf_ptr < ctx->statesize-grsnLENGTHFIELDLEN) {
ctx->buffer[(int)ctx->buf_ptr++] = 0;
}
/* length padding */
ctx->block_counter++;
ctx->buf_ptr = ctx->statesize;
while (ctx->buf_ptr > ctx->statesize-grsnLENGTHFIELDLEN) {
ctx->buffer[(int)--ctx->buf_ptr] = (u8)ctx->block_counter;
ctx->block_counter >>= 8;
}
/* digest final padding block */
grsnTransform(ctx, ctx->buffer, ctx->statesize);
/* perform output transformation */
grsnOutputTransformation(ctx);
/* store hash result in output */
for (i = ctx->statesize-grsnbytelen; i < ctx->statesize; i++,j++) {
output[j] = s[i];
}
/* zeroise relevant variables and deallocate memory */
for (i = 0; i < ctx->columns; i++) {
ctx->chaining[i] = 0;
}
for (i = 0; i < ctx->statesize; i++) {
ctx->buffer[i] = 0;
}
// free(ctx->chaining);
// free(ctx->buffer);
return;
}

View File

@@ -1,80 +0,0 @@
/* hash.h Aug 2011
*
* Groestl implementation for different versions.
* Author: Krystian Matusiewicz, Günther A. Roland, Martin Schläffer
*
* This code is placed in the public domain
*/
#ifndef __grsn_h
#define __grsn_h
#include <stdio.h>
#include <stdlib.h>
#include "brg_endian.h"
#define NEED_UINT_64T
#include "brg_types.h"
#ifndef grsnLENGTH
#define grsnLENGTH 512
#endif
/* some sizes (number of bytes) */
#define grsnROWS 8
#define grsnLENGTHFIELDLEN grsnROWS
#define grsnCOLS512 8
#define grsnCOLS1024 16
#define grsnSIZE512 (grsnROWS*grsnCOLS512)
#define grsnSIZE1024 (grsnROWS*grsnCOLS1024)
#define grsnROUNDS512 10
#define grsnROUNDS1024 14
#if grsnLENGTH<=256
#define grsnCOLS grsnCOLS512
#define grsnSIZE grsnSIZE512
#define grsnROUNDS grsnROUNDS512
#else
#define grsnCOLS grsnCOLS1024
#define grsnSIZE grsnSIZE1024
#define grsnROUNDS grsnROUNDS1024
#endif
#define ROTL64(a,n) ((((a)<<(n))|((a)>>(64-(n))))&li_64(ffffffffffffffff))
#if (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN)
#define EXT_BYTE(var,n) ((u8)((u64)(var) >> (8*(7-(n)))))
#define U64BIG(a) (a)
#endif /* IS_BIG_ENDIAN */
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
#define EXT_BYTE(var,n) ((u8)((u64)(var) >> (8*n)))
#define U64BIG(a) \
((ROTL64(a, 8) & li_64(000000FF000000FF)) | \
(ROTL64(a,24) & li_64(0000FF000000FF00)) | \
(ROTL64(a,40) & li_64(00FF000000FF0000)) | \
(ROTL64(a,56) & li_64(FF000000FF000000)))
#endif /* IS_LITTLE_ENDIAN */
typedef enum { LONG, SHORT } Var;
/* NIST API begin */
typedef unsigned char BitSequence;
typedef unsigned long long DataLength;
typedef struct {
__attribute__ ((aligned (32))) u64 chaining[grsnSIZE/8]; /* actual state */
__attribute__ ((aligned (32))) BitSequence buffer[grsnSIZE]; /* data buffer */
u64 block_counter; /* message block counter */
int buf_ptr; /* data buffer pointer */
int bits_in_last_byte; /* no. of message bits in last byte of
data buffer */
int columns; /* no. of columns in state */
int statesize; /* total no. of bytes in state */
Var v; /* LONG or SHORT */
} grsnState;
void grsnInit(grsnState*);
void grsnUpdate(grsnState*, const BitSequence*, DataLength);
void grsnFinal(grsnState*, BitSequence*);
#endif /* __hash_h */

File diff suppressed because it is too large Load Diff

View File

@@ -1,10 +0,0 @@
#ifndef GRSOASM_H
#define GRSOASM_H
#include "grso.h"
void grsoP1024ASM (u64 *x) ;
void grsoQ1024ASM (u64 *x) ;
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -1,11 +0,0 @@
#ifndef GRSOASM_H
#define GRSOASM_H
/* really same as the mmx asm.h */
/* made just in case something must be changed */
#include "grso.h"
void grsoP1024ASM (u64 *x) ;
void grsoQ1024ASM (u64 *x) ;
#endif

View File

@@ -1,110 +0,0 @@
/* hash.c January 2011
*
* Groestl-512 implementation with inline assembly containing mmx and
* sse instructions. Optimized for Opteron.
* Authors: Krystian Matusiewicz and Soeren S. Thomsen
*
* This code is placed in the public domain
*/
//#include "grso.h"
//#include "grso-asm.h"
// #include "grsotab.h"
#define DECL_GRS
/* load initial constants */
#define GRS_I \
do { \
int i; \
/* set initial value */ \
for (i = 0; i < grsoCOLS-1; i++) sts_grs.grsstate[i] = 0; \
sts_grs.grsstate[grsoCOLS-1] = grsoU64BIG((u64)(8*grsoDIGESTSIZE)); \
\
/* set other variables */ \
sts_grs.grsbuf_ptr = 0; \
sts_grs.grsblock_counter = 0; \
} while (0); \
/* load hash */
#define GRS_U \
do { \
unsigned char* in = hash; \
unsigned long long index = 0; \
\
/* if the buffer contains data that has not yet been digested, first \
add data to buffer until full */ \
if (sts_grs.grsbuf_ptr) { \
while (sts_grs.grsbuf_ptr < grsoSIZE && index < 64) { \
hashbuf[(int)sts_grs.grsbuf_ptr++] = in[index++]; \
} \
if (sts_grs.grsbuf_ptr < grsoSIZE) continue; \
\
/* digest buffer */ \
sts_grs.grsbuf_ptr = 0; \
grsoTransform(&sts_grs, hashbuf, grsoSIZE); \
} \
\
/* digest bulk of message */ \
grsoTransform(&sts_grs, in+index, 64-index); \
index += ((64-index)/grsoSIZE)*grsoSIZE; \
\
/* store remaining data in buffer */ \
while (index < 64) { \
hashbuf[(int)sts_grs.grsbuf_ptr++] = in[index++]; \
} \
\
} while (0);
/* groestl512 hash loaded */
/* hash = groestl512(loaded) */
#define GRS_C \
do { \
char *out = hash; \
int i, j = 0; \
unsigned char *s = (unsigned char*)sts_grs.grsstate; \
\
hashbuf[sts_grs.grsbuf_ptr++] = 0x80; \
\
/* pad with '0'-bits */ \
if (sts_grs.grsbuf_ptr > grsoSIZE-grsoLENGTHFIELDLEN) { \
/* padding requires two blocks */ \
while (sts_grs.grsbuf_ptr < grsoSIZE) { \
hashbuf[sts_grs.grsbuf_ptr++] = 0; \
} \
/* digest first padding block */ \
grsoTransform(&sts_grs, hashbuf, grsoSIZE); \
sts_grs.grsbuf_ptr = 0; \
} \
while (sts_grs.grsbuf_ptr < grsoSIZE-grsoLENGTHFIELDLEN) { \
hashbuf[sts_grs.grsbuf_ptr++] = 0; \
} \
\
/* length padding */ \
sts_grs.grsblock_counter++; \
sts_grs.grsbuf_ptr = grsoSIZE; \
while (sts_grs.grsbuf_ptr > grsoSIZE-grsoLENGTHFIELDLEN) { \
hashbuf[--sts_grs.grsbuf_ptr] = (unsigned char)sts_grs.grsblock_counter; \
sts_grs.grsblock_counter >>= 8; \
} \
\
/* digest final padding block */ \
grsoTransform(&sts_grs, hashbuf, grsoSIZE); \
/* perform output transformation */ \
grsoOutputTransformation(&sts_grs); \
\
/* store hash result in output */ \
for (i = grsoSIZE-grsoDIGESTSIZE; i < grsoSIZE; i++,j++) { \
out[j] = s[i]; \
} \
\
/* zeroise relevant variables and deallocate memory */ \
for (i = 0; i < grsoCOLS; i++) { \
sts_grs.grsstate[i] = 0; \
} \
for (i = 0; i < grsoSIZE; i++) { \
hashbuf[i] = 0; \
} \
} while (0);

View File

@@ -1,57 +0,0 @@
/* hash.c January 2011
*
* Groestl-512 implementation with inline assembly containing mmx and
* sse instructions. Optimized for Opteron.
* Authors: Krystian Matusiewicz and Soeren S. Thomsen
*
* This code is placed in the public domain
*/
#include "algo/groestl/sse2/grso-asm.h"
#include "algo/groestl/sse2/grso.h"
#include "algo/groestl/sse2/grsotab.h"
/* digest up to len bytes of input (full blocks only) */
void grsoTransform(grsoState *ctx,
const unsigned char *in,
unsigned long long len) {
u64 y[grsoCOLS+2] __attribute__ ((aligned (16)));
u64 z[grsoCOLS+2] __attribute__ ((aligned (16)));
u64 *m, *h = (u64*)ctx->grsstate;
int i;
/* increment block counter */
ctx->grsblock_counter += len/grsoSIZE;
/* digest message, one block at a time */
for (; len >= grsoSIZE; len -= grsoSIZE, in += grsoSIZE) {
m = (u64*)in;
for (i = 0; i < grsoCOLS; i++) {
y[i] = m[i];
z[i] = m[i] ^ h[i];
}
grsoQ1024ASM(y);
grsoP1024ASM(z);
/* h' == h + Q(m) + P(h+m) */
for (i = 0; i < grsoCOLS; i++) {
h[i] ^= z[i] ^ y[i];
}
}
}
/* given state h, do h <- P(h)+h */
void grsoOutputTransformation(grsoState *ctx) {
u64 z[grsoCOLS] __attribute__ ((aligned (16)));
int j;
for (j = 0; j < grsoCOLS; j++) {
z[j] = ctx->grsstate[j];
}
grsoP1024ASM(z);
for (j = 0; j < grsoCOLS; j++) {
ctx->grsstate[j] ^= z[j];
}
}

View File

@@ -1,62 +0,0 @@
#ifndef __hash_h
#define __hash_h
#include <stdio.h>
#include <stdlib.h>
#include "brg_endian.h"
#include "brg_types.h"
/* some sizes (number of bytes) */
#define grsoROWS 8
#define grsoLENGTHFIELDLEN grsoROWS
#define grsoCOLS 16
#define grsoSIZE (grsoROWS*grsoCOLS)
#define grsoDIGESTSIZE 64
#define grsoROUNDS 14
#define grsoROTL64(a,n) ((((a)<<(n))|((a)>>(64-(n))))&((u64)0xffffffffffffffffULL))
#if (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN)
#error
#endif /* IS_BIG_ENDIAN */
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
#define EXT_BYTE(var,n) ((u8)((u64)(var) >> (8*n)))
#define grsoU64BIG(a) \
((grsoROTL64(a, 8) & ((u64)0x000000ff000000ffULL)) | \
(grsoROTL64(a,24) & ((u64)0x0000ff000000ff00ULL)) | \
(grsoROTL64(a,40) & ((u64)0x00ff000000ff0000ULL)) | \
(grsoROTL64(a,56) & ((u64)0xff000000ff000000ULL)))
#endif /* IS_LITTLE_ENDIAN */
typedef struct {
u64 grsstate[grsoCOLS]; /* actual state */
u64 grsblock_counter; /* message block counter */
int grsbuf_ptr; /* data buffer pointer */
} grsoState;
//extern int grsoInit(grsoState* ctx);
//extern int grsoUpdate(grsoState* ctx, const unsigned char* in,
// unsigned long long len);
//extern int grsoUpdateq(grsoState* ctx, const unsigned char* in);
//extern int grsoFinal(grsoState* ctx,
// unsigned char* out);
//
//extern int grsohash(unsigned char *out,
// const unsigned char *in,
// unsigned long long len);
/* digest up to len bytes of input (full blocks only) */
void grsoTransform( grsoState *ctx, const unsigned char *in,
unsigned long long len );
/* given state h, do h <- P(h)+h */
void grsoOutputTransformation( grsoState *ctx );
int grso_init ( grsoState* sts_grs );
int grso_update ( grsoState* sts_grs, char* hashbuf, char* hash );
int grso_close ( grsoState *sts_grs, char* hashbuf, char* hash );
#endif /* __hash_h */

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@@ -1,45 +0,0 @@
/*
* file : hash_api.h
* version : 1.0.208
* date : 14.12.2010
*
* Grostl multi-stream bitsliced implementation Hash API
*
* Cagdas Calik
* ccalik@metu.edu.tr
* Institute of Applied Mathematics, Middle East Technical University, Turkey.
*
*/
#ifndef GRSS_API_H
#define GRSS_API_H
#include "sha3_common.h"
#include <tmmintrin.h>
typedef struct
{
__m128i state1[8];
__m128i state2[8];
__m128i state3[8];
__m128i state4[8];
__m128i _Pconst[14][8];
__m128i _Qconst[14][8];
__m128i _shiftconst[8];
unsigned int uHashLength;
unsigned int uBlockLength;
BitSequence buffer[128];
} grssState;
void grssInit(grssState *state, int grssbitlen);
void grssUpdate(grssState *state, const BitSequence *data, DataLength databitlen);
void grssFinal(grssState *state, BitSequence *grssval);
#endif // HASH_API_H

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@@ -1,202 +0,0 @@
/* hash.c Aug 2011
*
* Groestl implementation for different versions.
* Author: Krystian Matusiewicz, Günther A. Roland, Martin Schläffer
*
* This code is placed in the public domain
*/
#include "grsv.h"
#include "grsv-asm.h"
/* digest up to len bytes of input (full blocks only) */
void grsvTransform(grsvState *ctx,
const u8 *in,
unsigned long long len) {
/* increment block counter */
ctx->grsvblock_counter += len/grsvSIZE;
/* digest message, one block at a time */
for (; len >= grsvSIZE; len -= grsvSIZE, in += grsvSIZE)
#if grsvLENGTH<=256
grsvTF512((u64*)ctx->grsvchaining, (u64*)in);
#else
grsvTF1024((u64*)ctx->grsvchaining, (u64*)in);
#endif
asm volatile ("emms");
}
/* given state h, do h <- P(h)+h */
void grsvOutputTransformation(grsvState *ctx) {
/* determine variant */
#if (grsvLENGTH <= 256)
grsvOF512((u64*)ctx->grsvchaining);
#else
grsvOF1024((u64*)ctx->grsvchaining);
#endif
asm volatile ("emms");
}
/* initialise context */
void grsvInit(grsvState* ctx) {
u8 i = 0;
/* output size (in bits) must be a positive integer less than or
equal to 512, and divisible by 8 */
if (grsvLENGTH <= 0 || (grsvLENGTH%8) || grsvLENGTH > 512)
return;
/* set number of state columns and state size depending on
variant */
ctx->grsvcolumns = grsvCOLS;
ctx->grsvstatesize = grsvSIZE;
#if (grsvLENGTH <= 256)
ctx->grsvv = SHORT;
#else
ctx->grsvv = LONG;
#endif
SET_CONSTANTS();
for (i=0; i<grsvSIZE/8; i++)
ctx->grsvchaining[i] = 0;
for (i=0; i<grsvSIZE; i++)
ctx->grsvbuffer[i] = 0;
if (ctx->grsvchaining == NULL || ctx->grsvbuffer == NULL)
return;
/* set initial value */
ctx->grsvchaining[ctx->grsvcolumns-1] = U64BIG((u64)grsvLENGTH);
grsvINIT(ctx->grsvchaining);
/* set other variables */
ctx->grsvbuf_ptr = 0;
ctx->grsvblock_counter = 0;
ctx->grsvbits_in_last_byte = 0;
return;
}
/* update state with databitlen bits of input */
void grsvUpdate(grsvState* ctx,
const grsvBitSequence* input,
grsvDataLength databitlen) {
int index = 0;
int msglen = (int)(databitlen/8);
int rem = (int)(databitlen%8);
/* non-integral number of message bytes can only be supplied in the
last call to this function */
if (ctx->grsvbits_in_last_byte) return;
/* if the buffer contains data that has not yet been digested, first
add data to buffer until full */
if (ctx->grsvbuf_ptr) {
while (ctx->grsvbuf_ptr < ctx->grsvstatesize && index < msglen) {
ctx->grsvbuffer[(int)ctx->grsvbuf_ptr++] = input[index++];
}
if (ctx->grsvbuf_ptr < ctx->grsvstatesize) {
/* buffer still not full, return */
if (rem) {
ctx->grsvbits_in_last_byte = rem;
ctx->grsvbuffer[(int)ctx->grsvbuf_ptr++] = input[index];
}
return;
}
/* digest buffer */
ctx->grsvbuf_ptr = 0;
printf("error\n");
grsvTransform(ctx, ctx->grsvbuffer, ctx->grsvstatesize);
}
/* digest bulk of message */
grsvTransform(ctx, input+index, msglen-index);
index += ((msglen-index)/ctx->grsvstatesize)*ctx->grsvstatesize;
/* store remaining data in buffer */
while (index < msglen) {
ctx->grsvbuffer[(int)ctx->grsvbuf_ptr++] = input[index++];
}
/* if non-integral number of bytes have been supplied, store
remaining bits in last byte, together with information about
number of bits */
if (rem) {
ctx->grsvbits_in_last_byte = rem;
ctx->grsvbuffer[(int)ctx->grsvbuf_ptr++] = input[index];
}
return;
}
#define BILB ctx->grsvbits_in_last_byte
/* finalise: process remaining data (including padding), perform
output transformation, and write hash result to 'output' */
void grsvFinal(grsvState* ctx,
grsvBitSequence* output) {
int i, j = 0, grsvbytelen = grsvLENGTH/8;
u8 *s = (grsvBitSequence*)ctx->grsvchaining;
/* pad with '1'-bit and first few '0'-bits */
if (BILB) {
ctx->grsvbuffer[(int)ctx->grsvbuf_ptr-1] &= ((1<<BILB)-1)<<(8-BILB);
ctx->grsvbuffer[(int)ctx->grsvbuf_ptr-1] ^= 0x1<<(7-BILB);
BILB = 0;
}
else ctx->grsvbuffer[(int)ctx->grsvbuf_ptr++] = 0x80;
/* pad with '0'-bits */
if (ctx->grsvbuf_ptr > ctx->grsvstatesize-grsvLENGTHFIELDLEN) {
/* padding requires two blocks */
while (ctx->grsvbuf_ptr < ctx->grsvstatesize) {
ctx->grsvbuffer[(int)ctx->grsvbuf_ptr++] = 0;
}
/* digest first padding block */
grsvTransform(ctx, ctx->grsvbuffer, ctx->grsvstatesize);
ctx->grsvbuf_ptr = 0;
}
while (ctx->grsvbuf_ptr < ctx->grsvstatesize-grsvLENGTHFIELDLEN) {
ctx->grsvbuffer[(int)ctx->grsvbuf_ptr++] = 0;
}
/* length padding */
ctx->grsvblock_counter++;
ctx->grsvbuf_ptr = ctx->grsvstatesize;
while (ctx->grsvbuf_ptr > ctx->grsvstatesize-grsvLENGTHFIELDLEN) {
ctx->grsvbuffer[(int)--ctx->grsvbuf_ptr] = (u8)ctx->grsvblock_counter;
ctx->grsvblock_counter >>= 8;
}
/* digest final padding block */
grsvTransform(ctx, ctx->grsvbuffer, ctx->grsvstatesize);
/* perform output transformation */
grsvOutputTransformation(ctx);
/* store hash result in output */
for (i = ctx->grsvstatesize-grsvbytelen; i < ctx->grsvstatesize; i++,j++) {
output[j] = s[i];
}
/* zeroise relevant variables and deallocate memory */
for (i = 0; i < ctx->grsvcolumns; i++) {
ctx->grsvchaining[i] = 0;
}
for (i = 0; i < ctx->grsvstatesize; i++) {
ctx->grsvbuffer[i] = 0;
}
// free(ctx->grsvchaining);
// free(ctx->buffer);
return;
}

View File

@@ -1,77 +0,0 @@
/* hash.h Aug 2011
*
* Groestl implementation for different versions.
* Author: Krystian Matusiewicz, Günther A. Roland, Martin Schläffer
*
* This code is placed in the public domain
*/
#ifndef __grsv_h
#define __grsv_h
#include <stdio.h>
#include <stdlib.h>
#include "brg_endian.h"
#define NEED_UINT_64T
#include "brg_types.h"
#define grsvLENGTH 512
/* some sizes (number of bytes) */
#define grsvROWS 8
#define grsvLENGTHFIELDLEN grsvROWS
#define grsvCOLS512 8
#define grsvCOLS1024 16
#define grsvSIZE512 (grsvROWS*grsvCOLS512)
#define grsvSIZE1024 (grsvROWS*grsvCOLS1024)
#define grsvROUNDS512 10
#define grsvROUNDS1024 14
#if grsvLENGTH<=256
#define grsvCOLS grsvCOLS512
#define grsvSIZE grsvSIZE512
#define grsvROUNDS grsvROUNDS512
#else
#define grsvCOLS grsvCOLS1024
#define grsvSIZE grsvSIZE1024
#define grsvROUNDS grsvROUNDS1024
#endif
#define ROTL64(a,n) ((((a)<<(n))|((a)>>(64-(n))))&li_64(ffffffffffffffff))
#if (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN)
#define EXT_BYTE(var,n) ((u8)((u64)(var) >> (8*(7-(n)))))
#define U64BIG(a) (a)
#endif /* IS_BIG_ENDIAN */
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
#define EXT_BYTE(var,n) ((u8)((u64)(var) >> (8*n)))
#define U64BIG(a) \
((ROTL64(a, 8) & li_64(000000FF000000FF)) | \
(ROTL64(a,24) & li_64(0000FF000000FF00)) | \
(ROTL64(a,40) & li_64(00FF000000FF0000)) | \
(ROTL64(a,56) & li_64(FF000000FF000000)))
#endif /* IS_LITTLE_ENDIAN */
typedef enum { LONG, SHORT } grsvVar;
typedef unsigned char grsvBitSequence;
typedef unsigned long long grsvDataLength;
typedef struct {
__attribute__ ((aligned (32))) u64 grsvchaining[grsvSIZE/8]; /* actual state */
__attribute__ ((aligned (32))) grsvBitSequence grsvbuffer[grsvSIZE]; /* data buffer */
u64 grsvblock_counter; /* message block counter */
int grsvbuf_ptr; /* data buffer pointer */
int grsvbits_in_last_byte; /* no. of message bits in last byte of
data buffer */
int grsvcolumns; /* no. of columns in state */
int grsvstatesize; /* total no. of bytes in state */
grsvVar grsvv; /* LONG or SHORT */
} grsvState;
void grsvInit(grsvState*);
void grsvUpdate(grsvState*, const grsvBitSequence*, grsvDataLength);
void grsvFinal(grsvState*, grsvBitSequence*);
#endif /* __grsv_h */

View File

@@ -23,10 +23,7 @@
#include "algo/sha2/sph-sha2.h"
#include "algo/haval/sph-haval.h"
#ifdef NO_AES_NI
#include "algo/groestl/sse2/grso.h"
#include "algo/groestl/sse2/grso-macro.c"
#else
#ifndef NO_AES_NI
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/echo/aes_ni/hash_api.h"
#endif
@@ -34,38 +31,31 @@
#include "algo/luffa/sse2/luffa_for_sse2.h"
#include "algo/cubehash/sse2/cubehash_sse2.h"
#include "algo/simd/sse2/nist.h"
//#include "algo/blake/sse2/blake.c"
//#include "algo/keccak/sse2/keccak.c"
//#include "algo/bmw/sse2/bmw.c"
//#include "algo/skein/sse2/skein.c"
#include "algo/jh/sse2/jh_sse2_opt64.h"
typedef struct {
sph_blake512_context blake1, blake2;
sph_bmw512_context bmw1, bmw2, bmw3;
sph_skein512_context skein1, skein2;
sph_jh512_context jh1, jh2;
sph_keccak512_context keccak1, keccak2;
// sph_luffa512_context luffa1, luffa2;
hashState_luffa luffa1, luffa2;
// sph_cubehash512_context cube1, cube2;
cubehashParam cube;
sph_shavite512_context shavite1, shavite2;
// sph_simd512_context simd1, simd2;
hashState_sd simd1, simd2;
sph_hamsi512_context hamsi1;
sph_fugue512_context fugue1, fugue2;
sph_shabal512_context shabal1;
sph_whirlpool_context whirlpool1, whirlpool2, whirlpool3, whirlpool4;
sph_sha512_context sha1, sha2;
sph_haval256_5_context haval1, haval2;
sph_blake512_context blake1, blake2;
sph_bmw512_context bmw1, bmw2, bmw3;
sph_skein512_context skein1, skein2;
sph_jh512_context jh1, jh2;
sph_keccak512_context keccak1, keccak2;
hashState_luffa luffa1, luffa2;
cubehashParam cube;
sph_shavite512_context shavite1, shavite2;
hashState_sd simd1, simd2;
sph_hamsi512_context hamsi1;
sph_fugue512_context fugue1, fugue2;
sph_shabal512_context shabal1;
sph_whirlpool_context whirlpool1, whirlpool2, whirlpool3, whirlpool4;
sph_sha512_context sha1, sha2;
sph_haval256_5_context haval1, haval2;
#ifdef NO_AES_NI
sph_groestl512_context groestl1, groestl2;
sph_echo512_context echo1, echo2;
sph_groestl512_context groestl1, groestl2;
sph_echo512_context echo1, echo2;
#else
hashState_echo echo1, echo2;
hashState_groestl groestl1, groestl2;
hashState_echo echo1, echo2;
hashState_groestl groestl1, groestl2;
#endif
} hmq1725_ctx_holder;
@@ -90,19 +80,14 @@ void init_hmq1725_ctx()
sph_keccak512_init(&hmq1725_ctx.keccak1);
sph_keccak512_init(&hmq1725_ctx.keccak2);
// sph_luffa512_init(&hmq1725_ctx.luffa1);
// sph_luffa512_init(&hmq1725_ctx.luffa2);
init_luffa( &hmq1725_ctx.luffa1, 512 );
init_luffa( &hmq1725_ctx.luffa2, 512 );
// sph_cubehash512_init(&hmq1725_ctx.cubehash1);
cubehashInit( &hmq1725_ctx.cube, 512, 16, 32 );
sph_shavite512_init(&hmq1725_ctx.shavite1);
sph_shavite512_init(&hmq1725_ctx.shavite2);
// sph_simd512_init(&hmq1725_ctx.simd1);
// sph_simd512_init(&hmq1725_ctx.simd2);
init_sd( &hmq1725_ctx.simd1, 512 );
init_sd( &hmq1725_ctx.simd2, 512 );
@@ -135,46 +120,18 @@ void init_hmq1725_ctx()
init_groestl( &hmq1725_ctx.groestl1 );
init_groestl( &hmq1725_ctx.groestl2 );
#endif
}
extern void hmq1725hash(void *state, const void *input)
{
hmq1725_ctx_holder ctx;
memcpy(&ctx, &hmq1725_ctx, sizeof(hmq1725_ctx));
size_t hashptr;
// DATA_ALIGNXY(sph_u64 hashctA,8);
// DATA_ALIGNXY(sph_u64 hashctB,8);
// DATA_ALIGNXY(unsigned char hash[128],16);
unsigned char hashbuf[128];
sph_u64 hashctA;
sph_u64 hashctB;
const uint32_t mask = 24;
uint32_t hashA[25], hashB[25];
hmq1725_ctx_holder ctx;
//these uint512 in the c++ source of the client are backed by an array of uint32
uint32_t hashA[25], hashB[25];
// unsigned char hash[128]; // uint32_t hashA[16], hashB[16];
// #define hashA hash
// #define hashB (hash+64)
memcpy(&ctx, &hmq1725_ctx, sizeof(hmq1725_ctx));
sph_bmw512 (&ctx.bmw1, input, 80); //0
sph_bmw512_close(&ctx.bmw1, hashA); //1
/*
DECL_BMW;
BMW_I;
BMW_U;
#define M(x) sph_dec64le_aligned(data + 8 * (x))
#define H(x) (h[x])
#define dH(x) (dh[x])
BMW_C;
#undef M
#undef H
#undef dH
*/
sph_whirlpool (&ctx.whirlpool1, hashA, 64); //0
sph_whirlpool_close(&ctx.whirlpool1, hashB); //1
@@ -182,8 +139,8 @@ extern void hmq1725hash(void *state, const void *input)
if ( hashB[0] & mask ) //1
{
#ifdef NO_AES_NI
sph_groestl512 (&ctx.groestl1, hashB, 64); //1
sph_groestl512_close(&ctx.groestl1, hashA); //2
sph_groestl512 (&ctx.groestl1, hashB, 64); //1
sph_groestl512_close(&ctx.groestl1, hashA); //2
#else
update_groestl( &ctx.groestl1, (char*)hashB, 512 );
final_groestl( &ctx.groestl1, (char*)hashA );
@@ -191,8 +148,8 @@ extern void hmq1725hash(void *state, const void *input)
}
else
{
sph_skein512 (&ctx.skein1, hashB, 64); //1
sph_skein512_close(&ctx.skein1, hashA); //2
sph_skein512 (&ctx.skein1, hashB, 64); //1
sph_skein512_close(&ctx.skein1, hashA); //2
}
sph_jh512 (&ctx.jh1, hashA, 64); //3
@@ -212,13 +169,9 @@ extern void hmq1725hash(void *state, const void *input)
sph_bmw512_close(&ctx.bmw2, hashB); //5
}
// sph_luffa512 (&ctx.luffa1, hashB, 64); //5
// sph_luffa512_close(&ctx.luffa1, hashA); //6
update_luffa( &ctx.luffa1, (BitSequence*)hashB, 512 );
final_luffa( &ctx.luffa1, (BitSequence*)hashA );
// sph_cubehash512 (&ctx.cubehash1, hashA, 64); //6
// sph_cubehash512_close(&ctx.cubehash1, hashB); //7
cubehashUpdate( &ctx.cube, (BitSequence *)hashA, 64 );
cubehashDigest( &ctx.cube, (BitSequence *)hashB );
@@ -233,14 +186,11 @@ extern void hmq1725hash(void *state, const void *input)
sph_jh512_close(&ctx.jh2, hashA); //8
}
sph_shavite512 (&ctx.shavite1, hashA, 64); //3
sph_shavite512_close(&ctx.shavite1, hashB); //4
// sph_simd512 (&ctx.simd1, hashB, 64); //2
// sph_simd512_close(&ctx.simd1, hashA); //3
update_sd( &ctx.simd1, (BitSequence *)hashB, 512 );
final_sd( &ctx.simd1, (BitSequence *)hashA );
update_sd( &ctx.simd1, (BitSequence *)hashB, 512 );
final_sd( &ctx.simd1, (BitSequence *)hashA );
if ( hashA[0] & mask ) //4
{
@@ -258,8 +208,8 @@ extern void hmq1725hash(void *state, const void *input)
sph_echo512 (&ctx.echo1, hashB, 64); //5
sph_echo512_close(&ctx.echo1, hashA); //6
#else
update_echo ( &ctx.echo1, (BitSequence *)hashB, 512 );
final_echo( &ctx.echo1, (BitSequence *)hashA );
update_echo ( &ctx.echo1, (BitSequence *)hashB, 512 );
final_echo( &ctx.echo1, (BitSequence *)hashA );
#endif
sph_blake512 (&ctx.blake2, hashA, 64); //6
@@ -272,8 +222,6 @@ extern void hmq1725hash(void *state, const void *input)
}
else
{
// sph_luffa512 (&ctx.luffa2, hashB, 64); //7
// sph_luffa512_close(&ctx.luffa2, hashA); //8
update_luffa( &ctx.luffa2, (BitSequence *)hashB, 512 );
final_luffa( &ctx.luffa2, (BitSequence *)hashA );
}
@@ -287,8 +235,8 @@ extern void hmq1725hash(void *state, const void *input)
if ( hashA[0] & mask ) //4
{
#ifdef NO_AES_NI
sph_echo512 (&ctx.echo2, hashA, 64); //
sph_echo512_close(&ctx.echo2, hashB); //5
sph_echo512 (&ctx.echo2, hashA, 64); //
sph_echo512_close(&ctx.echo2, hashB); //5
#else
update_echo ( &ctx.echo2, (BitSequence *)hashA, 512 );
final_echo( &ctx.echo2, (BitSequence *)hashB );
@@ -296,8 +244,6 @@ extern void hmq1725hash(void *state, const void *input)
}
else
{
// sph_simd512 (&ctx.simd2, hashA, 64); //4
// sph_simd512_close(&ctx.simd2, hashB); //5
update_sd( &ctx.simd2, (BitSequence *)hashA, 512 );
final_sd( &ctx.simd2, (BitSequence *)hashB );
}
@@ -323,8 +269,8 @@ extern void hmq1725hash(void *state, const void *input)
sph_groestl512 (&ctx.groestl2, hashA, 64); //3
sph_groestl512_close(&ctx.groestl2, hashB); //4
#else
update_groestl( &ctx.groestl2, (char*)hashA, 512 );
final_groestl( &ctx.groestl2, (char*)hashB );
update_groestl( &ctx.groestl2, (char*)hashA, 512 );
final_groestl( &ctx.groestl2, (char*)hashB );
#endif
sph_sha512 (&ctx.sha2, hashB, 64); //2

View File

@@ -7,6 +7,7 @@
#include <stdio.h>
#include "algo/blake/sph_blake.h"
#include "algo/groestl/sph_groestl.h"
#include "algo/skein/sph_skein.h"
#include "algo/jh/sph_jh.h"
#include "algo/keccak/sph_keccak.h"
@@ -16,15 +17,14 @@
#include "algo/skein/sse2/skein.c"
#include "algo/jh/sse2/jh_sse2_opt64.h"
#ifdef NO_AES_NI
#include "algo/groestl/sse2/grso.h"
#include "algo/groestl/sse2/grso-macro.c"
#else
#ifndef NO_AES_NI
#include "algo/groestl/aes_ni/hash-groestl.h"
#endif
typedef struct {
#ifndef NO_AES_NI
#ifdef NO_AES_NI
sph_groestl512_context groestl;
#else
hashState_groestl groestl;
#endif
} nist5_ctx_holder;
@@ -33,16 +33,15 @@ nist5_ctx_holder nist5_ctx;
void init_nist5_ctx()
{
#ifndef NO_AES_NI
#ifdef NO_AES_NI
sph_groestl512_init( &nist5_ctx.groestl );
#else
init_groestl( &nist5_ctx.groestl );
#endif
}
void nist5hash(void *output, const void *input)
{
#ifdef NO_AES_NI
grsoState sts_grs;
#endif
size_t hashptr;
unsigned char hashbuf[128];
sph_u64 hashctA;
@@ -54,16 +53,14 @@ void nist5hash(void *output, const void *input)
nist5_ctx_holder ctx;
memcpy( &ctx, &nist5_ctx, sizeof(nist5_ctx) );
DECL_BLK;
BLK_I;
BLK_W;
BLK_C;
#ifdef NO_AES_NI
GRS_I;
GRS_U;
GRS_C;
sph_groestl512 (&ctx.groestl, hash, 64);
sph_groestl512_close(&ctx.groestl, hash);
#else
update_groestl( &ctx.groestl, (char*)hash,512);
final_groestl( &ctx.groestl, (char*)hash);

View File

@@ -19,10 +19,7 @@
#include "algo/skein/sse2/skein.c"
#include "algo/jh/sse2/jh_sse2_opt64.h"
#ifdef NO_AES_NI
#include "algo/groestl/sse2/grso.h"
#include "algo/groestl/sse2/grso-macro.c"
#else
#ifndef NO_AES_NI
#include "algo/groestl/aes_ni/hash-groestl.h"
#endif
@@ -36,37 +33,36 @@
#define DATA_ALIGNXY(x,y) __declspec(align(y)) x
#endif
#ifndef NO_AES_NI
hashState_groestl quark_groestl_ctx;
#ifdef NO_AES_NI
sph_groestl512_context quark_ctx;
#else
hashState_groestl quark_ctx;
#endif
void init_quark_ctx()
{
#ifndef NO_AES_NI
init_groestl( &quark_groestl_ctx );
#ifdef NO_AES_NI
sph_groestl512_init( &quark_ctx );
#else
init_groestl( &quark_ctx );
#endif
}
inline static void quarkhash(void *state, const void *input)
{
#ifdef NO_AES_NI
grsoState sts_grs;
#else
hashState_groestl ctx;
memcpy(&ctx, &quark_groestl_ctx, sizeof(quark_groestl_ctx));
#endif
/* shared temp space */
/* hash is really just 64bytes but it used to hold both hash and final round constants passed 64 */
unsigned char hashbuf[128];
size_t hashptr;
sph_u64 hashctA;
sph_u64 hashctB;
int i;
unsigned char hash[128];
#ifdef NO_AES_NI
sph_groestl512_context ctx;
#else
hashState_groestl ctx;
#endif
memcpy( &ctx, &quark_ctx, sizeof(ctx) );
// Blake
DECL_BLK;
@@ -117,13 +113,13 @@ inline static void quarkhash(void *state, const void *input)
{
#ifdef NO_AES_NI
GRS_I;
GRS_U;
GRS_C;
sph_groestl512_init( &ctx );
sph_groestl512 ( &ctx, hash, 64 );
sph_groestl512_close( &ctx, hash );
#else
reinit_groestl( &ctx );
update_groestl(&ctx, (char*)hash,512);
final_groestl(&ctx, (char*)hash);
reinit_groestl( &ctx );
update_groestl( &ctx, (char*)hash, 512 );
final_groestl( &ctx, (char*)hash );
#endif
} while(0); continue;

View File

@@ -371,7 +371,6 @@ extern "C"{
#define DECL_SKN \
sph_u64 sknh0, sknh1, sknh2, sknh3, sknh4, sknh5, sknh6, sknh7; \
unsigned char sknbuf[64]; \
#define sknREAD_STATE_BIG(sc) do { \
sknh0 = (sc)->sknh0; \
@@ -424,7 +423,6 @@ do { \
do { \
unsigned char *buf; \
size_t ptr; \
unsigned first; \
size_t len = 64; \
const void *data = hash; \
buf = hashbuf; \
@@ -441,7 +439,6 @@ do { \
unsigned char *buf; \
size_t ptr; \
unsigned et; \
int i; \
\
buf = hashbuf; \
ptr = hashptr; \

View File

@@ -18,10 +18,7 @@
#include "algo/simd/sph_simd.h"
#include "algo/echo/sph_echo.h"
#ifdef NO_AES_NI
// #include "algo/echo/sph_echo.h"
// #include "algo/groestl/sph_groestl.h"
#else
#ifndef NO_AES_NI
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/echo/aes_ni/hash_api.h"
#endif

View File

@@ -17,10 +17,7 @@
#include "algo/simd/sph_simd.h"
#include "algo/echo/sph_echo.h"
#ifdef NO_AES_NI
#include "algo/groestl/sse2/grso.h"
#include "algo/groestl/sse2/grso-macro.c"
#else
#ifndef NO_AES_NI
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/echo/aes_ni/hash_api.h"
#endif
@@ -40,7 +37,7 @@ typedef struct {
hashState_sd simd;
sph_shavite512_context shavite;
#ifdef NO_AES_NI
// sph_groestl512_context groestl;
sph_groestl512_context groestl;
sph_echo512_context echo;
#else
hashState_echo echo;
@@ -57,7 +54,7 @@ void init_x11_ctx()
sph_shavite512_init( &x11_ctx.shavite );
init_sd( &x11_ctx.simd, 512 );
#ifdef NO_AES_NI
// sph_groestl512_init( &x11_ctx.groestl );
sph_groestl512_init( &x11_ctx.groestl );
sph_echo512_init( &x11_ctx.echo );
#else
init_echo( &x11_ctx.echo, 512 );
@@ -92,13 +89,8 @@ static void x11_hash( void *state, const void *input )
#undef dH
#ifdef NO_AES_NI
grsoState sts_grs;
GRS_I;
GRS_U;
GRS_C;
// sph_groestl512 (&ctx.groestl, hash, 64);
// sph_groestl512_close(&ctx.groestl, hash);
sph_groestl512 (&ctx.groestl, hash, 64);
sph_groestl512_close(&ctx.groestl, hash);
#else
update_groestl( &ctx.groestl, (char*)hash, 512 );
final_groestl( &ctx.groestl, (char*)hash );

View File

@@ -18,10 +18,7 @@
#include "algo/simd/sph_simd.h"
#include "algo/echo/sph_echo.h"
#ifdef NO_AES_NI
// #include "algo/groestl/sse2/grso.h"
// #include "algo/groestl/sse2/grso-macro.c"
#else
#ifndef NO_AES_NI
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/echo/aes_ni/hash_api.h"
#endif

View File

@@ -6,6 +6,7 @@
#include <string.h>
#include <stdio.h>
#include "algo/groestl/sph_groestl.h"
#include "algo/gost/sph_gost.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/echo/sph_echo.h"
@@ -19,10 +20,7 @@
#include "algo/skein/sse2/skein.c"
#include "algo/jh/sse2/jh_sse2_opt64.h"
#ifdef NO_AES_NI
#include "algo/groestl/sse2/grso.h"
#include "algo/groestl/sse2/grso-macro.c"
#else
#ifndef NO_AES_NI
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/echo/aes_ni/hash_api.h"
#endif
@@ -34,6 +32,7 @@ typedef struct {
cubehashParam cube;
hashState_sd simd;
#ifdef NO_AES_NI
sph_groestl512_context groestl;
sph_echo512_context echo;
#else
hashState_echo echo;
@@ -51,6 +50,7 @@ void init_sib_ctx()
cubehashInit( &sib_ctx.cube, 512, 16, 32 );
init_sd( &sib_ctx.simd, 512 );
#ifdef NO_AES_NI
sph_groestl512_init( &sib_ctx.groestl );
sph_echo512_init( &sib_ctx.echo );
#else
init_echo( &sib_ctx.echo, 512 );
@@ -59,17 +59,12 @@ void init_sib_ctx()
}
void sibhash(void *output, const void *input)
{
unsigned char hash[128]; // uint32_t hashA[16], hashB[16];
#define hashA hash
#define hashB hash+64
#ifdef NO_AES_NI
grsoState sts_grs;
#endif
size_t hashptr;
unsigned char hashbuf[128];
sph_u64 hashctA;
@@ -95,12 +90,11 @@ void sibhash(void *output, const void *input)
#undef dH
#ifdef NO_AES_NI
GRS_I;
GRS_U;
GRS_C;
sph_groestl512 (&ctx.groestl, hash, 64);
sph_groestl512_close(&ctx.groestl, hash);
#else
update_groestl( &ctx.groestl, (char*)hash,512);
final_groestl( &ctx.groestl, (char*)hash);
update_groestl( &ctx.groestl, (char*)hash,512);
final_groestl( &ctx.groestl, (char*)hash);
#endif
DECL_SKN;

View File

@@ -29,10 +29,7 @@
#include "algo/skein/sse2/skein.c"
#include "algo/jh/sse2/jh_sse2_opt64.h"
#ifdef NO_AES_NI
#include "algo/groestl/sse2/grso.h"
#include "algo/groestl/sse2/grso-macro.c"
#else
#ifndef NO_AES_NI
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/echo/aes_ni/hash_api.h"
#endif
@@ -79,9 +76,6 @@ static void x13hash(void *output, const void *input)
x13_ctx_holder ctx;
memcpy( &ctx, &x13_ctx, sizeof(x13_ctx) );
#ifdef NO_AES_NI
grsoState sts_grs;
#endif
// X11 algos
@@ -116,12 +110,8 @@ static void x13hash(void *output, const void *input)
//---groetl----
#ifdef NO_AES_NI
// use GRS if possible
GRS_I;
GRS_U;
GRS_C;
// sph_groestl512 (&ctx.groestl, hash, 64);
// sph_groestl512_close(&ctx.groestl, hash);
sph_groestl512 (&ctx.groestl, hash, 64);
sph_groestl512_close(&ctx.groestl, hash);
#else
update_groestl( &ctx.groestl, (char*)hash,512);
final_groestl( &ctx.groestl, (char*)hash);

View File

@@ -31,10 +31,7 @@
#include "algo/skein/sse2/skein.c"
#include "algo/jh/sse2/jh_sse2_opt64.h"
#ifdef NO_AES_NI
#include "algo/groestl/sse2/grso.h"
#include "algo/groestl/sse2/grso-macro.c"
#else
#ifndef NO_AES_NI
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/echo/aes_ni/hash_api.h"
#endif
@@ -84,10 +81,6 @@ static void x14hash(void *output, const void *input)
x14_ctx_holder ctx;
memcpy(&ctx, &x14_ctx, sizeof(x14_ctx));
#ifdef NO_AES_NI
grsoState sts_grs;
#endif
unsigned char hashbuf[128];
size_t hashptr;
sph_u64 hashctA;
@@ -119,12 +112,8 @@ static void x14hash(void *output, const void *input)
//---groestl----
#ifdef NO_AES_NI
// use SSE2 optimized GRS if possible
GRS_I;
GRS_U;
GRS_C;
// sph_groestl512 (&ctx.groestl, hash, 64);
// sph_groestl512_close(&ctx.groestl, hash);
sph_groestl512 (&ctx.groestl, hash, 64);
sph_groestl512_close(&ctx.groestl, hash);
#else
update_groestl( &ctx.groestl, (char*)hash,512);
final_groestl( &ctx.groestl, (char*)hash);

View File

@@ -31,10 +31,7 @@
#include "algo/skein/sse2/skein.c"
#include "algo/jh/sse2/jh_sse2_opt64.h"
#ifdef NO_AES_NI
#include "algo/groestl/sse2/grso.h"
#include "algo/groestl/sse2/grso-macro.c"
#else
#ifndef NO_AES_NI
#include "algo/echo/aes_ni/hash_api.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
#endif
@@ -86,10 +83,6 @@ static void x15hash(void *output, const void *input)
x15_ctx_holder ctx;
memcpy( &ctx, &x15_ctx, sizeof(x15_ctx) );
#ifdef NO_AES_NI
grsoState sts_grs;
#endif
unsigned char hashbuf[128];
size_t hashptr;
sph_u64 hashctA;
@@ -120,14 +113,11 @@ static void x15hash(void *output, const void *input)
//---groestl----
#ifdef NO_AES_NI
GRS_I;
GRS_U;
GRS_C;
// sph_groestl512(&ctx.groestl, hash, 64);
// sph_groestl512_close(&ctx.groestl, hash);
sph_groestl512(&ctx.groestl, hash, 64);
sph_groestl512_close(&ctx.groestl, hash);
#else
update_groestl( &ctx.groestl, (char*)hash,512);
final_groestl( &ctx.groestl, (char*)hash);
update_groestl( &ctx.groestl, (char*)hash,512);
final_groestl( &ctx.groestl, (char*)hash);
#endif
//---skein4---

View File

@@ -33,10 +33,7 @@
#include "algo/skein/sse2/skein.c"
#include "algo/jh/sse2/jh_sse2_opt64.h"
#ifdef NO_AES_NI
#include "algo/groestl/sse2/grso.h"
#include "algo/groestl/sse2/grso-macro.c"
#else
#ifndef NO_AES_NI
#include "algo/echo/aes_ni/hash_api.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
#endif
@@ -92,10 +89,6 @@ static void x17hash(void *output, const void *input)
x17_ctx_holder ctx;
memcpy( &ctx, &x17_ctx, sizeof(x17_ctx) );
#ifdef NO_AES_NI
grsoState sts_grs;
#endif
unsigned char hashbuf[128];
size_t hashptr;
sph_u64 hashctA;
@@ -126,14 +119,11 @@ static void x17hash(void *output, const void *input)
//---groestl----
#ifdef NO_AES_NI
// GRS_I;
// GRS_U;
// GRS_C;
sph_groestl512(&ctx.groestl, hash, 64);
sph_groestl512_close(&ctx.groestl, hash);
#else
update_groestl( &ctx.groestl, (char*)hash,512);
final_groestl( &ctx.groestl, (char*)hash);
update_groestl( &ctx.groestl, (char*)hash,512);
final_groestl( &ctx.groestl, (char*)hash);
#endif
//---skein4---

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,821 +0,0 @@
/*
* Copyright 2011-2012 pooler@litecoinpool.org
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "cpuminer-config.h"
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif
#if defined(__i386__)
.macro scrypt_shuffle src, so, dest, do
movl \so+60(\src), %eax
movl \so+44(\src), %ebx
movl \so+28(\src), %ecx
movl \so+12(\src), %edx
movl %eax, \do+12(\dest)
movl %ebx, \do+28(\dest)
movl %ecx, \do+44(\dest)
movl %edx, \do+60(\dest)
movl \so+40(\src), %eax
movl \so+8(\src), %ebx
movl \so+48(\src), %ecx
movl \so+16(\src), %edx
movl %eax, \do+8(\dest)
movl %ebx, \do+40(\dest)
movl %ecx, \do+16(\dest)
movl %edx, \do+48(\dest)
movl \so+20(\src), %eax
movl \so+4(\src), %ebx
movl \so+52(\src), %ecx
movl \so+36(\src), %edx
movl %eax, \do+4(\dest)
movl %ebx, \do+20(\dest)
movl %ecx, \do+36(\dest)
movl %edx, \do+52(\dest)
movl \so+0(\src), %eax
movl \so+24(\src), %ebx
movl \so+32(\src), %ecx
movl \so+56(\src), %edx
movl %eax, \do+0(\dest)
movl %ebx, \do+24(\dest)
movl %ecx, \do+32(\dest)
movl %edx, \do+56(\dest)
.endm
.macro salsa8_core_gen_quadround
movl 52(%esp), %ecx
movl 4(%esp), %edx
movl 20(%esp), %ebx
movl 8(%esp), %esi
leal (%ecx, %edx), %edi
roll $7, %edi
xorl %edi, %ebx
movl %ebx, 4(%esp)
movl 36(%esp), %edi
leal (%edx, %ebx), %ebp
roll $9, %ebp
xorl %ebp, %edi
movl 24(%esp), %ebp
movl %edi, 8(%esp)
addl %edi, %ebx
roll $13, %ebx
xorl %ebx, %ecx
movl 40(%esp), %ebx
movl %ecx, 20(%esp)
addl %edi, %ecx
roll $18, %ecx
leal (%esi, %ebp), %edi
roll $7, %edi
xorl %edi, %ebx
movl %ebx, 24(%esp)
movl 56(%esp), %edi
xorl %ecx, %edx
leal (%ebp, %ebx), %ecx
roll $9, %ecx
xorl %ecx, %edi
movl %edi, 36(%esp)
movl 28(%esp), %ecx
movl %edx, 28(%esp)
movl 44(%esp), %edx
addl %edi, %ebx
roll $13, %ebx
xorl %ebx, %esi
movl 60(%esp), %ebx
movl %esi, 40(%esp)
addl %edi, %esi
roll $18, %esi
leal (%ecx, %edx), %edi
roll $7, %edi
xorl %edi, %ebx
movl %ebx, 44(%esp)
movl 12(%esp), %edi
xorl %esi, %ebp
leal (%edx, %ebx), %esi
roll $9, %esi
xorl %esi, %edi
movl %edi, 12(%esp)
movl 48(%esp), %esi
movl %ebp, 48(%esp)
movl 64(%esp), %ebp
addl %edi, %ebx
roll $13, %ebx
xorl %ebx, %ecx
movl 16(%esp), %ebx
movl %ecx, 16(%esp)
addl %edi, %ecx
roll $18, %ecx
leal (%esi, %ebp), %edi
roll $7, %edi
xorl %edi, %ebx
movl 32(%esp), %edi
xorl %ecx, %edx
leal (%ebp, %ebx), %ecx
roll $9, %ecx
xorl %ecx, %edi
movl %edi, 32(%esp)
movl %ebx, %ecx
movl %edx, 52(%esp)
movl 28(%esp), %edx
addl %edi, %ebx
roll $13, %ebx
xorl %ebx, %esi
movl 40(%esp), %ebx
movl %esi, 28(%esp)
addl %edi, %esi
roll $18, %esi
leal (%ecx, %edx), %edi
roll $7, %edi
xorl %edi, %ebx
movl %ebx, 40(%esp)
movl 12(%esp), %edi
xorl %esi, %ebp
leal (%edx, %ebx), %esi
roll $9, %esi
xorl %esi, %edi
movl %edi, 12(%esp)
movl 4(%esp), %esi
movl %ebp, 4(%esp)
movl 48(%esp), %ebp
addl %edi, %ebx
roll $13, %ebx
xorl %ebx, %ecx
movl 16(%esp), %ebx
movl %ecx, 16(%esp)
addl %edi, %ecx
roll $18, %ecx
leal (%esi, %ebp), %edi
roll $7, %edi
xorl %edi, %ebx
movl %ebx, 48(%esp)
movl 32(%esp), %edi
xorl %ecx, %edx
leal (%ebp, %ebx), %ecx
roll $9, %ecx
xorl %ecx, %edi
movl %edi, 32(%esp)
movl 24(%esp), %ecx
movl %edx, 24(%esp)
movl 52(%esp), %edx
addl %edi, %ebx
roll $13, %ebx
xorl %ebx, %esi
movl 28(%esp), %ebx
movl %esi, 28(%esp)
addl %edi, %esi
roll $18, %esi
leal (%ecx, %edx), %edi
roll $7, %edi
xorl %edi, %ebx
movl %ebx, 52(%esp)
movl 8(%esp), %edi
xorl %esi, %ebp
leal (%edx, %ebx), %esi
roll $9, %esi
xorl %esi, %edi
movl %edi, 8(%esp)
movl 44(%esp), %esi
movl %ebp, 44(%esp)
movl 4(%esp), %ebp
addl %edi, %ebx
roll $13, %ebx
xorl %ebx, %ecx
movl 20(%esp), %ebx
movl %ecx, 4(%esp)
addl %edi, %ecx
roll $18, %ecx
leal (%esi, %ebp), %edi
roll $7, %edi
xorl %edi, %ebx
movl 36(%esp), %edi
xorl %ecx, %edx
leal (%ebp, %ebx), %ecx
roll $9, %ecx
xorl %ecx, %edi
movl %edi, 20(%esp)
movl %ebx, %ecx
movl %edx, 36(%esp)
movl 24(%esp), %edx
addl %edi, %ebx
roll $13, %ebx
xorl %ebx, %esi
movl 28(%esp), %ebx
movl %esi, 24(%esp)
addl %edi, %esi
roll $18, %esi
leal (%ecx, %edx), %edi
roll $7, %edi
xorl %edi, %ebx
movl %ebx, 28(%esp)
xorl %esi, %ebp
movl 8(%esp), %esi
leal (%edx, %ebx), %edi
roll $9, %edi
xorl %edi, %esi
movl 40(%esp), %edi
movl %ebp, 8(%esp)
movl 44(%esp), %ebp
movl %esi, 40(%esp)
addl %esi, %ebx
roll $13, %ebx
xorl %ebx, %ecx
movl 4(%esp), %ebx
movl %ecx, 44(%esp)
addl %esi, %ecx
roll $18, %ecx
leal (%edi, %ebp), %esi
roll $7, %esi
xorl %esi, %ebx
movl %ebx, 4(%esp)
movl 20(%esp), %esi
xorl %ecx, %edx
leal (%ebp, %ebx), %ecx
roll $9, %ecx
xorl %ecx, %esi
movl %esi, 56(%esp)
movl 48(%esp), %ecx
movl %edx, 20(%esp)
movl 36(%esp), %edx
addl %esi, %ebx
roll $13, %ebx
xorl %ebx, %edi
movl 24(%esp), %ebx
movl %edi, 24(%esp)
addl %esi, %edi
roll $18, %edi
leal (%ecx, %edx), %esi
roll $7, %esi
xorl %esi, %ebx
movl %ebx, 60(%esp)
movl 12(%esp), %esi
xorl %edi, %ebp
leal (%edx, %ebx), %edi
roll $9, %edi
xorl %edi, %esi
movl %esi, 12(%esp)
movl 52(%esp), %edi
movl %ebp, 36(%esp)
movl 8(%esp), %ebp
addl %esi, %ebx
roll $13, %ebx
xorl %ebx, %ecx
movl 16(%esp), %ebx
movl %ecx, 16(%esp)
addl %esi, %ecx
roll $18, %ecx
leal (%edi, %ebp), %esi
roll $7, %esi
xorl %esi, %ebx
movl 32(%esp), %esi
xorl %ecx, %edx
leal (%ebp, %ebx), %ecx
roll $9, %ecx
xorl %ecx, %esi
movl %esi, 32(%esp)
movl %ebx, %ecx
movl %edx, 48(%esp)
movl 20(%esp), %edx
addl %esi, %ebx
roll $13, %ebx
xorl %ebx, %edi
movl 24(%esp), %ebx
movl %edi, 20(%esp)
addl %esi, %edi
roll $18, %edi
leal (%ecx, %edx), %esi
roll $7, %esi
xorl %esi, %ebx
movl %ebx, 8(%esp)
movl 12(%esp), %esi
xorl %edi, %ebp
leal (%edx, %ebx), %edi
roll $9, %edi
xorl %edi, %esi
movl %esi, 12(%esp)
movl 28(%esp), %edi
movl %ebp, 52(%esp)
movl 36(%esp), %ebp
addl %esi, %ebx
roll $13, %ebx
xorl %ebx, %ecx
movl 16(%esp), %ebx
movl %ecx, 16(%esp)
addl %esi, %ecx
roll $18, %ecx
leal (%edi, %ebp), %esi
roll $7, %esi
xorl %esi, %ebx
movl %ebx, 28(%esp)
movl 32(%esp), %esi
xorl %ecx, %edx
leal (%ebp, %ebx), %ecx
roll $9, %ecx
xorl %ecx, %esi
movl %esi, 32(%esp)
movl 4(%esp), %ecx
movl %edx, 4(%esp)
movl 48(%esp), %edx
addl %esi, %ebx
roll $13, %ebx
xorl %ebx, %edi
movl 20(%esp), %ebx
movl %edi, 20(%esp)
addl %esi, %edi
roll $18, %edi
leal (%ecx, %edx), %esi
roll $7, %esi
xorl %esi, %ebx
movl %ebx, 48(%esp)
movl 40(%esp), %esi
xorl %edi, %ebp
leal (%edx, %ebx), %edi
roll $9, %edi
xorl %edi, %esi
movl %esi, 36(%esp)
movl 60(%esp), %edi
movl %ebp, 24(%esp)
movl 52(%esp), %ebp
addl %esi, %ebx
roll $13, %ebx
xorl %ebx, %ecx
movl 44(%esp), %ebx
movl %ecx, 40(%esp)
addl %esi, %ecx
roll $18, %ecx
leal (%edi, %ebp), %esi
roll $7, %esi
xorl %esi, %ebx
movl %ebx, 52(%esp)
movl 56(%esp), %esi
xorl %ecx, %edx
leal (%ebp, %ebx), %ecx
roll $9, %ecx
xorl %ecx, %esi
movl %esi, 56(%esp)
addl %esi, %ebx
movl %edx, 44(%esp)
roll $13, %ebx
xorl %ebx, %edi
movl %edi, 60(%esp)
addl %esi, %edi
roll $18, %edi
xorl %edi, %ebp
movl %ebp, 64(%esp)
.endm
.text
.p2align 5
salsa8_core_gen:
salsa8_core_gen_quadround
salsa8_core_gen_quadround
ret
.text
.p2align 5
.globl scrypt_core
.globl _scrypt_core
scrypt_core:
_scrypt_core:
pushl %ebx
pushl %ebp
pushl %edi
pushl %esi
/* Check for SSE2 availability */
movl $1, %eax
cpuid
andl $0x04000000, %edx
jnz scrypt_core_sse2
scrypt_core_gen:
movl 20(%esp), %edi
movl 24(%esp), %esi
subl $72, %esp
.macro scrypt_core_macro1a p, q
movl \p(%edi), %eax
movl \q(%edi), %edx
movl %eax, \p(%esi)
movl %edx, \q(%esi)
xorl %edx, %eax
movl %eax, \p(%edi)
movl %eax, \p(%esp)
.endm
.macro scrypt_core_macro1b p, q
movl \p(%edi), %eax
xorl \p(%esi, %edx), %eax
movl \q(%edi), %ebx
xorl \q(%esi, %edx), %ebx
movl %ebx, \q(%edi)
xorl %ebx, %eax
movl %eax, \p(%edi)
movl %eax, \p(%esp)
.endm
.macro scrypt_core_macro2 p, q
movl \p(%esp), %eax
addl \p(%edi), %eax
movl %eax, \p(%edi)
xorl \q(%edi), %eax
movl %eax, \q(%edi)
movl %eax, \p(%esp)
.endm
.macro scrypt_core_macro3 p, q
movl \p(%esp), %eax
addl \q(%edi), %eax
movl %eax, \q(%edi)
.endm
leal 131072(%esi), %ecx
scrypt_core_gen_loop1:
movl %esi, 64(%esp)
movl %ecx, 68(%esp)
scrypt_core_macro1a 0, 64
scrypt_core_macro1a 4, 68
scrypt_core_macro1a 8, 72
scrypt_core_macro1a 12, 76
scrypt_core_macro1a 16, 80
scrypt_core_macro1a 20, 84
scrypt_core_macro1a 24, 88
scrypt_core_macro1a 28, 92
scrypt_core_macro1a 32, 96
scrypt_core_macro1a 36, 100
scrypt_core_macro1a 40, 104
scrypt_core_macro1a 44, 108
scrypt_core_macro1a 48, 112
scrypt_core_macro1a 52, 116
scrypt_core_macro1a 56, 120
scrypt_core_macro1a 60, 124
call salsa8_core_gen
movl 92(%esp), %edi
scrypt_core_macro2 0, 64
scrypt_core_macro2 4, 68
scrypt_core_macro2 8, 72
scrypt_core_macro2 12, 76
scrypt_core_macro2 16, 80
scrypt_core_macro2 20, 84
scrypt_core_macro2 24, 88
scrypt_core_macro2 28, 92
scrypt_core_macro2 32, 96
scrypt_core_macro2 36, 100
scrypt_core_macro2 40, 104
scrypt_core_macro2 44, 108
scrypt_core_macro2 48, 112
scrypt_core_macro2 52, 116
scrypt_core_macro2 56, 120
scrypt_core_macro2 60, 124
call salsa8_core_gen
movl 92(%esp), %edi
scrypt_core_macro3 0, 64
scrypt_core_macro3 4, 68
scrypt_core_macro3 8, 72
scrypt_core_macro3 12, 76
scrypt_core_macro3 16, 80
scrypt_core_macro3 20, 84
scrypt_core_macro3 24, 88
scrypt_core_macro3 28, 92
scrypt_core_macro3 32, 96
scrypt_core_macro3 36, 100
scrypt_core_macro3 40, 104
scrypt_core_macro3 44, 108
scrypt_core_macro3 48, 112
scrypt_core_macro3 52, 116
scrypt_core_macro3 56, 120
scrypt_core_macro3 60, 124
movl 64(%esp), %esi
movl 68(%esp), %ecx
addl $128, %esi
cmpl %ecx, %esi
jne scrypt_core_gen_loop1
movl 96(%esp), %esi
movl $1024, %ecx
scrypt_core_gen_loop2:
movl %ecx, 68(%esp)
movl 64(%edi), %edx
andl $1023, %edx
shll $7, %edx
scrypt_core_macro1b 0, 64
scrypt_core_macro1b 4, 68
scrypt_core_macro1b 8, 72
scrypt_core_macro1b 12, 76
scrypt_core_macro1b 16, 80
scrypt_core_macro1b 20, 84
scrypt_core_macro1b 24, 88
scrypt_core_macro1b 28, 92
scrypt_core_macro1b 32, 96
scrypt_core_macro1b 36, 100
scrypt_core_macro1b 40, 104
scrypt_core_macro1b 44, 108
scrypt_core_macro1b 48, 112
scrypt_core_macro1b 52, 116
scrypt_core_macro1b 56, 120
scrypt_core_macro1b 60, 124
call salsa8_core_gen
movl 92(%esp), %edi
scrypt_core_macro2 0, 64
scrypt_core_macro2 4, 68
scrypt_core_macro2 8, 72
scrypt_core_macro2 12, 76
scrypt_core_macro2 16, 80
scrypt_core_macro2 20, 84
scrypt_core_macro2 24, 88
scrypt_core_macro2 28, 92
scrypt_core_macro2 32, 96
scrypt_core_macro2 36, 100
scrypt_core_macro2 40, 104
scrypt_core_macro2 44, 108
scrypt_core_macro2 48, 112
scrypt_core_macro2 52, 116
scrypt_core_macro2 56, 120
scrypt_core_macro2 60, 124
call salsa8_core_gen
movl 92(%esp), %edi
movl 96(%esp), %esi
scrypt_core_macro3 0, 64
scrypt_core_macro3 4, 68
scrypt_core_macro3 8, 72
scrypt_core_macro3 12, 76
scrypt_core_macro3 16, 80
scrypt_core_macro3 20, 84
scrypt_core_macro3 24, 88
scrypt_core_macro3 28, 92
scrypt_core_macro3 32, 96
scrypt_core_macro3 36, 100
scrypt_core_macro3 40, 104
scrypt_core_macro3 44, 108
scrypt_core_macro3 48, 112
scrypt_core_macro3 52, 116
scrypt_core_macro3 56, 120
scrypt_core_macro3 60, 124
movl 68(%esp), %ecx
subl $1, %ecx
ja scrypt_core_gen_loop2
addl $72, %esp
popl %esi
popl %edi
popl %ebp
popl %ebx
ret
.macro salsa8_core_sse2_doubleround
movdqa %xmm1, %xmm4
paddd %xmm0, %xmm4
movdqa %xmm4, %xmm5
pslld $7, %xmm4
psrld $25, %xmm5
pxor %xmm4, %xmm3
movdqa %xmm0, %xmm4
pxor %xmm5, %xmm3
paddd %xmm3, %xmm4
movdqa %xmm4, %xmm5
pslld $9, %xmm4
psrld $23, %xmm5
pxor %xmm4, %xmm2
movdqa %xmm3, %xmm4
pxor %xmm5, %xmm2
pshufd $0x93, %xmm3, %xmm3
paddd %xmm2, %xmm4
movdqa %xmm4, %xmm5
pslld $13, %xmm4
psrld $19, %xmm5
pxor %xmm4, %xmm1
movdqa %xmm2, %xmm4
pxor %xmm5, %xmm1
pshufd $0x4e, %xmm2, %xmm2
paddd %xmm1, %xmm4
movdqa %xmm4, %xmm5
pslld $18, %xmm4
psrld $14, %xmm5
pxor %xmm4, %xmm0
movdqa %xmm3, %xmm4
pxor %xmm5, %xmm0
pshufd $0x39, %xmm1, %xmm1
paddd %xmm0, %xmm4
movdqa %xmm4, %xmm5
pslld $7, %xmm4
psrld $25, %xmm5
pxor %xmm4, %xmm1
movdqa %xmm0, %xmm4
pxor %xmm5, %xmm1
paddd %xmm1, %xmm4
movdqa %xmm4, %xmm5
pslld $9, %xmm4
psrld $23, %xmm5
pxor %xmm4, %xmm2
movdqa %xmm1, %xmm4
pxor %xmm5, %xmm2
pshufd $0x93, %xmm1, %xmm1
paddd %xmm2, %xmm4
movdqa %xmm4, %xmm5
pslld $13, %xmm4
psrld $19, %xmm5
pxor %xmm4, %xmm3
movdqa %xmm2, %xmm4
pxor %xmm5, %xmm3
pshufd $0x4e, %xmm2, %xmm2
paddd %xmm3, %xmm4
movdqa %xmm4, %xmm5
pslld $18, %xmm4
psrld $14, %xmm5
pxor %xmm4, %xmm0
pshufd $0x39, %xmm3, %xmm3
pxor %xmm5, %xmm0
.endm
.macro salsa8_core_sse2
salsa8_core_sse2_doubleround
salsa8_core_sse2_doubleround
salsa8_core_sse2_doubleround
salsa8_core_sse2_doubleround
.endm
.p2align 5
scrypt_core_sse2:
movl 20(%esp), %edi
movl 24(%esp), %esi
movl %esp, %ebp
subl $128, %esp
andl $-16, %esp
scrypt_shuffle %edi, 0, %esp, 0
scrypt_shuffle %edi, 64, %esp, 64
movdqa 96(%esp), %xmm6
movdqa 112(%esp), %xmm7
movl %esi, %edx
leal 131072(%esi), %ecx
scrypt_core_sse2_loop1:
movdqa 0(%esp), %xmm0
movdqa 16(%esp), %xmm1
movdqa 32(%esp), %xmm2
movdqa 48(%esp), %xmm3
movdqa 64(%esp), %xmm4
movdqa 80(%esp), %xmm5
pxor %xmm4, %xmm0
pxor %xmm5, %xmm1
movdqa %xmm0, 0(%edx)
movdqa %xmm1, 16(%edx)
pxor %xmm6, %xmm2
pxor %xmm7, %xmm3
movdqa %xmm2, 32(%edx)
movdqa %xmm3, 48(%edx)
movdqa %xmm4, 64(%edx)
movdqa %xmm5, 80(%edx)
movdqa %xmm6, 96(%edx)
movdqa %xmm7, 112(%edx)
salsa8_core_sse2
paddd 0(%edx), %xmm0
paddd 16(%edx), %xmm1
paddd 32(%edx), %xmm2
paddd 48(%edx), %xmm3
movdqa %xmm0, 0(%esp)
movdqa %xmm1, 16(%esp)
movdqa %xmm2, 32(%esp)
movdqa %xmm3, 48(%esp)
pxor 64(%esp), %xmm0
pxor 80(%esp), %xmm1
pxor %xmm6, %xmm2
pxor %xmm7, %xmm3
movdqa %xmm0, 64(%esp)
movdqa %xmm1, 80(%esp)
movdqa %xmm2, %xmm6
movdqa %xmm3, %xmm7
salsa8_core_sse2
paddd 64(%esp), %xmm0
paddd 80(%esp), %xmm1
paddd %xmm2, %xmm6
paddd %xmm3, %xmm7
movdqa %xmm0, 64(%esp)
movdqa %xmm1, 80(%esp)
addl $128, %edx
cmpl %ecx, %edx
jne scrypt_core_sse2_loop1
movdqa 64(%esp), %xmm4
movdqa 80(%esp), %xmm5
movl $1024, %ecx
scrypt_core_sse2_loop2:
movd %xmm4, %edx
movdqa 0(%esp), %xmm0
movdqa 16(%esp), %xmm1
movdqa 32(%esp), %xmm2
movdqa 48(%esp), %xmm3
andl $1023, %edx
shll $7, %edx
pxor 0(%esi, %edx), %xmm0
pxor 16(%esi, %edx), %xmm1
pxor 32(%esi, %edx), %xmm2
pxor 48(%esi, %edx), %xmm3
pxor %xmm4, %xmm0
pxor %xmm5, %xmm1
movdqa %xmm0, 0(%esp)
movdqa %xmm1, 16(%esp)
pxor %xmm6, %xmm2
pxor %xmm7, %xmm3
movdqa %xmm2, 32(%esp)
movdqa %xmm3, 48(%esp)
salsa8_core_sse2
paddd 0(%esp), %xmm0
paddd 16(%esp), %xmm1
paddd 32(%esp), %xmm2
paddd 48(%esp), %xmm3
movdqa %xmm0, 0(%esp)
movdqa %xmm1, 16(%esp)
movdqa %xmm2, 32(%esp)
movdqa %xmm3, 48(%esp)
pxor 64(%esi, %edx), %xmm0
pxor 80(%esi, %edx), %xmm1
pxor 96(%esi, %edx), %xmm2
pxor 112(%esi, %edx), %xmm3
pxor 64(%esp), %xmm0
pxor 80(%esp), %xmm1
pxor %xmm6, %xmm2
pxor %xmm7, %xmm3
movdqa %xmm0, 64(%esp)
movdqa %xmm1, 80(%esp)
movdqa %xmm2, %xmm6
movdqa %xmm3, %xmm7
salsa8_core_sse2
paddd 64(%esp), %xmm0
paddd 80(%esp), %xmm1
paddd %xmm2, %xmm6
paddd %xmm3, %xmm7
movdqa %xmm0, %xmm4
movdqa %xmm1, %xmm5
movdqa %xmm0, 64(%esp)
movdqa %xmm1, 80(%esp)
subl $1, %ecx
ja scrypt_core_sse2_loop2
movdqa %xmm6, 96(%esp)
movdqa %xmm7, 112(%esp)
scrypt_shuffle %esp, 0, %edi, 0
scrypt_shuffle %esp, 64, %edi, 64
movl %ebp, %esp
popl %esi
popl %edi
popl %ebp
popl %ebx
ret
#endif

View File

@@ -1,767 +0,0 @@
/*
* Copyright 2009 Colin Percival, 2011 ArtForz, 2011-2013 pooler
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* This file was originally written by Colin Percival as part of the Tarsnap
* online backup system.
*/
#include "../cpuminer-config.h"
#include "../miner.h"
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
static const uint32_t keypad[12] = {
0x80000000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x00000280
};
static const uint32_t innerpad[11] = {
0x80000000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x000004a0
};
static const uint32_t outerpad[8] = {
0x80000000, 0, 0, 0, 0, 0, 0, 0x00000300
};
static const uint32_t finalblk[16] = {
0x00000001, 0x80000000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x00000620
};
static inline void HMAC_SHA256_80_init(const uint32_t *key,
uint32_t *tstate, uint32_t *ostate)
{
uint32_t ihash[8];
uint32_t pad[16];
int i;
/* tstate is assumed to contain the midstate of key */
memcpy(pad, key + 16, 16);
memcpy(pad + 4, keypad, 48);
sha256_transform(tstate, pad, 0);
memcpy(ihash, tstate, 32);
sha256_init(ostate);
for (i = 0; i < 8; i++)
pad[i] = ihash[i] ^ 0x5c5c5c5c;
for (; i < 16; i++)
pad[i] = 0x5c5c5c5c;
sha256_transform(ostate, pad, 0);
sha256_init(tstate);
for (i = 0; i < 8; i++)
pad[i] = ihash[i] ^ 0x36363636;
for (; i < 16; i++)
pad[i] = 0x36363636;
sha256_transform(tstate, pad, 0);
}
static inline void PBKDF2_SHA256_80_128(const uint32_t *tstate,
const uint32_t *ostate, const uint32_t *salt, uint32_t *output)
{
uint32_t istate[8], ostate2[8];
uint32_t ibuf[16], obuf[16];
int i, j;
memcpy(istate, tstate, 32);
sha256_transform(istate, salt, 0);
memcpy(ibuf, salt + 16, 16);
memcpy(ibuf + 5, innerpad, 44);
memcpy(obuf + 8, outerpad, 32);
for (i = 0; i < 4; i++) {
memcpy(obuf, istate, 32);
ibuf[4] = i + 1;
sha256_transform(obuf, ibuf, 0);
memcpy(ostate2, ostate, 32);
sha256_transform(ostate2, obuf, 0);
for (j = 0; j < 8; j++)
output[8 * i + j] = swab32(ostate2[j]);
}
}
static inline void PBKDF2_SHA256_128_32(uint32_t *tstate, uint32_t *ostate,
const uint32_t *salt, uint32_t *output)
{
uint32_t buf[16];
int i;
sha256_transform(tstate, salt, 1);
sha256_transform(tstate, salt + 16, 1);
sha256_transform(tstate, finalblk, 0);
memcpy(buf, tstate, 32);
memcpy(buf + 8, outerpad, 32);
sha256_transform(ostate, buf, 0);
for (i = 0; i < 8; i++)
output[i] = swab32(ostate[i]);
}
#ifdef HAVE_SHA256_4WAY
static const uint32_t keypad_4way[4 * 12] = {
0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000280, 0x00000280, 0x00000280, 0x00000280
};
static const uint32_t innerpad_4way[4 * 11] = {
0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x000004a0, 0x000004a0, 0x000004a0, 0x000004a0
};
static const uint32_t outerpad_4way[4 * 8] = {
0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000300, 0x00000300, 0x00000300, 0x00000300
};
static const uint32_t finalblk_4way[4 * 16] __attribute__((aligned(16))) = {
0x00000001, 0x00000001, 0x00000001, 0x00000001,
0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000620, 0x00000620, 0x00000620, 0x00000620
};
static inline void HMAC_SHA256_80_init_4way(const uint32_t *key,
uint32_t *tstate, uint32_t *ostate)
{
uint32_t ihash[4 * 8] __attribute__((aligned(16)));
uint32_t pad[4 * 16] __attribute__((aligned(16)));
int i;
/* tstate is assumed to contain the midstate of key */
memcpy(pad, key + 4 * 16, 4 * 16);
memcpy(pad + 4 * 4, keypad_4way, 4 * 48);
sha256_transform_4way(tstate, pad, 0);
memcpy(ihash, tstate, 4 * 32);
sha256_init_4way(ostate);
for (i = 0; i < 4 * 8; i++)
pad[i] = ihash[i] ^ 0x5c5c5c5c;
for (; i < 4 * 16; i++)
pad[i] = 0x5c5c5c5c;
sha256_transform_4way(ostate, pad, 0);
sha256_init_4way(tstate);
for (i = 0; i < 4 * 8; i++)
pad[i] = ihash[i] ^ 0x36363636;
for (; i < 4 * 16; i++)
pad[i] = 0x36363636;
sha256_transform_4way(tstate, pad, 0);
}
static inline void PBKDF2_SHA256_80_128_4way(const uint32_t *tstate,
const uint32_t *ostate, const uint32_t *salt, uint32_t *output)
{
uint32_t istate[4 * 8] __attribute__((aligned(16)));
uint32_t ostate2[4 * 8] __attribute__((aligned(16)));
uint32_t ibuf[4 * 16] __attribute__((aligned(16)));
uint32_t obuf[4 * 16] __attribute__((aligned(16)));
int i, j;
memcpy(istate, tstate, 4 * 32);
sha256_transform_4way(istate, salt, 0);
memcpy(ibuf, salt + 4 * 16, 4 * 16);
memcpy(ibuf + 4 * 5, innerpad_4way, 4 * 44);
memcpy(obuf + 4 * 8, outerpad_4way, 4 * 32);
for (i = 0; i < 4; i++) {
memcpy(obuf, istate, 4 * 32);
ibuf[4 * 4 + 0] = i + 1;
ibuf[4 * 4 + 1] = i + 1;
ibuf[4 * 4 + 2] = i + 1;
ibuf[4 * 4 + 3] = i + 1;
sha256_transform_4way(obuf, ibuf, 0);
memcpy(ostate2, ostate, 4 * 32);
sha256_transform_4way(ostate2, obuf, 0);
for (j = 0; j < 4 * 8; j++)
output[4 * 8 * i + j] = swab32(ostate2[j]);
}
}
static inline void PBKDF2_SHA256_128_32_4way(uint32_t *tstate,
uint32_t *ostate, const uint32_t *salt, uint32_t *output)
{
uint32_t buf[4 * 16] __attribute__((aligned(16)));
int i;
sha256_transform_4way(tstate, salt, 1);
sha256_transform_4way(tstate, salt + 4 * 16, 1);
sha256_transform_4way(tstate, finalblk_4way, 0);
memcpy(buf, tstate, 4 * 32);
memcpy(buf + 4 * 8, outerpad_4way, 4 * 32);
sha256_transform_4way(ostate, buf, 0);
for (i = 0; i < 4 * 8; i++)
output[i] = swab32(ostate[i]);
}
#endif /* HAVE_SHA256_4WAY */
#ifdef HAVE_SHA256_8WAY
static const uint32_t finalblk_8way[8 * 16] __attribute__((aligned(32))) = {
0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620
};
static inline void HMAC_SHA256_80_init_8way(const uint32_t *key,
uint32_t *tstate, uint32_t *ostate)
{
uint32_t ihash[8 * 8] __attribute__((aligned(32)));
uint32_t pad[8 * 16] __attribute__((aligned(32)));
int i;
/* tstate is assumed to contain the midstate of key */
memcpy(pad, key + 8 * 16, 8 * 16);
for (i = 0; i < 8; i++)
pad[8 * 4 + i] = 0x80000000;
memset(pad + 8 * 5, 0x00, 8 * 40);
for (i = 0; i < 8; i++)
pad[8 * 15 + i] = 0x00000280;
sha256_transform_8way(tstate, pad, 0);
memcpy(ihash, tstate, 8 * 32);
sha256_init_8way(ostate);
for (i = 0; i < 8 * 8; i++)
pad[i] = ihash[i] ^ 0x5c5c5c5c;
for (; i < 8 * 16; i++)
pad[i] = 0x5c5c5c5c;
sha256_transform_8way(ostate, pad, 0);
sha256_init_8way(tstate);
for (i = 0; i < 8 * 8; i++)
pad[i] = ihash[i] ^ 0x36363636;
for (; i < 8 * 16; i++)
pad[i] = 0x36363636;
sha256_transform_8way(tstate, pad, 0);
}
static inline void PBKDF2_SHA256_80_128_8way(const uint32_t *tstate,
const uint32_t *ostate, const uint32_t *salt, uint32_t *output)
{
uint32_t istate[8 * 8] __attribute__((aligned(32)));
uint32_t ostate2[8 * 8] __attribute__((aligned(32)));
uint32_t ibuf[8 * 16] __attribute__((aligned(32)));
uint32_t obuf[8 * 16] __attribute__((aligned(32)));
int i, j;
memcpy(istate, tstate, 8 * 32);
sha256_transform_8way(istate, salt, 0);
memcpy(ibuf, salt + 8 * 16, 8 * 16);
for (i = 0; i < 8; i++)
ibuf[8 * 5 + i] = 0x80000000;
memset(ibuf + 8 * 6, 0x00, 8 * 36);
for (i = 0; i < 8; i++)
ibuf[8 * 15 + i] = 0x000004a0;
for (i = 0; i < 8; i++)
obuf[8 * 8 + i] = 0x80000000;
memset(obuf + 8 * 9, 0x00, 8 * 24);
for (i = 0; i < 8; i++)
obuf[8 * 15 + i] = 0x00000300;
for (i = 0; i < 4; i++) {
memcpy(obuf, istate, 8 * 32);
ibuf[8 * 4 + 0] = i + 1;
ibuf[8 * 4 + 1] = i + 1;
ibuf[8 * 4 + 2] = i + 1;
ibuf[8 * 4 + 3] = i + 1;
ibuf[8 * 4 + 4] = i + 1;
ibuf[8 * 4 + 5] = i + 1;
ibuf[8 * 4 + 6] = i + 1;
ibuf[8 * 4 + 7] = i + 1;
sha256_transform_8way(obuf, ibuf, 0);
memcpy(ostate2, ostate, 8 * 32);
sha256_transform_8way(ostate2, obuf, 0);
for (j = 0; j < 8 * 8; j++)
output[8 * 8 * i + j] = swab32(ostate2[j]);
}
}
static inline void PBKDF2_SHA256_128_32_8way(uint32_t *tstate,
uint32_t *ostate, const uint32_t *salt, uint32_t *output)
{
uint32_t buf[8 * 16] __attribute__((aligned(32)));
int i;
sha256_transform_8way(tstate, salt, 1);
sha256_transform_8way(tstate, salt + 8 * 16, 1);
sha256_transform_8way(tstate, finalblk_8way, 0);
memcpy(buf, tstate, 8 * 32);
for (i = 0; i < 8; i++)
buf[8 * 8 + i] = 0x80000000;
memset(buf + 8 * 9, 0x00, 8 * 24);
for (i = 0; i < 8; i++)
buf[8 * 15 + i] = 0x00000300;
sha256_transform_8way(ostate, buf, 0);
for (i = 0; i < 8 * 8; i++)
output[i] = swab32(ostate[i]);
}
#endif /* HAVE_SHA256_8WAY */
#if defined(__x86_64__)
#define SCRYPT_MAX_WAYS 12
#define HAVE_SCRYPT_3WAY 1
int scrypt_best_throughput();
void scrypt_core(uint32_t *X, uint32_t *V);
void scrypt_core_3way(uint32_t *X, uint32_t *V);
#if defined(USE_AVX2)
#undef SCRYPT_MAX_WAYS
#define SCRYPT_MAX_WAYS 24
#define HAVE_SCRYPT_6WAY 1
void scrypt_core_6way(uint32_t *X, uint32_t *V);
#endif
#elif defined(__i386__)
#define SCRYPT_MAX_WAYS 4
#define scrypt_best_throughput() 1
void scrypt_core(uint32_t *X, uint32_t *V);
#elif defined(__arm__) && defined(__APCS_32__)
void scrypt_core(uint32_t *X, uint32_t *V);
#if defined(__ARM_NEON__)
#undef HAVE_SHA256_4WAY
#define SCRYPT_MAX_WAYS 3
#define HAVE_SCRYPT_3WAY 1
#define scrypt_best_throughput() 3
void scrypt_core_3way(uint32_t *X, uint32_t *V);
#endif
#else
static inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16])
{
uint32_t x00,x01,x02,x03,x04,x05,x06,x07,x08,x09,x10,x11,x12,x13,x14,x15;
int i;
x00 = (B[ 0] ^= Bx[ 0]);
x01 = (B[ 1] ^= Bx[ 1]);
x02 = (B[ 2] ^= Bx[ 2]);
x03 = (B[ 3] ^= Bx[ 3]);
x04 = (B[ 4] ^= Bx[ 4]);
x05 = (B[ 5] ^= Bx[ 5]);
x06 = (B[ 6] ^= Bx[ 6]);
x07 = (B[ 7] ^= Bx[ 7]);
x08 = (B[ 8] ^= Bx[ 8]);
x09 = (B[ 9] ^= Bx[ 9]);
x10 = (B[10] ^= Bx[10]);
x11 = (B[11] ^= Bx[11]);
x12 = (B[12] ^= Bx[12]);
x13 = (B[13] ^= Bx[13]);
x14 = (B[14] ^= Bx[14]);
x15 = (B[15] ^= Bx[15]);
for (i = 0; i < 8; i += 2) {
#define R(a, b) (((a) << (b)) | ((a) >> (32 - (b))))
/* Operate on columns. */
x04 ^= R(x00+x12, 7); x09 ^= R(x05+x01, 7);
x14 ^= R(x10+x06, 7); x03 ^= R(x15+x11, 7);
x08 ^= R(x04+x00, 9); x13 ^= R(x09+x05, 9);
x02 ^= R(x14+x10, 9); x07 ^= R(x03+x15, 9);
x12 ^= R(x08+x04,13); x01 ^= R(x13+x09,13);
x06 ^= R(x02+x14,13); x11 ^= R(x07+x03,13);
x00 ^= R(x12+x08,18); x05 ^= R(x01+x13,18);
x10 ^= R(x06+x02,18); x15 ^= R(x11+x07,18);
/* Operate on rows. */
x01 ^= R(x00+x03, 7); x06 ^= R(x05+x04, 7);
x11 ^= R(x10+x09, 7); x12 ^= R(x15+x14, 7);
x02 ^= R(x01+x00, 9); x07 ^= R(x06+x05, 9);
x08 ^= R(x11+x10, 9); x13 ^= R(x12+x15, 9);
x03 ^= R(x02+x01,13); x04 ^= R(x07+x06,13);
x09 ^= R(x08+x11,13); x14 ^= R(x13+x12,13);
x00 ^= R(x03+x02,18); x05 ^= R(x04+x07,18);
x10 ^= R(x09+x08,18); x15 ^= R(x14+x13,18);
#undef R
}
B[ 0] += x00;
B[ 1] += x01;
B[ 2] += x02;
B[ 3] += x03;
B[ 4] += x04;
B[ 5] += x05;
B[ 6] += x06;
B[ 7] += x07;
B[ 8] += x08;
B[ 9] += x09;
B[10] += x10;
B[11] += x11;
B[12] += x12;
B[13] += x13;
B[14] += x14;
B[15] += x15;
}
static inline void scrypt_core(uint32_t *X, uint32_t *V)
{
uint32_t i, j, k;
for (i = 0; i < 1024; i++) {
memcpy(&V[i * 32], X, 128);
xor_salsa8(&X[0], &X[16]);
xor_salsa8(&X[16], &X[0]);
}
for (i = 0; i < 1024; i++) {
j = 32 * (X[16] & 1023);
for (k = 0; k < 32; k++)
X[k] ^= V[j + k];
xor_salsa8(&X[0], &X[16]);
xor_salsa8(&X[16], &X[0]);
}
}
#endif
#ifndef SCRYPT_MAX_WAYS
#define SCRYPT_MAX_WAYS 1
#define scrypt_best_throughput() 1
#endif
#define SCRYPT_BUFFER_SIZE (SCRYPT_MAX_WAYS * 131072 + 63)
unsigned char *scrypt_buffer_alloc()
{
return malloc(SCRYPT_BUFFER_SIZE);
}
static void scrypt_1024_1_1_256(const uint32_t *input, uint32_t *output,
uint32_t *midstate, unsigned char *scratchpad)
{
uint32_t tstate[8], ostate[8];
uint32_t X[32];
uint32_t *V;
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
memcpy(tstate, midstate, 32);
HMAC_SHA256_80_init(input, tstate, ostate);
PBKDF2_SHA256_80_128(tstate, ostate, input, X);
scrypt_core(X, V);
PBKDF2_SHA256_128_32(tstate, ostate, X, output);
}
#ifdef HAVE_SHA256_4WAY
static void scrypt_1024_1_1_256_4way(const uint32_t *input,
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad)
{
uint32_t tstate[4 * 8] __attribute__((aligned(128)));
uint32_t ostate[4 * 8] __attribute__((aligned(128)));
uint32_t W[4 * 32] __attribute__((aligned(128)));
uint32_t X[4 * 32] __attribute__((aligned(128)));
uint32_t *V;
int i, k;
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
for (i = 0; i < 20; i++)
for (k = 0; k < 4; k++)
W[4 * i + k] = input[k * 20 + i];
for (i = 0; i < 8; i++)
for (k = 0; k < 4; k++)
tstate[4 * i + k] = midstate[i];
HMAC_SHA256_80_init_4way(W, tstate, ostate);
PBKDF2_SHA256_80_128_4way(tstate, ostate, W, W);
for (i = 0; i < 32; i++)
for (k = 0; k < 4; k++)
X[k * 32 + i] = W[4 * i + k];
scrypt_core(X + 0 * 32, V);
scrypt_core(X + 1 * 32, V);
scrypt_core(X + 2 * 32, V);
scrypt_core(X + 3 * 32, V);
for (i = 0; i < 32; i++)
for (k = 0; k < 4; k++)
W[4 * i + k] = X[k * 32 + i];
PBKDF2_SHA256_128_32_4way(tstate, ostate, W, W);
for (i = 0; i < 8; i++)
for (k = 0; k < 4; k++)
output[k * 8 + i] = W[4 * i + k];
}
#endif /* HAVE_SHA256_4WAY */
#ifdef HAVE_SCRYPT_3WAY
static void scrypt_1024_1_1_256_3way(const uint32_t *input,
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad)
{
uint32_t tstate[3 * 8], ostate[3 * 8];
uint32_t X[3 * 32] __attribute__((aligned(64)));
uint32_t *V;
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
memcpy(tstate + 0, midstate, 32);
memcpy(tstate + 8, midstate, 32);
memcpy(tstate + 16, midstate, 32);
HMAC_SHA256_80_init(input + 0, tstate + 0, ostate + 0);
HMAC_SHA256_80_init(input + 20, tstate + 8, ostate + 8);
HMAC_SHA256_80_init(input + 40, tstate + 16, ostate + 16);
PBKDF2_SHA256_80_128(tstate + 0, ostate + 0, input + 0, X + 0);
PBKDF2_SHA256_80_128(tstate + 8, ostate + 8, input + 20, X + 32);
PBKDF2_SHA256_80_128(tstate + 16, ostate + 16, input + 40, X + 64);
scrypt_core_3way(X, V);
PBKDF2_SHA256_128_32(tstate + 0, ostate + 0, X + 0, output + 0);
PBKDF2_SHA256_128_32(tstate + 8, ostate + 8, X + 32, output + 8);
PBKDF2_SHA256_128_32(tstate + 16, ostate + 16, X + 64, output + 16);
}
#ifdef HAVE_SHA256_4WAY
static void scrypt_1024_1_1_256_12way(const uint32_t *input,
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad)
{
uint32_t tstate[12 * 8] __attribute__((aligned(128)));
uint32_t ostate[12 * 8] __attribute__((aligned(128)));
uint32_t W[12 * 32] __attribute__((aligned(128)));
uint32_t X[12 * 32] __attribute__((aligned(128)));
uint32_t *V;
int i, j, k;
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
for (j = 0; j < 3; j++)
for (i = 0; i < 20; i++)
for (k = 0; k < 4; k++)
W[128 * j + 4 * i + k] = input[80 * j + k * 20 + i];
for (j = 0; j < 3; j++)
for (i = 0; i < 8; i++)
for (k = 0; k < 4; k++)
tstate[32 * j + 4 * i + k] = midstate[i];
HMAC_SHA256_80_init_4way(W + 0, tstate + 0, ostate + 0);
HMAC_SHA256_80_init_4way(W + 128, tstate + 32, ostate + 32);
HMAC_SHA256_80_init_4way(W + 256, tstate + 64, ostate + 64);
PBKDF2_SHA256_80_128_4way(tstate + 0, ostate + 0, W + 0, W + 0);
PBKDF2_SHA256_80_128_4way(tstate + 32, ostate + 32, W + 128, W + 128);
PBKDF2_SHA256_80_128_4way(tstate + 64, ostate + 64, W + 256, W + 256);
for (j = 0; j < 3; j++)
for (i = 0; i < 32; i++)
for (k = 0; k < 4; k++)
X[128 * j + k * 32 + i] = W[128 * j + 4 * i + k];
scrypt_core_3way(X + 0 * 96, V);
scrypt_core_3way(X + 1 * 96, V);
scrypt_core_3way(X + 2 * 96, V);
scrypt_core_3way(X + 3 * 96, V);
for (j = 0; j < 3; j++)
for (i = 0; i < 32; i++)
for (k = 0; k < 4; k++)
W[128 * j + 4 * i + k] = X[128 * j + k * 32 + i];
PBKDF2_SHA256_128_32_4way(tstate + 0, ostate + 0, W + 0, W + 0);
PBKDF2_SHA256_128_32_4way(tstate + 32, ostate + 32, W + 128, W + 128);
PBKDF2_SHA256_128_32_4way(tstate + 64, ostate + 64, W + 256, W + 256);
for (j = 0; j < 3; j++)
for (i = 0; i < 8; i++)
for (k = 0; k < 4; k++)
output[32 * j + k * 8 + i] = W[128 * j + 4 * i + k];
}
#endif /* HAVE_SHA256_4WAY */
#endif /* HAVE_SCRYPT_3WAY */
#ifdef HAVE_SCRYPT_6WAY
static void scrypt_1024_1_1_256_24way(const uint32_t *input,
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad)
{
uint32_t tstate[24 * 8] __attribute__((aligned(128)));
uint32_t ostate[24 * 8] __attribute__((aligned(128)));
uint32_t W[24 * 32] __attribute__((aligned(128)));
uint32_t X[24 * 32] __attribute__((aligned(128)));
uint32_t *V;
int i, j, k;
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
for (j = 0; j < 3; j++)
for (i = 0; i < 20; i++)
for (k = 0; k < 8; k++)
W[8 * 32 * j + 8 * i + k] = input[8 * 20 * j + k * 20 + i];
for (j = 0; j < 3; j++)
for (i = 0; i < 8; i++)
for (k = 0; k < 8; k++)
tstate[8 * 8 * j + 8 * i + k] = midstate[i];
HMAC_SHA256_80_init_8way(W + 0, tstate + 0, ostate + 0);
HMAC_SHA256_80_init_8way(W + 256, tstate + 64, ostate + 64);
HMAC_SHA256_80_init_8way(W + 512, tstate + 128, ostate + 128);
PBKDF2_SHA256_80_128_8way(tstate + 0, ostate + 0, W + 0, W + 0);
PBKDF2_SHA256_80_128_8way(tstate + 64, ostate + 64, W + 256, W + 256);
PBKDF2_SHA256_80_128_8way(tstate + 128, ostate + 128, W + 512, W + 512);
for (j = 0; j < 3; j++)
for (i = 0; i < 32; i++)
for (k = 0; k < 8; k++)
X[8 * 32 * j + k * 32 + i] = W[8 * 32 * j + 8 * i + k];
scrypt_core_6way(X + 0 * 32, V);
scrypt_core_6way(X + 6 * 32, V);
scrypt_core_6way(X + 12 * 32, V);
scrypt_core_6way(X + 18 * 32, V);
for (j = 0; j < 3; j++)
for (i = 0; i < 32; i++)
for (k = 0; k < 8; k++)
W[8 * 32 * j + 8 * i + k] = X[8 * 32 * j + k * 32 + i];
PBKDF2_SHA256_128_32_8way(tstate + 0, ostate + 0, W + 0, W + 0);
PBKDF2_SHA256_128_32_8way(tstate + 64, ostate + 64, W + 256, W + 256);
PBKDF2_SHA256_128_32_8way(tstate + 128, ostate + 128, W + 512, W + 512);
for (j = 0; j < 3; j++)
for (i = 0; i < 8; i++)
for (k = 0; k < 8; k++)
output[8 * 8 * j + k * 8 + i] = W[8 * 32 * j + 8 * i + k];
}
#endif /* HAVE_SCRYPT_6WAY */
int scanhash_scrypt(int thr_id, uint32_t *pdata,
unsigned char *scratchbuf, const uint32_t *ptarget,
uint32_t max_nonce, unsigned long *hashes_done)
{
uint32_t data[SCRYPT_MAX_WAYS * 20], hash[SCRYPT_MAX_WAYS * 8];
uint32_t midstate[8];
uint32_t n = pdata[19] - 1;
const uint32_t Htarg = ptarget[7];
int throughput = scrypt_best_throughput();
int i;
#ifdef HAVE_SHA256_4WAY
if (sha256_use_4way())
throughput *= 4;
#endif
for (i = 0; i < throughput; i++)
memcpy(data + i * 20, pdata, 80);
sha256_init(midstate);
sha256_transform(midstate, data, 0);
do {
for (i = 0; i < throughput; i++)
data[i * 20 + 19] = ++n;
#if defined(HAVE_SHA256_4WAY)
if (throughput == 4)
scrypt_1024_1_1_256_4way(data, hash, midstate, scratchbuf);
else
#endif
#if defined(HAVE_SCRYPT_3WAY) && defined(HAVE_SHA256_4WAY)
if (throughput == 12)
scrypt_1024_1_1_256_12way(data, hash, midstate, scratchbuf);
else
#endif
#if defined(HAVE_SCRYPT_6WAY)
if (throughput == 24)
scrypt_1024_1_1_256_24way(data, hash, midstate, scratchbuf);
else
#endif
#if defined(HAVE_SCRYPT_3WAY)
if (throughput == 3)
scrypt_1024_1_1_256_3way(data, hash, midstate, scratchbuf);
else
#endif
scrypt_1024_1_1_256(data, hash, midstate, scratchbuf);
for (i = 0; i < throughput; i++) {
if (hash[i * 8 + 7] <= Htarg && fulltest(hash + i * 8, ptarget)) {
*hashes_done = n - pdata[19] + 1;
pdata[19] = data[i * 20 + 19];
return 1;
}
}
} while (n < max_nonce && !work_restart[thr_id].restart);
*hashes_done = n - pdata[19] + 1;
pdata[19] = n;
return 0;
}
bool register_scrypt_algo( algo_gate_t* gate )
{
gate->scanhash = &scanhash_scrypt;
gate->hash = &scrypt_hash;
// gate->get_max64 = scrypt_get_max64;
return true;
};

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,630 +0,0 @@
/*
* Copyright 2011 ArtForz
* Copyright 2011-2013 pooler
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version. See COPYING for more details.
*/
#include "../cpuminer-config.h"
#include "../miner.h"
#include <string.h>
#include <stdint.h>
#if defined(__arm__) && defined(__APCS_32__)
#define EXTERN_SHA256
#endif
static const uint32_t sha256_h[8] = {
0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
};
static const uint32_t sha256_k[64] = {
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
};
void sha256_init(uint32_t *state)
{
memcpy(state, sha256_h, 32);
}
/* Elementary functions used by SHA256 */
#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
#define Maj(x, y, z) ((x & (y | z)) | (y & z))
#define ROTR(x, n) ((x >> n) | (x << (32 - n)))
#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ (x >> 3))
#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ (x >> 10))
/* SHA256 round function */
#define RND(a, b, c, d, e, f, g, h, k) \
do { \
t0 = h + S1(e) + Ch(e, f, g) + k; \
t1 = S0(a) + Maj(a, b, c); \
d += t0; \
h = t0 + t1; \
} while (0)
/* Adjusted round function for rotating state */
#define RNDr(S, W, i) \
RND(S[(64 - i) % 8], S[(65 - i) % 8], \
S[(66 - i) % 8], S[(67 - i) % 8], \
S[(68 - i) % 8], S[(69 - i) % 8], \
S[(70 - i) % 8], S[(71 - i) % 8], \
W[i] + sha256_k[i])
#ifndef EXTERN_SHA256
/*
* SHA256 block compression function. The 256-bit state is transformed via
* the 512-bit input block to produce a new state.
*/
void sha256_transform(uint32_t *state, const uint32_t *block, int swap)
{
uint32_t W[64];
uint32_t S[8];
uint32_t t0, t1;
int i;
/* 1. Prepare message schedule W. */
if (swap) {
for (i = 0; i < 16; i++)
W[i] = swab32(block[i]);
} else
memcpy(W, block, 64);
for (i = 16; i < 64; i += 2) {
W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16];
W[i+1] = s1(W[i - 1]) + W[i - 6] + s0(W[i - 14]) + W[i - 15];
}
/* 2. Initialize working variables. */
memcpy(S, state, 32);
/* 3. Mix. */
RNDr(S, W, 0);
RNDr(S, W, 1);
RNDr(S, W, 2);
RNDr(S, W, 3);
RNDr(S, W, 4);
RNDr(S, W, 5);
RNDr(S, W, 6);
RNDr(S, W, 7);
RNDr(S, W, 8);
RNDr(S, W, 9);
RNDr(S, W, 10);
RNDr(S, W, 11);
RNDr(S, W, 12);
RNDr(S, W, 13);
RNDr(S, W, 14);
RNDr(S, W, 15);
RNDr(S, W, 16);
RNDr(S, W, 17);
RNDr(S, W, 18);
RNDr(S, W, 19);
RNDr(S, W, 20);
RNDr(S, W, 21);
RNDr(S, W, 22);
RNDr(S, W, 23);
RNDr(S, W, 24);
RNDr(S, W, 25);
RNDr(S, W, 26);
RNDr(S, W, 27);
RNDr(S, W, 28);
RNDr(S, W, 29);
RNDr(S, W, 30);
RNDr(S, W, 31);
RNDr(S, W, 32);
RNDr(S, W, 33);
RNDr(S, W, 34);
RNDr(S, W, 35);
RNDr(S, W, 36);
RNDr(S, W, 37);
RNDr(S, W, 38);
RNDr(S, W, 39);
RNDr(S, W, 40);
RNDr(S, W, 41);
RNDr(S, W, 42);
RNDr(S, W, 43);
RNDr(S, W, 44);
RNDr(S, W, 45);
RNDr(S, W, 46);
RNDr(S, W, 47);
RNDr(S, W, 48);
RNDr(S, W, 49);
RNDr(S, W, 50);
RNDr(S, W, 51);
RNDr(S, W, 52);
RNDr(S, W, 53);
RNDr(S, W, 54);
RNDr(S, W, 55);
RNDr(S, W, 56);
RNDr(S, W, 57);
RNDr(S, W, 58);
RNDr(S, W, 59);
RNDr(S, W, 60);
RNDr(S, W, 61);
RNDr(S, W, 62);
RNDr(S, W, 63);
/* 4. Mix local working variables into global state */
for (i = 0; i < 8; i++)
state[i] += S[i];
}
#endif /* EXTERN_SHA256 */
static const uint32_t sha256d_hash1[16] = {
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x80000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000100
};
static void sha256d_80_swap(uint32_t *hash, const uint32_t *data)
{
uint32_t S[16];
int i;
sha256_init(S);
sha256_transform(S, data, 0);
sha256_transform(S, data + 16, 0);
memcpy(S + 8, sha256d_hash1 + 8, 32);
sha256_init(hash);
sha256_transform(hash, S, 0);
for (i = 0; i < 8; i++)
hash[i] = swab32(hash[i]);
}
void sha256d(unsigned char *hash, const unsigned char *data, int len)
{
uint32_t S[16], T[16];
int i, r;
sha256_init(S);
for (r = len; r > -9; r -= 64) {
if (r < 64)
memset(T, 0, 64);
memcpy(T, data + len - r, r > 64 ? 64 : (r < 0 ? 0 : r));
if (r >= 0 && r < 64)
((unsigned char *)T)[r] = 0x80;
for (i = 0; i < 16; i++)
T[i] = be32dec(T + i);
if (r < 56)
T[15] = 8 * len;
sha256_transform(S, T, 0);
}
memcpy(S + 8, sha256d_hash1 + 8, 32);
sha256_init(T);
sha256_transform(T, S, 0);
for (i = 0; i < 8; i++)
be32enc((uint32_t *)hash + i, T[i]);
}
static inline void sha256d_preextend(uint32_t *W)
{
W[16] = s1(W[14]) + W[ 9] + s0(W[ 1]) + W[ 0];
W[17] = s1(W[15]) + W[10] + s0(W[ 2]) + W[ 1];
W[18] = s1(W[16]) + W[11] + W[ 2];
W[19] = s1(W[17]) + W[12] + s0(W[ 4]);
W[20] = W[13] + s0(W[ 5]) + W[ 4];
W[21] = W[14] + s0(W[ 6]) + W[ 5];
W[22] = W[15] + s0(W[ 7]) + W[ 6];
W[23] = W[16] + s0(W[ 8]) + W[ 7];
W[24] = W[17] + s0(W[ 9]) + W[ 8];
W[25] = s0(W[10]) + W[ 9];
W[26] = s0(W[11]) + W[10];
W[27] = s0(W[12]) + W[11];
W[28] = s0(W[13]) + W[12];
W[29] = s0(W[14]) + W[13];
W[30] = s0(W[15]) + W[14];
W[31] = s0(W[16]) + W[15];
}
static inline void sha256d_prehash(uint32_t *S, const uint32_t *W)
{
uint32_t t0, t1;
RNDr(S, W, 0);
RNDr(S, W, 1);
RNDr(S, W, 2);
}
#ifdef EXTERN_SHA256
void sha256d_ms(uint32_t *hash, uint32_t *W,
const uint32_t *midstate, const uint32_t *prehash);
#else
static inline void sha256d_ms(uint32_t *hash, uint32_t *W,
const uint32_t *midstate, const uint32_t *prehash)
{
uint32_t S[64];
uint32_t t0, t1;
int i;
S[18] = W[18];
S[19] = W[19];
S[20] = W[20];
S[22] = W[22];
S[23] = W[23];
S[24] = W[24];
S[30] = W[30];
S[31] = W[31];
W[18] += s0(W[3]);
W[19] += W[3];
W[20] += s1(W[18]);
W[21] = s1(W[19]);
W[22] += s1(W[20]);
W[23] += s1(W[21]);
W[24] += s1(W[22]);
W[25] = s1(W[23]) + W[18];
W[26] = s1(W[24]) + W[19];
W[27] = s1(W[25]) + W[20];
W[28] = s1(W[26]) + W[21];
W[29] = s1(W[27]) + W[22];
W[30] += s1(W[28]) + W[23];
W[31] += s1(W[29]) + W[24];
for (i = 32; i < 64; i += 2) {
W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16];
W[i+1] = s1(W[i - 1]) + W[i - 6] + s0(W[i - 14]) + W[i - 15];
}
memcpy(S, prehash, 32);
RNDr(S, W, 3);
RNDr(S, W, 4);
RNDr(S, W, 5);
RNDr(S, W, 6);
RNDr(S, W, 7);
RNDr(S, W, 8);
RNDr(S, W, 9);
RNDr(S, W, 10);
RNDr(S, W, 11);
RNDr(S, W, 12);
RNDr(S, W, 13);
RNDr(S, W, 14);
RNDr(S, W, 15);
RNDr(S, W, 16);
RNDr(S, W, 17);
RNDr(S, W, 18);
RNDr(S, W, 19);
RNDr(S, W, 20);
RNDr(S, W, 21);
RNDr(S, W, 22);
RNDr(S, W, 23);
RNDr(S, W, 24);
RNDr(S, W, 25);
RNDr(S, W, 26);
RNDr(S, W, 27);
RNDr(S, W, 28);
RNDr(S, W, 29);
RNDr(S, W, 30);
RNDr(S, W, 31);
RNDr(S, W, 32);
RNDr(S, W, 33);
RNDr(S, W, 34);
RNDr(S, W, 35);
RNDr(S, W, 36);
RNDr(S, W, 37);
RNDr(S, W, 38);
RNDr(S, W, 39);
RNDr(S, W, 40);
RNDr(S, W, 41);
RNDr(S, W, 42);
RNDr(S, W, 43);
RNDr(S, W, 44);
RNDr(S, W, 45);
RNDr(S, W, 46);
RNDr(S, W, 47);
RNDr(S, W, 48);
RNDr(S, W, 49);
RNDr(S, W, 50);
RNDr(S, W, 51);
RNDr(S, W, 52);
RNDr(S, W, 53);
RNDr(S, W, 54);
RNDr(S, W, 55);
RNDr(S, W, 56);
RNDr(S, W, 57);
RNDr(S, W, 58);
RNDr(S, W, 59);
RNDr(S, W, 60);
RNDr(S, W, 61);
RNDr(S, W, 62);
RNDr(S, W, 63);
for (i = 0; i < 8; i++)
S[i] += midstate[i];
W[18] = S[18];
W[19] = S[19];
W[20] = S[20];
W[22] = S[22];
W[23] = S[23];
W[24] = S[24];
W[30] = S[30];
W[31] = S[31];
memcpy(S + 8, sha256d_hash1 + 8, 32);
S[16] = s1(sha256d_hash1[14]) + sha256d_hash1[ 9] + s0(S[ 1]) + S[ 0];
S[17] = s1(sha256d_hash1[15]) + sha256d_hash1[10] + s0(S[ 2]) + S[ 1];
S[18] = s1(S[16]) + sha256d_hash1[11] + s0(S[ 3]) + S[ 2];
S[19] = s1(S[17]) + sha256d_hash1[12] + s0(S[ 4]) + S[ 3];
S[20] = s1(S[18]) + sha256d_hash1[13] + s0(S[ 5]) + S[ 4];
S[21] = s1(S[19]) + sha256d_hash1[14] + s0(S[ 6]) + S[ 5];
S[22] = s1(S[20]) + sha256d_hash1[15] + s0(S[ 7]) + S[ 6];
S[23] = s1(S[21]) + S[16] + s0(sha256d_hash1[ 8]) + S[ 7];
S[24] = s1(S[22]) + S[17] + s0(sha256d_hash1[ 9]) + sha256d_hash1[ 8];
S[25] = s1(S[23]) + S[18] + s0(sha256d_hash1[10]) + sha256d_hash1[ 9];
S[26] = s1(S[24]) + S[19] + s0(sha256d_hash1[11]) + sha256d_hash1[10];
S[27] = s1(S[25]) + S[20] + s0(sha256d_hash1[12]) + sha256d_hash1[11];
S[28] = s1(S[26]) + S[21] + s0(sha256d_hash1[13]) + sha256d_hash1[12];
S[29] = s1(S[27]) + S[22] + s0(sha256d_hash1[14]) + sha256d_hash1[13];
S[30] = s1(S[28]) + S[23] + s0(sha256d_hash1[15]) + sha256d_hash1[14];
S[31] = s1(S[29]) + S[24] + s0(S[16]) + sha256d_hash1[15];
for (i = 32; i < 60; i += 2) {
S[i] = s1(S[i - 2]) + S[i - 7] + s0(S[i - 15]) + S[i - 16];
S[i+1] = s1(S[i - 1]) + S[i - 6] + s0(S[i - 14]) + S[i - 15];
}
S[60] = s1(S[58]) + S[53] + s0(S[45]) + S[44];
sha256_init(hash);
RNDr(hash, S, 0);
RNDr(hash, S, 1);
RNDr(hash, S, 2);
RNDr(hash, S, 3);
RNDr(hash, S, 4);
RNDr(hash, S, 5);
RNDr(hash, S, 6);
RNDr(hash, S, 7);
RNDr(hash, S, 8);
RNDr(hash, S, 9);
RNDr(hash, S, 10);
RNDr(hash, S, 11);
RNDr(hash, S, 12);
RNDr(hash, S, 13);
RNDr(hash, S, 14);
RNDr(hash, S, 15);
RNDr(hash, S, 16);
RNDr(hash, S, 17);
RNDr(hash, S, 18);
RNDr(hash, S, 19);
RNDr(hash, S, 20);
RNDr(hash, S, 21);
RNDr(hash, S, 22);
RNDr(hash, S, 23);
RNDr(hash, S, 24);
RNDr(hash, S, 25);
RNDr(hash, S, 26);
RNDr(hash, S, 27);
RNDr(hash, S, 28);
RNDr(hash, S, 29);
RNDr(hash, S, 30);
RNDr(hash, S, 31);
RNDr(hash, S, 32);
RNDr(hash, S, 33);
RNDr(hash, S, 34);
RNDr(hash, S, 35);
RNDr(hash, S, 36);
RNDr(hash, S, 37);
RNDr(hash, S, 38);
RNDr(hash, S, 39);
RNDr(hash, S, 40);
RNDr(hash, S, 41);
RNDr(hash, S, 42);
RNDr(hash, S, 43);
RNDr(hash, S, 44);
RNDr(hash, S, 45);
RNDr(hash, S, 46);
RNDr(hash, S, 47);
RNDr(hash, S, 48);
RNDr(hash, S, 49);
RNDr(hash, S, 50);
RNDr(hash, S, 51);
RNDr(hash, S, 52);
RNDr(hash, S, 53);
RNDr(hash, S, 54);
RNDr(hash, S, 55);
RNDr(hash, S, 56);
hash[2] += hash[6] + S1(hash[3]) + Ch(hash[3], hash[4], hash[5])
+ S[57] + sha256_k[57];
hash[1] += hash[5] + S1(hash[2]) + Ch(hash[2], hash[3], hash[4])
+ S[58] + sha256_k[58];
hash[0] += hash[4] + S1(hash[1]) + Ch(hash[1], hash[2], hash[3])
+ S[59] + sha256_k[59];
hash[7] += hash[3] + S1(hash[0]) + Ch(hash[0], hash[1], hash[2])
+ S[60] + sha256_k[60]
+ sha256_h[7];
}
#endif /* EXTERN_SHA256 */
#ifdef HAVE_SHA256_4WAY
void sha256d_ms_4way(uint32_t *hash, uint32_t *data,
const uint32_t *midstate, const uint32_t *prehash);
static inline int scanhash_sha256d_4way(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done)
{
uint32_t data[4 * 64] __attribute__((aligned(128)));
uint32_t hash[4 * 8] __attribute__((aligned(32)));
uint32_t midstate[4 * 8] __attribute__((aligned(32)));
uint32_t prehash[4 * 8] __attribute__((aligned(32)));
uint32_t n = pdata[19] - 1;
const uint32_t first_nonce = pdata[19];
const uint32_t Htarg = ptarget[7];
int i, j;
memcpy(data, pdata + 16, 64);
sha256d_preextend(data);
for (i = 31; i >= 0; i--)
for (j = 0; j < 4; j++)
data[i * 4 + j] = data[i];
sha256_init(midstate);
sha256_transform(midstate, pdata, 0);
memcpy(prehash, midstate, 32);
sha256d_prehash(prehash, pdata + 16);
for (i = 7; i >= 0; i--) {
for (j = 0; j < 4; j++) {
midstate[i * 4 + j] = midstate[i];
prehash[i * 4 + j] = prehash[i];
}
}
do {
for (i = 0; i < 4; i++)
data[4 * 3 + i] = ++n;
sha256d_ms_4way(hash, data, midstate, prehash);
for (i = 0; i < 4; i++) {
if (swab32(hash[4 * 7 + i]) <= Htarg) {
pdata[19] = data[4 * 3 + i];
sha256d_80_swap(hash, pdata);
if (fulltest(hash, ptarget)) {
*hashes_done = n - first_nonce + 1;
return 1;
}
}
}
} while (n < max_nonce && !work_restart[thr_id].restart);
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
return 0;
}
#endif /* HAVE_SHA256_4WAY */
#ifdef HAVE_SHA256_8WAY
void sha256d_ms_8way(uint32_t *hash, uint32_t *data,
const uint32_t *midstate, const uint32_t *prehash);
static inline int scanhash_sha256d_8way(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done)
{
uint32_t data[8 * 64] __attribute__((aligned(128)));
uint32_t hash[8 * 8] __attribute__((aligned(32)));
uint32_t midstate[8 * 8] __attribute__((aligned(32)));
uint32_t prehash[8 * 8] __attribute__((aligned(32)));
uint32_t n = pdata[19] - 1;
const uint32_t first_nonce = pdata[19];
const uint32_t Htarg = ptarget[7];
int i, j;
memcpy(data, pdata + 16, 64);
sha256d_preextend(data);
for (i = 31; i >= 0; i--)
for (j = 0; j < 8; j++)
data[i * 8 + j] = data[i];
sha256_init(midstate);
sha256_transform(midstate, pdata, 0);
memcpy(prehash, midstate, 32);
sha256d_prehash(prehash, pdata + 16);
for (i = 7; i >= 0; i--) {
for (j = 0; j < 8; j++) {
midstate[i * 8 + j] = midstate[i];
prehash[i * 8 + j] = prehash[i];
}
}
do {
for (i = 0; i < 8; i++)
data[8 * 3 + i] = ++n;
sha256d_ms_8way(hash, data, midstate, prehash);
for (i = 0; i < 8; i++) {
if (swab32(hash[8 * 7 + i]) <= Htarg) {
pdata[19] = data[8 * 3 + i];
sha256d_80_swap(hash, pdata);
if (fulltest(hash, ptarget)) {
*hashes_done = n - first_nonce + 1;
return 1;
}
}
}
} while (n < max_nonce && !work_restart[thr_id].restart);
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
return 0;
}
#endif /* HAVE_SHA256_8WAY */
int scanhash_sha256d(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
uint32_t max_nonce, unsigned long *hashes_done)
{
uint32_t data[64] __attribute__((aligned(128)));
uint32_t hash[8] __attribute__((aligned(32)));
uint32_t midstate[8] __attribute__((aligned(32)));
uint32_t prehash[8] __attribute__((aligned(32)));
uint32_t n = pdata[19] - 1;
const uint32_t first_nonce = pdata[19];
const uint32_t Htarg = ptarget[7];
#ifdef HAVE_SHA256_8WAY
if (sha256_use_8way())
return scanhash_sha256d_8way(thr_id, pdata, ptarget,
max_nonce, hashes_done);
#endif
#ifdef HAVE_SHA256_4WAY
if (sha256_use_4way())
return scanhash_sha256d_4way(thr_id, pdata, ptarget,
max_nonce, hashes_done);
#endif
memcpy(data, pdata + 16, 64);
sha256d_preextend(data);
sha256_init(midstate);
sha256_transform(midstate, pdata, 0);
memcpy(prehash, midstate, 32);
sha256d_prehash(prehash, pdata + 16);
do {
data[3] = ++n;
sha256d_ms(hash, data, midstate, prehash);
if (swab32(hash[7]) <= Htarg) {
pdata[19] = data[3];
sha256d_80_swap(hash, pdata);
if (fulltest(hash, ptarget)) {
*hashes_done = n - first_nonce + 1;
return 1;
}
}
} while (n < max_nonce && !work_restart[thr_id].restart);
*hashes_done = n - first_nonce + 1;
pdata[19] = n;
return 0;
}

View File

@@ -32,12 +32,10 @@
#include <string.h>
#include <stdint.h>
#include "algo/groestl/sph_groestl.h"
#include "algo/keccak/sph_keccak.h"
#ifdef NO_AES_NI
#include "algo/groestl/sse2/grso.h"
#include "algo/groestl/sse2/grso-macro.c"
#else
#ifndef NO_AES_NI
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/echo/aes_ni/hash_api.h"
#endif
@@ -61,17 +59,21 @@
#define POK_DATA_MASK 0xFFFF0000
typedef struct {
#ifndef NO_AES_NI
hashState_groestl groestl;
#ifdef NO_AES_NI
sph_groestl512_context groestl;
#else
hashState_groestl groestl;
#endif
sph_keccak512_context keccak;
sph_keccak512_context keccak;
} zr5_ctx_holder;
zr5_ctx_holder zr5_ctx;
void init_zr5_ctx()
{
#ifndef NO_AES_NI
#ifdef NO_AES_NI
sph_groestl512_init( &zr5_ctx.groestl );
#else
init_groestl( &zr5_ctx.groestl );
#endif
sph_keccak512_init(&zr5_ctx.keccak);
@@ -88,10 +90,6 @@ DATA_ALIGN16(sph_u64 hashctB);
//memset(hash, 0, 128);
#ifdef NO_AES_NI
grsoState sts_grs;
#endif
static const int arrOrder[][4] =
{
{ 0, 1, 2, 3 }, { 0, 1, 3, 2 }, { 0, 2, 1, 3 }, { 0, 2, 3, 1 },
@@ -123,9 +121,8 @@ static const int arrOrder[][4] =
break;
case 1:
#ifdef NO_AES_NI
{GRS_I;
GRS_U;
GRS_C; }
sph_groestl512 (&ctx.groestl, hash, 64);
sph_groestl512_close(&ctx.groestl, hash);
#else
update_groestl( &ctx.groestl, (char*)hash,512);
final_groestl( &ctx.groestl, (char*)hash);