mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v3.5.0
This commit is contained in:
@@ -12,40 +12,36 @@ void blakecoin_close(void *cc, void *dst);
|
||||
#include <memory.h>
|
||||
#include <openssl/sha.h>
|
||||
|
||||
/* Move init out of loop, so init once externally,
|
||||
* and then use one single memcpy */
|
||||
static sph_blake256_context blake_mid;
|
||||
static bool ctx_midstate_done = false;
|
||||
// context management is staged for efficiency.
|
||||
// 1. global initial ctx cached on startup
|
||||
// 2. per-thread midstate ctx cache refreshed every scan
|
||||
// 3. local ctx for final hash calculation
|
||||
|
||||
static void init_blake_hash(void)
|
||||
static sph_blake256_context blake_init_ctx;
|
||||
static __thread sph_blake256_context blake_mid_ctx;
|
||||
|
||||
static void blake_midstate_init( const void* input )
|
||||
{
|
||||
blakecoin_init(&blake_mid);
|
||||
ctx_midstate_done = true;
|
||||
// copy cached initial state
|
||||
memcpy( &blake_mid_ctx, &blake_init_ctx, sizeof blake_mid_ctx );
|
||||
blakecoin( &blake_mid_ctx, input, 64 );
|
||||
}
|
||||
|
||||
void blakecoinhash(void *state, const void *input)
|
||||
void blakecoinhash( void *state, const void *input )
|
||||
{
|
||||
sph_blake256_context ctx;
|
||||
|
||||
uint8_t hash[64];
|
||||
uint8_t *ending = (uint8_t*) input;
|
||||
ending += 64;
|
||||
uint8_t *ending = (uint8_t*) input + 64;
|
||||
|
||||
// do one memcopy to get a fresh context
|
||||
if (!ctx_midstate_done) {
|
||||
init_blake_hash();
|
||||
blakecoin(&blake_mid, input, 64);
|
||||
}
|
||||
memcpy(&ctx, &blake_mid, sizeof(blake_mid));
|
||||
|
||||
blakecoin(&ctx, ending, 16);
|
||||
blakecoin_close(&ctx, hash);
|
||||
|
||||
memcpy(state, hash, 32);
|
||||
// copy cached midstate
|
||||
memcpy( &ctx, &blake_mid_ctx, sizeof ctx );
|
||||
blakecoin( &ctx, ending, 16 );
|
||||
blakecoin_close( &ctx, hash );
|
||||
memcpy( state, hash, 32 );
|
||||
}
|
||||
|
||||
int scanhash_blakecoin(int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done)
|
||||
int scanhash_blakecoin( int thr_id, struct work *work, uint32_t max_nonce,
|
||||
uint64_t *hashes_done )
|
||||
{
|
||||
uint32_t *pdata = work->data;
|
||||
uint32_t *ptarget = work->target;
|
||||
@@ -57,16 +53,14 @@ int scanhash_blakecoin(int thr_id, struct work *work, uint32_t max_nonce,
|
||||
|
||||
uint32_t n = first_nonce;
|
||||
|
||||
ctx_midstate_done = false;
|
||||
|
||||
if (opt_benchmark)
|
||||
HTarget = 0x7f;
|
||||
|
||||
// we need big endian data...
|
||||
// be32enc_array( endiandata, pdata, 19 );
|
||||
for (int kk=0; kk < 19; kk++)
|
||||
be32enc(&endiandata[kk], ((uint32_t*)pdata)[kk]);
|
||||
|
||||
blake_midstate_init( endiandata );
|
||||
|
||||
#ifdef DEBUG_ALGO
|
||||
applog(LOG_DEBUG,"[%d] Target=%08x %08x", thr_id, ptarget[6], ptarget[7]);
|
||||
@@ -117,6 +111,7 @@ bool register_vanilla_algo( algo_gate_t* gate )
|
||||
gate->hash = (void*)&blakecoinhash;
|
||||
gate->hash_alt = (void*)&blakecoinhash;
|
||||
gate->get_max64 = (void*)&blakecoin_get_max64;
|
||||
blakecoin_init( &blake_init_ctx );
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -317,7 +317,6 @@ static const sph_u64 blkIV512[8] = {
|
||||
|
||||
|
||||
#define COMPRESS64 do { \
|
||||
int r; \
|
||||
int b=0; \
|
||||
sph_u64 M0, M1, M2, M3, M4, M5, M6, M7; \
|
||||
sph_u64 M8, M9, MA, MB, MC, MD, ME, MF; \
|
||||
|
||||
@@ -1,133 +0,0 @@
|
||||
/*
|
||||
---------------------------------------------------------------------------
|
||||
Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
|
||||
|
||||
LICENSE TERMS
|
||||
|
||||
The redistribution and use of this software (with or without changes)
|
||||
is allowed without the payment of fees or royalties provided that:
|
||||
|
||||
1. source code distributions include the above copyright notice, this
|
||||
list of conditions and the following disclaimer;
|
||||
|
||||
2. binary distributions include the above copyright notice, this list
|
||||
of conditions and the following disclaimer in their documentation;
|
||||
|
||||
3. the name of the copyright holder is not used to endorse products
|
||||
built using this software without specific written permission.
|
||||
|
||||
DISCLAIMER
|
||||
|
||||
This software is provided 'as is' with no explicit or implied warranties
|
||||
in respect of its properties, including, but not limited to, correctness
|
||||
and/or fitness for purpose.
|
||||
---------------------------------------------------------------------------
|
||||
Issue Date: 20/12/2007
|
||||
*/
|
||||
|
||||
#ifndef _BRG_ENDIAN_H
|
||||
#define _BRG_ENDIAN_H
|
||||
|
||||
#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */
|
||||
#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */
|
||||
|
||||
/* Include files where endian defines and byteswap functions may reside */
|
||||
#if defined( __sun )
|
||||
# include <sys/isa_defs.h>
|
||||
#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ )
|
||||
# include <sys/endian.h>
|
||||
#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \
|
||||
defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ )
|
||||
# include <machine/endian.h>
|
||||
#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ )
|
||||
# if !defined( __MINGW32__ ) && !defined( _AIX )
|
||||
# include <endian.h>
|
||||
# if !defined( __BEOS__ )
|
||||
# include <byteswap.h>
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Now attempt to set the define for platform byte order using any */
|
||||
/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */
|
||||
/* seem to encompass most endian symbol definitions */
|
||||
|
||||
#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN )
|
||||
# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN
|
||||
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
|
||||
# elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN
|
||||
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
|
||||
# endif
|
||||
#elif defined( BIG_ENDIAN )
|
||||
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
|
||||
#elif defined( LITTLE_ENDIAN )
|
||||
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
|
||||
#endif
|
||||
|
||||
#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN )
|
||||
# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN
|
||||
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
|
||||
# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN
|
||||
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
|
||||
# endif
|
||||
#elif defined( _BIG_ENDIAN )
|
||||
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
|
||||
#elif defined( _LITTLE_ENDIAN )
|
||||
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
|
||||
#endif
|
||||
|
||||
#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN )
|
||||
# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN
|
||||
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
|
||||
# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN
|
||||
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
|
||||
# endif
|
||||
#elif defined( __BIG_ENDIAN )
|
||||
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
|
||||
#elif defined( __LITTLE_ENDIAN )
|
||||
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
|
||||
#endif
|
||||
|
||||
#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ )
|
||||
# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__
|
||||
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
|
||||
# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__
|
||||
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
|
||||
# endif
|
||||
#elif defined( __BIG_ENDIAN__ )
|
||||
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
|
||||
#elif defined( __LITTLE_ENDIAN__ )
|
||||
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
|
||||
#endif
|
||||
|
||||
/* if the platform byte order could not be determined, then try to */
|
||||
/* set this define using common machine defines */
|
||||
#if !defined(PLATFORM_BYTE_ORDER)
|
||||
|
||||
#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \
|
||||
defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \
|
||||
defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \
|
||||
defined( vax ) || defined( vms ) || defined( VMS ) || \
|
||||
defined( __VMS ) || defined( _M_X64 )
|
||||
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
|
||||
|
||||
#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \
|
||||
defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \
|
||||
defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \
|
||||
defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \
|
||||
defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \
|
||||
defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \
|
||||
defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX )
|
||||
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
|
||||
|
||||
#elif 0 /* **** EDIT HERE IF NECESSARY **** */
|
||||
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
|
||||
#elif 0 /* **** EDIT HERE IF NECESSARY **** */
|
||||
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
|
||||
#else
|
||||
# error Please edit lines 126 or 128 in brg_endian.h to set the platform byte order
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -1,231 +0,0 @@
|
||||
/*
|
||||
---------------------------------------------------------------------------
|
||||
Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
|
||||
|
||||
(a few lines added by Soeren S. Thomsen, October 2008)
|
||||
|
||||
LICENSE TERMS
|
||||
|
||||
The redistribution and use of this software (with or without changes)
|
||||
is allowed without the payment of fees or royalties provided that:
|
||||
|
||||
1. source code distributions include the above copyright notice, this
|
||||
list of conditions and the following disclaimer;
|
||||
|
||||
2. binary distributions include the above copyright notice, this list
|
||||
of conditions and the following disclaimer in their documentation;
|
||||
|
||||
3. the name of the copyright holder is not used to endorse products
|
||||
built using this software without specific written permission.
|
||||
|
||||
DISCLAIMER
|
||||
|
||||
This software is provided 'as is' with no explicit or implied warranties
|
||||
in respect of its properties, including, but not limited to, correctness
|
||||
and/or fitness for purpose.
|
||||
---------------------------------------------------------------------------
|
||||
Issue Date: 20/12/2007
|
||||
|
||||
The unsigned integer types defined here are of the form uint_<nn>t where
|
||||
<nn> is the length of the type; for example, the unsigned 32-bit type is
|
||||
'uint_32t'. These are NOT the same as the 'C99 integer types' that are
|
||||
defined in the inttypes.h and stdint.h headers since attempts to use these
|
||||
types have shown that support for them is still highly variable. However,
|
||||
since the latter are of the form uint<nn>_t, a regular expression search
|
||||
and replace (in VC++ search on 'uint_{:z}t' and replace with 'uint\1_t')
|
||||
can be used to convert the types used here to the C99 standard types.
|
||||
*/
|
||||
|
||||
#ifndef _BRG_TYPES_H
|
||||
#define _BRG_TYPES_H
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <limits.h>
|
||||
|
||||
#if defined( _MSC_VER ) && ( _MSC_VER >= 1300 )
|
||||
# include <stddef.h>
|
||||
# define ptrint_t intptr_t
|
||||
#elif defined( __GNUC__ ) && ( __GNUC__ >= 3 )
|
||||
# include <stdint.h>
|
||||
# define ptrint_t intptr_t
|
||||
#else
|
||||
# define ptrint_t int
|
||||
#endif
|
||||
|
||||
#ifndef BRG_UI8
|
||||
# define BRG_UI8
|
||||
# if UCHAR_MAX == 255u
|
||||
typedef unsigned char uint_8t;
|
||||
# else
|
||||
# error Please define uint_8t as an 8-bit unsigned integer type in brg_types.h
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef BRG_UI16
|
||||
# define BRG_UI16
|
||||
# if USHRT_MAX == 65535u
|
||||
typedef unsigned short uint_16t;
|
||||
# else
|
||||
# error Please define uint_16t as a 16-bit unsigned short type in brg_types.h
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef BRG_UI32
|
||||
# define BRG_UI32
|
||||
# if UINT_MAX == 4294967295u
|
||||
# define li_32(h) 0x##h##u
|
||||
typedef unsigned int uint_32t;
|
||||
# elif ULONG_MAX == 4294967295u
|
||||
# define li_32(h) 0x##h##ul
|
||||
typedef unsigned long uint_32t;
|
||||
# elif defined( _CRAY )
|
||||
# error This code needs 32-bit data types, which Cray machines do not provide
|
||||
# else
|
||||
# error Please define uint_32t as a 32-bit unsigned integer type in brg_types.h
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef BRG_UI64
|
||||
# if defined( __BORLANDC__ ) && !defined( __MSDOS__ )
|
||||
# define BRG_UI64
|
||||
# define li_64(h) 0x##h##ui64
|
||||
typedef unsigned __int64 uint_64t;
|
||||
# elif defined( _MSC_VER ) && ( _MSC_VER < 1300 ) /* 1300 == VC++ 7.0 */
|
||||
# define BRG_UI64
|
||||
# define li_64(h) 0x##h##ui64
|
||||
typedef unsigned __int64 uint_64t;
|
||||
# elif defined( __sun ) && defined( ULONG_MAX ) && ULONG_MAX == 0xfffffffful
|
||||
# define BRG_UI64
|
||||
# define li_64(h) 0x##h##ull
|
||||
typedef unsigned long long uint_64t;
|
||||
# elif defined( __MVS__ )
|
||||
# define BRG_UI64
|
||||
# define li_64(h) 0x##h##ull
|
||||
typedef unsigned int long long uint_64t;
|
||||
# elif defined( UINT_MAX ) && UINT_MAX > 4294967295u
|
||||
# if UINT_MAX == 18446744073709551615u
|
||||
# define BRG_UI64
|
||||
# define li_64(h) 0x##h##u
|
||||
typedef unsigned int uint_64t;
|
||||
# endif
|
||||
# elif defined( ULONG_MAX ) && ULONG_MAX > 4294967295u
|
||||
# if ULONG_MAX == 18446744073709551615ul
|
||||
# define BRG_UI64
|
||||
# define li_64(h) 0x##h##ul
|
||||
typedef unsigned long uint_64t;
|
||||
# endif
|
||||
# elif defined( ULLONG_MAX ) && ULLONG_MAX > 4294967295u
|
||||
# if ULLONG_MAX == 18446744073709551615ull
|
||||
# define BRG_UI64
|
||||
# define li_64(h) 0x##h##ull
|
||||
typedef unsigned long long uint_64t;
|
||||
# endif
|
||||
# elif defined( ULONG_LONG_MAX ) && ULONG_LONG_MAX > 4294967295u
|
||||
# if ULONG_LONG_MAX == 18446744073709551615ull
|
||||
# define BRG_UI64
|
||||
# define li_64(h) 0x##h##ull
|
||||
typedef unsigned long long uint_64t;
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if !defined( BRG_UI64 )
|
||||
# if defined( NEED_UINT_64T )
|
||||
# error Please define uint_64t as an unsigned 64 bit type in brg_types.h
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef RETURN_VALUES
|
||||
# define RETURN_VALUES
|
||||
# if defined( DLL_EXPORT )
|
||||
# if defined( _MSC_VER ) || defined ( __INTEL_COMPILER )
|
||||
# define VOID_RETURN __declspec( dllexport ) void __stdcall
|
||||
# define INT_RETURN __declspec( dllexport ) int __stdcall
|
||||
# elif defined( __GNUC__ )
|
||||
# define VOID_RETURN __declspec( __dllexport__ ) void
|
||||
# define INT_RETURN __declspec( __dllexport__ ) int
|
||||
# else
|
||||
# error Use of the DLL is only available on the Microsoft, Intel and GCC compilers
|
||||
# endif
|
||||
# elif defined( DLL_IMPORT )
|
||||
# if defined( _MSC_VER ) || defined ( __INTEL_COMPILER )
|
||||
# define VOID_RETURN __declspec( dllimport ) void __stdcall
|
||||
# define INT_RETURN __declspec( dllimport ) int __stdcall
|
||||
# elif defined( __GNUC__ )
|
||||
# define VOID_RETURN __declspec( __dllimport__ ) void
|
||||
# define INT_RETURN __declspec( __dllimport__ ) int
|
||||
# else
|
||||
# error Use of the DLL is only available on the Microsoft, Intel and GCC compilers
|
||||
# endif
|
||||
# elif defined( __WATCOMC__ )
|
||||
# define VOID_RETURN void __cdecl
|
||||
# define INT_RETURN int __cdecl
|
||||
# else
|
||||
# define VOID_RETURN void
|
||||
# define INT_RETURN int
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* These defines are used to detect and set the memory alignment of pointers.
|
||||
Note that offsets are in bytes.
|
||||
|
||||
ALIGN_OFFSET(x,n) return the positive or zero offset of
|
||||
the memory addressed by the pointer 'x'
|
||||
from an address that is aligned on an
|
||||
'n' byte boundary ('n' is a power of 2)
|
||||
|
||||
ALIGN_FLOOR(x,n) return a pointer that points to memory
|
||||
that is aligned on an 'n' byte boundary
|
||||
and is not higher than the memory address
|
||||
pointed to by 'x' ('n' is a power of 2)
|
||||
|
||||
ALIGN_CEIL(x,n) return a pointer that points to memory
|
||||
that is aligned on an 'n' byte boundary
|
||||
and is not lower than the memory address
|
||||
pointed to by 'x' ('n' is a power of 2)
|
||||
*/
|
||||
|
||||
#define ALIGN_OFFSET(x,n) (((ptrint_t)(x)) & ((n) - 1))
|
||||
#define ALIGN_FLOOR(x,n) ((uint_8t*)(x) - ( ((ptrint_t)(x)) & ((n) - 1)))
|
||||
#define ALIGN_CEIL(x,n) ((uint_8t*)(x) + (-((ptrint_t)(x)) & ((n) - 1)))
|
||||
|
||||
/* These defines are used to declare buffers in a way that allows
|
||||
faster operations on longer variables to be used. In all these
|
||||
defines 'size' must be a power of 2 and >= 8. NOTE that the
|
||||
buffer size is in bytes but the type length is in bits
|
||||
|
||||
UNIT_TYPEDEF(x,size) declares a variable 'x' of length
|
||||
'size' bits
|
||||
|
||||
BUFR_TYPEDEF(x,size,bsize) declares a buffer 'x' of length 'bsize'
|
||||
bytes defined as an array of variables
|
||||
each of 'size' bits (bsize must be a
|
||||
multiple of size / 8)
|
||||
|
||||
UNIT_CAST(x,size) casts a variable to a type of
|
||||
length 'size' bits
|
||||
|
||||
UPTR_CAST(x,size) casts a pointer to a pointer to a
|
||||
varaiable of length 'size' bits
|
||||
*/
|
||||
|
||||
#define UI_TYPE(size) uint_##size##t
|
||||
#define UNIT_TYPEDEF(x,size) typedef UI_TYPE(size) x
|
||||
#define BUFR_TYPEDEF(x,size,bsize) typedef UI_TYPE(size) x[bsize / (size >> 3)]
|
||||
#define UNIT_CAST(x,size) ((UI_TYPE(size) )(x))
|
||||
#define UPTR_CAST(x,size) ((UI_TYPE(size)*)(x))
|
||||
|
||||
/* Added by Soeren S. Thomsen (begin) */
|
||||
#define u8 uint_8t
|
||||
#define u32 uint_32t
|
||||
#define u64 uint_64t
|
||||
/* (end) */
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,956 +0,0 @@
|
||||
/* groestl-intr-vperm.h Aug 2011
|
||||
*
|
||||
* Groestl implementation with intrinsics using ssse3 instructions.
|
||||
* Author: Günther A. Roland, Martin Schläffer
|
||||
*
|
||||
* Based on the vperm and aes_ni implementations of the hash function Groestl
|
||||
* by Cagdas Calik <ccalik@metu.edu.tr> http://www.metu.edu.tr/~ccalik/
|
||||
* Institute of Applied Mathematics, Middle East Technical University, Turkey
|
||||
*
|
||||
* This code is placed in the public domain
|
||||
*/
|
||||
|
||||
#include <tmmintrin.h>
|
||||
#include "grsi.h"
|
||||
|
||||
/*define data alignment for different C compilers*/
|
||||
#if defined(__GNUC__)
|
||||
#define DATA_ALIGN16(x) x __attribute__ ((aligned(16)))
|
||||
#else
|
||||
#define DATA_ALIGN16(x) __declspec(align(16)) x
|
||||
#endif
|
||||
|
||||
//#if defined(DECLARE_GLOBAL)
|
||||
#if 1
|
||||
#define GLOBAL
|
||||
#else
|
||||
#define GLOBAL extern
|
||||
#endif
|
||||
|
||||
//#if defined(DECLARE_IFUN)
|
||||
#if 1
|
||||
#define IFUN
|
||||
#else
|
||||
#define IFUN extern
|
||||
#endif
|
||||
|
||||
/* global constants */
|
||||
//GLOBAL __m128i grsiROUND_CONST_Lx;
|
||||
//GLOBAL __m128i grsiROUND_CONST_L0[grsiROUNDS512];
|
||||
//GLOBAL __m128i grsiROUND_CONST_L7[grsiROUNDS512];
|
||||
DATA_ALIGN16(int32_t grsiSUBSH_MASK_short[8*4]) = {
|
||||
0x03020100, 0x07060504, 0x0b0a0908, 0x0f0e0d0c,
|
||||
0x04030201, 0x08070605, 0x0c0b0a09, 0x000f0e0d,
|
||||
0x05040302, 0x09080706, 0x0d0c0b0a, 0x01000f0e,
|
||||
0x06050403, 0x0a090807, 0x0e0d0c0b, 0x0201000f,
|
||||
0x07060504, 0x0b0a0908, 0x0f0e0d0c, 0x03020100,
|
||||
0x08070605, 0x0c0b0a09, 0x000f0e0d, 0x04030201,
|
||||
0x09080706, 0x0d0c0b0a, 0x01000f0e, 0x05040302,
|
||||
0x0e0d0c0b, 0x0201000f, 0x06050403, 0x0a090807
|
||||
};
|
||||
GLOBAL __m128i *grsiSUBSH_MASK = grsiSUBSH_MASK_short;
|
||||
GLOBAL __m128i grsiALL_0F = {0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f};
|
||||
GLOBAL __m128i grsiALL_1B = {0x1b1b1b1b, 0x1b1b1b1b, 0x1b1b1b1b, 0x1b1b1b1b};
|
||||
GLOBAL __m128i grsiALL_FF = {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff};
|
||||
|
||||
/* global unsknown */
|
||||
|
||||
|
||||
GLOBAL __m128i grsiVPERM_OPT[2];
|
||||
GLOBAL __m128i grsiVPERM_INV[2];
|
||||
GLOBAL __m128i grsiVPERM_SB1[2];
|
||||
GLOBAL __m128i grsiVPERM_SB2[2];
|
||||
GLOBAL __m128i grsiVPERM_SB4[2];
|
||||
GLOBAL __m128i grsiVPERM_SBO[2];
|
||||
|
||||
/* state vars */
|
||||
GLOBAL __m128i grsiTRANSP_MASK;
|
||||
GLOBAL __m128i grsiVPERM_IPT[2];
|
||||
GLOBAL __m128i grsiALL_15;
|
||||
GLOBAL __m128i grsiALL_63;
|
||||
GLOBAL __m128i grsiROUND_CONST_P[grsiROUNDS1024];
|
||||
GLOBAL __m128i grsiROUND_CONST_Q[grsiROUNDS1024];
|
||||
|
||||
#define grsitos(a) #a
|
||||
#define grsitostr(a) grsitos(a)
|
||||
|
||||
/*
|
||||
grsiALL_1B = _mm_set_epi32(0x1b1b1b1b, 0x1b1b1b1b, 0x1b1b1b1b, 0x1b1b1b1b);\
|
||||
grsiALL_63 = _mm_set_epi32(0x63636363, 0x63636363, 0x63636363, 0x63636363);\
|
||||
*/
|
||||
|
||||
#define grsiSET_SHARED_CONSTANTS(){\
|
||||
grsiTRANSP_MASK = _mm_set_epi32(0x0f070b03, 0x0e060a02, 0x0d050901, 0x0c040800);\
|
||||
grsiALL_0F = _mm_set_epi32(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f);\
|
||||
grsiALL_15 = _mm_set_epi32(0x15151515, 0x15151515, 0x15151515, 0x15151515);\
|
||||
\
|
||||
grsiVPERM_IPT[0] = _mm_set_epi32(0xCD80B1FC, 0xB0FDCC81, 0x4C01307D, 0x317C4D00);\
|
||||
grsiVPERM_IPT[1] = _mm_set_epi32(0xCABAE090, 0x52227808, 0xC2B2E898, 0x5A2A7000);\
|
||||
grsiVPERM_OPT[0] = _mm_set_epi32(0xE10D5DB1, 0xB05C0CE0, 0x01EDBD51, 0x50BCEC00);\
|
||||
grsiVPERM_OPT[1] = _mm_set_epi32(0xF7974121, 0xDEBE6808, 0xFF9F4929, 0xD6B66000);\
|
||||
grsiVPERM_INV[0] = _mm_set_epi32(0x030D0E0C, 0x02050809, 0x01040A06, 0x0F0B0780);\
|
||||
grsiVPERM_INV[1] = _mm_set_epi32(0x04070309, 0x0A0B0C02, 0x0E05060F, 0x0D080180);\
|
||||
grsiVPERM_SB1[0] = _mm_set_epi32(0x3BF7CCC1, 0x0D2ED9EF, 0x3618D415, 0xFAE22300);\
|
||||
grsiVPERM_SB1[1] = _mm_set_epi32(0xA5DF7A6E, 0x142AF544, 0xB19BE18F, 0xCB503E00);\
|
||||
grsiVPERM_SB2[0] = _mm_set_epi32(0xC2A163C8, 0xAB82234A, 0x69EB8840, 0x0AE12900);\
|
||||
grsiVPERM_SB2[1] = _mm_set_epi32(0x5EB7E955, 0xBC982FCD, 0xE27A93C6, 0x0B712400);\
|
||||
grsiVPERM_SB4[0] = _mm_set_epi32(0xBA44FE79, 0x876D2914, 0x3D50AED7, 0xC393EA00);\
|
||||
grsiVPERM_SB4[1] = _mm_set_epi32(0xA876DE97, 0x49087E9F, 0xE1E937A0, 0x3FD64100);\
|
||||
}/**/
|
||||
|
||||
/* grsiVPERM
|
||||
* Transform w/o settings c*
|
||||
* transforms 2 rows to/from "vperm mode"
|
||||
* this function is derived from:
|
||||
* vperm and aes_ni implementations of hash function Grostl
|
||||
* by Cagdas CALIK
|
||||
* inputs:
|
||||
* a0, a1 = 2 rows
|
||||
* table = transformation table to use
|
||||
* t*, c* = clobbers
|
||||
* outputs:
|
||||
* a0, a1 = 2 rows transformed with table
|
||||
* */
|
||||
#define grsiVPERM_Transform_No_Const(a0, a1, t0, t1, t2, t3, c0, c1, c2){\
|
||||
t0 = c0;\
|
||||
t1 = c0;\
|
||||
t0 = _mm_andnot_si128(t0, a0);\
|
||||
t1 = _mm_andnot_si128(t1, a1);\
|
||||
t0 = _mm_srli_epi32(t0, 4);\
|
||||
t1 = _mm_srli_epi32(t1, 4);\
|
||||
a0 = _mm_and_si128(a0, c0);\
|
||||
a1 = _mm_and_si128(a1, c0);\
|
||||
t2 = c2;\
|
||||
t3 = c2;\
|
||||
t2 = _mm_shuffle_epi8(t2, a0);\
|
||||
t3 = _mm_shuffle_epi8(t3, a1);\
|
||||
a0 = c1;\
|
||||
a1 = c1;\
|
||||
a0 = _mm_shuffle_epi8(a0, t0);\
|
||||
a1 = _mm_shuffle_epi8(a1, t1);\
|
||||
a0 = _mm_xor_si128(a0, t2);\
|
||||
a1 = _mm_xor_si128(a1, t3);\
|
||||
}/**/
|
||||
|
||||
#define grsiVPERM_Transform_Set_Const(table, c0, c1, c2){\
|
||||
c0 = grsiALL_0F;\
|
||||
c1 = ((__m128i*) table )[0];\
|
||||
c2 = ((__m128i*) table )[1];\
|
||||
}/**/
|
||||
|
||||
/* grsiVPERM
|
||||
* Transform
|
||||
* transforms 2 rows to/from "vperm mode"
|
||||
* this function is derived from:
|
||||
* vperm and aes_ni implementations of hash function Grostl
|
||||
* by Cagdas CALIK
|
||||
* inputs:
|
||||
* a0, a1 = 2 rows
|
||||
* table = transformation table to use
|
||||
* t*, c* = clobbers
|
||||
* outputs:
|
||||
* a0, a1 = 2 rows transformed with table
|
||||
* */
|
||||
#define grsiVPERM_Transform(a0, a1, table, t0, t1, t2, t3, c0, c1, c2){\
|
||||
grsiVPERM_Transform_Set_Const(table, c0, c1, c2);\
|
||||
grsiVPERM_Transform_No_Const(a0, a1, t0, t1, t2, t3, c0, c1, c2);\
|
||||
}/**/
|
||||
|
||||
/* grsiVPERM
|
||||
* Transform State
|
||||
* inputs:
|
||||
* a0-a3 = state
|
||||
* table = transformation table to use
|
||||
* t* = clobbers
|
||||
* outputs:
|
||||
* a0-a3 = transformed state
|
||||
* */
|
||||
#define grsiVPERM_Transform_State(a0, a1, a2, a3, table, t0, t1, t2, t3, c0, c1, c2){\
|
||||
grsiVPERM_Transform_Set_Const(table, c0, c1, c2);\
|
||||
grsiVPERM_Transform_No_Const(a0, a1, t0, t1, t2, t3, c0, c1, c2);\
|
||||
grsiVPERM_Transform_No_Const(a2, a3, t0, t1, t2, t3, c0, c1, c2);\
|
||||
}/**/
|
||||
|
||||
/* grsiVPERM
|
||||
* Add Constant to State
|
||||
* inputs:
|
||||
* a0-a7 = state
|
||||
* constant = constant to add
|
||||
* t0 = clobber
|
||||
* outputs:
|
||||
* a0-a7 = state + constant
|
||||
* */
|
||||
#define grsiVPERM_Add_Constant(a0, a1, a2, a3, a4, a5, a6, a7, constant, t0){\
|
||||
t0 = constant;\
|
||||
a0 = _mm_xor_si128(a0, t0);\
|
||||
a1 = _mm_xor_si128(a1, t0);\
|
||||
a2 = _mm_xor_si128(a2, t0);\
|
||||
a3 = _mm_xor_si128(a3, t0);\
|
||||
a4 = _mm_xor_si128(a4, t0);\
|
||||
a5 = _mm_xor_si128(a5, t0);\
|
||||
a6 = _mm_xor_si128(a6, t0);\
|
||||
a7 = _mm_xor_si128(a7, t0);\
|
||||
}/**/
|
||||
|
||||
/* grsiVPERM
|
||||
* Set Substitute Core Constants
|
||||
* */
|
||||
#define grsiVPERM_Substitute_Core_Set_Const(c0, c1, c2){\
|
||||
grsiVPERM_Transform_Set_Const(grsiVPERM_INV, c0, c1, c2);\
|
||||
}/**/
|
||||
|
||||
/* grsiVPERM
|
||||
* Substitute Core
|
||||
* first part of sbox inverse computation
|
||||
* this function is derived from:
|
||||
* vperm and aes_ni implementations of hash function Grostl
|
||||
* by Cagdas CALIK
|
||||
* inputs:
|
||||
* a0 = 1 row
|
||||
* t*, c* = clobbers
|
||||
* outputs:
|
||||
* b0a, b0b = inputs for lookup step
|
||||
* */
|
||||
#define grsiVPERM_Substitute_Core(a0, b0a, b0b, t0, t1, c0, c1, c2){\
|
||||
t0 = c0;\
|
||||
t0 = _mm_andnot_si128(t0, a0);\
|
||||
t0 = _mm_srli_epi32(t0, 4);\
|
||||
a0 = _mm_and_si128(a0, c0);\
|
||||
b0a = c1;\
|
||||
b0a = _mm_shuffle_epi8(b0a, a0);\
|
||||
a0 = _mm_xor_si128(a0, t0);\
|
||||
b0b = c2;\
|
||||
b0b = _mm_shuffle_epi8(b0b, t0);\
|
||||
b0b = _mm_xor_si128(b0b, b0a);\
|
||||
t1 = c2;\
|
||||
t1 = _mm_shuffle_epi8(t1, a0);\
|
||||
t1 = _mm_xor_si128(t1, b0a);\
|
||||
b0a = c2;\
|
||||
b0a = _mm_shuffle_epi8(b0a, b0b);\
|
||||
b0a = _mm_xor_si128(b0a, a0);\
|
||||
b0b = c2;\
|
||||
b0b = _mm_shuffle_epi8(b0b, t1);\
|
||||
b0b = _mm_xor_si128(b0b, t0);\
|
||||
}/**/
|
||||
|
||||
/* grsiVPERM
|
||||
* Lookup
|
||||
* second part of sbox inverse computation
|
||||
* this function is derived from:
|
||||
* vperm and aes_ni implementations of hash function Grostl
|
||||
* by Cagdas CALIK
|
||||
* inputs:
|
||||
* a0a, a0b = output of Substitution Core
|
||||
* table = lookup table to use (*1 / *2 / *4)
|
||||
* t0 = clobber
|
||||
* outputs:
|
||||
* b0 = output of sbox + multiplication
|
||||
* */
|
||||
#define grsiVPERM_Lookup(a0a, a0b, table, b0, t0){\
|
||||
b0 = ((__m128i*) table )[0];\
|
||||
t0 = ((__m128i*) table )[1];\
|
||||
b0 = _mm_shuffle_epi8(b0, a0b);\
|
||||
t0 = _mm_shuffle_epi8(t0, a0a);\
|
||||
b0 = _mm_xor_si128(b0, t0);\
|
||||
}/**/
|
||||
|
||||
/* grsiVPERM
|
||||
* SubBytes and *2 / *4
|
||||
* this function is derived from:
|
||||
* Constant-time SSSE3 AES core implementation
|
||||
* by Mike Hamburg
|
||||
* and
|
||||
* vperm and aes_ni implementations of hash function Grostl
|
||||
* by Cagdas CALIK
|
||||
* inputs:
|
||||
* a0-a7 = state
|
||||
* t*, c* = clobbers
|
||||
* outputs:
|
||||
* a0-a7 = state * 4
|
||||
* c2 = row0 * 2 -> b0
|
||||
* c1 = row7 * 2 -> b3
|
||||
* c0 = row7 * 1 -> b4
|
||||
* t2 = row4 * 1 -> b7
|
||||
* TEMP_MUL1 = row(i) * 1
|
||||
* TEMP_MUL2 = row(i) * 2
|
||||
*
|
||||
* call:grsiVPERM_SUB_MULTIPLY(a0, a1, a2, a3, a4, a5, a6, a7, b1, b2, b5, b6, b0, b3, b4, b7) */
|
||||
#define grsiVPERM_SUB_MULTIPLY(a0, a1, a2, a3, a4, a5, a6, a7, t0, t1, t3, t4, c2, c1, c0, t2){\
|
||||
/* set Constants */\
|
||||
grsiVPERM_Substitute_Core_Set_Const(c0, c1, c2);\
|
||||
/* row 1 */\
|
||||
grsiVPERM_Substitute_Core(a1, t0, t1, t3, t4, c0, c1, c2);\
|
||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB1, t2, t4);\
|
||||
TEMP_MUL1[1] = t2;\
|
||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB2, t3, t4);\
|
||||
TEMP_MUL2[1] = t3;\
|
||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB4, a1, t4);\
|
||||
/* --- */\
|
||||
/* row 2 */\
|
||||
grsiVPERM_Substitute_Core(a2, t0, t1, t3, t4, c0, c1, c2);\
|
||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB1, t2, t4);\
|
||||
TEMP_MUL1[2] = t2;\
|
||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB2, t3, t4);\
|
||||
TEMP_MUL2[2] = t3;\
|
||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB4, a2, t4);\
|
||||
/* --- */\
|
||||
/* row 3 */\
|
||||
grsiVPERM_Substitute_Core(a3, t0, t1, t3, t4, c0, c1, c2);\
|
||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB1, t2, t4);\
|
||||
TEMP_MUL1[3] = t2;\
|
||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB2, t3, t4);\
|
||||
TEMP_MUL2[3] = t3;\
|
||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB4, a3, t4);\
|
||||
/* --- */\
|
||||
/* row 5 */\
|
||||
grsiVPERM_Substitute_Core(a5, t0, t1, t3, t4, c0, c1, c2);\
|
||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB1, t2, t4);\
|
||||
TEMP_MUL1[5] = t2;\
|
||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB2, t3, t4);\
|
||||
TEMP_MUL2[5] = t3;\
|
||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB4, a5, t4);\
|
||||
/* --- */\
|
||||
/* row 6 */\
|
||||
grsiVPERM_Substitute_Core(a6, t0, t1, t3, t4, c0, c1, c2);\
|
||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB1, t2, t4);\
|
||||
TEMP_MUL1[6] = t2;\
|
||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB2, t3, t4);\
|
||||
TEMP_MUL2[6] = t3;\
|
||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB4, a6, t4);\
|
||||
/* --- */\
|
||||
/* row 7 */\
|
||||
grsiVPERM_Substitute_Core(a7, t0, t1, t3, t4, c0, c1, c2);\
|
||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB1, t2, t4);\
|
||||
TEMP_MUL1[7] = t2;\
|
||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB2, c1, t4); /*c1 -> b3*/\
|
||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB4, a7, t4);\
|
||||
/* --- */\
|
||||
/* row 4 */\
|
||||
grsiVPERM_Substitute_Core(a4, t0, t1, t3, t4, c0, (grsiVPERM_INV[0]), c2);\
|
||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB1, t2, t4); /*t2 -> b7*/\
|
||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB2, t3, t4);\
|
||||
TEMP_MUL2[4] = t3;\
|
||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB4, a4, t4);\
|
||||
/* --- */\
|
||||
/* row 0 */\
|
||||
grsiVPERM_Substitute_Core(a0, t0, t1, t3, t4, c0, (grsiVPERM_INV[0]), c2);\
|
||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB1, c0, t4); /*c0 -> b4*/\
|
||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB2, c2, t4); /*c2 -> b0*/\
|
||||
TEMP_MUL2[0] = c2;\
|
||||
grsiVPERM_Lookup(t0, t1, grsiVPERM_SB4, a0, t4);\
|
||||
/* --- */\
|
||||
}/**/
|
||||
|
||||
|
||||
/* Optimized grsiMixBytes
|
||||
* inputs:
|
||||
* a0-a7 = (row0-row7) * 4
|
||||
* b0 = row0 * 2
|
||||
* b3 = row7 * 2
|
||||
* b4 = row7 * 1
|
||||
* b7 = row4 * 1
|
||||
* all *1 and *2 values must also be in TEMP_MUL1, TEMP_MUL2
|
||||
* output: b0-b7
|
||||
* */
|
||||
#define grsiMixBytes(a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3, b4, b5, b6, b7){\
|
||||
/* save one value */\
|
||||
TEMP_MUL4 = a3;\
|
||||
/* 1 */\
|
||||
b1 = a0;\
|
||||
b1 = _mm_xor_si128(b1, a5);\
|
||||
b1 = _mm_xor_si128(b1, b4); /* -> helper! */\
|
||||
b1 = _mm_xor_si128(b1, (TEMP_MUL2[3]));\
|
||||
b2 = b1;\
|
||||
\
|
||||
/* 2 */\
|
||||
b5 = a1;\
|
||||
b5 = _mm_xor_si128(b5, a4);\
|
||||
b5 = _mm_xor_si128(b5, b7); /* -> helper! */\
|
||||
b5 = _mm_xor_si128(b5, b3); /* -> helper! */\
|
||||
b6 = b5;\
|
||||
\
|
||||
/* 4 */\
|
||||
b7 = _mm_xor_si128(b7, a6);\
|
||||
/*b7 = _mm_xor_si128(b7, (TEMP_MUL1[4])); -> helper! */\
|
||||
b7 = _mm_xor_si128(b7, (TEMP_MUL1[6]));\
|
||||
b7 = _mm_xor_si128(b7, (TEMP_MUL2[1]));\
|
||||
b7 = _mm_xor_si128(b7, b3); /* -> helper! */\
|
||||
b2 = _mm_xor_si128(b2, b7);\
|
||||
\
|
||||
/* 3 */\
|
||||
b0 = _mm_xor_si128(b0, a7);\
|
||||
b0 = _mm_xor_si128(b0, (TEMP_MUL1[5]));\
|
||||
b0 = _mm_xor_si128(b0, (TEMP_MUL1[7]));\
|
||||
/*b0 = _mm_xor_si128(b0, (TEMP_MUL2[0])); -> helper! */\
|
||||
b0 = _mm_xor_si128(b0, (TEMP_MUL2[2]));\
|
||||
b3 = b0;\
|
||||
b1 = _mm_xor_si128(b1, b0);\
|
||||
b0 = _mm_xor_si128(b0, b7); /* moved from 4 */\
|
||||
\
|
||||
/* 5 */\
|
||||
b4 = _mm_xor_si128(b4, a2);\
|
||||
/*b4 = _mm_xor_si128(b4, (TEMP_MUL1[0])); -> helper! */\
|
||||
b4 = _mm_xor_si128(b4, (TEMP_MUL1[2]));\
|
||||
b4 = _mm_xor_si128(b4, (TEMP_MUL2[3]));\
|
||||
b4 = _mm_xor_si128(b4, (TEMP_MUL2[5]));\
|
||||
b3 = _mm_xor_si128(b3, b4);\
|
||||
b6 = _mm_xor_si128(b6, b4);\
|
||||
\
|
||||
/* 6 */\
|
||||
a3 = _mm_xor_si128(a3, (TEMP_MUL1[1]));\
|
||||
a3 = _mm_xor_si128(a3, (TEMP_MUL1[3]));\
|
||||
a3 = _mm_xor_si128(a3, (TEMP_MUL2[4]));\
|
||||
a3 = _mm_xor_si128(a3, (TEMP_MUL2[6]));\
|
||||
b4 = _mm_xor_si128(b4, a3);\
|
||||
b5 = _mm_xor_si128(b5, a3);\
|
||||
b7 = _mm_xor_si128(b7, a3);\
|
||||
\
|
||||
/* 7 */\
|
||||
a1 = _mm_xor_si128(a1, (TEMP_MUL1[1]));\
|
||||
a1 = _mm_xor_si128(a1, (TEMP_MUL2[4]));\
|
||||
b2 = _mm_xor_si128(b2, a1);\
|
||||
b3 = _mm_xor_si128(b3, a1);\
|
||||
\
|
||||
/* 8 */\
|
||||
a5 = _mm_xor_si128(a5, (TEMP_MUL1[5]));\
|
||||
a5 = _mm_xor_si128(a5, (TEMP_MUL2[0]));\
|
||||
b6 = _mm_xor_si128(b6, a5);\
|
||||
b7 = _mm_xor_si128(b7, a5);\
|
||||
\
|
||||
/* 9 */\
|
||||
a3 = TEMP_MUL1[2];\
|
||||
a3 = _mm_xor_si128(a3, (TEMP_MUL2[5]));\
|
||||
b0 = _mm_xor_si128(b0, a3);\
|
||||
b5 = _mm_xor_si128(b5, a3);\
|
||||
\
|
||||
/* 10 */\
|
||||
a1 = TEMP_MUL1[6];\
|
||||
a1 = _mm_xor_si128(a1, (TEMP_MUL2[1]));\
|
||||
b1 = _mm_xor_si128(b1, a1);\
|
||||
b4 = _mm_xor_si128(b4, a1);\
|
||||
\
|
||||
/* 11 */\
|
||||
a5 = TEMP_MUL1[3];\
|
||||
a5 = _mm_xor_si128(a5, (TEMP_MUL2[6]));\
|
||||
b1 = _mm_xor_si128(b1, a5);\
|
||||
b6 = _mm_xor_si128(b6, a5);\
|
||||
\
|
||||
/* 12 */\
|
||||
a3 = TEMP_MUL1[7];\
|
||||
a3 = _mm_xor_si128(a3, (TEMP_MUL2[2]));\
|
||||
b2 = _mm_xor_si128(b2, a3);\
|
||||
b5 = _mm_xor_si128(b5, a3);\
|
||||
\
|
||||
/* 13 */\
|
||||
b0 = _mm_xor_si128(b0, (TEMP_MUL4));\
|
||||
b0 = _mm_xor_si128(b0, a4);\
|
||||
b1 = _mm_xor_si128(b1, a4);\
|
||||
b3 = _mm_xor_si128(b3, a6);\
|
||||
b4 = _mm_xor_si128(b4, a0);\
|
||||
b4 = _mm_xor_si128(b4, a7);\
|
||||
b5 = _mm_xor_si128(b5, a0);\
|
||||
b7 = _mm_xor_si128(b7, a2);\
|
||||
}/**/
|
||||
|
||||
/*
|
||||
grsiSUBSH_MASK[0] = _mm_set_epi32(0x0f0e0d0c, 0x0b0a0908, 0x07060504, 0x03020100);\
|
||||
grsiSUBSH_MASK[1] = _mm_set_epi32(0x000f0e0d, 0x0c0b0a09, 0x08070605, 0x04030201);\
|
||||
grsiSUBSH_MASK[2] = _mm_set_epi32(0x01000f0e, 0x0d0c0b0a, 0x09080706, 0x05040302);\
|
||||
grsiSUBSH_MASK[3] = _mm_set_epi32(0x0201000f, 0x0e0d0c0b, 0x0a090807, 0x06050403);\
|
||||
grsiSUBSH_MASK[4] = _mm_set_epi32(0x03020100, 0x0f0e0d0c, 0x0b0a0908, 0x07060504);\
|
||||
grsiSUBSH_MASK[5] = _mm_set_epi32(0x04030201, 0x000f0e0d, 0x0c0b0a09, 0x08070605);\
|
||||
grsiSUBSH_MASK[6] = _mm_set_epi32(0x05040302, 0x01000f0e, 0x0d0c0b0a, 0x09080706);\
|
||||
grsiSUBSH_MASK[7] = _mm_set_epi32(0x0a090807, 0x06050403, 0x0201000f, 0x0e0d0c0b);\
|
||||
*/
|
||||
|
||||
#define grsiSET_CONSTANTS(){\
|
||||
grsiSET_SHARED_CONSTANTS();\
|
||||
grsiALL_FF = _mm_set_epi32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff);\
|
||||
for(i = 0; i < grsiROUNDS1024; i++)\
|
||||
{\
|
||||
grsiROUND_CONST_P[i] = _mm_set_epi32(0xf0e0d0c0 ^ (i * 0x01010101), 0xb0a09080 ^ (i * 0x01010101), 0x70605040 ^ (i * 0x01010101), 0x30201000 ^ (i * 0x01010101));\
|
||||
grsiROUND_CONST_Q[i] = _mm_set_epi32(0x0f1f2f3f ^ (i * 0x01010101), 0x4f5f6f7f ^ (i * 0x01010101), 0x8f9fafbf ^ (i * 0x01010101), 0xcfdfefff ^ (i * 0x01010101));\
|
||||
}\
|
||||
}/**/
|
||||
|
||||
/* one round
|
||||
* a0-a7 = input rows
|
||||
* b0-b7 = output rows
|
||||
*/
|
||||
#define grsiSUBMIX(a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3, b4, b5, b6, b7){\
|
||||
/* SubBytes + Multiplication */\
|
||||
grsiVPERM_SUB_MULTIPLY(a0, a1, a2, a3, a4, a5, a6, a7, b1, b2, b5, b6, b0, b3, b4, b7);\
|
||||
/* grsiMixBytes */\
|
||||
grsiMixBytes(a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3, b4, b5, b6, b7);\
|
||||
}/**/
|
||||
|
||||
#define grsiROUNDS_P(){\
|
||||
u32 round_counter;\
|
||||
for(round_counter = 0; round_counter < 14; round_counter+=2) {\
|
||||
/* AddRoundConstant P1024 */\
|
||||
xmm8 = _mm_xor_si128(xmm8, (grsiROUND_CONST_P[round_counter]));\
|
||||
/* ShiftBytes P1024 + pre-AESENCLAST */\
|
||||
xmm8 = _mm_shuffle_epi8(xmm8, (grsiSUBSH_MASK[0]));\
|
||||
xmm9 = _mm_shuffle_epi8(xmm9, (grsiSUBSH_MASK[1]));\
|
||||
xmm10 = _mm_shuffle_epi8(xmm10, (grsiSUBSH_MASK[2]));\
|
||||
xmm11 = _mm_shuffle_epi8(xmm11, (grsiSUBSH_MASK[3]));\
|
||||
xmm12 = _mm_shuffle_epi8(xmm12, (grsiSUBSH_MASK[4]));\
|
||||
xmm13 = _mm_shuffle_epi8(xmm13, (grsiSUBSH_MASK[5]));\
|
||||
xmm14 = _mm_shuffle_epi8(xmm14, (grsiSUBSH_MASK[6]));\
|
||||
xmm15 = _mm_shuffle_epi8(xmm15, (grsiSUBSH_MASK[7]));\
|
||||
/* SubBytes + grsiMixBytes */\
|
||||
grsiSUBMIX(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);\
|
||||
grsiVPERM_Add_Constant(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, grsiALL_15, xmm8);\
|
||||
\
|
||||
/* AddRoundConstant P1024 */\
|
||||
xmm0 = _mm_xor_si128(xmm0, (grsiROUND_CONST_P[round_counter+1]));\
|
||||
/* ShiftBytes P1024 + pre-AESENCLAST */\
|
||||
xmm0 = _mm_shuffle_epi8(xmm0, (grsiSUBSH_MASK[0]));\
|
||||
xmm1 = _mm_shuffle_epi8(xmm1, (grsiSUBSH_MASK[1]));\
|
||||
xmm2 = _mm_shuffle_epi8(xmm2, (grsiSUBSH_MASK[2]));\
|
||||
xmm3 = _mm_shuffle_epi8(xmm3, (grsiSUBSH_MASK[3]));\
|
||||
xmm4 = _mm_shuffle_epi8(xmm4, (grsiSUBSH_MASK[4]));\
|
||||
xmm5 = _mm_shuffle_epi8(xmm5, (grsiSUBSH_MASK[5]));\
|
||||
xmm6 = _mm_shuffle_epi8(xmm6, (grsiSUBSH_MASK[6]));\
|
||||
xmm7 = _mm_shuffle_epi8(xmm7, (grsiSUBSH_MASK[7]));\
|
||||
/* SubBytes + grsiMixBytes */\
|
||||
grsiSUBMIX(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15);\
|
||||
grsiVPERM_Add_Constant(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, grsiALL_15, xmm0);\
|
||||
}\
|
||||
}/**/
|
||||
|
||||
#define grsiROUNDS_Q(){\
|
||||
grsiVPERM_Add_Constant(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, grsiALL_15, xmm1);\
|
||||
u32 round_counter = 0;\
|
||||
for(round_counter = 0; round_counter < 14; round_counter+=2) {\
|
||||
/* AddRoundConstant Q1024 */\
|
||||
xmm1 = grsiALL_FF;\
|
||||
xmm8 = _mm_xor_si128(xmm8, xmm1);\
|
||||
xmm9 = _mm_xor_si128(xmm9, xmm1);\
|
||||
xmm10 = _mm_xor_si128(xmm10, xmm1);\
|
||||
xmm11 = _mm_xor_si128(xmm11, xmm1);\
|
||||
xmm12 = _mm_xor_si128(xmm12, xmm1);\
|
||||
xmm13 = _mm_xor_si128(xmm13, xmm1);\
|
||||
xmm14 = _mm_xor_si128(xmm14, xmm1);\
|
||||
xmm15 = _mm_xor_si128(xmm15, (grsiROUND_CONST_Q[round_counter]));\
|
||||
/* ShiftBytes Q1024 + pre-AESENCLAST */\
|
||||
xmm8 = _mm_shuffle_epi8(xmm8, (grsiSUBSH_MASK[1]));\
|
||||
xmm9 = _mm_shuffle_epi8(xmm9, (grsiSUBSH_MASK[3]));\
|
||||
xmm10 = _mm_shuffle_epi8(xmm10, (grsiSUBSH_MASK[5]));\
|
||||
xmm11 = _mm_shuffle_epi8(xmm11, (grsiSUBSH_MASK[7]));\
|
||||
xmm12 = _mm_shuffle_epi8(xmm12, (grsiSUBSH_MASK[0]));\
|
||||
xmm13 = _mm_shuffle_epi8(xmm13, (grsiSUBSH_MASK[2]));\
|
||||
xmm14 = _mm_shuffle_epi8(xmm14, (grsiSUBSH_MASK[4]));\
|
||||
xmm15 = _mm_shuffle_epi8(xmm15, (grsiSUBSH_MASK[6]));\
|
||||
/* SubBytes + grsiMixBytes */\
|
||||
grsiSUBMIX(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);\
|
||||
\
|
||||
/* AddRoundConstant Q1024 */\
|
||||
xmm9 = grsiALL_FF;\
|
||||
xmm0 = _mm_xor_si128(xmm0, xmm9);\
|
||||
xmm1 = _mm_xor_si128(xmm1, xmm9);\
|
||||
xmm2 = _mm_xor_si128(xmm2, xmm9);\
|
||||
xmm3 = _mm_xor_si128(xmm3, xmm9);\
|
||||
xmm4 = _mm_xor_si128(xmm4, xmm9);\
|
||||
xmm5 = _mm_xor_si128(xmm5, xmm9);\
|
||||
xmm6 = _mm_xor_si128(xmm6, xmm9);\
|
||||
xmm7 = _mm_xor_si128(xmm7, (grsiROUND_CONST_Q[round_counter+1]));\
|
||||
/* ShiftBytes Q1024 + pre-AESENCLAST */\
|
||||
xmm0 = _mm_shuffle_epi8(xmm0, (grsiSUBSH_MASK[1]));\
|
||||
xmm1 = _mm_shuffle_epi8(xmm1, (grsiSUBSH_MASK[3]));\
|
||||
xmm2 = _mm_shuffle_epi8(xmm2, (grsiSUBSH_MASK[5]));\
|
||||
xmm3 = _mm_shuffle_epi8(xmm3, (grsiSUBSH_MASK[7]));\
|
||||
xmm4 = _mm_shuffle_epi8(xmm4, (grsiSUBSH_MASK[0]));\
|
||||
xmm5 = _mm_shuffle_epi8(xmm5, (grsiSUBSH_MASK[2]));\
|
||||
xmm6 = _mm_shuffle_epi8(xmm6, (grsiSUBSH_MASK[4]));\
|
||||
xmm7 = _mm_shuffle_epi8(xmm7, (grsiSUBSH_MASK[6]));\
|
||||
/* SubBytes + grsiMixBytes*/ \
|
||||
grsiSUBMIX(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15);\
|
||||
}\
|
||||
grsiVPERM_Add_Constant(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, grsiALL_15, xmm1);\
|
||||
}/**/
|
||||
|
||||
|
||||
/* Matrix Transpose
|
||||
* input is a 1024-bit state with two columns in one xmm
|
||||
* output is a 1024-bit state with two rows in one xmm
|
||||
* inputs: i0-i7
|
||||
* outputs: i0-i7
|
||||
* clobbers: t0-t7
|
||||
*/
|
||||
#define grsiMatrix_Transpose(i0, i1, i2, i3, i4, i5, i6, i7, t0, t1, t2, t3, t4, t5, t6, t7){\
|
||||
t0 = grsiTRANSP_MASK;\
|
||||
\
|
||||
i6 = _mm_shuffle_epi8(i6, t0);\
|
||||
i0 = _mm_shuffle_epi8(i0, t0);\
|
||||
i1 = _mm_shuffle_epi8(i1, t0);\
|
||||
i2 = _mm_shuffle_epi8(i2, t0);\
|
||||
i3 = _mm_shuffle_epi8(i3, t0);\
|
||||
t1 = i2;\
|
||||
i4 = _mm_shuffle_epi8(i4, t0);\
|
||||
i5 = _mm_shuffle_epi8(i5, t0);\
|
||||
t2 = i4;\
|
||||
t3 = i6;\
|
||||
i7 = _mm_shuffle_epi8(i7, t0);\
|
||||
\
|
||||
/* continue with unpack using 4 temp registers */\
|
||||
t0 = i0;\
|
||||
t2 = _mm_unpackhi_epi16(t2, i5);\
|
||||
i4 = _mm_unpacklo_epi16(i4, i5);\
|
||||
t3 = _mm_unpackhi_epi16(t3, i7);\
|
||||
i6 = _mm_unpacklo_epi16(i6, i7);\
|
||||
t0 = _mm_unpackhi_epi16(t0, i1);\
|
||||
t1 = _mm_unpackhi_epi16(t1, i3);\
|
||||
i2 = _mm_unpacklo_epi16(i2, i3);\
|
||||
i0 = _mm_unpacklo_epi16(i0, i1);\
|
||||
\
|
||||
/* shuffle with immediate */\
|
||||
t0 = _mm_shuffle_epi32(t0, 216);\
|
||||
t1 = _mm_shuffle_epi32(t1, 216);\
|
||||
t2 = _mm_shuffle_epi32(t2, 216);\
|
||||
t3 = _mm_shuffle_epi32(t3, 216);\
|
||||
i0 = _mm_shuffle_epi32(i0, 216);\
|
||||
i2 = _mm_shuffle_epi32(i2, 216);\
|
||||
i4 = _mm_shuffle_epi32(i4, 216);\
|
||||
i6 = _mm_shuffle_epi32(i6, 216);\
|
||||
\
|
||||
/* continue with unpack */\
|
||||
t4 = i0;\
|
||||
i0 = _mm_unpacklo_epi32(i0, i2);\
|
||||
t4 = _mm_unpackhi_epi32(t4, i2);\
|
||||
t5 = t0;\
|
||||
t0 = _mm_unpacklo_epi32(t0, t1);\
|
||||
t5 = _mm_unpackhi_epi32(t5, t1);\
|
||||
t6 = i4;\
|
||||
i4 = _mm_unpacklo_epi32(i4, i6);\
|
||||
t7 = t2;\
|
||||
t6 = _mm_unpackhi_epi32(t6, i6);\
|
||||
i2 = t0;\
|
||||
t2 = _mm_unpacklo_epi32(t2, t3);\
|
||||
i3 = t0;\
|
||||
t7 = _mm_unpackhi_epi32(t7, t3);\
|
||||
\
|
||||
/* there are now 2 rows in each xmm */\
|
||||
/* unpack to get 1 row of CV in each xmm */\
|
||||
i1 = i0;\
|
||||
i1 = _mm_unpackhi_epi64(i1, i4);\
|
||||
i0 = _mm_unpacklo_epi64(i0, i4);\
|
||||
i4 = t4;\
|
||||
i3 = _mm_unpackhi_epi64(i3, t2);\
|
||||
i5 = t4;\
|
||||
i2 = _mm_unpacklo_epi64(i2, t2);\
|
||||
i6 = t5;\
|
||||
i5 = _mm_unpackhi_epi64(i5, t6);\
|
||||
i7 = t5;\
|
||||
i4 = _mm_unpacklo_epi64(i4, t6);\
|
||||
i7 = _mm_unpackhi_epi64(i7, t7);\
|
||||
i6 = _mm_unpacklo_epi64(i6, t7);\
|
||||
/* transpose done */\
|
||||
}/**/
|
||||
|
||||
/* Matrix Transpose Inverse
|
||||
* input is a 1024-bit state with two rows in one xmm
|
||||
* output is a 1024-bit state with two columns in one xmm
|
||||
* inputs: i0-i7
|
||||
* outputs: (i0, o0, i1, i3, o1, o2, i5, i7)
|
||||
* clobbers: t0-t4
|
||||
*/
|
||||
#define grsiMatrix_Transpose_INV(i0, i1, i2, i3, i4, i5, i6, i7, o0, o1, o2, t0, t1, t2, t3, t4){\
|
||||
/* transpose matrix to get output format */\
|
||||
o1 = i0;\
|
||||
i0 = _mm_unpacklo_epi64(i0, i1);\
|
||||
o1 = _mm_unpackhi_epi64(o1, i1);\
|
||||
t0 = i2;\
|
||||
i2 = _mm_unpacklo_epi64(i2, i3);\
|
||||
t0 = _mm_unpackhi_epi64(t0, i3);\
|
||||
t1 = i4;\
|
||||
i4 = _mm_unpacklo_epi64(i4, i5);\
|
||||
t1 = _mm_unpackhi_epi64(t1, i5);\
|
||||
t2 = i6;\
|
||||
o0 = grsiTRANSP_MASK;\
|
||||
i6 = _mm_unpacklo_epi64(i6, i7);\
|
||||
t2 = _mm_unpackhi_epi64(t2, i7);\
|
||||
/* load transpose mask into a register, because it will be used 8 times */\
|
||||
i0 = _mm_shuffle_epi8(i0, o0);\
|
||||
i2 = _mm_shuffle_epi8(i2, o0);\
|
||||
i4 = _mm_shuffle_epi8(i4, o0);\
|
||||
i6 = _mm_shuffle_epi8(i6, o0);\
|
||||
o1 = _mm_shuffle_epi8(o1, o0);\
|
||||
t0 = _mm_shuffle_epi8(t0, o0);\
|
||||
t1 = _mm_shuffle_epi8(t1, o0);\
|
||||
t2 = _mm_shuffle_epi8(t2, o0);\
|
||||
/* continue with unpack using 4 temp registers */\
|
||||
t3 = i4;\
|
||||
o2 = o1;\
|
||||
o0 = i0;\
|
||||
t4 = t1;\
|
||||
\
|
||||
t3 = _mm_unpackhi_epi16(t3, i6);\
|
||||
i4 = _mm_unpacklo_epi16(i4, i6);\
|
||||
o0 = _mm_unpackhi_epi16(o0, i2);\
|
||||
i0 = _mm_unpacklo_epi16(i0, i2);\
|
||||
o2 = _mm_unpackhi_epi16(o2, t0);\
|
||||
o1 = _mm_unpacklo_epi16(o1, t0);\
|
||||
t4 = _mm_unpackhi_epi16(t4, t2);\
|
||||
t1 = _mm_unpacklo_epi16(t1, t2);\
|
||||
/* shuffle with immediate */\
|
||||
i4 = _mm_shuffle_epi32(i4, 216);\
|
||||
t3 = _mm_shuffle_epi32(t3, 216);\
|
||||
o1 = _mm_shuffle_epi32(o1, 216);\
|
||||
o2 = _mm_shuffle_epi32(o2, 216);\
|
||||
i0 = _mm_shuffle_epi32(i0, 216);\
|
||||
o0 = _mm_shuffle_epi32(o0, 216);\
|
||||
t1 = _mm_shuffle_epi32(t1, 216);\
|
||||
t4 = _mm_shuffle_epi32(t4, 216);\
|
||||
/* continue with unpack */\
|
||||
i1 = i0;\
|
||||
i3 = o0;\
|
||||
i5 = o1;\
|
||||
i7 = o2;\
|
||||
i0 = _mm_unpacklo_epi32(i0, i4);\
|
||||
i1 = _mm_unpackhi_epi32(i1, i4);\
|
||||
o0 = _mm_unpacklo_epi32(o0, t3);\
|
||||
i3 = _mm_unpackhi_epi32(i3, t3);\
|
||||
o1 = _mm_unpacklo_epi32(o1, t1);\
|
||||
i5 = _mm_unpackhi_epi32(i5, t1);\
|
||||
o2 = _mm_unpacklo_epi32(o2, t4);\
|
||||
i7 = _mm_unpackhi_epi32(i7, t4);\
|
||||
/* transpose done */\
|
||||
}/**/
|
||||
|
||||
/* transform round constants into grsiVPERM mode */
|
||||
#define grsiVPERM_Transform_RoundConst_CNT2(i, j){\
|
||||
xmm0 = grsiROUND_CONST_P[i];\
|
||||
xmm1 = grsiROUND_CONST_P[j];\
|
||||
xmm2 = grsiROUND_CONST_Q[i];\
|
||||
xmm3 = grsiROUND_CONST_Q[j];\
|
||||
grsiVPERM_Transform_State(xmm0, xmm1, xmm2, xmm3, grsiVPERM_IPT, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10);\
|
||||
xmm2 = _mm_xor_si128(xmm2, (grsiALL_15));\
|
||||
xmm3 = _mm_xor_si128(xmm3, (grsiALL_15));\
|
||||
grsiROUND_CONST_P[i] = xmm0;\
|
||||
grsiROUND_CONST_P[j] = xmm1;\
|
||||
grsiROUND_CONST_Q[i] = xmm2;\
|
||||
grsiROUND_CONST_Q[j] = xmm3;\
|
||||
}/**/
|
||||
|
||||
/* transform round constants into grsiVPERM mode */
|
||||
#define grsiVPERM_Transform_RoundConst(){\
|
||||
grsiVPERM_Transform_RoundConst_CNT2(0, 1);\
|
||||
grsiVPERM_Transform_RoundConst_CNT2(2, 3);\
|
||||
grsiVPERM_Transform_RoundConst_CNT2(4, 5);\
|
||||
grsiVPERM_Transform_RoundConst_CNT2(6, 7);\
|
||||
grsiVPERM_Transform_RoundConst_CNT2(8, 9);\
|
||||
grsiVPERM_Transform_RoundConst_CNT2(10, 11);\
|
||||
grsiVPERM_Transform_RoundConst_CNT2(12, 13);\
|
||||
xmm0 = grsiALL_FF;\
|
||||
grsiVPERM_Transform(xmm0, xmm1, grsiVPERM_IPT, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10);\
|
||||
xmm0 = _mm_xor_si128(xmm0, (grsiALL_15));\
|
||||
grsiALL_FF = xmm0;\
|
||||
}/**/
|
||||
|
||||
|
||||
IFUN void grsiINIT(u64* h)
|
||||
#if !defined(DECLARE_IFUN)
|
||||
;
|
||||
#else
|
||||
{
|
||||
__m128i* const chaining = (__m128i*) h;
|
||||
static __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
|
||||
static __m128i xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15;
|
||||
|
||||
/* transform round constants into grsiVPERM mode */
|
||||
grsiVPERM_Transform_RoundConst();
|
||||
|
||||
/* load IV into registers xmm8 - xmm15 */
|
||||
xmm8 = chaining[0];
|
||||
xmm9 = chaining[1];
|
||||
xmm10 = chaining[2];
|
||||
xmm11 = chaining[3];
|
||||
xmm12 = chaining[4];
|
||||
xmm13 = chaining[5];
|
||||
xmm14 = chaining[6];
|
||||
xmm15 = chaining[7];
|
||||
|
||||
/* transform chaining value from column ordering into row ordering */
|
||||
grsiVPERM_Transform_State(xmm8, xmm9, xmm10, xmm11, grsiVPERM_IPT, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
|
||||
grsiVPERM_Transform_State(xmm12, xmm13, xmm14, xmm15, grsiVPERM_IPT, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
|
||||
grsiMatrix_Transpose(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
|
||||
|
||||
/* store transposed IV */
|
||||
chaining[0] = xmm8;
|
||||
chaining[1] = xmm9;
|
||||
chaining[2] = xmm10;
|
||||
chaining[3] = xmm11;
|
||||
chaining[4] = xmm12;
|
||||
chaining[5] = xmm13;
|
||||
chaining[6] = xmm14;
|
||||
chaining[7] = xmm15;
|
||||
}
|
||||
#endif
|
||||
|
||||
IFUN void grsiTF1024(u64* h, u64* m)
|
||||
#if !defined(DECLARE_IFUN)
|
||||
;
|
||||
#else
|
||||
{
|
||||
__m128i* const chaining = (__m128i*) h;
|
||||
__m128i* const message = (__m128i*) m;
|
||||
static __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
|
||||
static __m128i xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15;
|
||||
static __m128i TEMP_MUL1[8];
|
||||
static __m128i TEMP_MUL2[8];
|
||||
static __m128i TEMP_MUL4;
|
||||
static __m128i QTEMP[8];
|
||||
|
||||
/* load message into registers xmm8 - xmm15 (Q = message) */
|
||||
xmm8 = message[0];
|
||||
xmm9 = message[1];
|
||||
xmm10 = message[2];
|
||||
xmm11 = message[3];
|
||||
xmm12 = message[4];
|
||||
xmm13 = message[5];
|
||||
xmm14 = message[6];
|
||||
xmm15 = message[7];
|
||||
|
||||
/* transform message M from column ordering into row ordering */
|
||||
grsiVPERM_Transform_State(xmm8, xmm9, xmm10, xmm11, grsiVPERM_IPT, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
|
||||
grsiVPERM_Transform_State(xmm12, xmm13, xmm14, xmm15, grsiVPERM_IPT, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
|
||||
grsiMatrix_Transpose(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
|
||||
|
||||
/* store message M (Q input) for later */
|
||||
QTEMP[0] = xmm8;
|
||||
QTEMP[1] = xmm9;
|
||||
QTEMP[2] = xmm10;
|
||||
QTEMP[3] = xmm11;
|
||||
QTEMP[4] = xmm12;
|
||||
QTEMP[5] = xmm13;
|
||||
QTEMP[6] = xmm14;
|
||||
QTEMP[7] = xmm15;
|
||||
|
||||
/* xor CV to message to get P input */
|
||||
/* result: CV+M in xmm8...xmm15 */
|
||||
xmm8 = _mm_xor_si128(xmm8, (chaining[0]));
|
||||
xmm9 = _mm_xor_si128(xmm9, (chaining[1]));
|
||||
xmm10 = _mm_xor_si128(xmm10, (chaining[2]));
|
||||
xmm11 = _mm_xor_si128(xmm11, (chaining[3]));
|
||||
xmm12 = _mm_xor_si128(xmm12, (chaining[4]));
|
||||
xmm13 = _mm_xor_si128(xmm13, (chaining[5]));
|
||||
xmm14 = _mm_xor_si128(xmm14, (chaining[6]));
|
||||
xmm15 = _mm_xor_si128(xmm15, (chaining[7]));
|
||||
|
||||
/* compute permutation P */
|
||||
/* result: P(CV+M) in xmm8...xmm15 */
|
||||
grsiROUNDS_P();
|
||||
|
||||
/* xor CV to P output (feed-forward) */
|
||||
/* result: P(CV+M)+CV in xmm8...xmm15 */
|
||||
xmm8 = _mm_xor_si128(xmm8, (chaining[0]));
|
||||
xmm9 = _mm_xor_si128(xmm9, (chaining[1]));
|
||||
xmm10 = _mm_xor_si128(xmm10, (chaining[2]));
|
||||
xmm11 = _mm_xor_si128(xmm11, (chaining[3]));
|
||||
xmm12 = _mm_xor_si128(xmm12, (chaining[4]));
|
||||
xmm13 = _mm_xor_si128(xmm13, (chaining[5]));
|
||||
xmm14 = _mm_xor_si128(xmm14, (chaining[6]));
|
||||
xmm15 = _mm_xor_si128(xmm15, (chaining[7]));
|
||||
|
||||
/* store P(CV+M)+CV */
|
||||
chaining[0] = xmm8;
|
||||
chaining[1] = xmm9;
|
||||
chaining[2] = xmm10;
|
||||
chaining[3] = xmm11;
|
||||
chaining[4] = xmm12;
|
||||
chaining[5] = xmm13;
|
||||
chaining[6] = xmm14;
|
||||
chaining[7] = xmm15;
|
||||
|
||||
/* load message M (Q input) into xmm8-15 */
|
||||
xmm8 = QTEMP[0];
|
||||
xmm9 = QTEMP[1];
|
||||
xmm10 = QTEMP[2];
|
||||
xmm11 = QTEMP[3];
|
||||
xmm12 = QTEMP[4];
|
||||
xmm13 = QTEMP[5];
|
||||
xmm14 = QTEMP[6];
|
||||
xmm15 = QTEMP[7];
|
||||
|
||||
/* compute permutation Q */
|
||||
/* result: Q(M) in xmm8...xmm15 */
|
||||
grsiROUNDS_Q();
|
||||
|
||||
/* xor Q output */
|
||||
/* result: P(CV+M)+CV+Q(M) in xmm8...xmm15 */
|
||||
xmm8 = _mm_xor_si128(xmm8, (chaining[0]));
|
||||
xmm9 = _mm_xor_si128(xmm9, (chaining[1]));
|
||||
xmm10 = _mm_xor_si128(xmm10, (chaining[2]));
|
||||
xmm11 = _mm_xor_si128(xmm11, (chaining[3]));
|
||||
xmm12 = _mm_xor_si128(xmm12, (chaining[4]));
|
||||
xmm13 = _mm_xor_si128(xmm13, (chaining[5]));
|
||||
xmm14 = _mm_xor_si128(xmm14, (chaining[6]));
|
||||
xmm15 = _mm_xor_si128(xmm15, (chaining[7]));
|
||||
|
||||
/* store CV */
|
||||
chaining[0] = xmm8;
|
||||
chaining[1] = xmm9;
|
||||
chaining[2] = xmm10;
|
||||
chaining[3] = xmm11;
|
||||
chaining[4] = xmm12;
|
||||
chaining[5] = xmm13;
|
||||
chaining[6] = xmm14;
|
||||
chaining[7] = xmm15;
|
||||
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
IFUN void grsiOF1024(u64* h)
|
||||
#if !defined(DECLARE_IFUN)
|
||||
;
|
||||
#else
|
||||
{
|
||||
__m128i* const chaining = (__m128i*) h;
|
||||
static __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
|
||||
static __m128i xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15;
|
||||
static __m128i TEMP_MUL1[8];
|
||||
static __m128i TEMP_MUL2[8];
|
||||
static __m128i TEMP_MUL4;
|
||||
|
||||
/* load CV into registers xmm8 - xmm15 */
|
||||
xmm8 = chaining[0];
|
||||
xmm9 = chaining[1];
|
||||
xmm10 = chaining[2];
|
||||
xmm11 = chaining[3];
|
||||
xmm12 = chaining[4];
|
||||
xmm13 = chaining[5];
|
||||
xmm14 = chaining[6];
|
||||
xmm15 = chaining[7];
|
||||
|
||||
/* compute permutation P */
|
||||
/* result: P(CV) in xmm8...xmm15 */
|
||||
grsiROUNDS_P();
|
||||
|
||||
/* xor CV to P output (feed-forward) */
|
||||
/* result: P(CV)+CV in xmm8...xmm15 */
|
||||
xmm8 = _mm_xor_si128(xmm8, (chaining[0]));
|
||||
xmm9 = _mm_xor_si128(xmm9, (chaining[1]));
|
||||
xmm10 = _mm_xor_si128(xmm10, (chaining[2]));
|
||||
xmm11 = _mm_xor_si128(xmm11, (chaining[3]));
|
||||
xmm12 = _mm_xor_si128(xmm12, (chaining[4]));
|
||||
xmm13 = _mm_xor_si128(xmm13, (chaining[5]));
|
||||
xmm14 = _mm_xor_si128(xmm14, (chaining[6]));
|
||||
xmm15 = _mm_xor_si128(xmm15, (chaining[7]));
|
||||
|
||||
/* transpose CV back from row ordering to column ordering */
|
||||
/* result: final hash value in xmm0, xmm6, xmm13, xmm15 */
|
||||
grsiMatrix_Transpose_INV(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm4, xmm0, xmm6, xmm1, xmm2, xmm3, xmm5, xmm7);
|
||||
grsiVPERM_Transform_State(xmm0, xmm6, xmm13, xmm15, grsiVPERM_OPT, xmm1, xmm2, xmm3, xmm5, xmm7, xmm10, xmm12);
|
||||
|
||||
/* we only need to return the truncated half of the state */
|
||||
chaining[4] = xmm0;
|
||||
chaining[5] = xmm6;
|
||||
chaining[6] = xmm13;
|
||||
chaining[7] = xmm15;
|
||||
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -1,273 +0,0 @@
|
||||
/* hash.c Aug 2011
|
||||
*
|
||||
* Groestl implementation for different versions.
|
||||
* Author: Krystian Matusiewicz, Günther A. Roland, Martin Schläffer
|
||||
*
|
||||
* This code is placed in the public domain
|
||||
*/
|
||||
|
||||
#include "grsi.h"
|
||||
#include "grsi-asm.h"
|
||||
|
||||
/* void grsiInit(grsiState* ctx) { */
|
||||
#define GRS_I \
|
||||
do { \
|
||||
grsiState *ctx = &sts_grs; \
|
||||
u8 i = 0; \
|
||||
\
|
||||
/* set number of state columns and state size depending on \
|
||||
variant */ \
|
||||
ctx->grsicolumns = grsiCOLS; \
|
||||
ctx->grsistatesize = grsiSIZE; \
|
||||
ctx->grsiv = LONG; \
|
||||
\
|
||||
grsiSET_CONSTANTS(); \
|
||||
\
|
||||
memset(ctx->grsichaining, 0, sizeof(u64)*grsiSIZE/8); \
|
||||
memset(ctx->grsibuffer, 0, sizeof(grsiBitSequence)*grsiSIZE); \
|
||||
\
|
||||
if (ctx->grsichaining == NULL || ctx->grsibuffer == NULL) \
|
||||
return; \
|
||||
\
|
||||
/* set initial value */ \
|
||||
ctx->grsichaining[ctx->grsicolumns-1] = grsiU64BIG((u64)grsiLENGTH); \
|
||||
\
|
||||
grsiINIT(ctx->grsichaining); \
|
||||
\
|
||||
/* set other variables */ \
|
||||
ctx->grsibuf_ptr = 0; \
|
||||
ctx->grsiblock_counter = 0; \
|
||||
ctx->grsibits_in_last_byte = 0; \
|
||||
\
|
||||
} while (0)
|
||||
|
||||
/* digest up to len bytes of input (full blocks only) */
|
||||
void grsiTransform(grsiState *ctx,
|
||||
const u8 *in,
|
||||
unsigned long long len) {
|
||||
|
||||
/* increment block counter */
|
||||
ctx->grsiblock_counter += len/grsiSIZE;
|
||||
|
||||
/* digest message, one block at a time */
|
||||
for (; len >= grsiSIZE; len -= grsiSIZE, in += grsiSIZE)
|
||||
grsiTF1024((u64*)ctx->grsichaining, (u64*)in);
|
||||
|
||||
asm volatile ("emms");
|
||||
}
|
||||
|
||||
/* given state h, do h <- P(h)+h */
|
||||
void grsiOutputTransformation(grsiState *ctx) {
|
||||
|
||||
/* determine variant */
|
||||
grsiOF1024((u64*)ctx->grsichaining);
|
||||
|
||||
asm volatile ("emms");
|
||||
}
|
||||
|
||||
/* initialise context */
|
||||
void grsiInit(grsiState* ctx) {
|
||||
u8 i = 0;
|
||||
|
||||
/* output size (in bits) must be a positive integer less than or
|
||||
equal to 512, and divisible by 8 */
|
||||
if (grsiLENGTH <= 0 || (grsiLENGTH%8) || grsiLENGTH > 512)
|
||||
return;
|
||||
|
||||
/* set number of state columns and state size depending on
|
||||
variant */
|
||||
ctx->grsicolumns = grsiCOLS;
|
||||
ctx->grsistatesize = grsiSIZE;
|
||||
ctx->grsiv = LONG;
|
||||
|
||||
grsiSET_CONSTANTS();
|
||||
|
||||
for (i=0; i<grsiSIZE/8; i++)
|
||||
ctx->grsichaining[i] = 0;
|
||||
for (i=0; i<grsiSIZE; i++)
|
||||
ctx->grsibuffer[i] = 0;
|
||||
|
||||
if (ctx->grsichaining == NULL || ctx->grsibuffer == NULL)
|
||||
return;
|
||||
|
||||
/* set initial value */
|
||||
ctx->grsichaining[ctx->grsicolumns-1] = grsiU64BIG((u64)grsiLENGTH);
|
||||
|
||||
grsiINIT(ctx->grsichaining);
|
||||
|
||||
/* set other variables */
|
||||
ctx->grsibuf_ptr = 0;
|
||||
ctx->grsiblock_counter = 0;
|
||||
ctx->grsibits_in_last_byte = 0;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/* update state with databitlen bits of input */
|
||||
void grsiUpdate(grsiState* ctx,
|
||||
const grsiBitSequence* input,
|
||||
grsiDataLength databitlen) {
|
||||
int index = 0;
|
||||
int msglen = (int)(databitlen/8);
|
||||
int rem = (int)(databitlen%8);
|
||||
|
||||
/* non-integral number of message bytes can only be supplied in the
|
||||
last call to this function */
|
||||
if (ctx->grsibits_in_last_byte) return;
|
||||
|
||||
/* if the buffer contains data that has not yet been digested, first
|
||||
add data to buffer until full */
|
||||
if (ctx->grsibuf_ptr) {
|
||||
while (ctx->grsibuf_ptr < ctx->grsistatesize && index < msglen) {
|
||||
ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = input[index++];
|
||||
}
|
||||
if (ctx->grsibuf_ptr < ctx->grsistatesize) {
|
||||
/* buffer still not full, return */
|
||||
if (rem) {
|
||||
ctx->grsibits_in_last_byte = rem;
|
||||
ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = input[index];
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* digest buffer */
|
||||
ctx->grsibuf_ptr = 0;
|
||||
printf("error\n");
|
||||
grsiTransform(ctx, ctx->grsibuffer, ctx->grsistatesize);
|
||||
}
|
||||
|
||||
/* digest bulk of message */
|
||||
grsiTransform(ctx, input+index, msglen-index);
|
||||
index += ((msglen-index)/ctx->grsistatesize)*ctx->grsistatesize;
|
||||
|
||||
/* store remaining data in buffer */
|
||||
while (index < msglen) {
|
||||
ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = input[index++];
|
||||
}
|
||||
|
||||
/* if non-integral number of bytes have been supplied, store
|
||||
remaining bits in last byte, together with information about
|
||||
number of bits */
|
||||
if (rem) {
|
||||
ctx->grsibits_in_last_byte = rem;
|
||||
ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = input[index];
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* update state with databitlen bits of input */
|
||||
void grsiUpdateq(grsiState* ctx, const grsiBitSequence* input)
|
||||
{
|
||||
grsiDataLength databitlen= 64*8;
|
||||
int index = 0;
|
||||
int msglen = (int)(databitlen/8);
|
||||
int rem = (int)(databitlen%8);
|
||||
|
||||
/* non-integral number of message bytes can only be supplied in the
|
||||
last call to this function */
|
||||
if (ctx->grsibits_in_last_byte) return;
|
||||
|
||||
/* if the buffer contains data that has not yet been digested, first
|
||||
add data to buffer until full */
|
||||
if (ctx->grsibuf_ptr) {
|
||||
while (ctx->grsibuf_ptr < ctx->grsistatesize && index < msglen) {
|
||||
ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = input[index++];
|
||||
}
|
||||
if (ctx->grsibuf_ptr < ctx->grsistatesize) {
|
||||
/* buffer still not full, return */
|
||||
if (rem) {
|
||||
ctx->grsibits_in_last_byte = rem;
|
||||
ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = input[index];
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* digest buffer */
|
||||
ctx->grsibuf_ptr = 0;
|
||||
printf("error\n");
|
||||
grsiTransform(ctx, ctx->grsibuffer, ctx->grsistatesize);
|
||||
}
|
||||
|
||||
/* digest bulk of message */
|
||||
grsiTransform(ctx, input+index, msglen-index);
|
||||
index += ((msglen-index)/ctx->grsistatesize)*ctx->grsistatesize;
|
||||
|
||||
/* store remaining data in buffer */
|
||||
while (index < msglen) {
|
||||
ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = input[index++];
|
||||
}
|
||||
|
||||
/* if non-integral number of bytes have been supplied, store
|
||||
remaining bits in last byte, together with information about
|
||||
number of bits */
|
||||
if (rem) {
|
||||
ctx->grsibits_in_last_byte = rem;
|
||||
ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = input[index];
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
#define BILB ctx->grsibits_in_last_byte
|
||||
|
||||
/* finalise: process remaining data (including padding), perform
|
||||
output transformation, and write hash result to 'output' */
|
||||
void grsiFinal(grsiState* ctx,
|
||||
grsiBitSequence* output) {
|
||||
int i, j = 0, grsibytelen = grsiLENGTH/8;
|
||||
u8 *s = (grsiBitSequence*)ctx->grsichaining;
|
||||
|
||||
/* pad with '1'-bit and first few '0'-bits */
|
||||
if (BILB) {
|
||||
ctx->grsibuffer[(int)ctx->grsibuf_ptr-1] &= ((1<<BILB)-1)<<(8-BILB);
|
||||
ctx->grsibuffer[(int)ctx->grsibuf_ptr-1] ^= 0x1<<(7-BILB);
|
||||
BILB = 0;
|
||||
}
|
||||
else ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = 0x80;
|
||||
|
||||
/* pad with '0'-bits */
|
||||
if (ctx->grsibuf_ptr > ctx->grsistatesize-grsiLENGTHFIELDLEN) {
|
||||
/* padding requires two blocks */
|
||||
while (ctx->grsibuf_ptr < ctx->grsistatesize) {
|
||||
ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = 0;
|
||||
}
|
||||
/* digest first padding block */
|
||||
grsiTransform(ctx, ctx->grsibuffer, ctx->grsistatesize);
|
||||
ctx->grsibuf_ptr = 0;
|
||||
}
|
||||
while (ctx->grsibuf_ptr < ctx->grsistatesize-grsiLENGTHFIELDLEN) {
|
||||
ctx->grsibuffer[(int)ctx->grsibuf_ptr++] = 0;
|
||||
}
|
||||
|
||||
/* length padding */
|
||||
ctx->grsiblock_counter++;
|
||||
ctx->grsibuf_ptr = ctx->grsistatesize;
|
||||
while (ctx->grsibuf_ptr > ctx->grsistatesize-grsiLENGTHFIELDLEN) {
|
||||
ctx->grsibuffer[(int)--ctx->grsibuf_ptr] = (u8)ctx->grsiblock_counter;
|
||||
ctx->grsiblock_counter >>= 8;
|
||||
}
|
||||
|
||||
/* digest final padding block */
|
||||
grsiTransform(ctx, ctx->grsibuffer, ctx->grsistatesize);
|
||||
/* perform output transformation */
|
||||
grsiOutputTransformation(ctx);
|
||||
|
||||
/* store hash result in output */
|
||||
for (i = ctx->grsistatesize-grsibytelen; i < ctx->grsistatesize; i++,j++) {
|
||||
output[j] = s[i];
|
||||
}
|
||||
|
||||
/* zeroise relevant variables and deallocate memory */
|
||||
|
||||
for (i = 0; i < ctx->grsicolumns; i++) {
|
||||
ctx->grsichaining[i] = 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < ctx->grsistatesize; i++) {
|
||||
ctx->grsibuffer[i] = 0;
|
||||
}
|
||||
// free(ctx->grsichaining);
|
||||
// free(ctx->grsibuffer);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1,79 +0,0 @@
|
||||
/* hash.h Aug 2011
|
||||
*
|
||||
* Groestl implementation for different versions.
|
||||
* Author: Krystian Matusiewicz, Günther A. Roland, Martin Schläffer
|
||||
*
|
||||
* This code is placed in the public domain
|
||||
*/
|
||||
|
||||
#ifndef __grsi_h
|
||||
#define __grsi_h
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "brg_endian.h"
|
||||
#define NEED_UINT_64T
|
||||
#include "brg_types.h"
|
||||
|
||||
#define grsiLENGTH 512
|
||||
|
||||
/* some sizes (number of bytes) */
|
||||
#define grsiROWS 8
|
||||
#define grsiLENGTHFIELDLEN grsiROWS
|
||||
#define grsiCOLS512 8
|
||||
#define grsiCOLS1024 16
|
||||
#define grsiSIZE512 (grsiROWS*grsiCOLS512)
|
||||
#define grsiSIZE1024 (grsiROWS*grsiCOLS1024)
|
||||
#define grsiROUNDS512 10
|
||||
#define grsiROUNDS1024 14
|
||||
|
||||
#if grsiLENGTH<=256
|
||||
#define grsiCOLS grsiCOLS512
|
||||
#define grsiSIZE grsiSIZE512
|
||||
#define grsiROUNDS grsiROUNDS512
|
||||
#else
|
||||
#define grsiCOLS grsiCOLS1024
|
||||
#define grsiSIZE grsiSIZE1024
|
||||
#define grsiROUNDS grsiROUNDS1024
|
||||
#endif
|
||||
|
||||
#define ROTL64(a,n) ((((a)<<(n))|((a)>>(64-(n))))&li_64(ffffffffffffffff))
|
||||
|
||||
#if (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN)
|
||||
#define grsiEXT_BYTE(var,n) ((u8)((u64)(var) >> (8*(7-(n)))))
|
||||
#define grsiU64BIG(a) (a)
|
||||
#endif /* IS_BIG_ENDIAN */
|
||||
|
||||
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
|
||||
#define grsiEXT_BYTE(var,n) ((u8)((u64)(var) >> (8*n)))
|
||||
#define grsiU64BIG(a) \
|
||||
((ROTL64(a, 8) & li_64(000000FF000000FF)) | \
|
||||
(ROTL64(a,24) & li_64(0000FF000000FF00)) | \
|
||||
(ROTL64(a,40) & li_64(00FF000000FF0000)) | \
|
||||
(ROTL64(a,56) & li_64(FF000000FF000000)))
|
||||
#endif /* IS_LITTLE_ENDIAN */
|
||||
|
||||
typedef enum { LONG, SHORT } grsiVar;
|
||||
|
||||
/* NIST API begin */
|
||||
typedef unsigned char grsiBitSequence;
|
||||
typedef unsigned long long grsiDataLength;
|
||||
typedef struct {
|
||||
__attribute__ ((aligned (32))) u64 grsichaining[grsiSIZE/8]; /* actual state */
|
||||
__attribute__ ((aligned (32))) grsiBitSequence grsibuffer[grsiSIZE]; /* data buffer */
|
||||
u64 grsiblock_counter; /* message block counter */
|
||||
int grsibuf_ptr; /* data buffer pointer */
|
||||
int grsibits_in_last_byte; /* no. of message bits in last byte of
|
||||
data buffer */
|
||||
int grsicolumns; /* no. of columns in state */
|
||||
int grsistatesize; /* total no. of bytes in state */
|
||||
grsiVar grsiv; /* LONG or SHORT */
|
||||
} grsiState;
|
||||
|
||||
void grsiInit(grsiState*);
|
||||
void grsiUpdate(grsiState*, const grsiBitSequence*, grsiDataLength);
|
||||
void grsiFinal(grsiState*, grsiBitSequence*);
|
||||
/* NIST API end */
|
||||
|
||||
#endif /* __hash_h */
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,247 +0,0 @@
|
||||
/* hash.c Aug 2011
|
||||
*
|
||||
* Groestl implementation for different versions.
|
||||
* Author: Krystian Matusiewicz, Günther A. Roland, Martin Schläffer
|
||||
*
|
||||
* This code is placed in the public domain
|
||||
*/
|
||||
|
||||
#include "grsn-asm.h"
|
||||
|
||||
/* digest up to len bytes of input (full blocks only) */
|
||||
void grsnTransform(grsnState *ctx,
|
||||
const u8 *in,
|
||||
unsigned long long len) {
|
||||
|
||||
/* increment block counter */
|
||||
ctx->block_counter += len/grsnSIZE;
|
||||
|
||||
/* digest message, one block at a time */
|
||||
for (; len >= grsnSIZE; len -= grsnSIZE, in += grsnSIZE)
|
||||
#if grsnLENGTH<=256
|
||||
TF512((u64*)ctx->chaining, (u64*)in);
|
||||
#else
|
||||
TF1024((u64*)ctx->chaining, (u64*)in);
|
||||
#endif
|
||||
|
||||
asm volatile ("emms");
|
||||
}
|
||||
|
||||
/* given state h, do h <- P(h)+h */
|
||||
void grsnOutputTransformation(grsnState *ctx) {
|
||||
|
||||
/* determine variant */
|
||||
#if (grsnLENGTH <= 256)
|
||||
OF512((u64*)ctx->chaining);
|
||||
#else
|
||||
OF1024((u64*)ctx->chaining);
|
||||
#endif
|
||||
|
||||
asm volatile ("emms");
|
||||
}
|
||||
|
||||
/* initialise context */
|
||||
void grsnInit(grsnState* ctx) {
|
||||
u8 i = 0;
|
||||
|
||||
/* output size (in bits) must be a positive integer less than or
|
||||
equal to 512, and divisible by 8 */
|
||||
if (grsnLENGTH <= 0 || (grsnLENGTH%8) || grsnLENGTH > 512)
|
||||
return;
|
||||
|
||||
/* set number of state columns and state size depending on
|
||||
variant */
|
||||
ctx->columns = grsnCOLS;
|
||||
ctx->statesize = grsnSIZE;
|
||||
#if (grsnLENGTH <= 256)
|
||||
ctx->v = SHORT;
|
||||
#else
|
||||
ctx->v = LONG;
|
||||
#endif
|
||||
|
||||
SET_CONSTANTS();
|
||||
|
||||
for (i=0; i<grsnSIZE/8; i++)
|
||||
ctx->chaining[i] = 0;
|
||||
for (i=0; i<grsnSIZE; i++)
|
||||
ctx->buffer[i] = 0;
|
||||
|
||||
if (ctx->chaining == NULL || ctx->buffer == NULL)
|
||||
return;
|
||||
|
||||
/* set initial value */
|
||||
ctx->chaining[ctx->columns-1] = U64BIG((u64)grsnLENGTH);
|
||||
|
||||
INIT(ctx->chaining);
|
||||
|
||||
/* set other variables */
|
||||
ctx->buf_ptr = 0;
|
||||
ctx->block_counter = 0;
|
||||
ctx->bits_in_last_byte = 0;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/* update state with databitlen bits of input */
|
||||
void grsnUpdate(grsnState* ctx,
|
||||
const BitSequence* input,
|
||||
DataLength databitlen) {
|
||||
int index = 0;
|
||||
int msglen = (int)(databitlen/8);
|
||||
int rem = (int)(databitlen%8);
|
||||
|
||||
/* non-integral number of message bytes can only be supplied in the
|
||||
last call to this function */
|
||||
if (ctx->bits_in_last_byte) return;
|
||||
|
||||
/* if the buffer contains data that has not yet been digested, first
|
||||
add data to buffer until full */
|
||||
if (ctx->buf_ptr) {
|
||||
while (ctx->buf_ptr < ctx->statesize && index < msglen) {
|
||||
ctx->buffer[(int)ctx->buf_ptr++] = input[index++];
|
||||
}
|
||||
if (ctx->buf_ptr < ctx->statesize) {
|
||||
/* buffer still not full, return */
|
||||
if (rem) {
|
||||
ctx->bits_in_last_byte = rem;
|
||||
ctx->buffer[(int)ctx->buf_ptr++] = input[index];
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* digest buffer */
|
||||
ctx->buf_ptr = 0;
|
||||
printf("error\n");
|
||||
grsnTransform(ctx, ctx->buffer, ctx->statesize);
|
||||
}
|
||||
|
||||
/* digest bulk of message */
|
||||
grsnTransform(ctx, input+index, msglen-index);
|
||||
index += ((msglen-index)/ctx->statesize)*ctx->statesize;
|
||||
|
||||
/* store remaining data in buffer */
|
||||
while (index < msglen) {
|
||||
ctx->buffer[(int)ctx->buf_ptr++] = input[index++];
|
||||
}
|
||||
|
||||
/* if non-integral number of bytes have been supplied, store
|
||||
remaining bits in last byte, together with information about
|
||||
number of bits */
|
||||
if (rem) {
|
||||
ctx->bits_in_last_byte = rem;
|
||||
ctx->buffer[(int)ctx->buf_ptr++] = input[index];
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* update state with databitlen bits of input */
|
||||
void grsnUpdateq(grsnState* ctx, const BitSequence* input)
|
||||
{
|
||||
int index = 0;
|
||||
int msglen = (int)((64*8)/8);
|
||||
int rem = (int)((64*8)%8);
|
||||
|
||||
/* if the buffer contains data that has not yet been digested, first
|
||||
add data to buffer until full */
|
||||
if (ctx->buf_ptr) {
|
||||
while (ctx->buf_ptr < ctx->statesize && index < msglen) {
|
||||
ctx->buffer[(int)ctx->buf_ptr++] = input[index++];
|
||||
}
|
||||
if (ctx->buf_ptr < ctx->statesize) {
|
||||
/* buffer still not full, return */
|
||||
if (rem) {
|
||||
ctx->bits_in_last_byte = rem;
|
||||
ctx->buffer[(int)ctx->buf_ptr++] = input[index];
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* digest buffer */
|
||||
ctx->buf_ptr = 0;
|
||||
printf("error\n");
|
||||
grsnTransform(ctx, ctx->buffer, ctx->statesize);
|
||||
}
|
||||
|
||||
/* digest bulk of message */
|
||||
grsnTransform(ctx, input+index, msglen-index);
|
||||
index += ((msglen-index)/ctx->statesize)*ctx->statesize;
|
||||
|
||||
/* store remaining data in buffer */
|
||||
while (index < msglen) {
|
||||
ctx->buffer[(int)ctx->buf_ptr++] = input[index++];
|
||||
}
|
||||
|
||||
/* if non-integral number of bytes have been supplied, store
|
||||
remaining bits in last byte, together with information about
|
||||
number of bits */
|
||||
if (rem) {
|
||||
ctx->bits_in_last_byte = rem;
|
||||
ctx->buffer[(int)ctx->buf_ptr++] = input[index];
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
#define BILB ctx->bits_in_last_byte
|
||||
|
||||
/* finalise: process remaining data (including padding), perform
|
||||
output transformation, and write hash result to 'output' */
|
||||
void grsnFinal(grsnState* ctx,
|
||||
BitSequence* output) {
|
||||
int i, j = 0, grsnbytelen = grsnLENGTH/8;
|
||||
u8 *s = (BitSequence*)ctx->chaining;
|
||||
|
||||
/* pad with '1'-bit and first few '0'-bits */
|
||||
if (BILB) {
|
||||
ctx->buffer[(int)ctx->buf_ptr-1] &= ((1<<BILB)-1)<<(8-BILB);
|
||||
ctx->buffer[(int)ctx->buf_ptr-1] ^= 0x1<<(7-BILB);
|
||||
BILB = 0;
|
||||
}
|
||||
else ctx->buffer[(int)ctx->buf_ptr++] = 0x80;
|
||||
|
||||
/* pad with '0'-bits */
|
||||
if (ctx->buf_ptr > ctx->statesize-grsnLENGTHFIELDLEN) {
|
||||
/* padding requires two blocks */
|
||||
while (ctx->buf_ptr < ctx->statesize) {
|
||||
ctx->buffer[(int)ctx->buf_ptr++] = 0;
|
||||
}
|
||||
/* digest first padding block */
|
||||
grsnTransform(ctx, ctx->buffer, ctx->statesize);
|
||||
ctx->buf_ptr = 0;
|
||||
}
|
||||
while (ctx->buf_ptr < ctx->statesize-grsnLENGTHFIELDLEN) {
|
||||
ctx->buffer[(int)ctx->buf_ptr++] = 0;
|
||||
}
|
||||
|
||||
/* length padding */
|
||||
ctx->block_counter++;
|
||||
ctx->buf_ptr = ctx->statesize;
|
||||
while (ctx->buf_ptr > ctx->statesize-grsnLENGTHFIELDLEN) {
|
||||
ctx->buffer[(int)--ctx->buf_ptr] = (u8)ctx->block_counter;
|
||||
ctx->block_counter >>= 8;
|
||||
}
|
||||
|
||||
/* digest final padding block */
|
||||
grsnTransform(ctx, ctx->buffer, ctx->statesize);
|
||||
/* perform output transformation */
|
||||
grsnOutputTransformation(ctx);
|
||||
|
||||
/* store hash result in output */
|
||||
for (i = ctx->statesize-grsnbytelen; i < ctx->statesize; i++,j++) {
|
||||
output[j] = s[i];
|
||||
}
|
||||
|
||||
/* zeroise relevant variables and deallocate memory */
|
||||
|
||||
for (i = 0; i < ctx->columns; i++) {
|
||||
ctx->chaining[i] = 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < ctx->statesize; i++) {
|
||||
ctx->buffer[i] = 0;
|
||||
}
|
||||
// free(ctx->chaining);
|
||||
// free(ctx->buffer);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1,80 +0,0 @@
|
||||
/* hash.h Aug 2011
|
||||
*
|
||||
* Groestl implementation for different versions.
|
||||
* Author: Krystian Matusiewicz, Günther A. Roland, Martin Schläffer
|
||||
*
|
||||
* This code is placed in the public domain
|
||||
*/
|
||||
|
||||
#ifndef __grsn_h
|
||||
#define __grsn_h
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "brg_endian.h"
|
||||
#define NEED_UINT_64T
|
||||
#include "brg_types.h"
|
||||
|
||||
#ifndef grsnLENGTH
|
||||
#define grsnLENGTH 512
|
||||
#endif
|
||||
|
||||
/* some sizes (number of bytes) */
|
||||
#define grsnROWS 8
|
||||
#define grsnLENGTHFIELDLEN grsnROWS
|
||||
#define grsnCOLS512 8
|
||||
#define grsnCOLS1024 16
|
||||
#define grsnSIZE512 (grsnROWS*grsnCOLS512)
|
||||
#define grsnSIZE1024 (grsnROWS*grsnCOLS1024)
|
||||
#define grsnROUNDS512 10
|
||||
#define grsnROUNDS1024 14
|
||||
|
||||
#if grsnLENGTH<=256
|
||||
#define grsnCOLS grsnCOLS512
|
||||
#define grsnSIZE grsnSIZE512
|
||||
#define grsnROUNDS grsnROUNDS512
|
||||
#else
|
||||
#define grsnCOLS grsnCOLS1024
|
||||
#define grsnSIZE grsnSIZE1024
|
||||
#define grsnROUNDS grsnROUNDS1024
|
||||
#endif
|
||||
|
||||
#define ROTL64(a,n) ((((a)<<(n))|((a)>>(64-(n))))&li_64(ffffffffffffffff))
|
||||
|
||||
#if (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN)
|
||||
#define EXT_BYTE(var,n) ((u8)((u64)(var) >> (8*(7-(n)))))
|
||||
#define U64BIG(a) (a)
|
||||
#endif /* IS_BIG_ENDIAN */
|
||||
|
||||
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
|
||||
#define EXT_BYTE(var,n) ((u8)((u64)(var) >> (8*n)))
|
||||
#define U64BIG(a) \
|
||||
((ROTL64(a, 8) & li_64(000000FF000000FF)) | \
|
||||
(ROTL64(a,24) & li_64(0000FF000000FF00)) | \
|
||||
(ROTL64(a,40) & li_64(00FF000000FF0000)) | \
|
||||
(ROTL64(a,56) & li_64(FF000000FF000000)))
|
||||
#endif /* IS_LITTLE_ENDIAN */
|
||||
|
||||
typedef enum { LONG, SHORT } Var;
|
||||
|
||||
/* NIST API begin */
|
||||
typedef unsigned char BitSequence;
|
||||
typedef unsigned long long DataLength;
|
||||
typedef struct {
|
||||
__attribute__ ((aligned (32))) u64 chaining[grsnSIZE/8]; /* actual state */
|
||||
__attribute__ ((aligned (32))) BitSequence buffer[grsnSIZE]; /* data buffer */
|
||||
u64 block_counter; /* message block counter */
|
||||
int buf_ptr; /* data buffer pointer */
|
||||
int bits_in_last_byte; /* no. of message bits in last byte of
|
||||
data buffer */
|
||||
int columns; /* no. of columns in state */
|
||||
int statesize; /* total no. of bytes in state */
|
||||
Var v; /* LONG or SHORT */
|
||||
} grsnState;
|
||||
|
||||
void grsnInit(grsnState*);
|
||||
void grsnUpdate(grsnState*, const BitSequence*, DataLength);
|
||||
void grsnFinal(grsnState*, BitSequence*);
|
||||
|
||||
#endif /* __hash_h */
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,10 +0,0 @@
|
||||
#ifndef GRSOASM_H
|
||||
#define GRSOASM_H
|
||||
|
||||
#include "grso.h"
|
||||
|
||||
void grsoP1024ASM (u64 *x) ;
|
||||
|
||||
void grsoQ1024ASM (u64 *x) ;
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,11 +0,0 @@
|
||||
#ifndef GRSOASM_H
|
||||
#define GRSOASM_H
|
||||
/* really same as the mmx asm.h */
|
||||
/* made just in case something must be changed */
|
||||
#include "grso.h"
|
||||
|
||||
void grsoP1024ASM (u64 *x) ;
|
||||
|
||||
void grsoQ1024ASM (u64 *x) ;
|
||||
|
||||
#endif
|
||||
@@ -1,110 +0,0 @@
|
||||
/* hash.c January 2011
|
||||
*
|
||||
* Groestl-512 implementation with inline assembly containing mmx and
|
||||
* sse instructions. Optimized for Opteron.
|
||||
* Authors: Krystian Matusiewicz and Soeren S. Thomsen
|
||||
*
|
||||
* This code is placed in the public domain
|
||||
*/
|
||||
|
||||
//#include "grso.h"
|
||||
//#include "grso-asm.h"
|
||||
// #include "grsotab.h"
|
||||
|
||||
#define DECL_GRS
|
||||
|
||||
/* load initial constants */
|
||||
#define GRS_I \
|
||||
do { \
|
||||
int i; \
|
||||
/* set initial value */ \
|
||||
for (i = 0; i < grsoCOLS-1; i++) sts_grs.grsstate[i] = 0; \
|
||||
sts_grs.grsstate[grsoCOLS-1] = grsoU64BIG((u64)(8*grsoDIGESTSIZE)); \
|
||||
\
|
||||
/* set other variables */ \
|
||||
sts_grs.grsbuf_ptr = 0; \
|
||||
sts_grs.grsblock_counter = 0; \
|
||||
} while (0); \
|
||||
|
||||
/* load hash */
|
||||
#define GRS_U \
|
||||
do { \
|
||||
unsigned char* in = hash; \
|
||||
unsigned long long index = 0; \
|
||||
\
|
||||
/* if the buffer contains data that has not yet been digested, first \
|
||||
add data to buffer until full */ \
|
||||
if (sts_grs.grsbuf_ptr) { \
|
||||
while (sts_grs.grsbuf_ptr < grsoSIZE && index < 64) { \
|
||||
hashbuf[(int)sts_grs.grsbuf_ptr++] = in[index++]; \
|
||||
} \
|
||||
if (sts_grs.grsbuf_ptr < grsoSIZE) continue; \
|
||||
\
|
||||
/* digest buffer */ \
|
||||
sts_grs.grsbuf_ptr = 0; \
|
||||
grsoTransform(&sts_grs, hashbuf, grsoSIZE); \
|
||||
} \
|
||||
\
|
||||
/* digest bulk of message */ \
|
||||
grsoTransform(&sts_grs, in+index, 64-index); \
|
||||
index += ((64-index)/grsoSIZE)*grsoSIZE; \
|
||||
\
|
||||
/* store remaining data in buffer */ \
|
||||
while (index < 64) { \
|
||||
hashbuf[(int)sts_grs.grsbuf_ptr++] = in[index++]; \
|
||||
} \
|
||||
\
|
||||
} while (0);
|
||||
|
||||
/* groestl512 hash loaded */
|
||||
/* hash = groestl512(loaded) */
|
||||
#define GRS_C \
|
||||
do { \
|
||||
char *out = hash; \
|
||||
int i, j = 0; \
|
||||
unsigned char *s = (unsigned char*)sts_grs.grsstate; \
|
||||
\
|
||||
hashbuf[sts_grs.grsbuf_ptr++] = 0x80; \
|
||||
\
|
||||
/* pad with '0'-bits */ \
|
||||
if (sts_grs.grsbuf_ptr > grsoSIZE-grsoLENGTHFIELDLEN) { \
|
||||
/* padding requires two blocks */ \
|
||||
while (sts_grs.grsbuf_ptr < grsoSIZE) { \
|
||||
hashbuf[sts_grs.grsbuf_ptr++] = 0; \
|
||||
} \
|
||||
/* digest first padding block */ \
|
||||
grsoTransform(&sts_grs, hashbuf, grsoSIZE); \
|
||||
sts_grs.grsbuf_ptr = 0; \
|
||||
} \
|
||||
while (sts_grs.grsbuf_ptr < grsoSIZE-grsoLENGTHFIELDLEN) { \
|
||||
hashbuf[sts_grs.grsbuf_ptr++] = 0; \
|
||||
} \
|
||||
\
|
||||
/* length padding */ \
|
||||
sts_grs.grsblock_counter++; \
|
||||
sts_grs.grsbuf_ptr = grsoSIZE; \
|
||||
while (sts_grs.grsbuf_ptr > grsoSIZE-grsoLENGTHFIELDLEN) { \
|
||||
hashbuf[--sts_grs.grsbuf_ptr] = (unsigned char)sts_grs.grsblock_counter; \
|
||||
sts_grs.grsblock_counter >>= 8; \
|
||||
} \
|
||||
\
|
||||
/* digest final padding block */ \
|
||||
grsoTransform(&sts_grs, hashbuf, grsoSIZE); \
|
||||
/* perform output transformation */ \
|
||||
grsoOutputTransformation(&sts_grs); \
|
||||
\
|
||||
/* store hash result in output */ \
|
||||
for (i = grsoSIZE-grsoDIGESTSIZE; i < grsoSIZE; i++,j++) { \
|
||||
out[j] = s[i]; \
|
||||
} \
|
||||
\
|
||||
/* zeroise relevant variables and deallocate memory */ \
|
||||
for (i = 0; i < grsoCOLS; i++) { \
|
||||
sts_grs.grsstate[i] = 0; \
|
||||
} \
|
||||
for (i = 0; i < grsoSIZE; i++) { \
|
||||
hashbuf[i] = 0; \
|
||||
} \
|
||||
} while (0);
|
||||
|
||||
|
||||
@@ -1,57 +0,0 @@
|
||||
/* hash.c January 2011
|
||||
*
|
||||
* Groestl-512 implementation with inline assembly containing mmx and
|
||||
* sse instructions. Optimized for Opteron.
|
||||
* Authors: Krystian Matusiewicz and Soeren S. Thomsen
|
||||
*
|
||||
* This code is placed in the public domain
|
||||
*/
|
||||
|
||||
#include "algo/groestl/sse2/grso-asm.h"
|
||||
#include "algo/groestl/sse2/grso.h"
|
||||
#include "algo/groestl/sse2/grsotab.h"
|
||||
|
||||
/* digest up to len bytes of input (full blocks only) */
|
||||
void grsoTransform(grsoState *ctx,
|
||||
const unsigned char *in,
|
||||
unsigned long long len) {
|
||||
u64 y[grsoCOLS+2] __attribute__ ((aligned (16)));
|
||||
u64 z[grsoCOLS+2] __attribute__ ((aligned (16)));
|
||||
u64 *m, *h = (u64*)ctx->grsstate;
|
||||
int i;
|
||||
|
||||
/* increment block counter */
|
||||
ctx->grsblock_counter += len/grsoSIZE;
|
||||
|
||||
/* digest message, one block at a time */
|
||||
for (; len >= grsoSIZE; len -= grsoSIZE, in += grsoSIZE) {
|
||||
m = (u64*)in;
|
||||
for (i = 0; i < grsoCOLS; i++) {
|
||||
y[i] = m[i];
|
||||
z[i] = m[i] ^ h[i];
|
||||
}
|
||||
|
||||
grsoQ1024ASM(y);
|
||||
grsoP1024ASM(z);
|
||||
|
||||
/* h' == h + Q(m) + P(h+m) */
|
||||
for (i = 0; i < grsoCOLS; i++) {
|
||||
h[i] ^= z[i] ^ y[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* given state h, do h <- P(h)+h */
|
||||
void grsoOutputTransformation(grsoState *ctx) {
|
||||
u64 z[grsoCOLS] __attribute__ ((aligned (16)));
|
||||
int j;
|
||||
|
||||
for (j = 0; j < grsoCOLS; j++) {
|
||||
z[j] = ctx->grsstate[j];
|
||||
}
|
||||
grsoP1024ASM(z);
|
||||
for (j = 0; j < grsoCOLS; j++) {
|
||||
ctx->grsstate[j] ^= z[j];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,62 +0,0 @@
|
||||
#ifndef __hash_h
|
||||
#define __hash_h
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "brg_endian.h"
|
||||
#include "brg_types.h"
|
||||
|
||||
/* some sizes (number of bytes) */
|
||||
#define grsoROWS 8
|
||||
#define grsoLENGTHFIELDLEN grsoROWS
|
||||
#define grsoCOLS 16
|
||||
#define grsoSIZE (grsoROWS*grsoCOLS)
|
||||
#define grsoDIGESTSIZE 64
|
||||
|
||||
#define grsoROUNDS 14
|
||||
|
||||
#define grsoROTL64(a,n) ((((a)<<(n))|((a)>>(64-(n))))&((u64)0xffffffffffffffffULL))
|
||||
|
||||
#if (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN)
|
||||
#error
|
||||
#endif /* IS_BIG_ENDIAN */
|
||||
|
||||
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
|
||||
#define EXT_BYTE(var,n) ((u8)((u64)(var) >> (8*n)))
|
||||
#define grsoU64BIG(a) \
|
||||
((grsoROTL64(a, 8) & ((u64)0x000000ff000000ffULL)) | \
|
||||
(grsoROTL64(a,24) & ((u64)0x0000ff000000ff00ULL)) | \
|
||||
(grsoROTL64(a,40) & ((u64)0x00ff000000ff0000ULL)) | \
|
||||
(grsoROTL64(a,56) & ((u64)0xff000000ff000000ULL)))
|
||||
#endif /* IS_LITTLE_ENDIAN */
|
||||
|
||||
typedef struct {
|
||||
u64 grsstate[grsoCOLS]; /* actual state */
|
||||
u64 grsblock_counter; /* message block counter */
|
||||
int grsbuf_ptr; /* data buffer pointer */
|
||||
} grsoState;
|
||||
|
||||
//extern int grsoInit(grsoState* ctx);
|
||||
//extern int grsoUpdate(grsoState* ctx, const unsigned char* in,
|
||||
// unsigned long long len);
|
||||
//extern int grsoUpdateq(grsoState* ctx, const unsigned char* in);
|
||||
//extern int grsoFinal(grsoState* ctx,
|
||||
// unsigned char* out);
|
||||
//
|
||||
//extern int grsohash(unsigned char *out,
|
||||
// const unsigned char *in,
|
||||
// unsigned long long len);
|
||||
|
||||
/* digest up to len bytes of input (full blocks only) */
|
||||
void grsoTransform( grsoState *ctx, const unsigned char *in,
|
||||
unsigned long long len );
|
||||
|
||||
/* given state h, do h <- P(h)+h */
|
||||
void grsoOutputTransformation( grsoState *ctx );
|
||||
|
||||
int grso_init ( grsoState* sts_grs );
|
||||
int grso_update ( grsoState* sts_grs, char* hashbuf, char* hash );
|
||||
int grso_close ( grsoState *sts_grs, char* hashbuf, char* hash );
|
||||
|
||||
|
||||
#endif /* __hash_h */
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
@@ -1,45 +0,0 @@
|
||||
/*
|
||||
* file : hash_api.h
|
||||
* version : 1.0.208
|
||||
* date : 14.12.2010
|
||||
*
|
||||
* Grostl multi-stream bitsliced implementation Hash API
|
||||
*
|
||||
* Cagdas Calik
|
||||
* ccalik@metu.edu.tr
|
||||
* Institute of Applied Mathematics, Middle East Technical University, Turkey.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef GRSS_API_H
|
||||
#define GRSS_API_H
|
||||
|
||||
#include "sha3_common.h"
|
||||
#include <tmmintrin.h>
|
||||
|
||||
typedef struct
|
||||
{
|
||||
__m128i state1[8];
|
||||
__m128i state2[8];
|
||||
__m128i state3[8];
|
||||
__m128i state4[8];
|
||||
|
||||
__m128i _Pconst[14][8];
|
||||
__m128i _Qconst[14][8];
|
||||
__m128i _shiftconst[8];
|
||||
|
||||
unsigned int uHashLength;
|
||||
unsigned int uBlockLength;
|
||||
|
||||
BitSequence buffer[128];
|
||||
|
||||
} grssState;
|
||||
|
||||
void grssInit(grssState *state, int grssbitlen);
|
||||
|
||||
void grssUpdate(grssState *state, const BitSequence *data, DataLength databitlen);
|
||||
|
||||
void grssFinal(grssState *state, BitSequence *grssval);
|
||||
|
||||
#endif // HASH_API_H
|
||||
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
@@ -1,202 +0,0 @@
|
||||
/* hash.c Aug 2011
|
||||
*
|
||||
* Groestl implementation for different versions.
|
||||
* Author: Krystian Matusiewicz, Günther A. Roland, Martin Schläffer
|
||||
*
|
||||
* This code is placed in the public domain
|
||||
*/
|
||||
|
||||
|
||||
#include "grsv.h"
|
||||
#include "grsv-asm.h"
|
||||
|
||||
/* digest up to len bytes of input (full blocks only) */
|
||||
void grsvTransform(grsvState *ctx,
|
||||
const u8 *in,
|
||||
unsigned long long len) {
|
||||
|
||||
/* increment block counter */
|
||||
ctx->grsvblock_counter += len/grsvSIZE;
|
||||
|
||||
/* digest message, one block at a time */
|
||||
for (; len >= grsvSIZE; len -= grsvSIZE, in += grsvSIZE)
|
||||
#if grsvLENGTH<=256
|
||||
grsvTF512((u64*)ctx->grsvchaining, (u64*)in);
|
||||
#else
|
||||
grsvTF1024((u64*)ctx->grsvchaining, (u64*)in);
|
||||
#endif
|
||||
|
||||
asm volatile ("emms");
|
||||
}
|
||||
|
||||
/* given state h, do h <- P(h)+h */
|
||||
void grsvOutputTransformation(grsvState *ctx) {
|
||||
|
||||
/* determine variant */
|
||||
#if (grsvLENGTH <= 256)
|
||||
grsvOF512((u64*)ctx->grsvchaining);
|
||||
#else
|
||||
grsvOF1024((u64*)ctx->grsvchaining);
|
||||
#endif
|
||||
|
||||
asm volatile ("emms");
|
||||
}
|
||||
|
||||
/* initialise context */
|
||||
void grsvInit(grsvState* ctx) {
|
||||
u8 i = 0;
|
||||
|
||||
/* output size (in bits) must be a positive integer less than or
|
||||
equal to 512, and divisible by 8 */
|
||||
if (grsvLENGTH <= 0 || (grsvLENGTH%8) || grsvLENGTH > 512)
|
||||
return;
|
||||
|
||||
/* set number of state columns and state size depending on
|
||||
variant */
|
||||
ctx->grsvcolumns = grsvCOLS;
|
||||
ctx->grsvstatesize = grsvSIZE;
|
||||
#if (grsvLENGTH <= 256)
|
||||
ctx->grsvv = SHORT;
|
||||
#else
|
||||
ctx->grsvv = LONG;
|
||||
#endif
|
||||
|
||||
SET_CONSTANTS();
|
||||
|
||||
for (i=0; i<grsvSIZE/8; i++)
|
||||
ctx->grsvchaining[i] = 0;
|
||||
for (i=0; i<grsvSIZE; i++)
|
||||
ctx->grsvbuffer[i] = 0;
|
||||
|
||||
if (ctx->grsvchaining == NULL || ctx->grsvbuffer == NULL)
|
||||
return;
|
||||
|
||||
/* set initial value */
|
||||
ctx->grsvchaining[ctx->grsvcolumns-1] = U64BIG((u64)grsvLENGTH);
|
||||
|
||||
grsvINIT(ctx->grsvchaining);
|
||||
|
||||
/* set other variables */
|
||||
ctx->grsvbuf_ptr = 0;
|
||||
ctx->grsvblock_counter = 0;
|
||||
ctx->grsvbits_in_last_byte = 0;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/* update state with databitlen bits of input */
|
||||
void grsvUpdate(grsvState* ctx,
|
||||
const grsvBitSequence* input,
|
||||
grsvDataLength databitlen) {
|
||||
int index = 0;
|
||||
int msglen = (int)(databitlen/8);
|
||||
int rem = (int)(databitlen%8);
|
||||
|
||||
/* non-integral number of message bytes can only be supplied in the
|
||||
last call to this function */
|
||||
if (ctx->grsvbits_in_last_byte) return;
|
||||
|
||||
/* if the buffer contains data that has not yet been digested, first
|
||||
add data to buffer until full */
|
||||
if (ctx->grsvbuf_ptr) {
|
||||
while (ctx->grsvbuf_ptr < ctx->grsvstatesize && index < msglen) {
|
||||
ctx->grsvbuffer[(int)ctx->grsvbuf_ptr++] = input[index++];
|
||||
}
|
||||
if (ctx->grsvbuf_ptr < ctx->grsvstatesize) {
|
||||
/* buffer still not full, return */
|
||||
if (rem) {
|
||||
ctx->grsvbits_in_last_byte = rem;
|
||||
ctx->grsvbuffer[(int)ctx->grsvbuf_ptr++] = input[index];
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* digest buffer */
|
||||
ctx->grsvbuf_ptr = 0;
|
||||
printf("error\n");
|
||||
grsvTransform(ctx, ctx->grsvbuffer, ctx->grsvstatesize);
|
||||
}
|
||||
|
||||
/* digest bulk of message */
|
||||
grsvTransform(ctx, input+index, msglen-index);
|
||||
index += ((msglen-index)/ctx->grsvstatesize)*ctx->grsvstatesize;
|
||||
|
||||
/* store remaining data in buffer */
|
||||
while (index < msglen) {
|
||||
ctx->grsvbuffer[(int)ctx->grsvbuf_ptr++] = input[index++];
|
||||
}
|
||||
|
||||
/* if non-integral number of bytes have been supplied, store
|
||||
remaining bits in last byte, together with information about
|
||||
number of bits */
|
||||
if (rem) {
|
||||
ctx->grsvbits_in_last_byte = rem;
|
||||
ctx->grsvbuffer[(int)ctx->grsvbuf_ptr++] = input[index];
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
#define BILB ctx->grsvbits_in_last_byte
|
||||
|
||||
/* finalise: process remaining data (including padding), perform
|
||||
output transformation, and write hash result to 'output' */
|
||||
void grsvFinal(grsvState* ctx,
|
||||
grsvBitSequence* output) {
|
||||
int i, j = 0, grsvbytelen = grsvLENGTH/8;
|
||||
u8 *s = (grsvBitSequence*)ctx->grsvchaining;
|
||||
|
||||
/* pad with '1'-bit and first few '0'-bits */
|
||||
if (BILB) {
|
||||
ctx->grsvbuffer[(int)ctx->grsvbuf_ptr-1] &= ((1<<BILB)-1)<<(8-BILB);
|
||||
ctx->grsvbuffer[(int)ctx->grsvbuf_ptr-1] ^= 0x1<<(7-BILB);
|
||||
BILB = 0;
|
||||
}
|
||||
else ctx->grsvbuffer[(int)ctx->grsvbuf_ptr++] = 0x80;
|
||||
|
||||
/* pad with '0'-bits */
|
||||
if (ctx->grsvbuf_ptr > ctx->grsvstatesize-grsvLENGTHFIELDLEN) {
|
||||
/* padding requires two blocks */
|
||||
while (ctx->grsvbuf_ptr < ctx->grsvstatesize) {
|
||||
ctx->grsvbuffer[(int)ctx->grsvbuf_ptr++] = 0;
|
||||
}
|
||||
/* digest first padding block */
|
||||
grsvTransform(ctx, ctx->grsvbuffer, ctx->grsvstatesize);
|
||||
ctx->grsvbuf_ptr = 0;
|
||||
}
|
||||
while (ctx->grsvbuf_ptr < ctx->grsvstatesize-grsvLENGTHFIELDLEN) {
|
||||
ctx->grsvbuffer[(int)ctx->grsvbuf_ptr++] = 0;
|
||||
}
|
||||
|
||||
/* length padding */
|
||||
ctx->grsvblock_counter++;
|
||||
ctx->grsvbuf_ptr = ctx->grsvstatesize;
|
||||
while (ctx->grsvbuf_ptr > ctx->grsvstatesize-grsvLENGTHFIELDLEN) {
|
||||
ctx->grsvbuffer[(int)--ctx->grsvbuf_ptr] = (u8)ctx->grsvblock_counter;
|
||||
ctx->grsvblock_counter >>= 8;
|
||||
}
|
||||
|
||||
/* digest final padding block */
|
||||
grsvTransform(ctx, ctx->grsvbuffer, ctx->grsvstatesize);
|
||||
/* perform output transformation */
|
||||
grsvOutputTransformation(ctx);
|
||||
|
||||
/* store hash result in output */
|
||||
for (i = ctx->grsvstatesize-grsvbytelen; i < ctx->grsvstatesize; i++,j++) {
|
||||
output[j] = s[i];
|
||||
}
|
||||
|
||||
/* zeroise relevant variables and deallocate memory */
|
||||
|
||||
for (i = 0; i < ctx->grsvcolumns; i++) {
|
||||
ctx->grsvchaining[i] = 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < ctx->grsvstatesize; i++) {
|
||||
ctx->grsvbuffer[i] = 0;
|
||||
}
|
||||
// free(ctx->grsvchaining);
|
||||
// free(ctx->buffer);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1,77 +0,0 @@
|
||||
/* hash.h Aug 2011
|
||||
*
|
||||
* Groestl implementation for different versions.
|
||||
* Author: Krystian Matusiewicz, Günther A. Roland, Martin Schläffer
|
||||
*
|
||||
* This code is placed in the public domain
|
||||
*/
|
||||
|
||||
#ifndef __grsv_h
|
||||
#define __grsv_h
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "brg_endian.h"
|
||||
#define NEED_UINT_64T
|
||||
#include "brg_types.h"
|
||||
|
||||
#define grsvLENGTH 512
|
||||
|
||||
/* some sizes (number of bytes) */
|
||||
#define grsvROWS 8
|
||||
#define grsvLENGTHFIELDLEN grsvROWS
|
||||
#define grsvCOLS512 8
|
||||
#define grsvCOLS1024 16
|
||||
#define grsvSIZE512 (grsvROWS*grsvCOLS512)
|
||||
#define grsvSIZE1024 (grsvROWS*grsvCOLS1024)
|
||||
#define grsvROUNDS512 10
|
||||
#define grsvROUNDS1024 14
|
||||
|
||||
#if grsvLENGTH<=256
|
||||
#define grsvCOLS grsvCOLS512
|
||||
#define grsvSIZE grsvSIZE512
|
||||
#define grsvROUNDS grsvROUNDS512
|
||||
#else
|
||||
#define grsvCOLS grsvCOLS1024
|
||||
#define grsvSIZE grsvSIZE1024
|
||||
#define grsvROUNDS grsvROUNDS1024
|
||||
#endif
|
||||
|
||||
#define ROTL64(a,n) ((((a)<<(n))|((a)>>(64-(n))))&li_64(ffffffffffffffff))
|
||||
|
||||
#if (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN)
|
||||
#define EXT_BYTE(var,n) ((u8)((u64)(var) >> (8*(7-(n)))))
|
||||
#define U64BIG(a) (a)
|
||||
#endif /* IS_BIG_ENDIAN */
|
||||
|
||||
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
|
||||
#define EXT_BYTE(var,n) ((u8)((u64)(var) >> (8*n)))
|
||||
#define U64BIG(a) \
|
||||
((ROTL64(a, 8) & li_64(000000FF000000FF)) | \
|
||||
(ROTL64(a,24) & li_64(0000FF000000FF00)) | \
|
||||
(ROTL64(a,40) & li_64(00FF000000FF0000)) | \
|
||||
(ROTL64(a,56) & li_64(FF000000FF000000)))
|
||||
#endif /* IS_LITTLE_ENDIAN */
|
||||
|
||||
typedef enum { LONG, SHORT } grsvVar;
|
||||
|
||||
typedef unsigned char grsvBitSequence;
|
||||
typedef unsigned long long grsvDataLength;
|
||||
typedef struct {
|
||||
__attribute__ ((aligned (32))) u64 grsvchaining[grsvSIZE/8]; /* actual state */
|
||||
__attribute__ ((aligned (32))) grsvBitSequence grsvbuffer[grsvSIZE]; /* data buffer */
|
||||
u64 grsvblock_counter; /* message block counter */
|
||||
int grsvbuf_ptr; /* data buffer pointer */
|
||||
int grsvbits_in_last_byte; /* no. of message bits in last byte of
|
||||
data buffer */
|
||||
int grsvcolumns; /* no. of columns in state */
|
||||
int grsvstatesize; /* total no. of bytes in state */
|
||||
grsvVar grsvv; /* LONG or SHORT */
|
||||
} grsvState;
|
||||
|
||||
void grsvInit(grsvState*);
|
||||
void grsvUpdate(grsvState*, const grsvBitSequence*, grsvDataLength);
|
||||
void grsvFinal(grsvState*, grsvBitSequence*);
|
||||
|
||||
#endif /* __grsv_h */
|
||||
124
algo/hmq1725.c
124
algo/hmq1725.c
@@ -23,10 +23,7 @@
|
||||
#include "algo/sha2/sph-sha2.h"
|
||||
#include "algo/haval/sph-haval.h"
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
#include "algo/groestl/sse2/grso.h"
|
||||
#include "algo/groestl/sse2/grso-macro.c"
|
||||
#else
|
||||
#ifndef NO_AES_NI
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#endif
|
||||
@@ -34,38 +31,31 @@
|
||||
#include "algo/luffa/sse2/luffa_for_sse2.h"
|
||||
#include "algo/cubehash/sse2/cubehash_sse2.h"
|
||||
#include "algo/simd/sse2/nist.h"
|
||||
//#include "algo/blake/sse2/blake.c"
|
||||
//#include "algo/keccak/sse2/keccak.c"
|
||||
//#include "algo/bmw/sse2/bmw.c"
|
||||
//#include "algo/skein/sse2/skein.c"
|
||||
#include "algo/jh/sse2/jh_sse2_opt64.h"
|
||||
|
||||
typedef struct {
|
||||
sph_blake512_context blake1, blake2;
|
||||
sph_bmw512_context bmw1, bmw2, bmw3;
|
||||
sph_skein512_context skein1, skein2;
|
||||
sph_jh512_context jh1, jh2;
|
||||
sph_keccak512_context keccak1, keccak2;
|
||||
// sph_luffa512_context luffa1, luffa2;
|
||||
hashState_luffa luffa1, luffa2;
|
||||
// sph_cubehash512_context cube1, cube2;
|
||||
cubehashParam cube;
|
||||
sph_shavite512_context shavite1, shavite2;
|
||||
// sph_simd512_context simd1, simd2;
|
||||
hashState_sd simd1, simd2;
|
||||
sph_hamsi512_context hamsi1;
|
||||
sph_fugue512_context fugue1, fugue2;
|
||||
sph_shabal512_context shabal1;
|
||||
sph_whirlpool_context whirlpool1, whirlpool2, whirlpool3, whirlpool4;
|
||||
sph_sha512_context sha1, sha2;
|
||||
sph_haval256_5_context haval1, haval2;
|
||||
sph_blake512_context blake1, blake2;
|
||||
sph_bmw512_context bmw1, bmw2, bmw3;
|
||||
sph_skein512_context skein1, skein2;
|
||||
sph_jh512_context jh1, jh2;
|
||||
sph_keccak512_context keccak1, keccak2;
|
||||
hashState_luffa luffa1, luffa2;
|
||||
cubehashParam cube;
|
||||
sph_shavite512_context shavite1, shavite2;
|
||||
hashState_sd simd1, simd2;
|
||||
sph_hamsi512_context hamsi1;
|
||||
sph_fugue512_context fugue1, fugue2;
|
||||
sph_shabal512_context shabal1;
|
||||
sph_whirlpool_context whirlpool1, whirlpool2, whirlpool3, whirlpool4;
|
||||
sph_sha512_context sha1, sha2;
|
||||
sph_haval256_5_context haval1, haval2;
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
sph_groestl512_context groestl1, groestl2;
|
||||
sph_echo512_context echo1, echo2;
|
||||
sph_groestl512_context groestl1, groestl2;
|
||||
sph_echo512_context echo1, echo2;
|
||||
#else
|
||||
hashState_echo echo1, echo2;
|
||||
hashState_groestl groestl1, groestl2;
|
||||
hashState_echo echo1, echo2;
|
||||
hashState_groestl groestl1, groestl2;
|
||||
#endif
|
||||
|
||||
} hmq1725_ctx_holder;
|
||||
@@ -90,19 +80,14 @@ void init_hmq1725_ctx()
|
||||
sph_keccak512_init(&hmq1725_ctx.keccak1);
|
||||
sph_keccak512_init(&hmq1725_ctx.keccak2);
|
||||
|
||||
// sph_luffa512_init(&hmq1725_ctx.luffa1);
|
||||
// sph_luffa512_init(&hmq1725_ctx.luffa2);
|
||||
init_luffa( &hmq1725_ctx.luffa1, 512 );
|
||||
init_luffa( &hmq1725_ctx.luffa2, 512 );
|
||||
|
||||
// sph_cubehash512_init(&hmq1725_ctx.cubehash1);
|
||||
cubehashInit( &hmq1725_ctx.cube, 512, 16, 32 );
|
||||
|
||||
sph_shavite512_init(&hmq1725_ctx.shavite1);
|
||||
sph_shavite512_init(&hmq1725_ctx.shavite2);
|
||||
|
||||
// sph_simd512_init(&hmq1725_ctx.simd1);
|
||||
// sph_simd512_init(&hmq1725_ctx.simd2);
|
||||
init_sd( &hmq1725_ctx.simd1, 512 );
|
||||
init_sd( &hmq1725_ctx.simd2, 512 );
|
||||
|
||||
@@ -135,46 +120,18 @@ void init_hmq1725_ctx()
|
||||
init_groestl( &hmq1725_ctx.groestl1 );
|
||||
init_groestl( &hmq1725_ctx.groestl2 );
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
extern void hmq1725hash(void *state, const void *input)
|
||||
{
|
||||
hmq1725_ctx_holder ctx;
|
||||
memcpy(&ctx, &hmq1725_ctx, sizeof(hmq1725_ctx));
|
||||
|
||||
size_t hashptr;
|
||||
// DATA_ALIGNXY(sph_u64 hashctA,8);
|
||||
// DATA_ALIGNXY(sph_u64 hashctB,8);
|
||||
|
||||
// DATA_ALIGNXY(unsigned char hash[128],16);
|
||||
unsigned char hashbuf[128];
|
||||
sph_u64 hashctA;
|
||||
sph_u64 hashctB;
|
||||
|
||||
const uint32_t mask = 24;
|
||||
uint32_t hashA[25], hashB[25];
|
||||
hmq1725_ctx_holder ctx;
|
||||
|
||||
//these uint512 in the c++ source of the client are backed by an array of uint32
|
||||
uint32_t hashA[25], hashB[25];
|
||||
|
||||
// unsigned char hash[128]; // uint32_t hashA[16], hashB[16];
|
||||
// #define hashA hash
|
||||
// #define hashB (hash+64)
|
||||
memcpy(&ctx, &hmq1725_ctx, sizeof(hmq1725_ctx));
|
||||
|
||||
sph_bmw512 (&ctx.bmw1, input, 80); //0
|
||||
sph_bmw512_close(&ctx.bmw1, hashA); //1
|
||||
/*
|
||||
DECL_BMW;
|
||||
BMW_I;
|
||||
BMW_U;
|
||||
#define M(x) sph_dec64le_aligned(data + 8 * (x))
|
||||
#define H(x) (h[x])
|
||||
#define dH(x) (dh[x])
|
||||
BMW_C;
|
||||
#undef M
|
||||
#undef H
|
||||
#undef dH
|
||||
*/
|
||||
|
||||
sph_whirlpool (&ctx.whirlpool1, hashA, 64); //0
|
||||
sph_whirlpool_close(&ctx.whirlpool1, hashB); //1
|
||||
@@ -182,8 +139,8 @@ extern void hmq1725hash(void *state, const void *input)
|
||||
if ( hashB[0] & mask ) //1
|
||||
{
|
||||
#ifdef NO_AES_NI
|
||||
sph_groestl512 (&ctx.groestl1, hashB, 64); //1
|
||||
sph_groestl512_close(&ctx.groestl1, hashA); //2
|
||||
sph_groestl512 (&ctx.groestl1, hashB, 64); //1
|
||||
sph_groestl512_close(&ctx.groestl1, hashA); //2
|
||||
#else
|
||||
update_groestl( &ctx.groestl1, (char*)hashB, 512 );
|
||||
final_groestl( &ctx.groestl1, (char*)hashA );
|
||||
@@ -191,8 +148,8 @@ extern void hmq1725hash(void *state, const void *input)
|
||||
}
|
||||
else
|
||||
{
|
||||
sph_skein512 (&ctx.skein1, hashB, 64); //1
|
||||
sph_skein512_close(&ctx.skein1, hashA); //2
|
||||
sph_skein512 (&ctx.skein1, hashB, 64); //1
|
||||
sph_skein512_close(&ctx.skein1, hashA); //2
|
||||
}
|
||||
|
||||
sph_jh512 (&ctx.jh1, hashA, 64); //3
|
||||
@@ -212,13 +169,9 @@ extern void hmq1725hash(void *state, const void *input)
|
||||
sph_bmw512_close(&ctx.bmw2, hashB); //5
|
||||
}
|
||||
|
||||
// sph_luffa512 (&ctx.luffa1, hashB, 64); //5
|
||||
// sph_luffa512_close(&ctx.luffa1, hashA); //6
|
||||
update_luffa( &ctx.luffa1, (BitSequence*)hashB, 512 );
|
||||
final_luffa( &ctx.luffa1, (BitSequence*)hashA );
|
||||
|
||||
// sph_cubehash512 (&ctx.cubehash1, hashA, 64); //6
|
||||
// sph_cubehash512_close(&ctx.cubehash1, hashB); //7
|
||||
cubehashUpdate( &ctx.cube, (BitSequence *)hashA, 64 );
|
||||
cubehashDigest( &ctx.cube, (BitSequence *)hashB );
|
||||
|
||||
@@ -233,14 +186,11 @@ extern void hmq1725hash(void *state, const void *input)
|
||||
sph_jh512_close(&ctx.jh2, hashA); //8
|
||||
}
|
||||
|
||||
|
||||
sph_shavite512 (&ctx.shavite1, hashA, 64); //3
|
||||
sph_shavite512_close(&ctx.shavite1, hashB); //4
|
||||
|
||||
// sph_simd512 (&ctx.simd1, hashB, 64); //2
|
||||
// sph_simd512_close(&ctx.simd1, hashA); //3
|
||||
update_sd( &ctx.simd1, (BitSequence *)hashB, 512 );
|
||||
final_sd( &ctx.simd1, (BitSequence *)hashA );
|
||||
update_sd( &ctx.simd1, (BitSequence *)hashB, 512 );
|
||||
final_sd( &ctx.simd1, (BitSequence *)hashA );
|
||||
|
||||
if ( hashA[0] & mask ) //4
|
||||
{
|
||||
@@ -258,8 +208,8 @@ extern void hmq1725hash(void *state, const void *input)
|
||||
sph_echo512 (&ctx.echo1, hashB, 64); //5
|
||||
sph_echo512_close(&ctx.echo1, hashA); //6
|
||||
#else
|
||||
update_echo ( &ctx.echo1, (BitSequence *)hashB, 512 );
|
||||
final_echo( &ctx.echo1, (BitSequence *)hashA );
|
||||
update_echo ( &ctx.echo1, (BitSequence *)hashB, 512 );
|
||||
final_echo( &ctx.echo1, (BitSequence *)hashA );
|
||||
#endif
|
||||
|
||||
sph_blake512 (&ctx.blake2, hashA, 64); //6
|
||||
@@ -272,8 +222,6 @@ extern void hmq1725hash(void *state, const void *input)
|
||||
}
|
||||
else
|
||||
{
|
||||
// sph_luffa512 (&ctx.luffa2, hashB, 64); //7
|
||||
// sph_luffa512_close(&ctx.luffa2, hashA); //8
|
||||
update_luffa( &ctx.luffa2, (BitSequence *)hashB, 512 );
|
||||
final_luffa( &ctx.luffa2, (BitSequence *)hashA );
|
||||
}
|
||||
@@ -287,8 +235,8 @@ extern void hmq1725hash(void *state, const void *input)
|
||||
if ( hashA[0] & mask ) //4
|
||||
{
|
||||
#ifdef NO_AES_NI
|
||||
sph_echo512 (&ctx.echo2, hashA, 64); //
|
||||
sph_echo512_close(&ctx.echo2, hashB); //5
|
||||
sph_echo512 (&ctx.echo2, hashA, 64); //
|
||||
sph_echo512_close(&ctx.echo2, hashB); //5
|
||||
#else
|
||||
update_echo ( &ctx.echo2, (BitSequence *)hashA, 512 );
|
||||
final_echo( &ctx.echo2, (BitSequence *)hashB );
|
||||
@@ -296,8 +244,6 @@ extern void hmq1725hash(void *state, const void *input)
|
||||
}
|
||||
else
|
||||
{
|
||||
// sph_simd512 (&ctx.simd2, hashA, 64); //4
|
||||
// sph_simd512_close(&ctx.simd2, hashB); //5
|
||||
update_sd( &ctx.simd2, (BitSequence *)hashA, 512 );
|
||||
final_sd( &ctx.simd2, (BitSequence *)hashB );
|
||||
}
|
||||
@@ -323,8 +269,8 @@ extern void hmq1725hash(void *state, const void *input)
|
||||
sph_groestl512 (&ctx.groestl2, hashA, 64); //3
|
||||
sph_groestl512_close(&ctx.groestl2, hashB); //4
|
||||
#else
|
||||
update_groestl( &ctx.groestl2, (char*)hashA, 512 );
|
||||
final_groestl( &ctx.groestl2, (char*)hashB );
|
||||
update_groestl( &ctx.groestl2, (char*)hashA, 512 );
|
||||
final_groestl( &ctx.groestl2, (char*)hashB );
|
||||
#endif
|
||||
|
||||
sph_sha512 (&ctx.sha2, hashB, 64); //2
|
||||
|
||||
23
algo/nist5.c
23
algo/nist5.c
@@ -7,6 +7,7 @@
|
||||
#include <stdio.h>
|
||||
|
||||
#include "algo/blake/sph_blake.h"
|
||||
#include "algo/groestl/sph_groestl.h"
|
||||
#include "algo/skein/sph_skein.h"
|
||||
#include "algo/jh/sph_jh.h"
|
||||
#include "algo/keccak/sph_keccak.h"
|
||||
@@ -16,15 +17,14 @@
|
||||
#include "algo/skein/sse2/skein.c"
|
||||
#include "algo/jh/sse2/jh_sse2_opt64.h"
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
#include "algo/groestl/sse2/grso.h"
|
||||
#include "algo/groestl/sse2/grso-macro.c"
|
||||
#else
|
||||
#ifndef NO_AES_NI
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
#ifndef NO_AES_NI
|
||||
#ifdef NO_AES_NI
|
||||
sph_groestl512_context groestl;
|
||||
#else
|
||||
hashState_groestl groestl;
|
||||
#endif
|
||||
} nist5_ctx_holder;
|
||||
@@ -33,16 +33,15 @@ nist5_ctx_holder nist5_ctx;
|
||||
|
||||
void init_nist5_ctx()
|
||||
{
|
||||
#ifndef NO_AES_NI
|
||||
#ifdef NO_AES_NI
|
||||
sph_groestl512_init( &nist5_ctx.groestl );
|
||||
#else
|
||||
init_groestl( &nist5_ctx.groestl );
|
||||
#endif
|
||||
}
|
||||
|
||||
void nist5hash(void *output, const void *input)
|
||||
{
|
||||
#ifdef NO_AES_NI
|
||||
grsoState sts_grs;
|
||||
#endif
|
||||
size_t hashptr;
|
||||
unsigned char hashbuf[128];
|
||||
sph_u64 hashctA;
|
||||
@@ -54,16 +53,14 @@ void nist5hash(void *output, const void *input)
|
||||
nist5_ctx_holder ctx;
|
||||
memcpy( &ctx, &nist5_ctx, sizeof(nist5_ctx) );
|
||||
|
||||
|
||||
DECL_BLK;
|
||||
BLK_I;
|
||||
BLK_W;
|
||||
BLK_C;
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
GRS_I;
|
||||
GRS_U;
|
||||
GRS_C;
|
||||
sph_groestl512 (&ctx.groestl, hash, 64);
|
||||
sph_groestl512_close(&ctx.groestl, hash);
|
||||
#else
|
||||
update_groestl( &ctx.groestl, (char*)hash,512);
|
||||
final_groestl( &ctx.groestl, (char*)hash);
|
||||
|
||||
@@ -19,10 +19,7 @@
|
||||
#include "algo/skein/sse2/skein.c"
|
||||
#include "algo/jh/sse2/jh_sse2_opt64.h"
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
#include "algo/groestl/sse2/grso.h"
|
||||
#include "algo/groestl/sse2/grso-macro.c"
|
||||
#else
|
||||
#ifndef NO_AES_NI
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#endif
|
||||
|
||||
@@ -36,37 +33,36 @@
|
||||
#define DATA_ALIGNXY(x,y) __declspec(align(y)) x
|
||||
#endif
|
||||
|
||||
#ifndef NO_AES_NI
|
||||
hashState_groestl quark_groestl_ctx;
|
||||
#ifdef NO_AES_NI
|
||||
sph_groestl512_context quark_ctx;
|
||||
#else
|
||||
hashState_groestl quark_ctx;
|
||||
#endif
|
||||
|
||||
void init_quark_ctx()
|
||||
{
|
||||
#ifndef NO_AES_NI
|
||||
init_groestl( &quark_groestl_ctx );
|
||||
#ifdef NO_AES_NI
|
||||
sph_groestl512_init( &quark_ctx );
|
||||
#else
|
||||
init_groestl( &quark_ctx );
|
||||
#endif
|
||||
}
|
||||
|
||||
inline static void quarkhash(void *state, const void *input)
|
||||
{
|
||||
#ifdef NO_AES_NI
|
||||
grsoState sts_grs;
|
||||
#else
|
||||
hashState_groestl ctx;
|
||||
memcpy(&ctx, &quark_groestl_ctx, sizeof(quark_groestl_ctx));
|
||||
#endif
|
||||
|
||||
/* shared temp space */
|
||||
/* hash is really just 64bytes but it used to hold both hash and final round constants passed 64 */
|
||||
|
||||
unsigned char hashbuf[128];
|
||||
size_t hashptr;
|
||||
sph_u64 hashctA;
|
||||
sph_u64 hashctB;
|
||||
|
||||
int i;
|
||||
|
||||
unsigned char hash[128];
|
||||
#ifdef NO_AES_NI
|
||||
sph_groestl512_context ctx;
|
||||
#else
|
||||
hashState_groestl ctx;
|
||||
#endif
|
||||
|
||||
memcpy( &ctx, &quark_ctx, sizeof(ctx) );
|
||||
|
||||
// Blake
|
||||
DECL_BLK;
|
||||
@@ -117,13 +113,13 @@ inline static void quarkhash(void *state, const void *input)
|
||||
{
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
GRS_I;
|
||||
GRS_U;
|
||||
GRS_C;
|
||||
sph_groestl512_init( &ctx );
|
||||
sph_groestl512 ( &ctx, hash, 64 );
|
||||
sph_groestl512_close( &ctx, hash );
|
||||
#else
|
||||
reinit_groestl( &ctx );
|
||||
update_groestl(&ctx, (char*)hash,512);
|
||||
final_groestl(&ctx, (char*)hash);
|
||||
reinit_groestl( &ctx );
|
||||
update_groestl( &ctx, (char*)hash, 512 );
|
||||
final_groestl( &ctx, (char*)hash );
|
||||
#endif
|
||||
|
||||
} while(0); continue;
|
||||
|
||||
@@ -371,7 +371,6 @@ extern "C"{
|
||||
|
||||
#define DECL_SKN \
|
||||
sph_u64 sknh0, sknh1, sknh2, sknh3, sknh4, sknh5, sknh6, sknh7; \
|
||||
unsigned char sknbuf[64]; \
|
||||
|
||||
#define sknREAD_STATE_BIG(sc) do { \
|
||||
sknh0 = (sc)->sknh0; \
|
||||
@@ -424,7 +423,6 @@ do { \
|
||||
do { \
|
||||
unsigned char *buf; \
|
||||
size_t ptr; \
|
||||
unsigned first; \
|
||||
size_t len = 64; \
|
||||
const void *data = hash; \
|
||||
buf = hashbuf; \
|
||||
@@ -441,7 +439,6 @@ do { \
|
||||
unsigned char *buf; \
|
||||
size_t ptr; \
|
||||
unsigned et; \
|
||||
int i; \
|
||||
\
|
||||
buf = hashbuf; \
|
||||
ptr = hashptr; \
|
||||
|
||||
@@ -18,10 +18,7 @@
|
||||
#include "algo/simd/sph_simd.h"
|
||||
#include "algo/echo/sph_echo.h"
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
// #include "algo/echo/sph_echo.h"
|
||||
// #include "algo/groestl/sph_groestl.h"
|
||||
#else
|
||||
#ifndef NO_AES_NI
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#endif
|
||||
|
||||
@@ -17,10 +17,7 @@
|
||||
#include "algo/simd/sph_simd.h"
|
||||
#include "algo/echo/sph_echo.h"
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
#include "algo/groestl/sse2/grso.h"
|
||||
#include "algo/groestl/sse2/grso-macro.c"
|
||||
#else
|
||||
#ifndef NO_AES_NI
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#endif
|
||||
@@ -40,7 +37,7 @@ typedef struct {
|
||||
hashState_sd simd;
|
||||
sph_shavite512_context shavite;
|
||||
#ifdef NO_AES_NI
|
||||
// sph_groestl512_context groestl;
|
||||
sph_groestl512_context groestl;
|
||||
sph_echo512_context echo;
|
||||
#else
|
||||
hashState_echo echo;
|
||||
@@ -57,7 +54,7 @@ void init_x11_ctx()
|
||||
sph_shavite512_init( &x11_ctx.shavite );
|
||||
init_sd( &x11_ctx.simd, 512 );
|
||||
#ifdef NO_AES_NI
|
||||
// sph_groestl512_init( &x11_ctx.groestl );
|
||||
sph_groestl512_init( &x11_ctx.groestl );
|
||||
sph_echo512_init( &x11_ctx.echo );
|
||||
#else
|
||||
init_echo( &x11_ctx.echo, 512 );
|
||||
@@ -92,13 +89,8 @@ static void x11_hash( void *state, const void *input )
|
||||
#undef dH
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
grsoState sts_grs;
|
||||
GRS_I;
|
||||
GRS_U;
|
||||
GRS_C;
|
||||
|
||||
// sph_groestl512 (&ctx.groestl, hash, 64);
|
||||
// sph_groestl512_close(&ctx.groestl, hash);
|
||||
sph_groestl512 (&ctx.groestl, hash, 64);
|
||||
sph_groestl512_close(&ctx.groestl, hash);
|
||||
#else
|
||||
update_groestl( &ctx.groestl, (char*)hash, 512 );
|
||||
final_groestl( &ctx.groestl, (char*)hash );
|
||||
|
||||
@@ -18,10 +18,7 @@
|
||||
#include "algo/simd/sph_simd.h"
|
||||
#include "algo/echo/sph_echo.h"
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
// #include "algo/groestl/sse2/grso.h"
|
||||
// #include "algo/groestl/sse2/grso-macro.c"
|
||||
#else
|
||||
#ifndef NO_AES_NI
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#endif
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "algo/groestl/sph_groestl.h"
|
||||
#include "algo/gost/sph_gost.h"
|
||||
#include "algo/shavite/sph_shavite.h"
|
||||
#include "algo/echo/sph_echo.h"
|
||||
@@ -19,10 +20,7 @@
|
||||
#include "algo/skein/sse2/skein.c"
|
||||
#include "algo/jh/sse2/jh_sse2_opt64.h"
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
#include "algo/groestl/sse2/grso.h"
|
||||
#include "algo/groestl/sse2/grso-macro.c"
|
||||
#else
|
||||
#ifndef NO_AES_NI
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#endif
|
||||
@@ -34,6 +32,7 @@ typedef struct {
|
||||
cubehashParam cube;
|
||||
hashState_sd simd;
|
||||
#ifdef NO_AES_NI
|
||||
sph_groestl512_context groestl;
|
||||
sph_echo512_context echo;
|
||||
#else
|
||||
hashState_echo echo;
|
||||
@@ -51,6 +50,7 @@ void init_sib_ctx()
|
||||
cubehashInit( &sib_ctx.cube, 512, 16, 32 );
|
||||
init_sd( &sib_ctx.simd, 512 );
|
||||
#ifdef NO_AES_NI
|
||||
sph_groestl512_init( &sib_ctx.groestl );
|
||||
sph_echo512_init( &sib_ctx.echo );
|
||||
#else
|
||||
init_echo( &sib_ctx.echo, 512 );
|
||||
@@ -59,17 +59,12 @@ void init_sib_ctx()
|
||||
|
||||
}
|
||||
|
||||
|
||||
void sibhash(void *output, const void *input)
|
||||
{
|
||||
unsigned char hash[128]; // uint32_t hashA[16], hashB[16];
|
||||
#define hashA hash
|
||||
#define hashB hash+64
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
grsoState sts_grs;
|
||||
#endif
|
||||
|
||||
size_t hashptr;
|
||||
unsigned char hashbuf[128];
|
||||
sph_u64 hashctA;
|
||||
@@ -95,12 +90,11 @@ void sibhash(void *output, const void *input)
|
||||
#undef dH
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
GRS_I;
|
||||
GRS_U;
|
||||
GRS_C;
|
||||
sph_groestl512 (&ctx.groestl, hash, 64);
|
||||
sph_groestl512_close(&ctx.groestl, hash);
|
||||
#else
|
||||
update_groestl( &ctx.groestl, (char*)hash,512);
|
||||
final_groestl( &ctx.groestl, (char*)hash);
|
||||
update_groestl( &ctx.groestl, (char*)hash,512);
|
||||
final_groestl( &ctx.groestl, (char*)hash);
|
||||
#endif
|
||||
|
||||
DECL_SKN;
|
||||
|
||||
@@ -29,10 +29,7 @@
|
||||
#include "algo/skein/sse2/skein.c"
|
||||
#include "algo/jh/sse2/jh_sse2_opt64.h"
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
#include "algo/groestl/sse2/grso.h"
|
||||
#include "algo/groestl/sse2/grso-macro.c"
|
||||
#else
|
||||
#ifndef NO_AES_NI
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#endif
|
||||
@@ -79,9 +76,6 @@ static void x13hash(void *output, const void *input)
|
||||
|
||||
x13_ctx_holder ctx;
|
||||
memcpy( &ctx, &x13_ctx, sizeof(x13_ctx) );
|
||||
#ifdef NO_AES_NI
|
||||
grsoState sts_grs;
|
||||
#endif
|
||||
|
||||
// X11 algos
|
||||
|
||||
@@ -116,12 +110,8 @@ static void x13hash(void *output, const void *input)
|
||||
//---groetl----
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
// use GRS if possible
|
||||
GRS_I;
|
||||
GRS_U;
|
||||
GRS_C;
|
||||
// sph_groestl512 (&ctx.groestl, hash, 64);
|
||||
// sph_groestl512_close(&ctx.groestl, hash);
|
||||
sph_groestl512 (&ctx.groestl, hash, 64);
|
||||
sph_groestl512_close(&ctx.groestl, hash);
|
||||
#else
|
||||
update_groestl( &ctx.groestl, (char*)hash,512);
|
||||
final_groestl( &ctx.groestl, (char*)hash);
|
||||
|
||||
@@ -31,10 +31,7 @@
|
||||
#include "algo/skein/sse2/skein.c"
|
||||
#include "algo/jh/sse2/jh_sse2_opt64.h"
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
#include "algo/groestl/sse2/grso.h"
|
||||
#include "algo/groestl/sse2/grso-macro.c"
|
||||
#else
|
||||
#ifndef NO_AES_NI
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#endif
|
||||
@@ -84,10 +81,6 @@ static void x14hash(void *output, const void *input)
|
||||
x14_ctx_holder ctx;
|
||||
memcpy(&ctx, &x14_ctx, sizeof(x14_ctx));
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
grsoState sts_grs;
|
||||
#endif
|
||||
|
||||
unsigned char hashbuf[128];
|
||||
size_t hashptr;
|
||||
sph_u64 hashctA;
|
||||
@@ -119,12 +112,8 @@ static void x14hash(void *output, const void *input)
|
||||
//---groestl----
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
// use SSE2 optimized GRS if possible
|
||||
GRS_I;
|
||||
GRS_U;
|
||||
GRS_C;
|
||||
// sph_groestl512 (&ctx.groestl, hash, 64);
|
||||
// sph_groestl512_close(&ctx.groestl, hash);
|
||||
sph_groestl512 (&ctx.groestl, hash, 64);
|
||||
sph_groestl512_close(&ctx.groestl, hash);
|
||||
#else
|
||||
update_groestl( &ctx.groestl, (char*)hash,512);
|
||||
final_groestl( &ctx.groestl, (char*)hash);
|
||||
|
||||
@@ -31,10 +31,7 @@
|
||||
#include "algo/skein/sse2/skein.c"
|
||||
#include "algo/jh/sse2/jh_sse2_opt64.h"
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
#include "algo/groestl/sse2/grso.h"
|
||||
#include "algo/groestl/sse2/grso-macro.c"
|
||||
#else
|
||||
#ifndef NO_AES_NI
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#endif
|
||||
@@ -86,10 +83,6 @@ static void x15hash(void *output, const void *input)
|
||||
x15_ctx_holder ctx;
|
||||
memcpy( &ctx, &x15_ctx, sizeof(x15_ctx) );
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
grsoState sts_grs;
|
||||
#endif
|
||||
|
||||
unsigned char hashbuf[128];
|
||||
size_t hashptr;
|
||||
sph_u64 hashctA;
|
||||
@@ -120,14 +113,11 @@ static void x15hash(void *output, const void *input)
|
||||
//---groestl----
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
GRS_I;
|
||||
GRS_U;
|
||||
GRS_C;
|
||||
// sph_groestl512(&ctx.groestl, hash, 64);
|
||||
// sph_groestl512_close(&ctx.groestl, hash);
|
||||
sph_groestl512(&ctx.groestl, hash, 64);
|
||||
sph_groestl512_close(&ctx.groestl, hash);
|
||||
#else
|
||||
update_groestl( &ctx.groestl, (char*)hash,512);
|
||||
final_groestl( &ctx.groestl, (char*)hash);
|
||||
update_groestl( &ctx.groestl, (char*)hash,512);
|
||||
final_groestl( &ctx.groestl, (char*)hash);
|
||||
#endif
|
||||
|
||||
//---skein4---
|
||||
|
||||
@@ -33,10 +33,7 @@
|
||||
#include "algo/skein/sse2/skein.c"
|
||||
#include "algo/jh/sse2/jh_sse2_opt64.h"
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
#include "algo/groestl/sse2/grso.h"
|
||||
#include "algo/groestl/sse2/grso-macro.c"
|
||||
#else
|
||||
#ifndef NO_AES_NI
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#endif
|
||||
@@ -92,10 +89,6 @@ static void x17hash(void *output, const void *input)
|
||||
x17_ctx_holder ctx;
|
||||
memcpy( &ctx, &x17_ctx, sizeof(x17_ctx) );
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
grsoState sts_grs;
|
||||
#endif
|
||||
|
||||
unsigned char hashbuf[128];
|
||||
size_t hashptr;
|
||||
sph_u64 hashctA;
|
||||
@@ -126,14 +119,11 @@ static void x17hash(void *output, const void *input)
|
||||
//---groestl----
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
// GRS_I;
|
||||
// GRS_U;
|
||||
// GRS_C;
|
||||
sph_groestl512(&ctx.groestl, hash, 64);
|
||||
sph_groestl512_close(&ctx.groestl, hash);
|
||||
#else
|
||||
update_groestl( &ctx.groestl, (char*)hash,512);
|
||||
final_groestl( &ctx.groestl, (char*)hash);
|
||||
update_groestl( &ctx.groestl, (char*)hash,512);
|
||||
final_groestl( &ctx.groestl, (char*)hash);
|
||||
#endif
|
||||
|
||||
//---skein4---
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,821 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011-2012 pooler@litecoinpool.org
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "cpuminer-config.h"
|
||||
|
||||
#if defined(__linux__) && defined(__ELF__)
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
#endif
|
||||
|
||||
#if defined(__i386__)
|
||||
|
||||
.macro scrypt_shuffle src, so, dest, do
|
||||
movl \so+60(\src), %eax
|
||||
movl \so+44(\src), %ebx
|
||||
movl \so+28(\src), %ecx
|
||||
movl \so+12(\src), %edx
|
||||
movl %eax, \do+12(\dest)
|
||||
movl %ebx, \do+28(\dest)
|
||||
movl %ecx, \do+44(\dest)
|
||||
movl %edx, \do+60(\dest)
|
||||
movl \so+40(\src), %eax
|
||||
movl \so+8(\src), %ebx
|
||||
movl \so+48(\src), %ecx
|
||||
movl \so+16(\src), %edx
|
||||
movl %eax, \do+8(\dest)
|
||||
movl %ebx, \do+40(\dest)
|
||||
movl %ecx, \do+16(\dest)
|
||||
movl %edx, \do+48(\dest)
|
||||
movl \so+20(\src), %eax
|
||||
movl \so+4(\src), %ebx
|
||||
movl \so+52(\src), %ecx
|
||||
movl \so+36(\src), %edx
|
||||
movl %eax, \do+4(\dest)
|
||||
movl %ebx, \do+20(\dest)
|
||||
movl %ecx, \do+36(\dest)
|
||||
movl %edx, \do+52(\dest)
|
||||
movl \so+0(\src), %eax
|
||||
movl \so+24(\src), %ebx
|
||||
movl \so+32(\src), %ecx
|
||||
movl \so+56(\src), %edx
|
||||
movl %eax, \do+0(\dest)
|
||||
movl %ebx, \do+24(\dest)
|
||||
movl %ecx, \do+32(\dest)
|
||||
movl %edx, \do+56(\dest)
|
||||
.endm
|
||||
|
||||
.macro salsa8_core_gen_quadround
|
||||
movl 52(%esp), %ecx
|
||||
movl 4(%esp), %edx
|
||||
movl 20(%esp), %ebx
|
||||
movl 8(%esp), %esi
|
||||
leal (%ecx, %edx), %edi
|
||||
roll $7, %edi
|
||||
xorl %edi, %ebx
|
||||
movl %ebx, 4(%esp)
|
||||
movl 36(%esp), %edi
|
||||
leal (%edx, %ebx), %ebp
|
||||
roll $9, %ebp
|
||||
xorl %ebp, %edi
|
||||
movl 24(%esp), %ebp
|
||||
movl %edi, 8(%esp)
|
||||
addl %edi, %ebx
|
||||
roll $13, %ebx
|
||||
xorl %ebx, %ecx
|
||||
movl 40(%esp), %ebx
|
||||
movl %ecx, 20(%esp)
|
||||
addl %edi, %ecx
|
||||
roll $18, %ecx
|
||||
leal (%esi, %ebp), %edi
|
||||
roll $7, %edi
|
||||
xorl %edi, %ebx
|
||||
movl %ebx, 24(%esp)
|
||||
movl 56(%esp), %edi
|
||||
xorl %ecx, %edx
|
||||
leal (%ebp, %ebx), %ecx
|
||||
roll $9, %ecx
|
||||
xorl %ecx, %edi
|
||||
movl %edi, 36(%esp)
|
||||
movl 28(%esp), %ecx
|
||||
movl %edx, 28(%esp)
|
||||
movl 44(%esp), %edx
|
||||
addl %edi, %ebx
|
||||
roll $13, %ebx
|
||||
xorl %ebx, %esi
|
||||
movl 60(%esp), %ebx
|
||||
movl %esi, 40(%esp)
|
||||
addl %edi, %esi
|
||||
roll $18, %esi
|
||||
leal (%ecx, %edx), %edi
|
||||
roll $7, %edi
|
||||
xorl %edi, %ebx
|
||||
movl %ebx, 44(%esp)
|
||||
movl 12(%esp), %edi
|
||||
xorl %esi, %ebp
|
||||
leal (%edx, %ebx), %esi
|
||||
roll $9, %esi
|
||||
xorl %esi, %edi
|
||||
movl %edi, 12(%esp)
|
||||
movl 48(%esp), %esi
|
||||
movl %ebp, 48(%esp)
|
||||
movl 64(%esp), %ebp
|
||||
addl %edi, %ebx
|
||||
roll $13, %ebx
|
||||
xorl %ebx, %ecx
|
||||
movl 16(%esp), %ebx
|
||||
movl %ecx, 16(%esp)
|
||||
addl %edi, %ecx
|
||||
roll $18, %ecx
|
||||
leal (%esi, %ebp), %edi
|
||||
roll $7, %edi
|
||||
xorl %edi, %ebx
|
||||
movl 32(%esp), %edi
|
||||
xorl %ecx, %edx
|
||||
leal (%ebp, %ebx), %ecx
|
||||
roll $9, %ecx
|
||||
xorl %ecx, %edi
|
||||
movl %edi, 32(%esp)
|
||||
movl %ebx, %ecx
|
||||
movl %edx, 52(%esp)
|
||||
movl 28(%esp), %edx
|
||||
addl %edi, %ebx
|
||||
roll $13, %ebx
|
||||
xorl %ebx, %esi
|
||||
movl 40(%esp), %ebx
|
||||
movl %esi, 28(%esp)
|
||||
addl %edi, %esi
|
||||
roll $18, %esi
|
||||
leal (%ecx, %edx), %edi
|
||||
roll $7, %edi
|
||||
xorl %edi, %ebx
|
||||
movl %ebx, 40(%esp)
|
||||
movl 12(%esp), %edi
|
||||
xorl %esi, %ebp
|
||||
leal (%edx, %ebx), %esi
|
||||
roll $9, %esi
|
||||
xorl %esi, %edi
|
||||
movl %edi, 12(%esp)
|
||||
movl 4(%esp), %esi
|
||||
movl %ebp, 4(%esp)
|
||||
movl 48(%esp), %ebp
|
||||
addl %edi, %ebx
|
||||
roll $13, %ebx
|
||||
xorl %ebx, %ecx
|
||||
movl 16(%esp), %ebx
|
||||
movl %ecx, 16(%esp)
|
||||
addl %edi, %ecx
|
||||
roll $18, %ecx
|
||||
leal (%esi, %ebp), %edi
|
||||
roll $7, %edi
|
||||
xorl %edi, %ebx
|
||||
movl %ebx, 48(%esp)
|
||||
movl 32(%esp), %edi
|
||||
xorl %ecx, %edx
|
||||
leal (%ebp, %ebx), %ecx
|
||||
roll $9, %ecx
|
||||
xorl %ecx, %edi
|
||||
movl %edi, 32(%esp)
|
||||
movl 24(%esp), %ecx
|
||||
movl %edx, 24(%esp)
|
||||
movl 52(%esp), %edx
|
||||
addl %edi, %ebx
|
||||
roll $13, %ebx
|
||||
xorl %ebx, %esi
|
||||
movl 28(%esp), %ebx
|
||||
movl %esi, 28(%esp)
|
||||
addl %edi, %esi
|
||||
roll $18, %esi
|
||||
leal (%ecx, %edx), %edi
|
||||
roll $7, %edi
|
||||
xorl %edi, %ebx
|
||||
movl %ebx, 52(%esp)
|
||||
movl 8(%esp), %edi
|
||||
xorl %esi, %ebp
|
||||
leal (%edx, %ebx), %esi
|
||||
roll $9, %esi
|
||||
xorl %esi, %edi
|
||||
movl %edi, 8(%esp)
|
||||
movl 44(%esp), %esi
|
||||
movl %ebp, 44(%esp)
|
||||
movl 4(%esp), %ebp
|
||||
addl %edi, %ebx
|
||||
roll $13, %ebx
|
||||
xorl %ebx, %ecx
|
||||
movl 20(%esp), %ebx
|
||||
movl %ecx, 4(%esp)
|
||||
addl %edi, %ecx
|
||||
roll $18, %ecx
|
||||
leal (%esi, %ebp), %edi
|
||||
roll $7, %edi
|
||||
xorl %edi, %ebx
|
||||
movl 36(%esp), %edi
|
||||
xorl %ecx, %edx
|
||||
leal (%ebp, %ebx), %ecx
|
||||
roll $9, %ecx
|
||||
xorl %ecx, %edi
|
||||
movl %edi, 20(%esp)
|
||||
movl %ebx, %ecx
|
||||
movl %edx, 36(%esp)
|
||||
movl 24(%esp), %edx
|
||||
addl %edi, %ebx
|
||||
roll $13, %ebx
|
||||
xorl %ebx, %esi
|
||||
movl 28(%esp), %ebx
|
||||
movl %esi, 24(%esp)
|
||||
addl %edi, %esi
|
||||
roll $18, %esi
|
||||
leal (%ecx, %edx), %edi
|
||||
roll $7, %edi
|
||||
xorl %edi, %ebx
|
||||
movl %ebx, 28(%esp)
|
||||
xorl %esi, %ebp
|
||||
movl 8(%esp), %esi
|
||||
leal (%edx, %ebx), %edi
|
||||
roll $9, %edi
|
||||
xorl %edi, %esi
|
||||
movl 40(%esp), %edi
|
||||
movl %ebp, 8(%esp)
|
||||
movl 44(%esp), %ebp
|
||||
movl %esi, 40(%esp)
|
||||
addl %esi, %ebx
|
||||
roll $13, %ebx
|
||||
xorl %ebx, %ecx
|
||||
movl 4(%esp), %ebx
|
||||
movl %ecx, 44(%esp)
|
||||
addl %esi, %ecx
|
||||
roll $18, %ecx
|
||||
leal (%edi, %ebp), %esi
|
||||
roll $7, %esi
|
||||
xorl %esi, %ebx
|
||||
movl %ebx, 4(%esp)
|
||||
movl 20(%esp), %esi
|
||||
xorl %ecx, %edx
|
||||
leal (%ebp, %ebx), %ecx
|
||||
roll $9, %ecx
|
||||
xorl %ecx, %esi
|
||||
movl %esi, 56(%esp)
|
||||
movl 48(%esp), %ecx
|
||||
movl %edx, 20(%esp)
|
||||
movl 36(%esp), %edx
|
||||
addl %esi, %ebx
|
||||
roll $13, %ebx
|
||||
xorl %ebx, %edi
|
||||
movl 24(%esp), %ebx
|
||||
movl %edi, 24(%esp)
|
||||
addl %esi, %edi
|
||||
roll $18, %edi
|
||||
leal (%ecx, %edx), %esi
|
||||
roll $7, %esi
|
||||
xorl %esi, %ebx
|
||||
movl %ebx, 60(%esp)
|
||||
movl 12(%esp), %esi
|
||||
xorl %edi, %ebp
|
||||
leal (%edx, %ebx), %edi
|
||||
roll $9, %edi
|
||||
xorl %edi, %esi
|
||||
movl %esi, 12(%esp)
|
||||
movl 52(%esp), %edi
|
||||
movl %ebp, 36(%esp)
|
||||
movl 8(%esp), %ebp
|
||||
addl %esi, %ebx
|
||||
roll $13, %ebx
|
||||
xorl %ebx, %ecx
|
||||
movl 16(%esp), %ebx
|
||||
movl %ecx, 16(%esp)
|
||||
addl %esi, %ecx
|
||||
roll $18, %ecx
|
||||
leal (%edi, %ebp), %esi
|
||||
roll $7, %esi
|
||||
xorl %esi, %ebx
|
||||
movl 32(%esp), %esi
|
||||
xorl %ecx, %edx
|
||||
leal (%ebp, %ebx), %ecx
|
||||
roll $9, %ecx
|
||||
xorl %ecx, %esi
|
||||
movl %esi, 32(%esp)
|
||||
movl %ebx, %ecx
|
||||
movl %edx, 48(%esp)
|
||||
movl 20(%esp), %edx
|
||||
addl %esi, %ebx
|
||||
roll $13, %ebx
|
||||
xorl %ebx, %edi
|
||||
movl 24(%esp), %ebx
|
||||
movl %edi, 20(%esp)
|
||||
addl %esi, %edi
|
||||
roll $18, %edi
|
||||
leal (%ecx, %edx), %esi
|
||||
roll $7, %esi
|
||||
xorl %esi, %ebx
|
||||
movl %ebx, 8(%esp)
|
||||
movl 12(%esp), %esi
|
||||
xorl %edi, %ebp
|
||||
leal (%edx, %ebx), %edi
|
||||
roll $9, %edi
|
||||
xorl %edi, %esi
|
||||
movl %esi, 12(%esp)
|
||||
movl 28(%esp), %edi
|
||||
movl %ebp, 52(%esp)
|
||||
movl 36(%esp), %ebp
|
||||
addl %esi, %ebx
|
||||
roll $13, %ebx
|
||||
xorl %ebx, %ecx
|
||||
movl 16(%esp), %ebx
|
||||
movl %ecx, 16(%esp)
|
||||
addl %esi, %ecx
|
||||
roll $18, %ecx
|
||||
leal (%edi, %ebp), %esi
|
||||
roll $7, %esi
|
||||
xorl %esi, %ebx
|
||||
movl %ebx, 28(%esp)
|
||||
movl 32(%esp), %esi
|
||||
xorl %ecx, %edx
|
||||
leal (%ebp, %ebx), %ecx
|
||||
roll $9, %ecx
|
||||
xorl %ecx, %esi
|
||||
movl %esi, 32(%esp)
|
||||
movl 4(%esp), %ecx
|
||||
movl %edx, 4(%esp)
|
||||
movl 48(%esp), %edx
|
||||
addl %esi, %ebx
|
||||
roll $13, %ebx
|
||||
xorl %ebx, %edi
|
||||
movl 20(%esp), %ebx
|
||||
movl %edi, 20(%esp)
|
||||
addl %esi, %edi
|
||||
roll $18, %edi
|
||||
leal (%ecx, %edx), %esi
|
||||
roll $7, %esi
|
||||
xorl %esi, %ebx
|
||||
movl %ebx, 48(%esp)
|
||||
movl 40(%esp), %esi
|
||||
xorl %edi, %ebp
|
||||
leal (%edx, %ebx), %edi
|
||||
roll $9, %edi
|
||||
xorl %edi, %esi
|
||||
movl %esi, 36(%esp)
|
||||
movl 60(%esp), %edi
|
||||
movl %ebp, 24(%esp)
|
||||
movl 52(%esp), %ebp
|
||||
addl %esi, %ebx
|
||||
roll $13, %ebx
|
||||
xorl %ebx, %ecx
|
||||
movl 44(%esp), %ebx
|
||||
movl %ecx, 40(%esp)
|
||||
addl %esi, %ecx
|
||||
roll $18, %ecx
|
||||
leal (%edi, %ebp), %esi
|
||||
roll $7, %esi
|
||||
xorl %esi, %ebx
|
||||
movl %ebx, 52(%esp)
|
||||
movl 56(%esp), %esi
|
||||
xorl %ecx, %edx
|
||||
leal (%ebp, %ebx), %ecx
|
||||
roll $9, %ecx
|
||||
xorl %ecx, %esi
|
||||
movl %esi, 56(%esp)
|
||||
addl %esi, %ebx
|
||||
movl %edx, 44(%esp)
|
||||
roll $13, %ebx
|
||||
xorl %ebx, %edi
|
||||
movl %edi, 60(%esp)
|
||||
addl %esi, %edi
|
||||
roll $18, %edi
|
||||
xorl %edi, %ebp
|
||||
movl %ebp, 64(%esp)
|
||||
.endm
|
||||
|
||||
.text
|
||||
.p2align 5
|
||||
salsa8_core_gen:
|
||||
salsa8_core_gen_quadround
|
||||
salsa8_core_gen_quadround
|
||||
ret
|
||||
|
||||
|
||||
.text
|
||||
.p2align 5
|
||||
.globl scrypt_core
|
||||
.globl _scrypt_core
|
||||
scrypt_core:
|
||||
_scrypt_core:
|
||||
pushl %ebx
|
||||
pushl %ebp
|
||||
pushl %edi
|
||||
pushl %esi
|
||||
|
||||
/* Check for SSE2 availability */
|
||||
movl $1, %eax
|
||||
cpuid
|
||||
andl $0x04000000, %edx
|
||||
jnz scrypt_core_sse2
|
||||
|
||||
scrypt_core_gen:
|
||||
movl 20(%esp), %edi
|
||||
movl 24(%esp), %esi
|
||||
subl $72, %esp
|
||||
|
||||
.macro scrypt_core_macro1a p, q
|
||||
movl \p(%edi), %eax
|
||||
movl \q(%edi), %edx
|
||||
movl %eax, \p(%esi)
|
||||
movl %edx, \q(%esi)
|
||||
xorl %edx, %eax
|
||||
movl %eax, \p(%edi)
|
||||
movl %eax, \p(%esp)
|
||||
.endm
|
||||
|
||||
.macro scrypt_core_macro1b p, q
|
||||
movl \p(%edi), %eax
|
||||
xorl \p(%esi, %edx), %eax
|
||||
movl \q(%edi), %ebx
|
||||
xorl \q(%esi, %edx), %ebx
|
||||
movl %ebx, \q(%edi)
|
||||
xorl %ebx, %eax
|
||||
movl %eax, \p(%edi)
|
||||
movl %eax, \p(%esp)
|
||||
.endm
|
||||
|
||||
.macro scrypt_core_macro2 p, q
|
||||
movl \p(%esp), %eax
|
||||
addl \p(%edi), %eax
|
||||
movl %eax, \p(%edi)
|
||||
xorl \q(%edi), %eax
|
||||
movl %eax, \q(%edi)
|
||||
movl %eax, \p(%esp)
|
||||
.endm
|
||||
|
||||
.macro scrypt_core_macro3 p, q
|
||||
movl \p(%esp), %eax
|
||||
addl \q(%edi), %eax
|
||||
movl %eax, \q(%edi)
|
||||
.endm
|
||||
|
||||
leal 131072(%esi), %ecx
|
||||
scrypt_core_gen_loop1:
|
||||
movl %esi, 64(%esp)
|
||||
movl %ecx, 68(%esp)
|
||||
|
||||
scrypt_core_macro1a 0, 64
|
||||
scrypt_core_macro1a 4, 68
|
||||
scrypt_core_macro1a 8, 72
|
||||
scrypt_core_macro1a 12, 76
|
||||
scrypt_core_macro1a 16, 80
|
||||
scrypt_core_macro1a 20, 84
|
||||
scrypt_core_macro1a 24, 88
|
||||
scrypt_core_macro1a 28, 92
|
||||
scrypt_core_macro1a 32, 96
|
||||
scrypt_core_macro1a 36, 100
|
||||
scrypt_core_macro1a 40, 104
|
||||
scrypt_core_macro1a 44, 108
|
||||
scrypt_core_macro1a 48, 112
|
||||
scrypt_core_macro1a 52, 116
|
||||
scrypt_core_macro1a 56, 120
|
||||
scrypt_core_macro1a 60, 124
|
||||
|
||||
call salsa8_core_gen
|
||||
|
||||
movl 92(%esp), %edi
|
||||
scrypt_core_macro2 0, 64
|
||||
scrypt_core_macro2 4, 68
|
||||
scrypt_core_macro2 8, 72
|
||||
scrypt_core_macro2 12, 76
|
||||
scrypt_core_macro2 16, 80
|
||||
scrypt_core_macro2 20, 84
|
||||
scrypt_core_macro2 24, 88
|
||||
scrypt_core_macro2 28, 92
|
||||
scrypt_core_macro2 32, 96
|
||||
scrypt_core_macro2 36, 100
|
||||
scrypt_core_macro2 40, 104
|
||||
scrypt_core_macro2 44, 108
|
||||
scrypt_core_macro2 48, 112
|
||||
scrypt_core_macro2 52, 116
|
||||
scrypt_core_macro2 56, 120
|
||||
scrypt_core_macro2 60, 124
|
||||
|
||||
call salsa8_core_gen
|
||||
|
||||
movl 92(%esp), %edi
|
||||
scrypt_core_macro3 0, 64
|
||||
scrypt_core_macro3 4, 68
|
||||
scrypt_core_macro3 8, 72
|
||||
scrypt_core_macro3 12, 76
|
||||
scrypt_core_macro3 16, 80
|
||||
scrypt_core_macro3 20, 84
|
||||
scrypt_core_macro3 24, 88
|
||||
scrypt_core_macro3 28, 92
|
||||
scrypt_core_macro3 32, 96
|
||||
scrypt_core_macro3 36, 100
|
||||
scrypt_core_macro3 40, 104
|
||||
scrypt_core_macro3 44, 108
|
||||
scrypt_core_macro3 48, 112
|
||||
scrypt_core_macro3 52, 116
|
||||
scrypt_core_macro3 56, 120
|
||||
scrypt_core_macro3 60, 124
|
||||
|
||||
movl 64(%esp), %esi
|
||||
movl 68(%esp), %ecx
|
||||
addl $128, %esi
|
||||
cmpl %ecx, %esi
|
||||
jne scrypt_core_gen_loop1
|
||||
|
||||
movl 96(%esp), %esi
|
||||
movl $1024, %ecx
|
||||
scrypt_core_gen_loop2:
|
||||
movl %ecx, 68(%esp)
|
||||
|
||||
movl 64(%edi), %edx
|
||||
andl $1023, %edx
|
||||
shll $7, %edx
|
||||
|
||||
scrypt_core_macro1b 0, 64
|
||||
scrypt_core_macro1b 4, 68
|
||||
scrypt_core_macro1b 8, 72
|
||||
scrypt_core_macro1b 12, 76
|
||||
scrypt_core_macro1b 16, 80
|
||||
scrypt_core_macro1b 20, 84
|
||||
scrypt_core_macro1b 24, 88
|
||||
scrypt_core_macro1b 28, 92
|
||||
scrypt_core_macro1b 32, 96
|
||||
scrypt_core_macro1b 36, 100
|
||||
scrypt_core_macro1b 40, 104
|
||||
scrypt_core_macro1b 44, 108
|
||||
scrypt_core_macro1b 48, 112
|
||||
scrypt_core_macro1b 52, 116
|
||||
scrypt_core_macro1b 56, 120
|
||||
scrypt_core_macro1b 60, 124
|
||||
|
||||
call salsa8_core_gen
|
||||
|
||||
movl 92(%esp), %edi
|
||||
scrypt_core_macro2 0, 64
|
||||
scrypt_core_macro2 4, 68
|
||||
scrypt_core_macro2 8, 72
|
||||
scrypt_core_macro2 12, 76
|
||||
scrypt_core_macro2 16, 80
|
||||
scrypt_core_macro2 20, 84
|
||||
scrypt_core_macro2 24, 88
|
||||
scrypt_core_macro2 28, 92
|
||||
scrypt_core_macro2 32, 96
|
||||
scrypt_core_macro2 36, 100
|
||||
scrypt_core_macro2 40, 104
|
||||
scrypt_core_macro2 44, 108
|
||||
scrypt_core_macro2 48, 112
|
||||
scrypt_core_macro2 52, 116
|
||||
scrypt_core_macro2 56, 120
|
||||
scrypt_core_macro2 60, 124
|
||||
|
||||
call salsa8_core_gen
|
||||
|
||||
movl 92(%esp), %edi
|
||||
movl 96(%esp), %esi
|
||||
scrypt_core_macro3 0, 64
|
||||
scrypt_core_macro3 4, 68
|
||||
scrypt_core_macro3 8, 72
|
||||
scrypt_core_macro3 12, 76
|
||||
scrypt_core_macro3 16, 80
|
||||
scrypt_core_macro3 20, 84
|
||||
scrypt_core_macro3 24, 88
|
||||
scrypt_core_macro3 28, 92
|
||||
scrypt_core_macro3 32, 96
|
||||
scrypt_core_macro3 36, 100
|
||||
scrypt_core_macro3 40, 104
|
||||
scrypt_core_macro3 44, 108
|
||||
scrypt_core_macro3 48, 112
|
||||
scrypt_core_macro3 52, 116
|
||||
scrypt_core_macro3 56, 120
|
||||
scrypt_core_macro3 60, 124
|
||||
|
||||
movl 68(%esp), %ecx
|
||||
subl $1, %ecx
|
||||
ja scrypt_core_gen_loop2
|
||||
|
||||
addl $72, %esp
|
||||
popl %esi
|
||||
popl %edi
|
||||
popl %ebp
|
||||
popl %ebx
|
||||
ret
|
||||
|
||||
|
||||
.macro salsa8_core_sse2_doubleround
|
||||
movdqa %xmm1, %xmm4
|
||||
paddd %xmm0, %xmm4
|
||||
movdqa %xmm4, %xmm5
|
||||
pslld $7, %xmm4
|
||||
psrld $25, %xmm5
|
||||
pxor %xmm4, %xmm3
|
||||
movdqa %xmm0, %xmm4
|
||||
pxor %xmm5, %xmm3
|
||||
|
||||
paddd %xmm3, %xmm4
|
||||
movdqa %xmm4, %xmm5
|
||||
pslld $9, %xmm4
|
||||
psrld $23, %xmm5
|
||||
pxor %xmm4, %xmm2
|
||||
movdqa %xmm3, %xmm4
|
||||
pxor %xmm5, %xmm2
|
||||
pshufd $0x93, %xmm3, %xmm3
|
||||
|
||||
paddd %xmm2, %xmm4
|
||||
movdqa %xmm4, %xmm5
|
||||
pslld $13, %xmm4
|
||||
psrld $19, %xmm5
|
||||
pxor %xmm4, %xmm1
|
||||
movdqa %xmm2, %xmm4
|
||||
pxor %xmm5, %xmm1
|
||||
pshufd $0x4e, %xmm2, %xmm2
|
||||
|
||||
paddd %xmm1, %xmm4
|
||||
movdqa %xmm4, %xmm5
|
||||
pslld $18, %xmm4
|
||||
psrld $14, %xmm5
|
||||
pxor %xmm4, %xmm0
|
||||
movdqa %xmm3, %xmm4
|
||||
pxor %xmm5, %xmm0
|
||||
pshufd $0x39, %xmm1, %xmm1
|
||||
|
||||
paddd %xmm0, %xmm4
|
||||
movdqa %xmm4, %xmm5
|
||||
pslld $7, %xmm4
|
||||
psrld $25, %xmm5
|
||||
pxor %xmm4, %xmm1
|
||||
movdqa %xmm0, %xmm4
|
||||
pxor %xmm5, %xmm1
|
||||
|
||||
paddd %xmm1, %xmm4
|
||||
movdqa %xmm4, %xmm5
|
||||
pslld $9, %xmm4
|
||||
psrld $23, %xmm5
|
||||
pxor %xmm4, %xmm2
|
||||
movdqa %xmm1, %xmm4
|
||||
pxor %xmm5, %xmm2
|
||||
pshufd $0x93, %xmm1, %xmm1
|
||||
|
||||
paddd %xmm2, %xmm4
|
||||
movdqa %xmm4, %xmm5
|
||||
pslld $13, %xmm4
|
||||
psrld $19, %xmm5
|
||||
pxor %xmm4, %xmm3
|
||||
movdqa %xmm2, %xmm4
|
||||
pxor %xmm5, %xmm3
|
||||
pshufd $0x4e, %xmm2, %xmm2
|
||||
|
||||
paddd %xmm3, %xmm4
|
||||
movdqa %xmm4, %xmm5
|
||||
pslld $18, %xmm4
|
||||
psrld $14, %xmm5
|
||||
pxor %xmm4, %xmm0
|
||||
pshufd $0x39, %xmm3, %xmm3
|
||||
pxor %xmm5, %xmm0
|
||||
.endm
|
||||
|
||||
.macro salsa8_core_sse2
|
||||
salsa8_core_sse2_doubleround
|
||||
salsa8_core_sse2_doubleround
|
||||
salsa8_core_sse2_doubleround
|
||||
salsa8_core_sse2_doubleround
|
||||
.endm
|
||||
|
||||
.p2align 5
|
||||
scrypt_core_sse2:
|
||||
movl 20(%esp), %edi
|
||||
movl 24(%esp), %esi
|
||||
movl %esp, %ebp
|
||||
subl $128, %esp
|
||||
andl $-16, %esp
|
||||
|
||||
scrypt_shuffle %edi, 0, %esp, 0
|
||||
scrypt_shuffle %edi, 64, %esp, 64
|
||||
|
||||
movdqa 96(%esp), %xmm6
|
||||
movdqa 112(%esp), %xmm7
|
||||
|
||||
movl %esi, %edx
|
||||
leal 131072(%esi), %ecx
|
||||
scrypt_core_sse2_loop1:
|
||||
movdqa 0(%esp), %xmm0
|
||||
movdqa 16(%esp), %xmm1
|
||||
movdqa 32(%esp), %xmm2
|
||||
movdqa 48(%esp), %xmm3
|
||||
movdqa 64(%esp), %xmm4
|
||||
movdqa 80(%esp), %xmm5
|
||||
pxor %xmm4, %xmm0
|
||||
pxor %xmm5, %xmm1
|
||||
movdqa %xmm0, 0(%edx)
|
||||
movdqa %xmm1, 16(%edx)
|
||||
pxor %xmm6, %xmm2
|
||||
pxor %xmm7, %xmm3
|
||||
movdqa %xmm2, 32(%edx)
|
||||
movdqa %xmm3, 48(%edx)
|
||||
movdqa %xmm4, 64(%edx)
|
||||
movdqa %xmm5, 80(%edx)
|
||||
movdqa %xmm6, 96(%edx)
|
||||
movdqa %xmm7, 112(%edx)
|
||||
|
||||
salsa8_core_sse2
|
||||
paddd 0(%edx), %xmm0
|
||||
paddd 16(%edx), %xmm1
|
||||
paddd 32(%edx), %xmm2
|
||||
paddd 48(%edx), %xmm3
|
||||
movdqa %xmm0, 0(%esp)
|
||||
movdqa %xmm1, 16(%esp)
|
||||
movdqa %xmm2, 32(%esp)
|
||||
movdqa %xmm3, 48(%esp)
|
||||
|
||||
pxor 64(%esp), %xmm0
|
||||
pxor 80(%esp), %xmm1
|
||||
pxor %xmm6, %xmm2
|
||||
pxor %xmm7, %xmm3
|
||||
movdqa %xmm0, 64(%esp)
|
||||
movdqa %xmm1, 80(%esp)
|
||||
movdqa %xmm2, %xmm6
|
||||
movdqa %xmm3, %xmm7
|
||||
salsa8_core_sse2
|
||||
paddd 64(%esp), %xmm0
|
||||
paddd 80(%esp), %xmm1
|
||||
paddd %xmm2, %xmm6
|
||||
paddd %xmm3, %xmm7
|
||||
movdqa %xmm0, 64(%esp)
|
||||
movdqa %xmm1, 80(%esp)
|
||||
|
||||
addl $128, %edx
|
||||
cmpl %ecx, %edx
|
||||
jne scrypt_core_sse2_loop1
|
||||
|
||||
movdqa 64(%esp), %xmm4
|
||||
movdqa 80(%esp), %xmm5
|
||||
|
||||
movl $1024, %ecx
|
||||
scrypt_core_sse2_loop2:
|
||||
movd %xmm4, %edx
|
||||
movdqa 0(%esp), %xmm0
|
||||
movdqa 16(%esp), %xmm1
|
||||
movdqa 32(%esp), %xmm2
|
||||
movdqa 48(%esp), %xmm3
|
||||
andl $1023, %edx
|
||||
shll $7, %edx
|
||||
pxor 0(%esi, %edx), %xmm0
|
||||
pxor 16(%esi, %edx), %xmm1
|
||||
pxor 32(%esi, %edx), %xmm2
|
||||
pxor 48(%esi, %edx), %xmm3
|
||||
|
||||
pxor %xmm4, %xmm0
|
||||
pxor %xmm5, %xmm1
|
||||
movdqa %xmm0, 0(%esp)
|
||||
movdqa %xmm1, 16(%esp)
|
||||
pxor %xmm6, %xmm2
|
||||
pxor %xmm7, %xmm3
|
||||
movdqa %xmm2, 32(%esp)
|
||||
movdqa %xmm3, 48(%esp)
|
||||
salsa8_core_sse2
|
||||
paddd 0(%esp), %xmm0
|
||||
paddd 16(%esp), %xmm1
|
||||
paddd 32(%esp), %xmm2
|
||||
paddd 48(%esp), %xmm3
|
||||
movdqa %xmm0, 0(%esp)
|
||||
movdqa %xmm1, 16(%esp)
|
||||
movdqa %xmm2, 32(%esp)
|
||||
movdqa %xmm3, 48(%esp)
|
||||
|
||||
pxor 64(%esi, %edx), %xmm0
|
||||
pxor 80(%esi, %edx), %xmm1
|
||||
pxor 96(%esi, %edx), %xmm2
|
||||
pxor 112(%esi, %edx), %xmm3
|
||||
pxor 64(%esp), %xmm0
|
||||
pxor 80(%esp), %xmm1
|
||||
pxor %xmm6, %xmm2
|
||||
pxor %xmm7, %xmm3
|
||||
movdqa %xmm0, 64(%esp)
|
||||
movdqa %xmm1, 80(%esp)
|
||||
movdqa %xmm2, %xmm6
|
||||
movdqa %xmm3, %xmm7
|
||||
salsa8_core_sse2
|
||||
paddd 64(%esp), %xmm0
|
||||
paddd 80(%esp), %xmm1
|
||||
paddd %xmm2, %xmm6
|
||||
paddd %xmm3, %xmm7
|
||||
movdqa %xmm0, %xmm4
|
||||
movdqa %xmm1, %xmm5
|
||||
movdqa %xmm0, 64(%esp)
|
||||
movdqa %xmm1, 80(%esp)
|
||||
|
||||
subl $1, %ecx
|
||||
ja scrypt_core_sse2_loop2
|
||||
|
||||
movdqa %xmm6, 96(%esp)
|
||||
movdqa %xmm7, 112(%esp)
|
||||
|
||||
scrypt_shuffle %esp, 0, %edi, 0
|
||||
scrypt_shuffle %esp, 64, %edi, 64
|
||||
|
||||
movl %ebp, %esp
|
||||
popl %esi
|
||||
popl %edi
|
||||
popl %ebp
|
||||
popl %ebx
|
||||
ret
|
||||
|
||||
#endif
|
||||
@@ -1,767 +0,0 @@
|
||||
/*
|
||||
* Copyright 2009 Colin Percival, 2011 ArtForz, 2011-2013 pooler
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* This file was originally written by Colin Percival as part of the Tarsnap
|
||||
* online backup system.
|
||||
*/
|
||||
|
||||
#include "../cpuminer-config.h"
|
||||
#include "../miner.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
static const uint32_t keypad[12] = {
|
||||
0x80000000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x00000280
|
||||
};
|
||||
static const uint32_t innerpad[11] = {
|
||||
0x80000000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x000004a0
|
||||
};
|
||||
static const uint32_t outerpad[8] = {
|
||||
0x80000000, 0, 0, 0, 0, 0, 0, 0x00000300
|
||||
};
|
||||
static const uint32_t finalblk[16] = {
|
||||
0x00000001, 0x80000000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x00000620
|
||||
};
|
||||
|
||||
static inline void HMAC_SHA256_80_init(const uint32_t *key,
|
||||
uint32_t *tstate, uint32_t *ostate)
|
||||
{
|
||||
uint32_t ihash[8];
|
||||
uint32_t pad[16];
|
||||
int i;
|
||||
|
||||
/* tstate is assumed to contain the midstate of key */
|
||||
memcpy(pad, key + 16, 16);
|
||||
memcpy(pad + 4, keypad, 48);
|
||||
sha256_transform(tstate, pad, 0);
|
||||
memcpy(ihash, tstate, 32);
|
||||
|
||||
sha256_init(ostate);
|
||||
for (i = 0; i < 8; i++)
|
||||
pad[i] = ihash[i] ^ 0x5c5c5c5c;
|
||||
for (; i < 16; i++)
|
||||
pad[i] = 0x5c5c5c5c;
|
||||
sha256_transform(ostate, pad, 0);
|
||||
|
||||
sha256_init(tstate);
|
||||
for (i = 0; i < 8; i++)
|
||||
pad[i] = ihash[i] ^ 0x36363636;
|
||||
for (; i < 16; i++)
|
||||
pad[i] = 0x36363636;
|
||||
sha256_transform(tstate, pad, 0);
|
||||
}
|
||||
|
||||
static inline void PBKDF2_SHA256_80_128(const uint32_t *tstate,
|
||||
const uint32_t *ostate, const uint32_t *salt, uint32_t *output)
|
||||
{
|
||||
uint32_t istate[8], ostate2[8];
|
||||
uint32_t ibuf[16], obuf[16];
|
||||
int i, j;
|
||||
|
||||
memcpy(istate, tstate, 32);
|
||||
sha256_transform(istate, salt, 0);
|
||||
|
||||
memcpy(ibuf, salt + 16, 16);
|
||||
memcpy(ibuf + 5, innerpad, 44);
|
||||
memcpy(obuf + 8, outerpad, 32);
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
memcpy(obuf, istate, 32);
|
||||
ibuf[4] = i + 1;
|
||||
sha256_transform(obuf, ibuf, 0);
|
||||
|
||||
memcpy(ostate2, ostate, 32);
|
||||
sha256_transform(ostate2, obuf, 0);
|
||||
for (j = 0; j < 8; j++)
|
||||
output[8 * i + j] = swab32(ostate2[j]);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void PBKDF2_SHA256_128_32(uint32_t *tstate, uint32_t *ostate,
|
||||
const uint32_t *salt, uint32_t *output)
|
||||
{
|
||||
uint32_t buf[16];
|
||||
int i;
|
||||
|
||||
sha256_transform(tstate, salt, 1);
|
||||
sha256_transform(tstate, salt + 16, 1);
|
||||
sha256_transform(tstate, finalblk, 0);
|
||||
memcpy(buf, tstate, 32);
|
||||
memcpy(buf + 8, outerpad, 32);
|
||||
|
||||
sha256_transform(ostate, buf, 0);
|
||||
for (i = 0; i < 8; i++)
|
||||
output[i] = swab32(ostate[i]);
|
||||
}
|
||||
|
||||
|
||||
#ifdef HAVE_SHA256_4WAY
|
||||
|
||||
static const uint32_t keypad_4way[4 * 12] = {
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000280, 0x00000280, 0x00000280, 0x00000280
|
||||
};
|
||||
static const uint32_t innerpad_4way[4 * 11] = {
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x000004a0, 0x000004a0, 0x000004a0, 0x000004a0
|
||||
};
|
||||
static const uint32_t outerpad_4way[4 * 8] = {
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000300, 0x00000300, 0x00000300, 0x00000300
|
||||
};
|
||||
static const uint32_t finalblk_4way[4 * 16] __attribute__((aligned(16))) = {
|
||||
0x00000001, 0x00000001, 0x00000001, 0x00000001,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000620, 0x00000620, 0x00000620, 0x00000620
|
||||
};
|
||||
|
||||
static inline void HMAC_SHA256_80_init_4way(const uint32_t *key,
|
||||
uint32_t *tstate, uint32_t *ostate)
|
||||
{
|
||||
uint32_t ihash[4 * 8] __attribute__((aligned(16)));
|
||||
uint32_t pad[4 * 16] __attribute__((aligned(16)));
|
||||
int i;
|
||||
|
||||
/* tstate is assumed to contain the midstate of key */
|
||||
memcpy(pad, key + 4 * 16, 4 * 16);
|
||||
memcpy(pad + 4 * 4, keypad_4way, 4 * 48);
|
||||
sha256_transform_4way(tstate, pad, 0);
|
||||
memcpy(ihash, tstate, 4 * 32);
|
||||
|
||||
sha256_init_4way(ostate);
|
||||
for (i = 0; i < 4 * 8; i++)
|
||||
pad[i] = ihash[i] ^ 0x5c5c5c5c;
|
||||
for (; i < 4 * 16; i++)
|
||||
pad[i] = 0x5c5c5c5c;
|
||||
sha256_transform_4way(ostate, pad, 0);
|
||||
|
||||
sha256_init_4way(tstate);
|
||||
for (i = 0; i < 4 * 8; i++)
|
||||
pad[i] = ihash[i] ^ 0x36363636;
|
||||
for (; i < 4 * 16; i++)
|
||||
pad[i] = 0x36363636;
|
||||
sha256_transform_4way(tstate, pad, 0);
|
||||
}
|
||||
|
||||
static inline void PBKDF2_SHA256_80_128_4way(const uint32_t *tstate,
|
||||
const uint32_t *ostate, const uint32_t *salt, uint32_t *output)
|
||||
{
|
||||
uint32_t istate[4 * 8] __attribute__((aligned(16)));
|
||||
uint32_t ostate2[4 * 8] __attribute__((aligned(16)));
|
||||
uint32_t ibuf[4 * 16] __attribute__((aligned(16)));
|
||||
uint32_t obuf[4 * 16] __attribute__((aligned(16)));
|
||||
int i, j;
|
||||
|
||||
memcpy(istate, tstate, 4 * 32);
|
||||
sha256_transform_4way(istate, salt, 0);
|
||||
|
||||
memcpy(ibuf, salt + 4 * 16, 4 * 16);
|
||||
memcpy(ibuf + 4 * 5, innerpad_4way, 4 * 44);
|
||||
memcpy(obuf + 4 * 8, outerpad_4way, 4 * 32);
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
memcpy(obuf, istate, 4 * 32);
|
||||
ibuf[4 * 4 + 0] = i + 1;
|
||||
ibuf[4 * 4 + 1] = i + 1;
|
||||
ibuf[4 * 4 + 2] = i + 1;
|
||||
ibuf[4 * 4 + 3] = i + 1;
|
||||
sha256_transform_4way(obuf, ibuf, 0);
|
||||
|
||||
memcpy(ostate2, ostate, 4 * 32);
|
||||
sha256_transform_4way(ostate2, obuf, 0);
|
||||
for (j = 0; j < 4 * 8; j++)
|
||||
output[4 * 8 * i + j] = swab32(ostate2[j]);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void PBKDF2_SHA256_128_32_4way(uint32_t *tstate,
|
||||
uint32_t *ostate, const uint32_t *salt, uint32_t *output)
|
||||
{
|
||||
uint32_t buf[4 * 16] __attribute__((aligned(16)));
|
||||
int i;
|
||||
|
||||
sha256_transform_4way(tstate, salt, 1);
|
||||
sha256_transform_4way(tstate, salt + 4 * 16, 1);
|
||||
sha256_transform_4way(tstate, finalblk_4way, 0);
|
||||
memcpy(buf, tstate, 4 * 32);
|
||||
memcpy(buf + 4 * 8, outerpad_4way, 4 * 32);
|
||||
|
||||
sha256_transform_4way(ostate, buf, 0);
|
||||
for (i = 0; i < 4 * 8; i++)
|
||||
output[i] = swab32(ostate[i]);
|
||||
}
|
||||
|
||||
#endif /* HAVE_SHA256_4WAY */
|
||||
|
||||
|
||||
#ifdef HAVE_SHA256_8WAY
|
||||
|
||||
static const uint32_t finalblk_8way[8 * 16] __attribute__((aligned(32))) = {
|
||||
0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620
|
||||
};
|
||||
|
||||
static inline void HMAC_SHA256_80_init_8way(const uint32_t *key,
|
||||
uint32_t *tstate, uint32_t *ostate)
|
||||
{
|
||||
uint32_t ihash[8 * 8] __attribute__((aligned(32)));
|
||||
uint32_t pad[8 * 16] __attribute__((aligned(32)));
|
||||
int i;
|
||||
|
||||
/* tstate is assumed to contain the midstate of key */
|
||||
memcpy(pad, key + 8 * 16, 8 * 16);
|
||||
for (i = 0; i < 8; i++)
|
||||
pad[8 * 4 + i] = 0x80000000;
|
||||
memset(pad + 8 * 5, 0x00, 8 * 40);
|
||||
for (i = 0; i < 8; i++)
|
||||
pad[8 * 15 + i] = 0x00000280;
|
||||
sha256_transform_8way(tstate, pad, 0);
|
||||
memcpy(ihash, tstate, 8 * 32);
|
||||
|
||||
sha256_init_8way(ostate);
|
||||
for (i = 0; i < 8 * 8; i++)
|
||||
pad[i] = ihash[i] ^ 0x5c5c5c5c;
|
||||
for (; i < 8 * 16; i++)
|
||||
pad[i] = 0x5c5c5c5c;
|
||||
sha256_transform_8way(ostate, pad, 0);
|
||||
|
||||
sha256_init_8way(tstate);
|
||||
for (i = 0; i < 8 * 8; i++)
|
||||
pad[i] = ihash[i] ^ 0x36363636;
|
||||
for (; i < 8 * 16; i++)
|
||||
pad[i] = 0x36363636;
|
||||
sha256_transform_8way(tstate, pad, 0);
|
||||
}
|
||||
|
||||
static inline void PBKDF2_SHA256_80_128_8way(const uint32_t *tstate,
|
||||
const uint32_t *ostate, const uint32_t *salt, uint32_t *output)
|
||||
{
|
||||
uint32_t istate[8 * 8] __attribute__((aligned(32)));
|
||||
uint32_t ostate2[8 * 8] __attribute__((aligned(32)));
|
||||
uint32_t ibuf[8 * 16] __attribute__((aligned(32)));
|
||||
uint32_t obuf[8 * 16] __attribute__((aligned(32)));
|
||||
int i, j;
|
||||
|
||||
memcpy(istate, tstate, 8 * 32);
|
||||
sha256_transform_8way(istate, salt, 0);
|
||||
|
||||
memcpy(ibuf, salt + 8 * 16, 8 * 16);
|
||||
for (i = 0; i < 8; i++)
|
||||
ibuf[8 * 5 + i] = 0x80000000;
|
||||
memset(ibuf + 8 * 6, 0x00, 8 * 36);
|
||||
for (i = 0; i < 8; i++)
|
||||
ibuf[8 * 15 + i] = 0x000004a0;
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
obuf[8 * 8 + i] = 0x80000000;
|
||||
memset(obuf + 8 * 9, 0x00, 8 * 24);
|
||||
for (i = 0; i < 8; i++)
|
||||
obuf[8 * 15 + i] = 0x00000300;
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
memcpy(obuf, istate, 8 * 32);
|
||||
ibuf[8 * 4 + 0] = i + 1;
|
||||
ibuf[8 * 4 + 1] = i + 1;
|
||||
ibuf[8 * 4 + 2] = i + 1;
|
||||
ibuf[8 * 4 + 3] = i + 1;
|
||||
ibuf[8 * 4 + 4] = i + 1;
|
||||
ibuf[8 * 4 + 5] = i + 1;
|
||||
ibuf[8 * 4 + 6] = i + 1;
|
||||
ibuf[8 * 4 + 7] = i + 1;
|
||||
sha256_transform_8way(obuf, ibuf, 0);
|
||||
|
||||
memcpy(ostate2, ostate, 8 * 32);
|
||||
sha256_transform_8way(ostate2, obuf, 0);
|
||||
for (j = 0; j < 8 * 8; j++)
|
||||
output[8 * 8 * i + j] = swab32(ostate2[j]);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void PBKDF2_SHA256_128_32_8way(uint32_t *tstate,
|
||||
uint32_t *ostate, const uint32_t *salt, uint32_t *output)
|
||||
{
|
||||
uint32_t buf[8 * 16] __attribute__((aligned(32)));
|
||||
int i;
|
||||
|
||||
sha256_transform_8way(tstate, salt, 1);
|
||||
sha256_transform_8way(tstate, salt + 8 * 16, 1);
|
||||
sha256_transform_8way(tstate, finalblk_8way, 0);
|
||||
|
||||
memcpy(buf, tstate, 8 * 32);
|
||||
for (i = 0; i < 8; i++)
|
||||
buf[8 * 8 + i] = 0x80000000;
|
||||
memset(buf + 8 * 9, 0x00, 8 * 24);
|
||||
for (i = 0; i < 8; i++)
|
||||
buf[8 * 15 + i] = 0x00000300;
|
||||
sha256_transform_8way(ostate, buf, 0);
|
||||
|
||||
for (i = 0; i < 8 * 8; i++)
|
||||
output[i] = swab32(ostate[i]);
|
||||
}
|
||||
|
||||
#endif /* HAVE_SHA256_8WAY */
|
||||
|
||||
|
||||
#if defined(__x86_64__)
|
||||
|
||||
#define SCRYPT_MAX_WAYS 12
|
||||
#define HAVE_SCRYPT_3WAY 1
|
||||
int scrypt_best_throughput();
|
||||
void scrypt_core(uint32_t *X, uint32_t *V);
|
||||
void scrypt_core_3way(uint32_t *X, uint32_t *V);
|
||||
#if defined(USE_AVX2)
|
||||
#undef SCRYPT_MAX_WAYS
|
||||
#define SCRYPT_MAX_WAYS 24
|
||||
#define HAVE_SCRYPT_6WAY 1
|
||||
void scrypt_core_6way(uint32_t *X, uint32_t *V);
|
||||
#endif
|
||||
|
||||
#elif defined(__i386__)
|
||||
|
||||
#define SCRYPT_MAX_WAYS 4
|
||||
#define scrypt_best_throughput() 1
|
||||
void scrypt_core(uint32_t *X, uint32_t *V);
|
||||
|
||||
#elif defined(__arm__) && defined(__APCS_32__)
|
||||
|
||||
void scrypt_core(uint32_t *X, uint32_t *V);
|
||||
#if defined(__ARM_NEON__)
|
||||
#undef HAVE_SHA256_4WAY
|
||||
#define SCRYPT_MAX_WAYS 3
|
||||
#define HAVE_SCRYPT_3WAY 1
|
||||
#define scrypt_best_throughput() 3
|
||||
void scrypt_core_3way(uint32_t *X, uint32_t *V);
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
static inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16])
|
||||
{
|
||||
uint32_t x00,x01,x02,x03,x04,x05,x06,x07,x08,x09,x10,x11,x12,x13,x14,x15;
|
||||
int i;
|
||||
|
||||
x00 = (B[ 0] ^= Bx[ 0]);
|
||||
x01 = (B[ 1] ^= Bx[ 1]);
|
||||
x02 = (B[ 2] ^= Bx[ 2]);
|
||||
x03 = (B[ 3] ^= Bx[ 3]);
|
||||
x04 = (B[ 4] ^= Bx[ 4]);
|
||||
x05 = (B[ 5] ^= Bx[ 5]);
|
||||
x06 = (B[ 6] ^= Bx[ 6]);
|
||||
x07 = (B[ 7] ^= Bx[ 7]);
|
||||
x08 = (B[ 8] ^= Bx[ 8]);
|
||||
x09 = (B[ 9] ^= Bx[ 9]);
|
||||
x10 = (B[10] ^= Bx[10]);
|
||||
x11 = (B[11] ^= Bx[11]);
|
||||
x12 = (B[12] ^= Bx[12]);
|
||||
x13 = (B[13] ^= Bx[13]);
|
||||
x14 = (B[14] ^= Bx[14]);
|
||||
x15 = (B[15] ^= Bx[15]);
|
||||
for (i = 0; i < 8; i += 2) {
|
||||
#define R(a, b) (((a) << (b)) | ((a) >> (32 - (b))))
|
||||
/* Operate on columns. */
|
||||
x04 ^= R(x00+x12, 7); x09 ^= R(x05+x01, 7);
|
||||
x14 ^= R(x10+x06, 7); x03 ^= R(x15+x11, 7);
|
||||
|
||||
x08 ^= R(x04+x00, 9); x13 ^= R(x09+x05, 9);
|
||||
x02 ^= R(x14+x10, 9); x07 ^= R(x03+x15, 9);
|
||||
|
||||
x12 ^= R(x08+x04,13); x01 ^= R(x13+x09,13);
|
||||
x06 ^= R(x02+x14,13); x11 ^= R(x07+x03,13);
|
||||
|
||||
x00 ^= R(x12+x08,18); x05 ^= R(x01+x13,18);
|
||||
x10 ^= R(x06+x02,18); x15 ^= R(x11+x07,18);
|
||||
|
||||
/* Operate on rows. */
|
||||
x01 ^= R(x00+x03, 7); x06 ^= R(x05+x04, 7);
|
||||
x11 ^= R(x10+x09, 7); x12 ^= R(x15+x14, 7);
|
||||
|
||||
x02 ^= R(x01+x00, 9); x07 ^= R(x06+x05, 9);
|
||||
x08 ^= R(x11+x10, 9); x13 ^= R(x12+x15, 9);
|
||||
|
||||
x03 ^= R(x02+x01,13); x04 ^= R(x07+x06,13);
|
||||
x09 ^= R(x08+x11,13); x14 ^= R(x13+x12,13);
|
||||
|
||||
x00 ^= R(x03+x02,18); x05 ^= R(x04+x07,18);
|
||||
x10 ^= R(x09+x08,18); x15 ^= R(x14+x13,18);
|
||||
#undef R
|
||||
}
|
||||
B[ 0] += x00;
|
||||
B[ 1] += x01;
|
||||
B[ 2] += x02;
|
||||
B[ 3] += x03;
|
||||
B[ 4] += x04;
|
||||
B[ 5] += x05;
|
||||
B[ 6] += x06;
|
||||
B[ 7] += x07;
|
||||
B[ 8] += x08;
|
||||
B[ 9] += x09;
|
||||
B[10] += x10;
|
||||
B[11] += x11;
|
||||
B[12] += x12;
|
||||
B[13] += x13;
|
||||
B[14] += x14;
|
||||
B[15] += x15;
|
||||
}
|
||||
|
||||
static inline void scrypt_core(uint32_t *X, uint32_t *V)
|
||||
{
|
||||
uint32_t i, j, k;
|
||||
|
||||
for (i = 0; i < 1024; i++) {
|
||||
memcpy(&V[i * 32], X, 128);
|
||||
xor_salsa8(&X[0], &X[16]);
|
||||
xor_salsa8(&X[16], &X[0]);
|
||||
}
|
||||
for (i = 0; i < 1024; i++) {
|
||||
j = 32 * (X[16] & 1023);
|
||||
for (k = 0; k < 32; k++)
|
||||
X[k] ^= V[j + k];
|
||||
xor_salsa8(&X[0], &X[16]);
|
||||
xor_salsa8(&X[16], &X[0]);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifndef SCRYPT_MAX_WAYS
|
||||
#define SCRYPT_MAX_WAYS 1
|
||||
#define scrypt_best_throughput() 1
|
||||
#endif
|
||||
|
||||
#define SCRYPT_BUFFER_SIZE (SCRYPT_MAX_WAYS * 131072 + 63)
|
||||
|
||||
unsigned char *scrypt_buffer_alloc()
|
||||
{
|
||||
return malloc(SCRYPT_BUFFER_SIZE);
|
||||
}
|
||||
|
||||
static void scrypt_1024_1_1_256(const uint32_t *input, uint32_t *output,
|
||||
uint32_t *midstate, unsigned char *scratchpad)
|
||||
{
|
||||
uint32_t tstate[8], ostate[8];
|
||||
uint32_t X[32];
|
||||
uint32_t *V;
|
||||
|
||||
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
|
||||
|
||||
memcpy(tstate, midstate, 32);
|
||||
HMAC_SHA256_80_init(input, tstate, ostate);
|
||||
PBKDF2_SHA256_80_128(tstate, ostate, input, X);
|
||||
|
||||
scrypt_core(X, V);
|
||||
|
||||
PBKDF2_SHA256_128_32(tstate, ostate, X, output);
|
||||
}
|
||||
|
||||
#ifdef HAVE_SHA256_4WAY
|
||||
static void scrypt_1024_1_1_256_4way(const uint32_t *input,
|
||||
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad)
|
||||
{
|
||||
uint32_t tstate[4 * 8] __attribute__((aligned(128)));
|
||||
uint32_t ostate[4 * 8] __attribute__((aligned(128)));
|
||||
uint32_t W[4 * 32] __attribute__((aligned(128)));
|
||||
uint32_t X[4 * 32] __attribute__((aligned(128)));
|
||||
uint32_t *V;
|
||||
int i, k;
|
||||
|
||||
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
|
||||
|
||||
for (i = 0; i < 20; i++)
|
||||
for (k = 0; k < 4; k++)
|
||||
W[4 * i + k] = input[k * 20 + i];
|
||||
for (i = 0; i < 8; i++)
|
||||
for (k = 0; k < 4; k++)
|
||||
tstate[4 * i + k] = midstate[i];
|
||||
HMAC_SHA256_80_init_4way(W, tstate, ostate);
|
||||
PBKDF2_SHA256_80_128_4way(tstate, ostate, W, W);
|
||||
for (i = 0; i < 32; i++)
|
||||
for (k = 0; k < 4; k++)
|
||||
X[k * 32 + i] = W[4 * i + k];
|
||||
scrypt_core(X + 0 * 32, V);
|
||||
scrypt_core(X + 1 * 32, V);
|
||||
scrypt_core(X + 2 * 32, V);
|
||||
scrypt_core(X + 3 * 32, V);
|
||||
for (i = 0; i < 32; i++)
|
||||
for (k = 0; k < 4; k++)
|
||||
W[4 * i + k] = X[k * 32 + i];
|
||||
PBKDF2_SHA256_128_32_4way(tstate, ostate, W, W);
|
||||
for (i = 0; i < 8; i++)
|
||||
for (k = 0; k < 4; k++)
|
||||
output[k * 8 + i] = W[4 * i + k];
|
||||
}
|
||||
#endif /* HAVE_SHA256_4WAY */
|
||||
|
||||
#ifdef HAVE_SCRYPT_3WAY
|
||||
|
||||
static void scrypt_1024_1_1_256_3way(const uint32_t *input,
|
||||
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad)
|
||||
{
|
||||
uint32_t tstate[3 * 8], ostate[3 * 8];
|
||||
uint32_t X[3 * 32] __attribute__((aligned(64)));
|
||||
uint32_t *V;
|
||||
|
||||
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
|
||||
|
||||
memcpy(tstate + 0, midstate, 32);
|
||||
memcpy(tstate + 8, midstate, 32);
|
||||
memcpy(tstate + 16, midstate, 32);
|
||||
HMAC_SHA256_80_init(input + 0, tstate + 0, ostate + 0);
|
||||
HMAC_SHA256_80_init(input + 20, tstate + 8, ostate + 8);
|
||||
HMAC_SHA256_80_init(input + 40, tstate + 16, ostate + 16);
|
||||
PBKDF2_SHA256_80_128(tstate + 0, ostate + 0, input + 0, X + 0);
|
||||
PBKDF2_SHA256_80_128(tstate + 8, ostate + 8, input + 20, X + 32);
|
||||
PBKDF2_SHA256_80_128(tstate + 16, ostate + 16, input + 40, X + 64);
|
||||
|
||||
scrypt_core_3way(X, V);
|
||||
|
||||
PBKDF2_SHA256_128_32(tstate + 0, ostate + 0, X + 0, output + 0);
|
||||
PBKDF2_SHA256_128_32(tstate + 8, ostate + 8, X + 32, output + 8);
|
||||
PBKDF2_SHA256_128_32(tstate + 16, ostate + 16, X + 64, output + 16);
|
||||
}
|
||||
|
||||
#ifdef HAVE_SHA256_4WAY
|
||||
static void scrypt_1024_1_1_256_12way(const uint32_t *input,
|
||||
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad)
|
||||
{
|
||||
uint32_t tstate[12 * 8] __attribute__((aligned(128)));
|
||||
uint32_t ostate[12 * 8] __attribute__((aligned(128)));
|
||||
uint32_t W[12 * 32] __attribute__((aligned(128)));
|
||||
uint32_t X[12 * 32] __attribute__((aligned(128)));
|
||||
uint32_t *V;
|
||||
int i, j, k;
|
||||
|
||||
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
|
||||
|
||||
for (j = 0; j < 3; j++)
|
||||
for (i = 0; i < 20; i++)
|
||||
for (k = 0; k < 4; k++)
|
||||
W[128 * j + 4 * i + k] = input[80 * j + k * 20 + i];
|
||||
for (j = 0; j < 3; j++)
|
||||
for (i = 0; i < 8; i++)
|
||||
for (k = 0; k < 4; k++)
|
||||
tstate[32 * j + 4 * i + k] = midstate[i];
|
||||
HMAC_SHA256_80_init_4way(W + 0, tstate + 0, ostate + 0);
|
||||
HMAC_SHA256_80_init_4way(W + 128, tstate + 32, ostate + 32);
|
||||
HMAC_SHA256_80_init_4way(W + 256, tstate + 64, ostate + 64);
|
||||
PBKDF2_SHA256_80_128_4way(tstate + 0, ostate + 0, W + 0, W + 0);
|
||||
PBKDF2_SHA256_80_128_4way(tstate + 32, ostate + 32, W + 128, W + 128);
|
||||
PBKDF2_SHA256_80_128_4way(tstate + 64, ostate + 64, W + 256, W + 256);
|
||||
for (j = 0; j < 3; j++)
|
||||
for (i = 0; i < 32; i++)
|
||||
for (k = 0; k < 4; k++)
|
||||
X[128 * j + k * 32 + i] = W[128 * j + 4 * i + k];
|
||||
scrypt_core_3way(X + 0 * 96, V);
|
||||
scrypt_core_3way(X + 1 * 96, V);
|
||||
scrypt_core_3way(X + 2 * 96, V);
|
||||
scrypt_core_3way(X + 3 * 96, V);
|
||||
for (j = 0; j < 3; j++)
|
||||
for (i = 0; i < 32; i++)
|
||||
for (k = 0; k < 4; k++)
|
||||
W[128 * j + 4 * i + k] = X[128 * j + k * 32 + i];
|
||||
PBKDF2_SHA256_128_32_4way(tstate + 0, ostate + 0, W + 0, W + 0);
|
||||
PBKDF2_SHA256_128_32_4way(tstate + 32, ostate + 32, W + 128, W + 128);
|
||||
PBKDF2_SHA256_128_32_4way(tstate + 64, ostate + 64, W + 256, W + 256);
|
||||
for (j = 0; j < 3; j++)
|
||||
for (i = 0; i < 8; i++)
|
||||
for (k = 0; k < 4; k++)
|
||||
output[32 * j + k * 8 + i] = W[128 * j + 4 * i + k];
|
||||
}
|
||||
#endif /* HAVE_SHA256_4WAY */
|
||||
|
||||
#endif /* HAVE_SCRYPT_3WAY */
|
||||
|
||||
#ifdef HAVE_SCRYPT_6WAY
|
||||
static void scrypt_1024_1_1_256_24way(const uint32_t *input,
|
||||
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad)
|
||||
{
|
||||
uint32_t tstate[24 * 8] __attribute__((aligned(128)));
|
||||
uint32_t ostate[24 * 8] __attribute__((aligned(128)));
|
||||
uint32_t W[24 * 32] __attribute__((aligned(128)));
|
||||
uint32_t X[24 * 32] __attribute__((aligned(128)));
|
||||
uint32_t *V;
|
||||
int i, j, k;
|
||||
|
||||
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
|
||||
|
||||
for (j = 0; j < 3; j++)
|
||||
for (i = 0; i < 20; i++)
|
||||
for (k = 0; k < 8; k++)
|
||||
W[8 * 32 * j + 8 * i + k] = input[8 * 20 * j + k * 20 + i];
|
||||
for (j = 0; j < 3; j++)
|
||||
for (i = 0; i < 8; i++)
|
||||
for (k = 0; k < 8; k++)
|
||||
tstate[8 * 8 * j + 8 * i + k] = midstate[i];
|
||||
HMAC_SHA256_80_init_8way(W + 0, tstate + 0, ostate + 0);
|
||||
HMAC_SHA256_80_init_8way(W + 256, tstate + 64, ostate + 64);
|
||||
HMAC_SHA256_80_init_8way(W + 512, tstate + 128, ostate + 128);
|
||||
PBKDF2_SHA256_80_128_8way(tstate + 0, ostate + 0, W + 0, W + 0);
|
||||
PBKDF2_SHA256_80_128_8way(tstate + 64, ostate + 64, W + 256, W + 256);
|
||||
PBKDF2_SHA256_80_128_8way(tstate + 128, ostate + 128, W + 512, W + 512);
|
||||
for (j = 0; j < 3; j++)
|
||||
for (i = 0; i < 32; i++)
|
||||
for (k = 0; k < 8; k++)
|
||||
X[8 * 32 * j + k * 32 + i] = W[8 * 32 * j + 8 * i + k];
|
||||
scrypt_core_6way(X + 0 * 32, V);
|
||||
scrypt_core_6way(X + 6 * 32, V);
|
||||
scrypt_core_6way(X + 12 * 32, V);
|
||||
scrypt_core_6way(X + 18 * 32, V);
|
||||
for (j = 0; j < 3; j++)
|
||||
for (i = 0; i < 32; i++)
|
||||
for (k = 0; k < 8; k++)
|
||||
W[8 * 32 * j + 8 * i + k] = X[8 * 32 * j + k * 32 + i];
|
||||
PBKDF2_SHA256_128_32_8way(tstate + 0, ostate + 0, W + 0, W + 0);
|
||||
PBKDF2_SHA256_128_32_8way(tstate + 64, ostate + 64, W + 256, W + 256);
|
||||
PBKDF2_SHA256_128_32_8way(tstate + 128, ostate + 128, W + 512, W + 512);
|
||||
for (j = 0; j < 3; j++)
|
||||
for (i = 0; i < 8; i++)
|
||||
for (k = 0; k < 8; k++)
|
||||
output[8 * 8 * j + k * 8 + i] = W[8 * 32 * j + 8 * i + k];
|
||||
}
|
||||
#endif /* HAVE_SCRYPT_6WAY */
|
||||
|
||||
int scanhash_scrypt(int thr_id, uint32_t *pdata,
|
||||
unsigned char *scratchbuf, const uint32_t *ptarget,
|
||||
uint32_t max_nonce, unsigned long *hashes_done)
|
||||
{
|
||||
uint32_t data[SCRYPT_MAX_WAYS * 20], hash[SCRYPT_MAX_WAYS * 8];
|
||||
uint32_t midstate[8];
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
int throughput = scrypt_best_throughput();
|
||||
int i;
|
||||
|
||||
#ifdef HAVE_SHA256_4WAY
|
||||
if (sha256_use_4way())
|
||||
throughput *= 4;
|
||||
#endif
|
||||
|
||||
for (i = 0; i < throughput; i++)
|
||||
memcpy(data + i * 20, pdata, 80);
|
||||
|
||||
sha256_init(midstate);
|
||||
sha256_transform(midstate, data, 0);
|
||||
|
||||
do {
|
||||
for (i = 0; i < throughput; i++)
|
||||
data[i * 20 + 19] = ++n;
|
||||
|
||||
#if defined(HAVE_SHA256_4WAY)
|
||||
if (throughput == 4)
|
||||
scrypt_1024_1_1_256_4way(data, hash, midstate, scratchbuf);
|
||||
else
|
||||
#endif
|
||||
#if defined(HAVE_SCRYPT_3WAY) && defined(HAVE_SHA256_4WAY)
|
||||
if (throughput == 12)
|
||||
scrypt_1024_1_1_256_12way(data, hash, midstate, scratchbuf);
|
||||
else
|
||||
#endif
|
||||
#if defined(HAVE_SCRYPT_6WAY)
|
||||
if (throughput == 24)
|
||||
scrypt_1024_1_1_256_24way(data, hash, midstate, scratchbuf);
|
||||
else
|
||||
#endif
|
||||
#if defined(HAVE_SCRYPT_3WAY)
|
||||
if (throughput == 3)
|
||||
scrypt_1024_1_1_256_3way(data, hash, midstate, scratchbuf);
|
||||
else
|
||||
#endif
|
||||
scrypt_1024_1_1_256(data, hash, midstate, scratchbuf);
|
||||
|
||||
for (i = 0; i < throughput; i++) {
|
||||
if (hash[i * 8 + 7] <= Htarg && fulltest(hash + i * 8, ptarget)) {
|
||||
*hashes_done = n - pdata[19] + 1;
|
||||
pdata[19] = data[i * 20 + 19];
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - pdata[19] + 1;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool register_scrypt_algo( algo_gate_t* gate )
|
||||
{
|
||||
gate->scanhash = &scanhash_scrypt;
|
||||
gate->hash = &scrypt_hash;
|
||||
// gate->get_max64 = scrypt_get_max64;
|
||||
return true;
|
||||
};
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,630 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011 ArtForz
|
||||
* Copyright 2011-2013 pooler
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version. See COPYING for more details.
|
||||
*/
|
||||
|
||||
#include "../cpuminer-config.h"
|
||||
#include "../miner.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(__arm__) && defined(__APCS_32__)
|
||||
#define EXTERN_SHA256
|
||||
#endif
|
||||
|
||||
static const uint32_t sha256_h[8] = {
|
||||
0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
|
||||
0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
|
||||
};
|
||||
|
||||
static const uint32_t sha256_k[64] = {
|
||||
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
|
||||
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
|
||||
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
|
||||
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
|
||||
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
|
||||
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
|
||||
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
|
||||
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
|
||||
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
|
||||
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
|
||||
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
|
||||
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
|
||||
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
|
||||
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
|
||||
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
|
||||
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
|
||||
};
|
||||
|
||||
void sha256_init(uint32_t *state)
|
||||
{
|
||||
memcpy(state, sha256_h, 32);
|
||||
}
|
||||
|
||||
/* Elementary functions used by SHA256 */
|
||||
#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
|
||||
#define Maj(x, y, z) ((x & (y | z)) | (y & z))
|
||||
#define ROTR(x, n) ((x >> n) | (x << (32 - n)))
|
||||
#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
|
||||
#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
|
||||
#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ (x >> 3))
|
||||
#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ (x >> 10))
|
||||
|
||||
/* SHA256 round function */
|
||||
#define RND(a, b, c, d, e, f, g, h, k) \
|
||||
do { \
|
||||
t0 = h + S1(e) + Ch(e, f, g) + k; \
|
||||
t1 = S0(a) + Maj(a, b, c); \
|
||||
d += t0; \
|
||||
h = t0 + t1; \
|
||||
} while (0)
|
||||
|
||||
/* Adjusted round function for rotating state */
|
||||
#define RNDr(S, W, i) \
|
||||
RND(S[(64 - i) % 8], S[(65 - i) % 8], \
|
||||
S[(66 - i) % 8], S[(67 - i) % 8], \
|
||||
S[(68 - i) % 8], S[(69 - i) % 8], \
|
||||
S[(70 - i) % 8], S[(71 - i) % 8], \
|
||||
W[i] + sha256_k[i])
|
||||
|
||||
#ifndef EXTERN_SHA256
|
||||
|
||||
/*
|
||||
* SHA256 block compression function. The 256-bit state is transformed via
|
||||
* the 512-bit input block to produce a new state.
|
||||
*/
|
||||
void sha256_transform(uint32_t *state, const uint32_t *block, int swap)
|
||||
{
|
||||
uint32_t W[64];
|
||||
uint32_t S[8];
|
||||
uint32_t t0, t1;
|
||||
int i;
|
||||
|
||||
/* 1. Prepare message schedule W. */
|
||||
if (swap) {
|
||||
for (i = 0; i < 16; i++)
|
||||
W[i] = swab32(block[i]);
|
||||
} else
|
||||
memcpy(W, block, 64);
|
||||
for (i = 16; i < 64; i += 2) {
|
||||
W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16];
|
||||
W[i+1] = s1(W[i - 1]) + W[i - 6] + s0(W[i - 14]) + W[i - 15];
|
||||
}
|
||||
|
||||
/* 2. Initialize working variables. */
|
||||
memcpy(S, state, 32);
|
||||
|
||||
/* 3. Mix. */
|
||||
RNDr(S, W, 0);
|
||||
RNDr(S, W, 1);
|
||||
RNDr(S, W, 2);
|
||||
RNDr(S, W, 3);
|
||||
RNDr(S, W, 4);
|
||||
RNDr(S, W, 5);
|
||||
RNDr(S, W, 6);
|
||||
RNDr(S, W, 7);
|
||||
RNDr(S, W, 8);
|
||||
RNDr(S, W, 9);
|
||||
RNDr(S, W, 10);
|
||||
RNDr(S, W, 11);
|
||||
RNDr(S, W, 12);
|
||||
RNDr(S, W, 13);
|
||||
RNDr(S, W, 14);
|
||||
RNDr(S, W, 15);
|
||||
RNDr(S, W, 16);
|
||||
RNDr(S, W, 17);
|
||||
RNDr(S, W, 18);
|
||||
RNDr(S, W, 19);
|
||||
RNDr(S, W, 20);
|
||||
RNDr(S, W, 21);
|
||||
RNDr(S, W, 22);
|
||||
RNDr(S, W, 23);
|
||||
RNDr(S, W, 24);
|
||||
RNDr(S, W, 25);
|
||||
RNDr(S, W, 26);
|
||||
RNDr(S, W, 27);
|
||||
RNDr(S, W, 28);
|
||||
RNDr(S, W, 29);
|
||||
RNDr(S, W, 30);
|
||||
RNDr(S, W, 31);
|
||||
RNDr(S, W, 32);
|
||||
RNDr(S, W, 33);
|
||||
RNDr(S, W, 34);
|
||||
RNDr(S, W, 35);
|
||||
RNDr(S, W, 36);
|
||||
RNDr(S, W, 37);
|
||||
RNDr(S, W, 38);
|
||||
RNDr(S, W, 39);
|
||||
RNDr(S, W, 40);
|
||||
RNDr(S, W, 41);
|
||||
RNDr(S, W, 42);
|
||||
RNDr(S, W, 43);
|
||||
RNDr(S, W, 44);
|
||||
RNDr(S, W, 45);
|
||||
RNDr(S, W, 46);
|
||||
RNDr(S, W, 47);
|
||||
RNDr(S, W, 48);
|
||||
RNDr(S, W, 49);
|
||||
RNDr(S, W, 50);
|
||||
RNDr(S, W, 51);
|
||||
RNDr(S, W, 52);
|
||||
RNDr(S, W, 53);
|
||||
RNDr(S, W, 54);
|
||||
RNDr(S, W, 55);
|
||||
RNDr(S, W, 56);
|
||||
RNDr(S, W, 57);
|
||||
RNDr(S, W, 58);
|
||||
RNDr(S, W, 59);
|
||||
RNDr(S, W, 60);
|
||||
RNDr(S, W, 61);
|
||||
RNDr(S, W, 62);
|
||||
RNDr(S, W, 63);
|
||||
|
||||
/* 4. Mix local working variables into global state */
|
||||
for (i = 0; i < 8; i++)
|
||||
state[i] += S[i];
|
||||
}
|
||||
|
||||
#endif /* EXTERN_SHA256 */
|
||||
|
||||
|
||||
static const uint32_t sha256d_hash1[16] = {
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x80000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000100
|
||||
};
|
||||
|
||||
static void sha256d_80_swap(uint32_t *hash, const uint32_t *data)
|
||||
{
|
||||
uint32_t S[16];
|
||||
int i;
|
||||
|
||||
sha256_init(S);
|
||||
sha256_transform(S, data, 0);
|
||||
sha256_transform(S, data + 16, 0);
|
||||
memcpy(S + 8, sha256d_hash1 + 8, 32);
|
||||
sha256_init(hash);
|
||||
sha256_transform(hash, S, 0);
|
||||
for (i = 0; i < 8; i++)
|
||||
hash[i] = swab32(hash[i]);
|
||||
}
|
||||
|
||||
void sha256d(unsigned char *hash, const unsigned char *data, int len)
|
||||
{
|
||||
uint32_t S[16], T[16];
|
||||
int i, r;
|
||||
|
||||
sha256_init(S);
|
||||
for (r = len; r > -9; r -= 64) {
|
||||
if (r < 64)
|
||||
memset(T, 0, 64);
|
||||
memcpy(T, data + len - r, r > 64 ? 64 : (r < 0 ? 0 : r));
|
||||
if (r >= 0 && r < 64)
|
||||
((unsigned char *)T)[r] = 0x80;
|
||||
for (i = 0; i < 16; i++)
|
||||
T[i] = be32dec(T + i);
|
||||
if (r < 56)
|
||||
T[15] = 8 * len;
|
||||
sha256_transform(S, T, 0);
|
||||
}
|
||||
memcpy(S + 8, sha256d_hash1 + 8, 32);
|
||||
sha256_init(T);
|
||||
sha256_transform(T, S, 0);
|
||||
for (i = 0; i < 8; i++)
|
||||
be32enc((uint32_t *)hash + i, T[i]);
|
||||
}
|
||||
|
||||
static inline void sha256d_preextend(uint32_t *W)
|
||||
{
|
||||
W[16] = s1(W[14]) + W[ 9] + s0(W[ 1]) + W[ 0];
|
||||
W[17] = s1(W[15]) + W[10] + s0(W[ 2]) + W[ 1];
|
||||
W[18] = s1(W[16]) + W[11] + W[ 2];
|
||||
W[19] = s1(W[17]) + W[12] + s0(W[ 4]);
|
||||
W[20] = W[13] + s0(W[ 5]) + W[ 4];
|
||||
W[21] = W[14] + s0(W[ 6]) + W[ 5];
|
||||
W[22] = W[15] + s0(W[ 7]) + W[ 6];
|
||||
W[23] = W[16] + s0(W[ 8]) + W[ 7];
|
||||
W[24] = W[17] + s0(W[ 9]) + W[ 8];
|
||||
W[25] = s0(W[10]) + W[ 9];
|
||||
W[26] = s0(W[11]) + W[10];
|
||||
W[27] = s0(W[12]) + W[11];
|
||||
W[28] = s0(W[13]) + W[12];
|
||||
W[29] = s0(W[14]) + W[13];
|
||||
W[30] = s0(W[15]) + W[14];
|
||||
W[31] = s0(W[16]) + W[15];
|
||||
}
|
||||
|
||||
static inline void sha256d_prehash(uint32_t *S, const uint32_t *W)
|
||||
{
|
||||
uint32_t t0, t1;
|
||||
RNDr(S, W, 0);
|
||||
RNDr(S, W, 1);
|
||||
RNDr(S, W, 2);
|
||||
}
|
||||
|
||||
#ifdef EXTERN_SHA256
|
||||
|
||||
void sha256d_ms(uint32_t *hash, uint32_t *W,
|
||||
const uint32_t *midstate, const uint32_t *prehash);
|
||||
|
||||
#else
|
||||
|
||||
static inline void sha256d_ms(uint32_t *hash, uint32_t *W,
|
||||
const uint32_t *midstate, const uint32_t *prehash)
|
||||
{
|
||||
uint32_t S[64];
|
||||
uint32_t t0, t1;
|
||||
int i;
|
||||
|
||||
S[18] = W[18];
|
||||
S[19] = W[19];
|
||||
S[20] = W[20];
|
||||
S[22] = W[22];
|
||||
S[23] = W[23];
|
||||
S[24] = W[24];
|
||||
S[30] = W[30];
|
||||
S[31] = W[31];
|
||||
|
||||
W[18] += s0(W[3]);
|
||||
W[19] += W[3];
|
||||
W[20] += s1(W[18]);
|
||||
W[21] = s1(W[19]);
|
||||
W[22] += s1(W[20]);
|
||||
W[23] += s1(W[21]);
|
||||
W[24] += s1(W[22]);
|
||||
W[25] = s1(W[23]) + W[18];
|
||||
W[26] = s1(W[24]) + W[19];
|
||||
W[27] = s1(W[25]) + W[20];
|
||||
W[28] = s1(W[26]) + W[21];
|
||||
W[29] = s1(W[27]) + W[22];
|
||||
W[30] += s1(W[28]) + W[23];
|
||||
W[31] += s1(W[29]) + W[24];
|
||||
for (i = 32; i < 64; i += 2) {
|
||||
W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16];
|
||||
W[i+1] = s1(W[i - 1]) + W[i - 6] + s0(W[i - 14]) + W[i - 15];
|
||||
}
|
||||
|
||||
memcpy(S, prehash, 32);
|
||||
|
||||
RNDr(S, W, 3);
|
||||
RNDr(S, W, 4);
|
||||
RNDr(S, W, 5);
|
||||
RNDr(S, W, 6);
|
||||
RNDr(S, W, 7);
|
||||
RNDr(S, W, 8);
|
||||
RNDr(S, W, 9);
|
||||
RNDr(S, W, 10);
|
||||
RNDr(S, W, 11);
|
||||
RNDr(S, W, 12);
|
||||
RNDr(S, W, 13);
|
||||
RNDr(S, W, 14);
|
||||
RNDr(S, W, 15);
|
||||
RNDr(S, W, 16);
|
||||
RNDr(S, W, 17);
|
||||
RNDr(S, W, 18);
|
||||
RNDr(S, W, 19);
|
||||
RNDr(S, W, 20);
|
||||
RNDr(S, W, 21);
|
||||
RNDr(S, W, 22);
|
||||
RNDr(S, W, 23);
|
||||
RNDr(S, W, 24);
|
||||
RNDr(S, W, 25);
|
||||
RNDr(S, W, 26);
|
||||
RNDr(S, W, 27);
|
||||
RNDr(S, W, 28);
|
||||
RNDr(S, W, 29);
|
||||
RNDr(S, W, 30);
|
||||
RNDr(S, W, 31);
|
||||
RNDr(S, W, 32);
|
||||
RNDr(S, W, 33);
|
||||
RNDr(S, W, 34);
|
||||
RNDr(S, W, 35);
|
||||
RNDr(S, W, 36);
|
||||
RNDr(S, W, 37);
|
||||
RNDr(S, W, 38);
|
||||
RNDr(S, W, 39);
|
||||
RNDr(S, W, 40);
|
||||
RNDr(S, W, 41);
|
||||
RNDr(S, W, 42);
|
||||
RNDr(S, W, 43);
|
||||
RNDr(S, W, 44);
|
||||
RNDr(S, W, 45);
|
||||
RNDr(S, W, 46);
|
||||
RNDr(S, W, 47);
|
||||
RNDr(S, W, 48);
|
||||
RNDr(S, W, 49);
|
||||
RNDr(S, W, 50);
|
||||
RNDr(S, W, 51);
|
||||
RNDr(S, W, 52);
|
||||
RNDr(S, W, 53);
|
||||
RNDr(S, W, 54);
|
||||
RNDr(S, W, 55);
|
||||
RNDr(S, W, 56);
|
||||
RNDr(S, W, 57);
|
||||
RNDr(S, W, 58);
|
||||
RNDr(S, W, 59);
|
||||
RNDr(S, W, 60);
|
||||
RNDr(S, W, 61);
|
||||
RNDr(S, W, 62);
|
||||
RNDr(S, W, 63);
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
S[i] += midstate[i];
|
||||
|
||||
W[18] = S[18];
|
||||
W[19] = S[19];
|
||||
W[20] = S[20];
|
||||
W[22] = S[22];
|
||||
W[23] = S[23];
|
||||
W[24] = S[24];
|
||||
W[30] = S[30];
|
||||
W[31] = S[31];
|
||||
|
||||
memcpy(S + 8, sha256d_hash1 + 8, 32);
|
||||
S[16] = s1(sha256d_hash1[14]) + sha256d_hash1[ 9] + s0(S[ 1]) + S[ 0];
|
||||
S[17] = s1(sha256d_hash1[15]) + sha256d_hash1[10] + s0(S[ 2]) + S[ 1];
|
||||
S[18] = s1(S[16]) + sha256d_hash1[11] + s0(S[ 3]) + S[ 2];
|
||||
S[19] = s1(S[17]) + sha256d_hash1[12] + s0(S[ 4]) + S[ 3];
|
||||
S[20] = s1(S[18]) + sha256d_hash1[13] + s0(S[ 5]) + S[ 4];
|
||||
S[21] = s1(S[19]) + sha256d_hash1[14] + s0(S[ 6]) + S[ 5];
|
||||
S[22] = s1(S[20]) + sha256d_hash1[15] + s0(S[ 7]) + S[ 6];
|
||||
S[23] = s1(S[21]) + S[16] + s0(sha256d_hash1[ 8]) + S[ 7];
|
||||
S[24] = s1(S[22]) + S[17] + s0(sha256d_hash1[ 9]) + sha256d_hash1[ 8];
|
||||
S[25] = s1(S[23]) + S[18] + s0(sha256d_hash1[10]) + sha256d_hash1[ 9];
|
||||
S[26] = s1(S[24]) + S[19] + s0(sha256d_hash1[11]) + sha256d_hash1[10];
|
||||
S[27] = s1(S[25]) + S[20] + s0(sha256d_hash1[12]) + sha256d_hash1[11];
|
||||
S[28] = s1(S[26]) + S[21] + s0(sha256d_hash1[13]) + sha256d_hash1[12];
|
||||
S[29] = s1(S[27]) + S[22] + s0(sha256d_hash1[14]) + sha256d_hash1[13];
|
||||
S[30] = s1(S[28]) + S[23] + s0(sha256d_hash1[15]) + sha256d_hash1[14];
|
||||
S[31] = s1(S[29]) + S[24] + s0(S[16]) + sha256d_hash1[15];
|
||||
for (i = 32; i < 60; i += 2) {
|
||||
S[i] = s1(S[i - 2]) + S[i - 7] + s0(S[i - 15]) + S[i - 16];
|
||||
S[i+1] = s1(S[i - 1]) + S[i - 6] + s0(S[i - 14]) + S[i - 15];
|
||||
}
|
||||
S[60] = s1(S[58]) + S[53] + s0(S[45]) + S[44];
|
||||
|
||||
sha256_init(hash);
|
||||
|
||||
RNDr(hash, S, 0);
|
||||
RNDr(hash, S, 1);
|
||||
RNDr(hash, S, 2);
|
||||
RNDr(hash, S, 3);
|
||||
RNDr(hash, S, 4);
|
||||
RNDr(hash, S, 5);
|
||||
RNDr(hash, S, 6);
|
||||
RNDr(hash, S, 7);
|
||||
RNDr(hash, S, 8);
|
||||
RNDr(hash, S, 9);
|
||||
RNDr(hash, S, 10);
|
||||
RNDr(hash, S, 11);
|
||||
RNDr(hash, S, 12);
|
||||
RNDr(hash, S, 13);
|
||||
RNDr(hash, S, 14);
|
||||
RNDr(hash, S, 15);
|
||||
RNDr(hash, S, 16);
|
||||
RNDr(hash, S, 17);
|
||||
RNDr(hash, S, 18);
|
||||
RNDr(hash, S, 19);
|
||||
RNDr(hash, S, 20);
|
||||
RNDr(hash, S, 21);
|
||||
RNDr(hash, S, 22);
|
||||
RNDr(hash, S, 23);
|
||||
RNDr(hash, S, 24);
|
||||
RNDr(hash, S, 25);
|
||||
RNDr(hash, S, 26);
|
||||
RNDr(hash, S, 27);
|
||||
RNDr(hash, S, 28);
|
||||
RNDr(hash, S, 29);
|
||||
RNDr(hash, S, 30);
|
||||
RNDr(hash, S, 31);
|
||||
RNDr(hash, S, 32);
|
||||
RNDr(hash, S, 33);
|
||||
RNDr(hash, S, 34);
|
||||
RNDr(hash, S, 35);
|
||||
RNDr(hash, S, 36);
|
||||
RNDr(hash, S, 37);
|
||||
RNDr(hash, S, 38);
|
||||
RNDr(hash, S, 39);
|
||||
RNDr(hash, S, 40);
|
||||
RNDr(hash, S, 41);
|
||||
RNDr(hash, S, 42);
|
||||
RNDr(hash, S, 43);
|
||||
RNDr(hash, S, 44);
|
||||
RNDr(hash, S, 45);
|
||||
RNDr(hash, S, 46);
|
||||
RNDr(hash, S, 47);
|
||||
RNDr(hash, S, 48);
|
||||
RNDr(hash, S, 49);
|
||||
RNDr(hash, S, 50);
|
||||
RNDr(hash, S, 51);
|
||||
RNDr(hash, S, 52);
|
||||
RNDr(hash, S, 53);
|
||||
RNDr(hash, S, 54);
|
||||
RNDr(hash, S, 55);
|
||||
RNDr(hash, S, 56);
|
||||
|
||||
hash[2] += hash[6] + S1(hash[3]) + Ch(hash[3], hash[4], hash[5])
|
||||
+ S[57] + sha256_k[57];
|
||||
hash[1] += hash[5] + S1(hash[2]) + Ch(hash[2], hash[3], hash[4])
|
||||
+ S[58] + sha256_k[58];
|
||||
hash[0] += hash[4] + S1(hash[1]) + Ch(hash[1], hash[2], hash[3])
|
||||
+ S[59] + sha256_k[59];
|
||||
hash[7] += hash[3] + S1(hash[0]) + Ch(hash[0], hash[1], hash[2])
|
||||
+ S[60] + sha256_k[60]
|
||||
+ sha256_h[7];
|
||||
}
|
||||
|
||||
#endif /* EXTERN_SHA256 */
|
||||
|
||||
#ifdef HAVE_SHA256_4WAY
|
||||
|
||||
void sha256d_ms_4way(uint32_t *hash, uint32_t *data,
|
||||
const uint32_t *midstate, const uint32_t *prehash);
|
||||
|
||||
static inline int scanhash_sha256d_4way(int thr_id, uint32_t *pdata,
|
||||
const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done)
|
||||
{
|
||||
uint32_t data[4 * 64] __attribute__((aligned(128)));
|
||||
uint32_t hash[4 * 8] __attribute__((aligned(32)));
|
||||
uint32_t midstate[4 * 8] __attribute__((aligned(32)));
|
||||
uint32_t prehash[4 * 8] __attribute__((aligned(32)));
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
int i, j;
|
||||
|
||||
memcpy(data, pdata + 16, 64);
|
||||
sha256d_preextend(data);
|
||||
for (i = 31; i >= 0; i--)
|
||||
for (j = 0; j < 4; j++)
|
||||
data[i * 4 + j] = data[i];
|
||||
|
||||
sha256_init(midstate);
|
||||
sha256_transform(midstate, pdata, 0);
|
||||
memcpy(prehash, midstate, 32);
|
||||
sha256d_prehash(prehash, pdata + 16);
|
||||
for (i = 7; i >= 0; i--) {
|
||||
for (j = 0; j < 4; j++) {
|
||||
midstate[i * 4 + j] = midstate[i];
|
||||
prehash[i * 4 + j] = prehash[i];
|
||||
}
|
||||
}
|
||||
|
||||
do {
|
||||
for (i = 0; i < 4; i++)
|
||||
data[4 * 3 + i] = ++n;
|
||||
|
||||
sha256d_ms_4way(hash, data, midstate, prehash);
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
if (swab32(hash[4 * 7 + i]) <= Htarg) {
|
||||
pdata[19] = data[4 * 3 + i];
|
||||
sha256d_80_swap(hash, pdata);
|
||||
if (fulltest(hash, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* HAVE_SHA256_4WAY */
|
||||
|
||||
#ifdef HAVE_SHA256_8WAY
|
||||
|
||||
void sha256d_ms_8way(uint32_t *hash, uint32_t *data,
|
||||
const uint32_t *midstate, const uint32_t *prehash);
|
||||
|
||||
static inline int scanhash_sha256d_8way(int thr_id, uint32_t *pdata,
|
||||
const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done)
|
||||
{
|
||||
uint32_t data[8 * 64] __attribute__((aligned(128)));
|
||||
uint32_t hash[8 * 8] __attribute__((aligned(32)));
|
||||
uint32_t midstate[8 * 8] __attribute__((aligned(32)));
|
||||
uint32_t prehash[8 * 8] __attribute__((aligned(32)));
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
int i, j;
|
||||
|
||||
memcpy(data, pdata + 16, 64);
|
||||
sha256d_preextend(data);
|
||||
for (i = 31; i >= 0; i--)
|
||||
for (j = 0; j < 8; j++)
|
||||
data[i * 8 + j] = data[i];
|
||||
|
||||
sha256_init(midstate);
|
||||
sha256_transform(midstate, pdata, 0);
|
||||
memcpy(prehash, midstate, 32);
|
||||
sha256d_prehash(prehash, pdata + 16);
|
||||
for (i = 7; i >= 0; i--) {
|
||||
for (j = 0; j < 8; j++) {
|
||||
midstate[i * 8 + j] = midstate[i];
|
||||
prehash[i * 8 + j] = prehash[i];
|
||||
}
|
||||
}
|
||||
|
||||
do {
|
||||
for (i = 0; i < 8; i++)
|
||||
data[8 * 3 + i] = ++n;
|
||||
|
||||
sha256d_ms_8way(hash, data, midstate, prehash);
|
||||
|
||||
for (i = 0; i < 8; i++) {
|
||||
if (swab32(hash[8 * 7 + i]) <= Htarg) {
|
||||
pdata[19] = data[8 * 3 + i];
|
||||
sha256d_80_swap(hash, pdata);
|
||||
if (fulltest(hash, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* HAVE_SHA256_8WAY */
|
||||
|
||||
int scanhash_sha256d(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
|
||||
uint32_t max_nonce, unsigned long *hashes_done)
|
||||
{
|
||||
uint32_t data[64] __attribute__((aligned(128)));
|
||||
uint32_t hash[8] __attribute__((aligned(32)));
|
||||
uint32_t midstate[8] __attribute__((aligned(32)));
|
||||
uint32_t prehash[8] __attribute__((aligned(32)));
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
|
||||
#ifdef HAVE_SHA256_8WAY
|
||||
if (sha256_use_8way())
|
||||
return scanhash_sha256d_8way(thr_id, pdata, ptarget,
|
||||
max_nonce, hashes_done);
|
||||
#endif
|
||||
#ifdef HAVE_SHA256_4WAY
|
||||
if (sha256_use_4way())
|
||||
return scanhash_sha256d_4way(thr_id, pdata, ptarget,
|
||||
max_nonce, hashes_done);
|
||||
#endif
|
||||
|
||||
memcpy(data, pdata + 16, 64);
|
||||
sha256d_preextend(data);
|
||||
|
||||
sha256_init(midstate);
|
||||
sha256_transform(midstate, pdata, 0);
|
||||
memcpy(prehash, midstate, 32);
|
||||
sha256d_prehash(prehash, pdata + 16);
|
||||
|
||||
do {
|
||||
data[3] = ++n;
|
||||
sha256d_ms(hash, data, midstate, prehash);
|
||||
if (swab32(hash[7]) <= Htarg) {
|
||||
pdata[19] = data[3];
|
||||
sha256d_80_swap(hash, pdata);
|
||||
if (fulltest(hash, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
27
algo/zr5.c
27
algo/zr5.c
@@ -32,12 +32,10 @@
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "algo/groestl/sph_groestl.h"
|
||||
#include "algo/keccak/sph_keccak.h"
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
#include "algo/groestl/sse2/grso.h"
|
||||
#include "algo/groestl/sse2/grso-macro.c"
|
||||
#else
|
||||
#ifndef NO_AES_NI
|
||||
#include "algo/groestl/aes_ni/hash-groestl.h"
|
||||
#include "algo/echo/aes_ni/hash_api.h"
|
||||
#endif
|
||||
@@ -61,17 +59,21 @@
|
||||
#define POK_DATA_MASK 0xFFFF0000
|
||||
|
||||
typedef struct {
|
||||
#ifndef NO_AES_NI
|
||||
hashState_groestl groestl;
|
||||
#ifdef NO_AES_NI
|
||||
sph_groestl512_context groestl;
|
||||
#else
|
||||
hashState_groestl groestl;
|
||||
#endif
|
||||
sph_keccak512_context keccak;
|
||||
sph_keccak512_context keccak;
|
||||
} zr5_ctx_holder;
|
||||
|
||||
zr5_ctx_holder zr5_ctx;
|
||||
|
||||
void init_zr5_ctx()
|
||||
{
|
||||
#ifndef NO_AES_NI
|
||||
#ifdef NO_AES_NI
|
||||
sph_groestl512_init( &zr5_ctx.groestl );
|
||||
#else
|
||||
init_groestl( &zr5_ctx.groestl );
|
||||
#endif
|
||||
sph_keccak512_init(&zr5_ctx.keccak);
|
||||
@@ -88,10 +90,6 @@ DATA_ALIGN16(sph_u64 hashctB);
|
||||
|
||||
//memset(hash, 0, 128);
|
||||
|
||||
#ifdef NO_AES_NI
|
||||
grsoState sts_grs;
|
||||
#endif
|
||||
|
||||
static const int arrOrder[][4] =
|
||||
{
|
||||
{ 0, 1, 2, 3 }, { 0, 1, 3, 2 }, { 0, 2, 1, 3 }, { 0, 2, 3, 1 },
|
||||
@@ -123,9 +121,8 @@ static const int arrOrder[][4] =
|
||||
break;
|
||||
case 1:
|
||||
#ifdef NO_AES_NI
|
||||
{GRS_I;
|
||||
GRS_U;
|
||||
GRS_C; }
|
||||
sph_groestl512 (&ctx.groestl, hash, 64);
|
||||
sph_groestl512_close(&ctx.groestl, hash);
|
||||
#else
|
||||
update_groestl( &ctx.groestl, (char*)hash,512);
|
||||
final_groestl( &ctx.groestl, (char*)hash);
|
||||
|
||||
Reference in New Issue
Block a user