v3.4.8 release

This commit is contained in:
Jay D Dee
2016-10-17 14:16:28 -04:00
parent 4ecf9555da
commit 6f28a67f9c
22 changed files with 1117 additions and 656 deletions

View File

@@ -44,171 +44,344 @@
*
* @return 0 if the key is generated correctly; -1 if there is an error (usually due to lack of memory for allocation)
*/
int LYRA2(void *K, int64_t kLen, const void *pwd, int32_t pwdlen, const void *salt, int32_t saltlen, int64_t timeCost, const int16_t nRows, const int16_t nCols)
// Lyra2RE & Lyra2REv2, nRows must be a power of 2
int LYRA2( void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen,
const void *salt, uint64_t saltlen, uint64_t timeCost,
const uint64_t nRows, const uint64_t nCols )
{
//============================= Basic variables ============================//
int64_t row = 2; //index of row to be processed
int64_t prev = 1; //index of prev (last row ever computed/modified)
int64_t rowa = 0; //index of row* (a previous row, deterministically picked during Setup and randomly picked while Wandering)
int64_t tau; //Time Loop iterator
int64_t step = 1; //Visitation step (used during Setup and Wandering phases)
int64_t window = 2; //Visitation window (used to define which rows can be revisited during Setup)
int64_t gap = 1; //Modifier to the step, assuming the values 1 or -1
int64_t i; //auxiliary iteration counter
int64_t v64; // 64bit var for memcpy
//==========================================================================/
//====================== Basic variables ============================//
uint64_t _ALIGN(256) state[16];
int64_t row = 2; //index of row to be processed
int64_t prev = 1; //index of prev (last row ever computed/modified)
int64_t rowa = 0; //index of row* (a previous row, deterministically picked during Setup and randomly picked while Wandering)
int64_t tau; //Time Loop iterator
int64_t step = 1; //Visitation step (used during Setup and Wandering phases)
int64_t window = 2; //Visitation window (used to define which rows can be revisited during Setup)
int64_t gap = 1; //Modifier to the step, assuming the values 1 or -1
int64_t i; //auxiliary iteration counter
int64_t v64; // 64bit var for memcpy
//====================================================================/
//========== Initializing the Memory Matrix and pointers to it =============//
//Tries to allocate enough space for the whole memory matrix
//=== Initializing the Memory Matrix and pointers to it =============//
//Tries to allocate enough space for the whole memory matrix
const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * nCols;
const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
// for Lyra2REv2, nCols = 4, v1 was using 8
const int64_t BLOCK_LEN = (nCols == 4) ? BLOCK_LEN_BLAKE2_SAFE_INT64 : BLOCK_LEN_BLAKE2_SAFE_BYTES;
const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * nCols;
const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
// for Lyra2REv2, nCols = 4, v1 was using 8
const int64_t BLOCK_LEN = (nCols == 4) ? BLOCK_LEN_BLAKE2_SAFE_INT64
: BLOCK_LEN_BLAKE2_SAFE_BYTES;
i = (int64_t)ROW_LEN_BYTES * nRows;
uint64_t *wholeMatrix = malloc(i);
if (wholeMatrix == NULL) {
return -1;
}
memset(wholeMatrix, 0, i);
i = (int64_t)ROW_LEN_BYTES * nRows;
uint64_t *wholeMatrix = malloc(i);
if (wholeMatrix == NULL)
return -1;
//Allocates pointers to each row of the matrix
uint64_t **memMatrix = malloc(sizeof(uint64_t*) * nRows);
if (memMatrix == NULL) {
return -1;
}
//Places the pointers in the correct positions
uint64_t *ptrWord = wholeMatrix;
for (i = 0; i < nRows; i++) {
memMatrix[i] = ptrWord;
ptrWord += ROW_LEN_INT64;
}
//==========================================================================/
memset(wholeMatrix, 0, i);
//============= Getting the password + salt + basil padded with 10*1 ===============//
//OBS.:The memory matrix will temporarily hold the password: not for saving memory,
//but this ensures that the password copied locally will be overwritten as soon as possible
//Allocates pointers to each row of the matrix
uint64_t **memMatrix = malloc(sizeof(uint64_t*) * nRows);
if (memMatrix == NULL)
return -1;
//First, we clean enough blocks for the password, salt, basil and padding
int64_t nBlocksInput = ((saltlen + pwdlen + 6 * sizeof(uint64_t)) / BLOCK_LEN_BLAKE2_SAFE_BYTES) + 1;
//Places the pointers in the correct positions
uint64_t *ptrWord = wholeMatrix;
for (i = 0; i < nRows; i++)
{
memMatrix[i] = ptrWord;
ptrWord += ROW_LEN_INT64;
}
byte *ptrByte = (byte*) wholeMatrix;
//=== Getting the password + salt + basil padded with 10*1 ==========//
//OBS.:The memory matrix will temporarily hold the password: not for saving memory,
//but this ensures that the password copied locally will be overwritten as soon as possible
//Prepends the password
memcpy(ptrByte, pwd, pwdlen);
ptrByte += pwdlen;
//First, we clean enough blocks for the password, salt, basil and padding
int64_t nBlocksInput = ( ( saltlen + pwdlen + 6 * sizeof(uint64_t) )
/ BLOCK_LEN_BLAKE2_SAFE_BYTES ) + 1;
//Concatenates the salt
memcpy(ptrByte, salt, saltlen);
ptrByte += saltlen;
byte *ptrByte = (byte*) wholeMatrix;
memset(ptrByte, 0, nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES - (saltlen + pwdlen));
//Prepends the password
memcpy(ptrByte, pwd, pwdlen);
ptrByte += pwdlen;
//Concatenates the basil: every integer passed as parameter, in the order they are provided by the interface
memcpy(ptrByte, &kLen, sizeof(int64_t));
ptrByte += sizeof(uint64_t);
v64 = pwdlen;
memcpy(ptrByte, &v64, sizeof(int64_t));
ptrByte += sizeof(uint64_t);
v64 = saltlen;
memcpy(ptrByte, &v64, sizeof(int64_t));
ptrByte += sizeof(uint64_t);
v64 = timeCost;
memcpy(ptrByte, &v64, sizeof(int64_t));
ptrByte += sizeof(uint64_t);
v64 = nRows;
memcpy(ptrByte, &v64, sizeof(int64_t));
ptrByte += sizeof(uint64_t);
v64 = nCols;
memcpy(ptrByte, &v64, sizeof(int64_t));
ptrByte += sizeof(uint64_t);
//Concatenates the salt
memcpy(ptrByte, salt, saltlen);
ptrByte += saltlen;
//Now comes the padding
*ptrByte = 0x80; //first byte of padding: right after the password
ptrByte = (byte*) wholeMatrix; //resets the pointer to the start of the memory matrix
ptrByte += nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES - 1; //sets the pointer to the correct position: end of incomplete block
*ptrByte ^= 0x01; //last byte of padding: at the end of the last incomplete block
//==========================================================================/
memset( ptrByte, 0, nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES
- (saltlen + pwdlen) );
//======================= Initializing the Sponge State ====================//
//Sponge state: 16 uint64_t, BLOCK_LEN_INT64 words of them for the bitrate (b) and the remainder for the capacity (c)
uint64_t _ALIGN(256) state[16];
initState(state);
//==========================================================================/
//Concatenates the basil: every integer passed as parameter, in the order they are provided by the interface
memcpy(ptrByte, &kLen, sizeof(int64_t));
ptrByte += sizeof(uint64_t);
v64 = pwdlen;
memcpy(ptrByte, &v64, sizeof(int64_t));
ptrByte += sizeof(uint64_t);
v64 = saltlen;
memcpy(ptrByte, &v64, sizeof(int64_t));
ptrByte += sizeof(uint64_t);
v64 = timeCost;
memcpy(ptrByte, &v64, sizeof(int64_t));
ptrByte += sizeof(uint64_t);
v64 = nRows;
memcpy(ptrByte, &v64, sizeof(int64_t));
ptrByte += sizeof(uint64_t);
v64 = nCols;
memcpy(ptrByte, &v64, sizeof(int64_t));
ptrByte += sizeof(uint64_t);
//================================ Setup Phase =============================//
//Absorbing salt, password and basil: this is the only place in which the block length is hard-coded to 512 bits
ptrWord = wholeMatrix;
for (i = 0; i < nBlocksInput; i++) {
absorbBlockBlake2Safe(state, ptrWord); //absorbs each block of pad(pwd || salt || basil)
ptrWord += BLOCK_LEN; //goes to next block of pad(pwd || salt || basil)
}
//Now comes the padding
*ptrByte = 0x80; //first byte of padding: right after the password
ptrByte = (byte*) wholeMatrix; //resets the pointer to the start of the memory matrix
ptrByte += nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES - 1; //sets the pointer to the correct position: end of incomplete block
*ptrByte ^= 0x01; //last byte of padding: at the end of the last incomplete block
//Initializes M[0] and M[1]
reducedSqueezeRow0(state, memMatrix[0], nCols); //The locally copied password is most likely overwritten here
//================= Initializing the Sponge State ====================//
//Sponge state: 16 uint64_t, BLOCK_LEN_INT64 words of them for the bitrate (b) and the remainder for the capacity (c)
initState(state);
reducedDuplexRow1(state, memMatrix[0], memMatrix[1], nCols);
//========================= Setup Phase =============================//
//Absorbing salt, password and basil: this is the only place in which the block length is hard-coded to 512 bits
ptrWord = wholeMatrix;
for (i = 0; i < nBlocksInput; i++)
{
absorbBlockBlake2Safe(state, ptrWord); //absorbs each block of pad(pwd || salt || basil)
ptrWord += BLOCK_LEN; //goes to next block of pad(pwd || salt || basil)
}
do {
//M[row] = rand; //M[row*] = M[row*] XOR rotW(rand)
//Initializes M[0] and M[1]
reducedSqueezeRow0(state, memMatrix[0], nCols); //The locally copied password is most likely overwritten here
reducedDuplexRowSetup(state, memMatrix[prev], memMatrix[rowa], memMatrix[row], nCols);
reducedDuplexRow1(state, memMatrix[0], memMatrix[1], nCols);
//updates the value of row* (deterministically picked during Setup))
rowa = (rowa + step) & (window - 1);
//update prev: it now points to the last row ever computed
prev = row;
//updates row: goes to the next row to be computed
row++;
do
{
//M[row] = rand; //M[row*] = M[row*] XOR rotW(rand)
//Checks if all rows in the window where visited.
if (rowa == 0) {
step = window + gap; //changes the step: approximately doubles its value
window *= 2; //doubles the size of the re-visitation window
gap = -gap; //inverts the modifier to the step
}
reducedDuplexRowSetup(state, memMatrix[prev], memMatrix[rowa], memMatrix[row], nCols);
} while (row < nRows);
//==========================================================================/
//updates the value of row* (deterministically picked during Setup))
rowa = (rowa + step) & (window - 1);
//update prev: it now points to the last row ever computed
prev = row;
//updates row: goes to the next row to be computed
row++;
//============================ Wandering Phase =============================//
row = 0; //Resets the visitation to the first row of the memory matrix
for (tau = 1; tau <= timeCost; tau++) {
//Step is approximately half the number of all rows of the memory matrix for an odd tau; otherwise, it is -1
step = (tau % 2 == 0) ? -1 : nRows / 2 - 1;
do {
//Selects a pseudorandom index row*
//------------------------------------------------------------------------------------------
rowa = state[0] & (unsigned int)(nRows-1); //(USE THIS IF nRows IS A POWER OF 2)
//rowa = state[0] % nRows; //(USE THIS FOR THE "GENERIC" CASE)
//------------------------------------------------------------------------------------------
//Checks if all rows in the window where visited.
if (rowa == 0)
{
step = window + gap; //changes the step: approximately doubles its value
window *= 2; //doubles the size of the re-visitation window
gap = -gap; //inverts the modifier to the step
}
//Performs a reduced-round duplexing operation over M[row*] XOR M[prev], updating both M[row*] and M[row]
reducedDuplexRow(state, memMatrix[prev], memMatrix[rowa], memMatrix[row], nCols);
} while (row < nRows);
//update prev: it now points to the last row ever computed
prev = row;
//===================== Wandering Phase =============================//
row = 0; //Resets the visitation to the first row of the memory matrix
for (tau = 1; tau <= timeCost; tau++)
{
//Step is approximately half the number of all rows of the memory matrix for an odd tau; otherwise, it is -1
step = (tau % 2 == 0) ? -1 : nRows / 2 - 1;
do
{
//Selects a pseudorandom index row*
//-----------------------------------------------
rowa = state[0] & (unsigned int)(nRows-1); //(USE THIS IF nRows IS A POWER OF 2)
//rowa = state[0] % nRows; //(USE THIS FOR THE "GENERIC" CASE)
//-------------------------------------------
//updates row: goes to the next row to be computed
//------------------------------------------------------------------------------------------
row = (row + step) & (unsigned int)(nRows-1); //(USE THIS IF nRows IS A POWER OF 2)
//row = (row + step) % nRows; //(USE THIS FOR THE "GENERIC" CASE)
//------------------------------------------------------------------------------------------
//Performs a reduced-round duplexing operation over M[row*] XOR M[prev], updating both M[row*] and M[row]
reducedDuplexRow(state, memMatrix[prev], memMatrix[rowa], memMatrix[row], nCols);
} while (row != 0);
}
//update prev: it now points to the last row ever computed
prev = row;
//============================ Wrap-up Phase ===============================//
//Absorbs the last block of the memory matrix
absorbBlock(state, memMatrix[rowa]);
//updates row: goes to the next row to be computed
//----------------------------------------------------
row = (row + step) & (unsigned int)(nRows-1); //(USE THIS IF nRows IS A POWER OF 2)
//row = (row + step) % nRows; //(USE THIS FOR THE "GENERIC" CASE)
//----------------------------------------------------
//Squeezes the key
squeeze(state, K, (unsigned int) kLen);
} while (row != 0);
}
//========================= Freeing the memory =============================//
free(memMatrix);
free(wholeMatrix);
//===================== Wrap-up Phase ===============================//
//Absorbs the last block of the memory matrix
absorbBlock(state, memMatrix[rowa]);
return 0;
//Squeezes the key
squeeze(state, K, (unsigned int) kLen);
//================== Freeing the memory =============================//
free(memMatrix);
free(wholeMatrix);
return 0;
}
// Zcoin, nRows may be any value
int LYRA2Z( void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen,
const void *salt, uint64_t saltlen, uint64_t timeCost,
uint64_t nRows, uint64_t nCols )
{
//========================== Basic variables ============================//
uint64_t _ALIGN(256) state[16];
int64_t row = 2; //index of row to be processed
int64_t prev = 1; //index of prev (last row ever computed/modified)
int64_t rowa = 0; //index of row* (a previous row, deterministically picked during Setup and randomly picked while Wandering)
int64_t tau; //Time Loop iterator
int64_t step = 1; //Visitation step (used during Setup and Wandering phases)
int64_t window = 2; //Visitation window (used to define which rows can be revisited during Setup)
int64_t gap = 1; //Modifier to the step, assuming the values 1 or -1
int64_t i; //auxiliary iteration counter
//=======================================================================/
//======= Initializing the Memory Matrix and pointers to it =============//
//Tries to allocate enough space for the whole memory matrix
const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * nCols;
const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
i = (int64_t) ((int64_t) nRows * (int64_t) ROW_LEN_BYTES);
uint64_t *wholeMatrix = malloc(i);
if (wholeMatrix == NULL)
return -1;
memset(wholeMatrix, 0, i);
//Allocates pointers to each row of the matrix
uint64_t **memMatrix = malloc(nRows * sizeof (uint64_t*));
if (memMatrix == NULL)
return -1;
//Places the pointers in the correct positions
uint64_t *ptrWord = wholeMatrix;
for (i = 0; i < nRows; i++)
{
memMatrix[i] = ptrWord;
ptrWord += ROW_LEN_INT64;
}
//==== Getting the password + salt + basil padded with 10*1 ============//
//OBS.:The memory matrix will temporarily hold the password: not for saving memory,
//but this ensures that the password copied locally will be overwritten as soon as possible
//First, we clean enough blocks for the password, salt, basil and padding
uint64_t nBlocksInput = ( ( saltlen + pwdlen + 6 * sizeof (uint64_t) )
/ BLOCK_LEN_BLAKE2_SAFE_BYTES) + 1;
byte *ptrByte = (byte*) wholeMatrix;
memset( ptrByte, 0, nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES );
//Prepends the password
memcpy(ptrByte, pwd, pwdlen);
ptrByte += pwdlen;
//Concatenates the salt
memcpy(ptrByte, salt, saltlen);
ptrByte += saltlen;
//Concatenates the basil: every integer passed as parameter, in the order they are provided by the interface
memcpy(ptrByte, &kLen, sizeof (uint64_t));
ptrByte += sizeof (uint64_t);
memcpy(ptrByte, &pwdlen, sizeof (uint64_t));
ptrByte += sizeof (uint64_t);
memcpy(ptrByte, &saltlen, sizeof (uint64_t));
ptrByte += sizeof (uint64_t);
memcpy(ptrByte, &timeCost, sizeof (uint64_t));
ptrByte += sizeof (uint64_t);
memcpy(ptrByte, &nRows, sizeof (uint64_t));
ptrByte += sizeof (uint64_t);
memcpy(ptrByte, &nCols, sizeof (uint64_t));
ptrByte += sizeof (uint64_t);
//Now comes the padding
*ptrByte = 0x80; //first byte of padding: right after the password
ptrByte = (byte*) wholeMatrix; //resets the pointer to the start of the memory matrix
ptrByte += nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES - 1; //sets the pointer to the correct position: end of incomplete block
*ptrByte ^= 0x01; //last byte of padding: at the end of the last incomplete block
//=================== Initializing the Sponge State ====================//
//Sponge state: 16 uint64_t, BLOCK_LEN_INT64 words of them for the bitrate (b) and the remainder for the capacity (c)
initState( state );
//============================== Setup Phase =============================//
//Absorbing salt, password and basil: this is the only place in which the block length is hard-coded to 512 bits
ptrWord = wholeMatrix;
for ( i = 0; i < nBlocksInput; i++ )
{
absorbBlockBlake2Safe( state, ptrWord ); //absorbs each block of pad(pwd || salt || basil)
ptrWord += BLOCK_LEN_BLAKE2_SAFE_INT64; //goes to next block of pad(pwd || salt || basil)
}
//Initializes M[0] and M[1]
reducedSqueezeRow0( state, memMatrix[0], nCols ); //The locally copied password is most likely overwritten here
reducedDuplexRow1( state, memMatrix[0], memMatrix[1], nCols );
do
{
//M[row] = rand; //M[row*] = M[row*] XOR rotW(rand)
reducedDuplexRowSetup( state, memMatrix[prev], memMatrix[rowa],
memMatrix[row], nCols );
//updates the value of row* (deterministically picked during Setup))
rowa = (rowa + step) & (window - 1);
//update prev: it now points to the last row ever computed
prev = row;
//updates row: goes to the next row to be computed
row++;
//Checks if all rows in the window where visited.
if (rowa == 0)
{
step = window + gap; //changes the step: approximately doubles its value
window *= 2; //doubles the size of the re-visitation window
gap = -gap; //inverts the modifier to the step
}
} while (row < nRows);
//======================== Wandering Phase =============================//
row = 0; //Resets the visitation to the first row of the memory matrix
for ( tau = 1; tau <= timeCost; tau++ )
{
//Step is approximately half the number of all rows of the memory matrix for an odd tau; otherwise, it is -1
step = (tau % 2 == 0) ? -1 : nRows / 2 - 1;
do {
//Selects a pseudorandom index row*
//----------------------------------------------------------------------
//rowa = ((unsigned int)state[0]) & (nRows-1); //(USE THIS IF nRows IS A POWER OF 2)
rowa = ((uint64_t) (state[0])) % nRows; //(USE THIS FOR THE "GENERIC" CASE)
//-----------------------------------------------------------------
//Performs a reduced-round duplexing operation over M[row*] XOR M[prev], updating both M[row*] and M[row]
reducedDuplexRow( state, memMatrix[prev], memMatrix[rowa],
memMatrix[row], nCols );
//update prev: it now points to the last row ever computed
prev = row;
//updates row: goes to the next row to be computed
//---------------------------------------------------------------
//row = (row + step) & (nRows-1); //(USE THIS IF nRows IS A POWER OF 2)
row = (row + step) % nRows; //(USE THIS FOR THE "GENERIC" CASE)
//--------------------------------------------------------------------
} while (row != 0);
}
//========================= Wrap-up Phase ===============================//
//Absorbs the last block of the memory matrix
absorbBlock( state, memMatrix[rowa] );
//Squeezes the key
squeeze( state, K, kLen );
//====================== Freeing the memory =============================//
free( memMatrix );
free( wholeMatrix );
return 0;
}

View File

@@ -37,6 +37,10 @@ typedef unsigned char byte;
#define BLOCK_LEN_BYTES (BLOCK_LEN_INT64 * 8) //Block length, in bytes
#endif
int LYRA2(void *K, int64_t kLen, const void *pwd, int32_t pwdlen, const void *salt, int32_t saltlen, int64_t timeCost, const int16_t nRows, const int16_t nCols);
int LYRA2( void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen,
const void *salt, uint64_t saltlen, uint64_t timeCost,
uint64_t nRows, uint64_t nCols );
int LYRA2Z( void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen,
const void *salt, uint64_t saltlen, uint64_t timeCost,
uint64_t nRows, uint64_t nCols );
#endif /* LYRA2_H_ */

File diff suppressed because it is too large Load Diff

View File

@@ -23,42 +23,34 @@
#define SPONGE_H_
#include <stdint.h>
#include "avxdefs.h"
/* Blake2b IV Array */
#if defined(__GNUC__)
#define ALIGN __attribute__ ((aligned(32)))
#elif defined(_MSC_VER)
#define ALIGN __declspec(align(32))
#else
#define ALIGN
#endif
/*Blake2b IV Array*/
static const uint64_t blake2b_IV[8] =
{
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
};
/* Blake2b's rotation */
static __inline uint64_t rotr64(const uint64_t w, const unsigned c) {
#ifdef _MSC_VER
return _rotr64(w, c);
#else
return ( w >> c ) | ( w << ( 64 - c ) );
#endif
/*Blake2b's rotation*/
static inline uint64_t rotr64( const uint64_t w, const unsigned c ){
return ( w >> c ) | ( w << ( 64 - c ) );
}
#if defined __AVX2__
// only available with avx2
// rotate each uint64 c bits
// returns _m256i
#define mm256_rotr_64(w,c) _mm256_or_si256(_mm256_srli_epi64(w, c), \
_mm256_slli_epi64(w, 64 - c))
// Rotate 4 uint64 (256 bits) by one uint64 (64 bits)
// returns __m256i
#define mm256_rotl256_1x64(s) _mm256_permute4x64_epi64( s, 0x39 )
#define mm256_rotr256_1x64(s) _mm256_permute4x64_epi64( s, 0x93 )
// swap hi and lo 128 bits in 256 bit vector
// returns _m256i
#define mm256_swap128(s) _mm256_permute2f128_si256( s, s, 1 )
// init vectors from memory
// returns void, updates defines and inits implicit args a, b, c, d
#define LYRA_INIT_AVX2 \
@@ -88,15 +80,29 @@ static __inline uint64_t rotr64(const uint64_t w, const unsigned c) {
c = _mm256_add_epi64( c, d ); \
b = mm256_rotr_64( _mm256_xor_si256( b, c ), 63 );
#define LYRA_ROUND_AVX2 \
G_4X64( a[0], a[1], a[2], a[3] ); \
a[1] = mm256_rotl256_1x64( a[1]); \
a[2] = mm256_swap128( a[2] ); \
a[3] = mm256_rotr256_1x64( a[3] ); \
G_4X64( a[0], a[1], a[2], a[3] ); \
a[1] = mm256_rotr256_1x64( a[1] ); \
a[2] = mm256_swap128( a[2] ); \
a[3] = mm256_rotl256_1x64( a[3] );
#define LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
G_4X64( s0, s1, s2, s3 ); \
s1 = mm256_rotl256_1x64( s1); \
s2 = mm256_swap128( s2 ); \
s3 = mm256_rotr256_1x64( s3 ); \
G_4X64( s0, s1, s2, s3 ); \
s1 = mm256_rotr256_1x64( s1 ); \
s2 = mm256_swap128( s2 ); \
s3 = mm256_rotl256_1x64( s3 );
#define LYRA_12_ROUNDS_AVX2( s0, s1, s2, s3 ) \
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
LYRA_ROUND_AVX2( s0, s1, s2, s3 ) \
#else
// only available with avx
@@ -148,6 +154,7 @@ static __inline uint64_t rotr64(const uint64_t w, const unsigned c) {
#endif // AVX2
/*
#if defined __AVX__
// can coexist with AVX2
@@ -161,7 +168,7 @@ static __inline uint64_t rotr64(const uint64_t w, const unsigned c) {
#define mm128_swap128(s0, s1) s0 = _mm_xor_si128(s0, s1); \
s1 = _mm_xor_si128(s0, s1); \
s0 = _mm_xor_si128(s0, s1);
// swap uint64 in 128 bit source vector, equivalent of rotating 128 bits by
// 64 bits (8 bytes)
// __m128i
@@ -193,30 +200,33 @@ static __inline uint64_t rotr64(const uint64_t w, const unsigned c) {
} while(0)
#endif // AVX
*/
/* Blake2b's G function */
#define G(r,i,a,b,c,d) do { \
a = a + b; \
d = rotr64(d ^ a, 32); \
c = c + d; \
b = rotr64(b ^ c, 24); \
a = a + b; \
d = rotr64(d ^ a, 16); \
c = c + d; \
b = rotr64(b ^ c, 63); \
// Scalar
//Blake2b's G function
#define G(r,i,a,b,c,d) \
do { \
a = a + b; \
d = rotr64(d ^ a, 32); \
c = c + d; \
b = rotr64(b ^ c, 24); \
a = a + b; \
d = rotr64(d ^ a, 16); \
c = c + d; \
b = rotr64(b ^ c, 63); \
} while(0)
/*One Round of the Blake2b's compression function*/
#define ROUND_LYRA(r) \
G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
G(r,2,v[ 2],v[ 6],v[10],v[14]); \
G(r,3,v[ 3],v[ 7],v[11],v[15]); \
G(r,4,v[ 0],v[ 5],v[10],v[15]); \
G(r,5,v[ 1],v[ 6],v[11],v[12]); \
G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
G(r,7,v[ 3],v[ 4],v[ 9],v[14]);
#define ROUND_LYRA(r) \
G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
G(r,2,v[ 2],v[ 6],v[10],v[14]); \
G(r,3,v[ 3],v[ 7],v[11],v[15]); \
G(r,4,v[ 0],v[ 5],v[10],v[15]); \
G(r,5,v[ 1],v[ 6],v[11],v[12]); \
G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
G(r,7,v[ 3],v[ 4],v[ 9],v[14]);
//---- Housekeeping
@@ -224,18 +234,31 @@ void initState(uint64_t state[/*16*/]);
//---- Squeezes
void squeeze(uint64_t *state, unsigned char *out, unsigned int len);
void reducedSqueezeRow0(uint64_t* state, uint64_t* row, const uint32_t nCols);
void reducedSqueezeRow0(uint64_t* state, uint64_t* row, uint64_t nCols);
//---- Absorbs
void absorbBlock(uint64_t *state, const uint64_t *in);
void absorbBlockBlake2Safe(uint64_t *state, const uint64_t *in);
//---- Duplexes
void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut, const uint32_t nCols);
void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, const uint32_t nCols);
void reducedDuplexRow(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, const uint32_t nCols);
void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut, uint64_t nCols);
void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, uint64_t nCols);
void reducedDuplexRow(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, uint64_t nCols);
//---- Misc
void printArray(unsigned char *array, unsigned int size, char *name);
////////////////////////////////////////////////////////////////////////////////////////////////
////TESTS////
//void reducedDuplexRowc(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
//void reducedDuplexRowd(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
//void reducedDuplexRowSetupv4(uint64_t *state, uint64_t *rowIn1, uint64_t *rowIn2, uint64_t *rowOut1, uint64_t *rowOut2);
//void reducedDuplexRowSetupv5(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
//void reducedDuplexRowSetupv5c(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
//void reducedDuplexRowSetupv5d(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut);
/////////////
#endif /* SPONGE_H_ */

77
algo/lyra2/zcoin.c Normal file
View File

@@ -0,0 +1,77 @@
#include <memory.h>
#include "miner.h"
#include "algo-gate-api.h"
#include "lyra2.h"
void zcoin_hash(void *state, const void *input, uint32_t height)
{
uint32_t _ALIGN(256) hash[16];
LYRA2Z(hash, 32, input, 80, input, 80, 2, height, 256);
memcpy(state, hash, 32);
}
//int scanhash_zcoin(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done, uint32_t height)
int scanhash_zcoin( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done )
{
uint32_t _ALIGN(128) hash[8];
uint32_t _ALIGN(128) endiandata[20];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t Htarg = ptarget[7];
const uint32_t first_nonce = pdata[19];
uint32_t nonce = first_nonce;
if (opt_benchmark)
ptarget[7] = 0x0000ff;
for (int i=0; i < 19; i++) {
be32enc(&endiandata[i], pdata[i]);
}
do {
be32enc(&endiandata[19], nonce);
zcoin_hash( hash, endiandata, work->height );
if (hash[7] <= Htarg && fulltest(hash, ptarget)) {
work_set_target_ratio(work, hash);
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce;
return 1;
}
nonce++;
} while (nonce < max_nonce && !work_restart[thr_id].restart);
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce + 1;
return 0;
}
int64_t get_max64_0xffffLL() { return 0xffffLL; };
void zcoin_set_target( struct work* work, double job_diff )
{
work_set_target( work, job_diff / (256.0 * opt_diff_factor) );
}
bool zcoin_get_work_height( struct work* work, struct stratum_ctx* sctx )
{
work->height = sctx->bloc_height;
return false;
}
bool register_zcoin_algo( algo_gate_t* gate )
{
gate->optimizations = SSE2_OPT | AES_OPT | AVX_OPT | AVX2_OPT;
gate->scanhash = (void*)&scanhash_zcoin;
gate->hash = (void*)&zcoin_hash;
gate->hash_alt = (void*)&zcoin_hash;
gate->get_max64 = (void*)&get_max64_0xffffLL;
gate->set_target = (void*)&zcoin_set_target;
gate->prevent_dupes = (void*)&zcoin_get_work_height;
return true;
};