This commit is contained in:
Jay D Dee
2017-01-28 17:49:14 -05:00
parent 7af5b7cf80
commit c1d6c3a57f
22 changed files with 210 additions and 379 deletions

3
.gitignore vendored
View File

@@ -29,7 +29,8 @@ config.sub
mingw32-config.cache
*/.dirstamp
*/*/.dirstamp
*/*/*/.dirstamp
*.iml
*.vcxproj.user

View File

@@ -1,9 +1,18 @@
Compile instruction for Linux and Windows are at the bottom of this file.
Change Log
----------
v3.5.3
More optimizations
Timetravel +16%
Xevan +3%
Qubit +12%
V3.5.2
Timetrave (machinecoin) added and optimized.
Timetravel (machinecoin) added and optimized.
v3.5.1
@@ -125,8 +134,6 @@ It is assumed users know how to install packages on their system and
be able to compile standard source packages. This is basic Linux and
beyond the scope of cpuminer-opt.
Make sure you have the basic development packages installed.
Here is a good start:

View File

@@ -49,9 +49,8 @@
/* digest up to len bytes of input (full blocks only) */
void Transform(hashState_groestl *ctx,
const u8 *in,
unsigned long long len) {
void Transform( hashState_groestl *ctx, const u8 *in, unsigned long long len )
{
/* increment block counter */
ctx->block_counter += len/SIZE;
@@ -67,7 +66,8 @@ void Transform(hashState_groestl *ctx,
}
/* given state h, do h <- P(h)+h */
void OutputTransformation(hashState_groestl *ctx) {
void OutputTransformation( hashState_groestl *ctx )
{
/* determine variant */
#if (LENGTH <= 256)
OF512((u64*)ctx->chaining);
@@ -79,7 +79,8 @@ void OutputTransformation(hashState_groestl *ctx) {
}
/* initialise context */
HashReturn_gr init_groestl(hashState_groestl* ctx) {
HashReturn_gr init_groestl( hashState_groestl* ctx )
{
u8 i = 0;
/* output size (in bits) must be a positive integer less than or
equal to 512, and divisible by 8 */
@@ -114,14 +115,12 @@ HashReturn_gr init_groestl(hashState_groestl* ctx) {
/* set other variables */
ctx->buf_ptr = 0;
ctx->block_counter = 0;
ctx->bits_in_last_byte = 0;
return SUCCESS_GR;
}
HashReturn_gr reinit_groestl(hashState_groestl* ctx)
{
HashReturn_gr reinit_groestl( hashState_groestl* ctx )
{
int i;
for (i=0; i<SIZE/8; i++)
ctx->chaining[i] = 0;
@@ -140,146 +139,63 @@ HashReturn_gr reinit_groestl(hashState_groestl* ctx)
ctx->buf_ptr = 0;
ctx->block_counter = 0;
// not used
ctx->bits_in_last_byte = 0;
return SUCCESS_GR;
}
/* update state with databitlen bits of input */
HashReturn_gr update_groestl(hashState_groestl* ctx,
const BitSequence_gr* input,
DataLength_gr databitlen) {
HashReturn_gr update_groestl( hashState_groestl* ctx,
const BitSequence_gr* input,
DataLength_gr databitlen )
{
int index = 0;
int msglen = (int)(databitlen/8);
int rem = (int)(databitlen%8); // not used
// The only data length used is either 64 bytes (512 bits,
// or 80 bytes (640 bits). The sph version of groestl used a byte
// size for the data length, so odd bits aren't supported there.
// No need to support them here either, change the arg to bytes
// for consistency.
/* non-integral number of message bytes can only be supplied in the
last call to this function */
if (ctx->bits_in_last_byte) return FAIL_GR;
/* if the buffer contains data that has not yet been digested, first
add data to buffer until full */
//// This code can never run, it is indeed dead. buf_ptr is initialized
//// to 0 in init_groestl and hasn't been changed yet
// The following block of code never gets hit when hashing x11 or quark
// leave it here in case it might be needed.
// if (ctx->buf_ptr)
// {
// while (ctx->buf_ptr < ctx->statesize && index < msglen)
// {
// ctx->buffer[(int)ctx->buf_ptr++] = input[index++];
// }
// if (ctx->buf_ptr < ctx->statesize)
// {
// /* buffer still not full, return */
// if (rem)
// {
// ctx->bits_in_last_byte = rem;
// ctx->buffer[(int)ctx->buf_ptr++] = input[index];
// }
// return SUCCESS_GR;
// }
// /* digest buffer */
// ctx->buf_ptr = 0;
// printf("error\n");
// Transform(ctx, ctx->buffer, ctx->statesize);
// end dead code
// }
/* digest bulk of message */
Transform(ctx, input+index, msglen-index);
Transform( ctx, input+index, msglen-index );
// index is always zero here, the following line sets it == msglen
// meaning the next while test will always fail. it's all part of
// supporting odd bits.
// this line makes no sense, index = 0 before and after
// but removing this line breaks the hash.
index += ((msglen-index)/ctx->statesize)*ctx->statesize;
/* store remaining data in buffer */
while (index < msglen)
{
ctx->buffer[(int)ctx->buf_ptr++] = input[index++];
}
// buf_ptr should be msglen now.
//// This code isn't quite dead but but would only run if datalen
/// is not a multiple of 8. As a result bits_in_last_byte is never
//// modified from its initial zero.
// Another block that doesn't get used by x11 or quark
// /* if non-integral number of bytes have been supplied, store
// remaining bits in last byte, together with information about
// number of bits */
// if (rem)
// {
// ctx->bits_in_last_byte = rem;
// ctx->buffer[(int)ctx->buf_ptr++] = input[index];
// }
return SUCCESS_GR;
}
#define BILB ctx->bits_in_last_byte
/* finalise: process remaining data (including padding), perform
output transformation, and write hash result to 'output' */
HashReturn_gr final_groestl(hashState_groestl* ctx,
BitSequence_gr* output) {
HashReturn_gr final_groestl( hashState_groestl* ctx,
BitSequence_gr* output )
{
int i, j = 0, hashbytelen = LENGTH/8;
u8 *s = (BitSequence_gr*)ctx->chaining;
/* pad with '1'-bit and first few '0'-bits */
if (BILB) {
ctx->buffer[(int)ctx->buf_ptr-1] &= ((1<<BILB)-1)<<(8-BILB);
ctx->buffer[(int)ctx->buf_ptr-1] ^= 0x1<<(7-BILB);
BILB = 0;
}
//This sets the first pad byte
else ctx->buffer[(int)ctx->buf_ptr++] = 0x80;
ctx->buffer[(int)ctx->buf_ptr++] = 0x80;
// buf_ptr is left == msglen after update_groestl, 64 (bytes).
// It has now been incrememnted to 65. The test below should fail
// with 64 and 80 and require 1 pad block. Why does 64 bit need a pad block?
// length padding?
/* pad with '0'-bits */
if (ctx->buf_ptr > ctx->statesize-LENGTHFIELDLEN) {
if ( ctx->buf_ptr > ctx->statesize-LENGTHFIELDLEN )
{
/* padding requires two blocks */
while (ctx->buf_ptr < ctx->statesize) {
while ( ctx->buf_ptr < ctx->statesize )
ctx->buffer[(int)ctx->buf_ptr++] = 0;
}
/* digest first padding block */
Transform(ctx, ctx->buffer, ctx->statesize);
Transform( ctx, ctx->buffer, ctx->statesize );
ctx->buf_ptr = 0;
}
// the padding can be vectorized, including the first pad byte above
// 64 bit: buffer[64..79] = {0x80000000,0,0,0}
// buffer[80..95] = {0,0,0,0}
// buffer[96..111] = {0,0,0,0}
// buffer[112..128 = {0,0,length padding}
// 80 bit: buffer[64..79] = unchanged
// buffer[80..95] = {0x800000000,0,0,0}
// buffer[96..111] = {0,0,0,0}
// buffer[112..128 = {0,0,length padding}
// this will pad up to 120 bytes
while (ctx->buf_ptr < ctx->statesize-LENGTHFIELDLEN) {
// this will pad up to 120 bytes
while (ctx->buf_ptr < ctx->statesize-LENGTHFIELDLEN)
ctx->buffer[(int)ctx->buf_ptr++] = 0;
}
/* length padding */
ctx->block_counter++;
ctx->buf_ptr = ctx->statesize;
while (ctx->buf_ptr > ctx->statesize-LENGTHFIELDLEN) {
while (ctx->buf_ptr > ctx->statesize-LENGTHFIELDLEN)
{
ctx->buffer[(int)--ctx->buf_ptr] = (u8)ctx->block_counter;
ctx->block_counter >>= 8;
}
@@ -290,22 +206,8 @@ HashReturn_gr final_groestl(hashState_groestl* ctx,
OutputTransformation(ctx);
/* store hash result in output */
for (i = ctx->statesize-hashbytelen; i < ctx->statesize; i++,j++) {
for (i = ctx->statesize-hashbytelen; i < ctx->statesize; i++,j++)
output[j] = s[i];
}
// the following is redundant as init_groestl will reset to zero.
/* zeroise relevant variables and deallocate memory */
for (i = 0; i < ctx->columns; i++) {
ctx->chaining[i] = 0;
}
for (i = 0; i < ctx->statesize; i++) {
ctx->buffer[i] = 0;
}
// free(ctx->chaining);
// free(ctx->buffer);
return SUCCESS_GR;
}

View File

@@ -95,18 +95,18 @@ typedef struct {
__attribute__ ((aligned (32))) BitSequence_gr buffer[SIZE]; /* data buffer */
u64 block_counter; /* message block counter */
int buf_ptr; /* data buffer pointer */
int bits_in_last_byte; /* no. of message bits in last byte of
data buffer */
int columns; /* no. of columns in state */
int statesize; /* total no. of bytes in state */
Var v; /* LONG or SHORT */
} hashState_groestl;
HashReturn_gr init_groestl(hashState_groestl*);
HashReturn_gr reinit_groestl(hashState_groestl*);
HashReturn_gr update_groestl(hashState_groestl*, const BitSequence_gr*, DataLength_gr);
HashReturn_gr final_groestl(hashState_groestl*, BitSequence_gr*);
HashReturn_gr hash_groestl(int, const BitSequence_gr*, DataLength_gr, BitSequence_gr*);
HashReturn_gr init_groestl( hashState_groestl* );
HashReturn_gr reinit_groestl( hashState_groestl* );
HashReturn_gr update_groestl( hashState_groestl*, const BitSequence_gr*,
DataLength_gr );
HashReturn_gr final_groestl( hashState_groestl*, BitSequence_gr* );
HashReturn_gr hash_groestl( int, const BitSequence_gr*, DataLength_gr,
BitSequence_gr* );
/* NIST API end */
#endif /* __hash_h */

View File

@@ -57,11 +57,7 @@ void Transform256(hashState_groestl256 *ctx,
/* digest message, one block at a time */
for (; len >= SIZE; len -= SIZE, in += SIZE)
//#if LENGTH<=256
TF512((u64*)ctx->chaining, (u64*)in);
//#else
// TF1024((u64*)ctx->chaining, (u64*)in);
//#endif
asm volatile ("emms");
}
@@ -69,11 +65,7 @@ void Transform256(hashState_groestl256 *ctx,
/* given state h, do h <- P(h)+h */
void OutputTransformation256(hashState_groestl256 *ctx) {
/* determine variant */
//#if (LENGTH <= 256)
OF512((u64*)ctx->chaining);
//#else
// OF1024((u64*)ctx->chaining);
//#endif
asm volatile ("emms");
}
@@ -83,18 +75,12 @@ HashReturn_gr init_groestl256(hashState_groestl256* ctx) {
u8 i = 0;
/* output size (in bits) must be a positive integer less than or
equal to 512, and divisible by 8 */
// if (LENGTH <= 0 || (LENGTH%8) || LENGTH > 512)
// return BAD_HASHBITLEN_GR;
/* set number of state columns and state size depending on
variant */
ctx->columns = COLS;
ctx->statesize = SIZE;
//#if (LENGTH <= 256)
ctx->v = SHoRT;
//#else
// ctx->v = LoNG;
//#endif
SET_CONSTANTS();
@@ -107,7 +93,6 @@ HashReturn_gr init_groestl256(hashState_groestl256* ctx) {
return FAIL_GR;
/* set initial value */
// ctx->chaining[ctx->columns-1] = U64BIG((u64)LENGTH);
ctx->chaining[ctx->columns-1] = U64BIG((u64)256);
INIT256(ctx->chaining);
@@ -115,7 +100,6 @@ HashReturn_gr init_groestl256(hashState_groestl256* ctx) {
/* set other variables */
ctx->buf_ptr = 0;
ctx->block_counter = 0;
ctx->bits_in_last_byte = 0;
return SUCCESS_GR;
}
@@ -133,7 +117,6 @@ HashReturn_gr reinit_groestl256(hashState_groestl256* ctx)
return FAIL_GR;
/* set initial value */
// ctx->chaining[ctx->columns-1] = U64BIG((u64)LENGTH);
ctx->chaining[ctx->columns-1] = 256;
INIT256(ctx->chaining);
@@ -141,111 +124,61 @@ HashReturn_gr reinit_groestl256(hashState_groestl256* ctx)
/* set other variables */
ctx->buf_ptr = 0;
ctx->block_counter = 0;
ctx->bits_in_last_byte = 0;
return SUCCESS_GR;
}
/* update state with databitlen bits of input */
HashReturn_gr update_groestl256(hashState_groestl256* ctx,
const BitSequence_gr* input,
DataLength_gr databitlen) {
HashReturn_gr update_groestl256( hashState_groestl256* ctx,
const BitSequence_gr* input,
DataLength_gr databitlen )
{
int index = 0;
int msglen = (int)(databitlen/8);
int rem = (int)(databitlen%8);
/* non-integral number of message bytes can only be supplied in the
last call to this function */
if (ctx->bits_in_last_byte) return FAIL_GR;
/* if the buffer contains data that has not yet been digested, first
add data to buffer until full */
// The following block of code never gets hit when hashing x11 or quark
// leave it here in case it might be needed.
// if (ctx->buf_ptr)
// {
// while (ctx->buf_ptr < ctx->statesize && index < msglen)
// {
// ctx->buffer[(int)ctx->buf_ptr++] = input[index++];
// }
// if (ctx->buf_ptr < ctx->statesize)
// {
// /* buffer still not full, return */
// if (rem)
// {
// ctx->bits_in_last_byte = rem;
// ctx->buffer[(int)ctx->buf_ptr++] = input[index];
// }
// return SUCCESS_GR;
// }
// /* digest buffer */
// ctx->buf_ptr = 0;
// printf("error\n");
// Transform(ctx, ctx->buffer, ctx->statesize);
// end dead code
// }
/* digest bulk of message */
Transform256(ctx, input+index, msglen-index);
Transform256( ctx, input+index, msglen-index );
index += ((msglen-index)/ctx->statesize)*ctx->statesize;
/* store remaining data in buffer */
while (index < msglen)
{
ctx->buffer[(int)ctx->buf_ptr++] = input[index++];
}
// Another block that doesn't get used by x11 or quark
// /* if non-integral number of bytes have been supplied, store
// remaining bits in last byte, together with information about
// number of bits */
// if (rem)
// {
// ctx->bits_in_last_byte = rem;
// ctx->buffer[(int)ctx->buf_ptr++] = input[index];
// }
return SUCCESS_GR;
}
#define BILB ctx->bits_in_last_byte
/* finalise: process remaining data (including padding), perform
output transformation, and write hash result to 'output' */
HashReturn_gr final_groestl256(hashState_groestl256* ctx,
BitSequence_gr* output) {
// int i, j = 0, hashbytelen = LENGTH/8;
HashReturn_gr final_groestl256( hashState_groestl256* ctx,
BitSequence_gr* output )
{
int i, j = 0, hashbytelen = 256/8;
u8 *s = (BitSequence_gr*)ctx->chaining;
/* pad with '1'-bit and first few '0'-bits */
if (BILB) {
ctx->buffer[(int)ctx->buf_ptr-1] &= ((1<<BILB)-1)<<(8-BILB);
ctx->buffer[(int)ctx->buf_ptr-1] ^= 0x1<<(7-BILB);
BILB = 0;
}
else ctx->buffer[(int)ctx->buf_ptr++] = 0x80;
ctx->buffer[(int)ctx->buf_ptr++] = 0x80;
/* pad with '0'-bits */
if (ctx->buf_ptr > ctx->statesize-LENGTHFIELDLEN) {
if ( ctx->buf_ptr > ctx->statesize-LENGTHFIELDLEN )
{
/* padding requires two blocks */
while (ctx->buf_ptr < ctx->statesize) {
while ( ctx->buf_ptr < ctx->statesize )
ctx->buffer[(int)ctx->buf_ptr++] = 0;
}
/* digest first padding block */
Transform256(ctx, ctx->buffer, ctx->statesize);
Transform256( ctx, ctx->buffer, ctx->statesize );
ctx->buf_ptr = 0;
}
while (ctx->buf_ptr < ctx->statesize-LENGTHFIELDLEN) {
while ( ctx->buf_ptr < ctx->statesize-LENGTHFIELDLEN )
ctx->buffer[(int)ctx->buf_ptr++] = 0;
}
/* length padding */
ctx->block_counter++;
ctx->buf_ptr = ctx->statesize;
while (ctx->buf_ptr > ctx->statesize-LENGTHFIELDLEN) {
while ( ctx->buf_ptr > ctx->statesize-LENGTHFIELDLEN )
{
ctx->buffer[(int)--ctx->buf_ptr] = (u8)ctx->block_counter;
ctx->block_counter >>= 8;
}
@@ -256,21 +189,8 @@ HashReturn_gr final_groestl256(hashState_groestl256* ctx,
OutputTransformation256(ctx);
/* store hash result in output */
for (i = ctx->statesize-hashbytelen; i < ctx->statesize; i++,j++) {
for ( i = ctx->statesize-hashbytelen; i < ctx->statesize; i++,j++ )
output[j] = s[i];
}
/* zeroise relevant variables and deallocate memory */
for (i = 0; i < ctx->columns; i++) {
ctx->chaining[i] = 0;
}
for (i = 0; i < ctx->statesize; i++) {
ctx->buffer[i] = 0;
}
// free(ctx->chaining);
// free(ctx->buffer);
return SUCCESS_GR;
}

View File

@@ -93,18 +93,18 @@ typedef struct {
__attribute__ ((aligned (32))) BitSequence_gr buffer[SIZE]; /* data buffer */
u64 block_counter; /* message block counter */
int buf_ptr; /* data buffer pointer */
int bits_in_last_byte; /* no. of message bits in last byte of
data buffer */
int columns; /* no. of columns in state */
int statesize; /* total no. of bytes in state */
Var v; /* LONG or SHORT */
} hashState_groestl256;
HashReturn_gr init_groestl(hashState_groestl256*);
HashReturn_gr reinit_groestl(hashState_groestl256*);
HashReturn_gr update_groestl(hashState_groestl256*, const BitSequence_gr*, DataLength_gr);
HashReturn_gr final_groestl(hashState_groestl256*, BitSequence_gr*);
HashReturn_gr hash_groestl(int, const BitSequence_gr*, DataLength_gr, BitSequence_gr*);
HashReturn_gr init_groestl( hashState_groestl256* );
HashReturn_gr reinit_groestl( hashState_groestl256* );
HashReturn_gr update_groestl( hashState_groestl256*, const BitSequence_gr*,
DataLength_gr );
HashReturn_gr final_groestl( hashState_groestl256*, BitSequence_gr* );
HashReturn_gr hash_groestl( int, const BitSequence_gr*, DataLength_gr,
BitSequence_gr* );
/* NIST API end */
#endif /* __hash_h */

View File

@@ -47,7 +47,7 @@ void bastionhash(void *output, const void *input)
HEFTY1(input, 80, hash);
init_luffa( &ctx_luffa, 512 );
update_luffa( &ctx_luffa, hash, 512 );
update_luffa( &ctx_luffa, hash, 64 );
final_luffa( &ctx_luffa, hash );
if (hash[0] & 0x8)
@@ -86,7 +86,7 @@ void bastionhash(void *output, const void *input)
#endif
} else {
init_luffa( &ctx_luffa, 512 );
update_luffa( &ctx_luffa, hash, 512 );
update_luffa( &ctx_luffa, hash, 64 );
final_luffa( &ctx_luffa, hash );
}
@@ -124,7 +124,7 @@ void bastionhash(void *output, const void *input)
sph_hamsi512_close(&ctx_hamsi, hash);
} else {
init_luffa( &ctx_luffa, 512 );
update_luffa( &ctx_luffa, hash, 512 );
update_luffa( &ctx_luffa, hash, 64 );
final_luffa( &ctx_luffa, hash );
}

View File

@@ -169,7 +169,7 @@ extern void hmq1725hash(void *state, const void *input)
sph_bmw512_close(&ctx.bmw2, hashB); //5
}
update_luffa( &ctx.luffa1, (BitSequence*)hashB, 512 );
update_luffa( &ctx.luffa1, (BitSequence*)hashB, 64 );
final_luffa( &ctx.luffa1, (BitSequence*)hashA );
cubehashUpdate( &ctx.cube, (BitSequence *)hashA, 64 );
@@ -222,7 +222,7 @@ extern void hmq1725hash(void *state, const void *input)
}
else
{
update_luffa( &ctx.luffa2, (BitSequence *)hashB, 512 );
update_luffa( &ctx.luffa2, (BitSequence *)hashB, 64 );
final_luffa( &ctx.luffa2, (BitSequence *)hashA );
}

View File

@@ -208,7 +208,7 @@ static void finalization512(hashState_luffa *state, uint32 *b);
/* initial values of chaining variables */
static const uint32 IV[40] = {
static const uint32 IV[40] __attribute((aligned(16))) = {
0xdbf78465,0x4eaa6fb4,0x44b051e0,0x6d251e69,
0xdef610bb,0xee058139,0x90152df4,0x6e292011,
0xde099fa3,0x70eee9a0,0xd9d2f256,0xc3b44b95,
@@ -222,7 +222,7 @@ static const uint32 IV[40] = {
};
/* Round Constants */
static const uint32 CNS_INIT[128] = {
static const uint32 CNS_INIT[128] __attribute((aligned(16))) = {
0xb213afa5,0xfc20d9d2,0xb6de10ed,0x303994a6,
0xe028c9bf,0xe25e72c1,0x01685f3d,0xe0337818,
0xc84ebe95,0x34552e25,0x70f47aae,0xc0e65299,
@@ -257,6 +257,7 @@ static const uint32 CNS_INIT[128] = {
0x00000000,0x00000000,0x00000000,0xfc053c31
};
__m128i CNS128[32];
__m128i ALLONE;
__m128i MASK;
@@ -265,58 +266,64 @@ HashReturn init_luffa(hashState_luffa *state, int hashbitlen)
{
int i;
state->hashbitlen = hashbitlen;
/* set the lower 32 bits to '1' */
MASK= _mm_set_epi32(0x00000000, 0x00000000, 0x00000000, 0xffffffff);
/* set all bits to '1' */
ALLONE = _mm_set_epi32(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff);
/* set the 32-bit round constant values to the 128-bit data field */
for ( i=0; i<32; i++ )
CNS128[i] = _mm_loadu_si128( (__m128i*)&CNS_INIT[i*4] );
CNS128[i] = _mm_load_si128( (__m128i*)&CNS_INIT[i*4] );
for ( i=0; i<10; i++ )
state->chainv[i] = _mm_loadu_si128( (__m128i*)&IV[i*4] );
state->bitlen[0] = 0;
state->bitlen[1] = 0;
state->rembitlen = 0;
memset(state->buffer, 0, sizeof state->buffer );
state->chainv[i] = _mm_load_si128( (__m128i*)&IV[i*4] );
// memset(state->buffer, 0, sizeof state->buffer );
return SUCCESS;
}
HashReturn update_luffa(hashState_luffa *state, const BitSequence *data, DataLength databitlen)
HashReturn update_luffa( hashState_luffa *state, const BitSequence *data,
size_t len )
{
HashReturn ret=SUCCESS;
int i;
int i, j;
int rem = len % 32;
int blocks = (int)( len / 32 );
uint8 *p = (uint8*)state->buffer;
for ( i=0; i<8; i++ )
// full blocks
for ( j = 0; j < blocks; j++ )
{
state->buffer[0] = BYTES_SWAP32( ((uint32*)data)[0] );
state->buffer[1] = BYTES_SWAP32( ((uint32*)data)[1] );
state->buffer[2] = BYTES_SWAP32( ((uint32*)data)[2] );
state->buffer[3] = BYTES_SWAP32( ((uint32*)data)[3] );
state->buffer[4] = BYTES_SWAP32( ((uint32*)data)[4] );
state->buffer[5] = BYTES_SWAP32( ((uint32*)data)[5] );
state->buffer[6] = BYTES_SWAP32( ((uint32*)data)[6] );
state->buffer[7] = BYTES_SWAP32( ((uint32*)data)[7] );
rnd512( state );
data += MSG_BLOCK_BYTE_LEN;
}
// remaining partial block, if any
for ( i = 0; i < rem/4; i++ )
state->buffer[i] = BYTES_SWAP32( ((uint32*)data)[i] );
rnd512( state );
data += MSG_BLOCK_BYTE_LEN;
state->rembitlen = 0;
for ( i=0; i<8; i++ )
state->buffer[i] = BYTES_SWAP32(((uint32*)data)[i]);
rnd512(state);
data += MSG_BLOCK_BYTE_LEN;
memset(p+1, 0, 31*sizeof(uint8));
p[0] = 0x80;
for ( i=0; i<8; i++ )
// padding of partial block
memset( p+rem+1, 0, (31-rem)*sizeof(uint8) );
p[rem] = 0x80;
for ( i = rem/4; i < 8; i++ )
state->buffer[i] = BYTES_SWAP32(state->buffer[i]);
rnd512(state);
return ret;
}
HashReturn final_luffa(hashState_luffa *state, BitSequence *hashval)
{
finalization512(state, (uint32*) hashval);
if ( state->hashbitlen > 512 )
finalization512( state, (uint32*)( hashval+128 ) );
return SUCCESS;
}
@@ -348,8 +355,8 @@ static void rnd512(hashState_luffa *state)
MULT2( t[0], t[1]);
msg[0] = _mm_loadu_si128 ( (__m128i*)&state->buffer[0] );
msg[1] = _mm_loadu_si128 ( (__m128i*)&state->buffer[4] );
msg[0] = _mm_load_si128 ( (__m128i*)&state->buffer[0] );
msg[1] = _mm_load_si128 ( (__m128i*)&state->buffer[4] );
msg[0] = _mm_shuffle_epi32( msg[0], 27 );
msg[1] = _mm_shuffle_epi32( msg[1], 27 );
@@ -457,10 +464,14 @@ static void rnd512(hashState_luffa *state)
chainv[1],chainv[3],chainv[5],chainv[7],
x[4], x[5], x[6], x[7] );
for ( i=0; i<8 ; i++ )
{
STEP_PART( &x[0], &CNS128[i*2], &tmp[0] );
}
STEP_PART( &x[0], &CNS128[ 0], &tmp[0] );
STEP_PART( &x[0], &CNS128[ 2], &tmp[0] );
STEP_PART( &x[0], &CNS128[ 4], &tmp[0] );
STEP_PART( &x[0], &CNS128[ 6], &tmp[0] );
STEP_PART( &x[0], &CNS128[ 8], &tmp[0] );
STEP_PART( &x[0], &CNS128[10], &tmp[0] );
STEP_PART( &x[0], &CNS128[12], &tmp[0] );
STEP_PART( &x[0], &CNS128[14], &tmp[0] );
MIXTON1024( x[0], x[1], x[2], x[3],
chainv[0], chainv[2], chainv[4],chainv[6],
@@ -468,11 +479,22 @@ static void rnd512(hashState_luffa *state)
chainv[1],chainv[3],chainv[5],chainv[7]);
/* Process last 256-bit block */
for ( i=0; i<8; i++ )
{
STEP_PART2( chainv[8], chainv[9], t[0], t[1], CNS128[16+2*i],
CNS128[17+2*i], tmp[0], tmp[1] );
}
STEP_PART2( chainv[8], chainv[9], t[0], t[1], CNS128[16], CNS128[17],
tmp[0], tmp[1] );
STEP_PART2( chainv[8], chainv[9], t[0], t[1], CNS128[18], CNS128[19],
tmp[0], tmp[1] );
STEP_PART2( chainv[8], chainv[9], t[0], t[1], CNS128[20], CNS128[21],
tmp[0], tmp[1] );
STEP_PART2( chainv[8], chainv[9], t[0], t[1], CNS128[22], CNS128[23],
tmp[0], tmp[1] );
STEP_PART2( chainv[8], chainv[9], t[0], t[1], CNS128[24], CNS128[25],
tmp[0], tmp[1] );
STEP_PART2( chainv[8], chainv[9], t[0], t[1], CNS128[26], CNS128[27],
tmp[0], tmp[1] );
STEP_PART2( chainv[8], chainv[9], t[0], t[1], CNS128[28], CNS128[29],
tmp[0], tmp[1] );
STEP_PART2( chainv[8], chainv[9], t[0], t[1], CNS128[30], CNS128[31],
tmp[0], tmp[1] );
return;
}
@@ -486,7 +508,7 @@ static void finalization512(hashState_luffa *state, uint32 *b)
{
__m128i* chainv = state->chainv;
__m128i t[2];
uint32 hash[8];
uint32 hash[8] __attribute((aligned(16)));
int i;
/*---- blank round with m=0 ----*/
@@ -507,10 +529,17 @@ static void finalization512(hashState_luffa *state, uint32 *b)
t[0] = _mm_shuffle_epi32(t[0], 27);
t[1] = _mm_shuffle_epi32(t[1], 27);
_mm_storeu_si128((__m128i*)&hash[0], t[0]);
_mm_storeu_si128((__m128i*)&hash[4], t[1]);
_mm_store_si128((__m128i*)&hash[0], t[0]);
_mm_store_si128((__m128i*)&hash[4], t[1]);
for (i=0;i<8;i++) b[i] = BYTES_SWAP32(hash[i]);
b[0] = BYTES_SWAP32(hash[0]);
b[1] = BYTES_SWAP32(hash[1]);
b[2] = BYTES_SWAP32(hash[2]);
b[3] = BYTES_SWAP32(hash[3]);
b[4] = BYTES_SWAP32(hash[4]);
b[5] = BYTES_SWAP32(hash[5]);
b[6] = BYTES_SWAP32(hash[6]);
b[7] = BYTES_SWAP32(hash[7]);
memset(state->buffer, 0, sizeof state->buffer );
rnd512(state);
@@ -529,10 +558,17 @@ static void finalization512(hashState_luffa *state, uint32 *b)
t[0] = _mm_shuffle_epi32(t[0], 27);
t[1] = _mm_shuffle_epi32(t[1], 27);
_mm_storeu_si128((__m128i*)&hash[0], t[0]);
_mm_storeu_si128((__m128i*)&hash[4], t[1]);
_mm_store_si128((__m128i*)&hash[0], t[0]);
_mm_store_si128((__m128i*)&hash[4], t[1]);
for (i=0;i<8;i++) b[8+i] = BYTES_SWAP32(hash[i]);
b[ 8] = BYTES_SWAP32(hash[0]);
b[ 9] = BYTES_SWAP32(hash[1]);
b[10] = BYTES_SWAP32(hash[2]);
b[11] = BYTES_SWAP32(hash[3]);
b[12] = BYTES_SWAP32(hash[4]);
b[13] = BYTES_SWAP32(hash[5]);
b[14] = BYTES_SWAP32(hash[6]);
b[15] = BYTES_SWAP32(hash[7]);
return;
}

View File

@@ -46,13 +46,18 @@
/*********************************/
typedef struct {
uint32 buffer[8]; /* Buffer to be hashed */
uint32 buffer[8] __attribute((aligned(16)));
__m128i chainv[10]; /* Chaining values */
uint64 bitlen[2]; /* Message length in bits */
uint32 rembitlen; /* Length of buffer data to be hashed */
// uint64 bitlen[2]; /* Message length in bits */
// uint32 rembitlen; /* Length of buffer data to be hashed */
int hashbitlen;
} hashState_luffa;
HashReturn init_luffa(hashState_luffa *state, int hashbitlen);
HashReturn update_luffa(hashState_luffa *state, const BitSequence *data, DataLength databitlen);
HashReturn final_luffa(hashState_luffa *state, BitSequence *hashval);
HashReturn init_luffa( hashState_luffa *state, int hashbitlen );
// len is in bytes
HashReturn update_luffa( hashState_luffa *state, const BitSequence *data,
size_t len );
HashReturn final_luffa( hashState_luffa *state, BitSequence *hashval );

View File

@@ -6,9 +6,6 @@
#include <string.h>
#include <stdio.h>
// Optimized luffa doesn't find blocks with qubit
#define LUFFA_SSE2_BROKEN
#include "algo/luffa/sph_luffa.h"
#include "algo/cubehash/sph_cubehash.h"
#include "algo/shavite/sph_shavite.h"
@@ -26,11 +23,7 @@
typedef struct
{
#ifdef LUFFA_SSE2_BROKEN
sph_luffa512_context luffa;
#else
hashState_luffa luffa;
#endif
hashState_luffa luffa;
cubehashParam cubehash;
sph_shavite512_context shavite;
hashState_sd simd;
@@ -45,11 +38,7 @@ qubit_ctx_holder qubit_ctx;
void init_qubit_ctx()
{
#ifdef LUFFA_SSE2_BROKEN
sph_luffa512_init(&qubit_ctx.luffa);
#else
init_luffa(&qubit_ctx.luffa,512);
#endif
cubehashInit(&qubit_ctx.cubehash,512,16,32);
sph_shavite512_init(&qubit_ctx.shavite);
init_sd(&qubit_ctx.simd,512);
@@ -68,14 +57,8 @@ void qubithash(void *output, const void *input)
qubit_ctx_holder ctx;
memcpy( &ctx, &qubit_ctx, sizeof(qubit_ctx) );
#ifdef LUFFA_SSE2_BROKEN
sph_luffa512 (&ctx.luffa, input, 80);
sph_luffa512_close(&ctx.luffa, (void*) hash);
#else
// init_luffa(&qubit_ctx.luffa,512);
update_luffa( &ctx.luffa, (const BitSequence*)input, 80 );
final_luffa( &ctx.luffa, (BitSequence*)hash);
#endif
cubehashUpdate( &ctx.cubehash, (const byte*) hash,64);
cubehashDigest( &ctx.cubehash, (byte*)hash);

View File

@@ -11,7 +11,6 @@
#include "algo/jh/sph_jh.h"
#include "algo/keccak/sph_keccak.h"
#include "algo/skein/sph_skein.h"
#include "algo/luffa/sph_luffa.h"
#include "algo/luffa/sse2/luffa_for_sse2.h"
#include "algo/cubehash/sse2/cubehash_sse2.h"
@@ -27,7 +26,6 @@
#define HASH_FUNC_COUNT 8
#define HASH_FUNC_COUNT_PERMUTATIONS 40320
//static int permutation[HASH_FUNC_COUNT] = { 0 };
static __thread uint32_t s_ntime = UINT32_MAX;
static __thread int permutation[HASH_FUNC_COUNT] = { 0 };
@@ -84,15 +82,13 @@ typedef struct {
sph_skein512_context skein;
sph_jh512_context jh;
sph_keccak512_context keccak;
sph_luffa512_context luffa;
// hashState_luffa luffa;
hashState_luffa luffa;
cubehashParam cube;
// ctx optimization doesn't work for groestl, do it the old way
//#ifdef NO_AES_NI
// sph_groestl512_context groestl;
//#else
// hashState_groestl groestl;
//#endif
#ifdef NO_AES_NI
sph_groestl512_context groestl;
#else
hashState_groestl groestl;
#endif
} tt_ctx_holder;
tt_ctx_holder tt_ctx;
@@ -104,14 +100,13 @@ void init_tt_ctx()
sph_skein512_init( &tt_ctx.skein );
sph_jh512_init( &tt_ctx.jh );
sph_keccak512_init( &tt_ctx.keccak );
sph_luffa512_init( &tt_ctx.luffa );
// init_luffa( &tt_ctx.luffa, 512 );
init_luffa( &tt_ctx.luffa, 512 );
cubehashInit( &tt_ctx.cube, 512, 16, 32 );
//#ifdef NO_AES_NI
// sph_groestl512_init( &tt_ctx.groestl );
//#else
// init_groestl( &tt_ctx.groestl );
//#endif
#ifdef NO_AES_NI
sph_groestl512_init( &tt_ctx.groestl );
#else
init_groestl( &tt_ctx.groestl );
#endif
};
void timetravel_hash(void *output, const void *input)
@@ -120,18 +115,10 @@ void timetravel_hash(void *output, const void *input)
uint32_t *hashA, *hashB;
uint32_t dataLen = 64;
uint32_t *work_data = (uint32_t *)input;
const uint32_t timestamp = work_data[17];
tt_ctx_holder ctx;
memcpy( &ctx, &tt_ctx, sizeof(tt_ctx) );
int i;
// workaround for initializing groestl ctx
#ifdef NO_AES_NI
sph_groestl512_context ctx_groestl;
#else
hashState_groestl ctx_groestl;
#endif
for ( i = 0; i < HASH_FUNC_COUNT; i++ )
{
if (i == 0)
@@ -149,51 +136,39 @@ void timetravel_hash(void *output, const void *input)
switch ( permutation[i] )
{
case 0:
// sph_blake512_init( &ctx.blake );
sph_blake512( &ctx.blake, hashA, dataLen );
sph_blake512_close( &ctx.blake, hashB );
break;
case 1:
// sph_bmw512_init( &ctx.bmw );
sph_bmw512( &ctx.bmw, hashA, dataLen );
sph_bmw512_close( &ctx.bmw, hashB );
break;
case 2:
#ifdef NO_AES_NI
sph_groestl512_init( &ctx_groestl );
sph_groestl512( &ctx_groestl, hashA, dataLen );
sph_groestl512_close( &ctx_groestl, hashB );
sph_groestl512( &ctx.groestl, hashA, dataLen );
sph_groestl512_close( &ctx.groestl, hashB );
#else
init_groestl( &ctx_groestl );
update_groestl( &ctx_groestl, (char*)hashA, dataLen*8 );
final_groestl( &ctx_groestl, (char*)hashB );
update_groestl( &ctx.groestl, (char*)hashA, dataLen*8 );
final_groestl( &ctx.groestl, (char*)hashB );
#endif
break;
case 3:
// sph_skein512_init( &ctx.skein );
sph_skein512( &ctx.skein, hashA, dataLen );
sph_skein512_close( &ctx.skein, hashB );
break;
case 4:
// sph_jh512_init( &ctx.jh );
sph_jh512( &ctx.jh, hashA, dataLen );
sph_jh512_close( &ctx.jh, hashB);
break;
case 5:
// sph_keccak512_init( &ctx.keccak );
sph_keccak512( &ctx.keccak, hashA, dataLen );
sph_keccak512_close( &ctx.keccak, hashB );
break;
case 6:
// sph_luffa512_init( &ctx.luffa );
sph_luffa512 ( &ctx.luffa, hashA, dataLen );
sph_luffa512_close( &ctx.luffa, hashB );
// init_luffa( &ctx.luffa, 512 );
// update_luffa( &ctx.luffa, (const BitSequence*)hashA, dataLen*8 );
// final_luffa( &ctx.luffa, (BitSequence*)hashB );
update_luffa( &ctx.luffa, (const BitSequence*)hashA, dataLen );
final_luffa( &ctx.luffa, (BitSequence*)hashB );
break;
case 7:
// cubehashInit( &ctx.cube, 512, 16, 32 );
cubehashUpdate( &ctx.cube, (const byte*) hashA, dataLen );
cubehashDigest( &ctx.cube, (byte*)hashB );
break;

View File

@@ -115,7 +115,7 @@ void c11hash( void *output, const void *input )
SKN_U;
SKN_C;
update_luffa( &ctx.luffa, (const BitSequence*)hash,512);
update_luffa( &ctx.luffa, (const BitSequence*)hash,64);
final_luffa( &ctx.luffa, (BitSequence*)hash+64);
cubehashUpdate( &ctx.cube, (const byte*) hash+64,64);

View File

@@ -111,7 +111,7 @@ static void x11_hash( void *state, const void *input )
// asm volatile ("emms");
update_luffa( &ctx.luffa, (const BitSequence*)hash, 512 );
update_luffa( &ctx.luffa, (const BitSequence*)hash, 64 );
final_luffa( &ctx.luffa, (BitSequence*)hash+64 );
cubehashUpdate( &ctx.cube, (const byte*) hash+64, 64 );

View File

@@ -200,7 +200,7 @@ static inline void x11evo_hash( void *state, const void *input )
sph_keccak512_close( &ctx.keccak, (char*)hash );
break;
case 6:
update_luffa( &ctx.luffa, (char*)hash, 512 );
update_luffa( &ctx.luffa, (char*)hash, 64 );
final_luffa( &ctx.luffa, (char*)hash );
break;
case 7:

View File

@@ -113,7 +113,7 @@ void sibhash(void *output, const void *input)
sph_gost512(&ctx.gost, hashA, 64);
sph_gost512_close(&ctx.gost, hashB);
update_luffa( &ctx.luffa, (const BitSequence*)hashB,512);
update_luffa( &ctx.luffa, (const BitSequence*)hashB,64);
final_luffa( &ctx.luffa, (BitSequence*)hashA);
cubehashUpdate( &ctx.cube, (const byte*) hashA,64);

View File

@@ -137,7 +137,7 @@ static void x13hash(void *output, const void *input)
KEC_C;
//--- luffa7
update_luffa( &ctx.luffa, (const BitSequence*)hash,512);
update_luffa( &ctx.luffa, (const BitSequence*)hash,64);
final_luffa( &ctx.luffa, (BitSequence*)hashB);
// 8 Cube

View File

@@ -139,7 +139,7 @@ static void x14hash(void *output, const void *input)
KEC_C;
//--- luffa7
update_luffa( &ctx.luffa, (const BitSequence*)hash,512);
update_luffa( &ctx.luffa, (const BitSequence*)hash,64);
final_luffa( &ctx.luffa, (BitSequence*)hashB);
// 8 Cube

View File

@@ -140,7 +140,7 @@ static void x15hash(void *output, const void *input)
KEC_C;
//--- luffa7
update_luffa( &ctx.luffa, (const BitSequence*)hash,512);
update_luffa( &ctx.luffa, (const BitSequence*)hash,64);
final_luffa( &ctx.luffa, (BitSequence*)hashB);
// 8 Cube

View File

@@ -146,7 +146,7 @@ static void x17hash(void *output, const void *input)
KEC_C;
//--- luffa7
update_luffa( &ctx.luffa, (const BitSequence*)hash,512);
update_luffa( &ctx.luffa, (const BitSequence*)hash,64);
final_luffa( &ctx.luffa, (BitSequence*)hashB);
// 8 Cube

View File

@@ -12,7 +12,7 @@
#include "algo/keccak/sph_keccak.h"
#include "algo/skein/sph_skein.h"
#include "algo/shavite/sph_shavite.h"
#include "algo/luffa/sph_luffa.h"
#include "algo/luffa/sse2/luffa_for_sse2.h"
#include "algo/hamsi/sph_hamsi.h"
#include "algo/fugue/sph_fugue.h"
#include "algo/shabal/sph_shabal.h"
@@ -36,7 +36,7 @@ typedef struct {
sph_skein512_context skein;
sph_jh512_context jh;
sph_keccak512_context keccak;
sph_luffa512_context luffa;
hashState_luffa luffa;
cubehashParam cubehash;
sph_shavite512_context shavite;
hashState_sd simd;
@@ -64,7 +64,7 @@ void init_xevan_ctx()
sph_skein512_init(&xevan_ctx.skein);
sph_jh512_init(&xevan_ctx.jh);
sph_keccak512_init(&xevan_ctx.keccak);
sph_luffa512_init(&xevan_ctx.luffa);
init_luffa( &xevan_ctx.luffa, 512 );
cubehashInit( &xevan_ctx.cubehash, 512, 16, 32 );
sph_shavite512_init( &xevan_ctx.shavite );
init_sd( &xevan_ctx.simd, 512 );
@@ -116,8 +116,10 @@ void xevan_hash(void *output, const void *input)
sph_keccak512(&ctx.keccak, hash, dataLen);
sph_keccak512_close(&ctx.keccak, hash);
sph_luffa512(&ctx.luffa, hash, dataLen);
sph_luffa512_close(&ctx.luffa, hash);
update_luffa( &ctx.luffa, (const BitSequence*)hash, dataLen );
final_luffa( &ctx.luffa, (BitSequence*)hash );
// sph_luffa512(&ctx.luffa, hash, dataLen);
// sph_luffa512_close(&ctx.luffa, hash);
cubehashUpdate( &ctx.cubehash, (const byte*) hash, dataLen );
cubehashDigest( &ctx.cubehash, (byte*)hash);
@@ -181,8 +183,8 @@ void xevan_hash(void *output, const void *input)
sph_keccak512(&ctx.keccak, hash, dataLen);
sph_keccak512_close(&ctx.keccak, hash);
sph_luffa512(&ctx.luffa, hash, dataLen);
sph_luffa512_close(&ctx.luffa, hash);
update_luffa( &ctx.luffa, (const BitSequence*)hash, dataLen );
final_luffa( &ctx.luffa, (BitSequence*)hash );
cubehashUpdate( &ctx.cubehash, (const byte*) hash, dataLen );
cubehashDigest( &ctx.cubehash, (byte*)hash);

View File

@@ -1,4 +1,4 @@
AC_INIT([cpuminer-opt], [3.5.2])
AC_INIT([cpuminer-opt], [3.5.3])
AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM