This commit is contained in:
Jay D Dee
2019-05-19 13:39:45 -04:00
parent bfd1c002f9
commit e1aead3c76
139 changed files with 10907 additions and 4218 deletions

View File

@@ -60,336 +60,174 @@ MYALIGN const unsigned int zero[] = {0x00000000, 0x00000000, 0x00000000, 0x000
MYALIGN const unsigned int mul2ipt[] = {0x728efc00, 0x6894e61a, 0x3fc3b14d, 0x25d9ab57, 0xfd5ba600, 0x2a8c71d7, 0x1eb845e3, 0xc96f9234};
//#include "crypto_hash.h"
int crypto_hash(
unsigned char *out,
const unsigned char *in,
unsigned long long inlen
)
{
if(hash_echo(512, in, inlen * 8, out) == SUCCESS)
return 0;
return -1;
}
/*
int main()
{
return 0;
}
*/
#if 0
void DumpState(__m128i *ps)
{
int i, j, k;
unsigned int ucol;
for(j = 0; j < 4; j++)
{
for(i = 0; i < 4; i++)
{
printf("row %d,col %d : ", i, j);
for(k = 0; k < 4; k++)
{
ucol = *((int*)ps + 16 * i + 4 * j + k);
printf("%02x%02x%02x%02x ", (ucol >> 0) & 0xff, (ucol >> 8) & 0xff, (ucol >> 16) & 0xff, (ucol >> 24) & 0xff);
}
printf("\n");
}
}
printf("\n");
}
#endif
#ifndef NO_AES_NI
#define ECHO_SUBBYTES(state, i, j) \
state[i][j] = _mm_aesenc_si128(state[i][j], k1);\
state[i][j] = _mm_aesenc_si128(state[i][j], M128(zero));\
k1 = _mm_add_epi32(k1, M128(const1))
#else
#define ECHO_SUBBYTES(state, i, j) \
AES_ROUND_VPERM(state[i][j], t1, t2, t3, t4, s1, s2, s3);\
state[i][j] = _mm_xor_si128(state[i][j], k1);\
AES_ROUND_VPERM(state[i][j], t1, t2, t3, t4, s1, s2, s3);\
k1 = _mm_add_epi32(k1, M128(const1))
#define ECHO_SUB_AND_MIX(state, i, j, state2, c, r1, r2, r3, r4) \
AES_ROUND_VPERM_CORE(state[i][j], t1, t2, t3, t4, s1, s2, s3);\
ktemp = k1;\
TRANSFORM(ktemp, _k_ipt, t1, t4);\
state[i][j] = _mm_xor_si128(state[i][j], ktemp);\
AES_ROUND_VPERM_CORE(state[i][j], t1, t2, t3, t4, s1, s2, s3);\
k1 = _mm_add_epi32(k1, M128(const1));\
s1 = state[i][j];\
s2 = s1;\
TRANSFORM(s2, mul2ipt, t1, t2);\
s3 = _mm_xor_si128(s1, s2);\
state2[r1][c] = _mm_xor_si128(state2[r1][c], s2);\
state2[r2][c] = _mm_xor_si128(state2[r2][c], s1);\
state2[r3][c] = _mm_xor_si128(state2[r3][c], s1);\
state2[r4][c] = _mm_xor_si128(state2[r4][c], s3)
#endif
state[i][j] = _mm_aesenc_si128(state[i][j], k1);\
state[i][j] = _mm_aesenc_si128(state[i][j], M128(zero));\
k1 = _mm_add_epi32(k1, M128(const1))
#define ECHO_MIXBYTES(state1, state2, j, t1, t2, s2) \
s2 = _mm_add_epi8(state1[0][j], state1[0][j]);\
t1 = _mm_srli_epi16(state1[0][j], 7);\
t1 = _mm_and_si128(t1, M128(lsbmask));\
t2 = _mm_shuffle_epi8(M128(mul2mask), t1);\
s2 = _mm_xor_si128(s2, t2);\
state2[0][j] = s2;\
state2[1][j] = state1[0][j];\
state2[2][j] = state1[0][j];\
state2[3][j] = _mm_xor_si128(s2, state1[0][j]);\
s2 = _mm_add_epi8(state1[1][(j + 1) & 3], state1[1][(j + 1) & 3]);\
t1 = _mm_srli_epi16(state1[1][(j + 1) & 3], 7);\
t1 = _mm_and_si128(t1, M128(lsbmask));\
t2 = _mm_shuffle_epi8(M128(mul2mask), t1);\
s2 = _mm_xor_si128(s2, t2);\
state2[0][j] = _mm_xor_si128(state2[0][j], _mm_xor_si128(s2, state1[1][(j + 1) & 3]));\
state2[1][j] = _mm_xor_si128(state2[1][j], s2);\
state2[2][j] = _mm_xor_si128(state2[2][j], state1[1][(j + 1) & 3]);\
state2[3][j] = _mm_xor_si128(state2[3][j], state1[1][(j + 1) & 3]);\
s2 = _mm_add_epi8(state1[2][(j + 2) & 3], state1[2][(j + 2) & 3]);\
t1 = _mm_srli_epi16(state1[2][(j + 2) & 3], 7);\
t1 = _mm_and_si128(t1, M128(lsbmask));\
t2 = _mm_shuffle_epi8(M128(mul2mask), t1);\
s2 = _mm_xor_si128(s2, t2);\
state2[0][j] = _mm_xor_si128(state2[0][j], state1[2][(j + 2) & 3]);\
state2[1][j] = _mm_xor_si128(state2[1][j], _mm_xor_si128(s2, state1[2][(j + 2) & 3]));\
state2[2][j] = _mm_xor_si128(state2[2][j], s2);\
state2[3][j] = _mm_xor_si128(state2[3][j], state1[2][(j + 2) & 3]);\
s2 = _mm_add_epi8(state1[3][(j + 3) & 3], state1[3][(j + 3) & 3]);\
t1 = _mm_srli_epi16(state1[3][(j + 3) & 3], 7);\
t1 = _mm_and_si128(t1, M128(lsbmask));\
t2 = _mm_shuffle_epi8(M128(mul2mask), t1);\
s2 = _mm_xor_si128(s2, t2);\
state2[0][j] = _mm_xor_si128(state2[0][j], state1[3][(j + 3) & 3]);\
state2[1][j] = _mm_xor_si128(state2[1][j], state1[3][(j + 3) & 3]);\
state2[2][j] = _mm_xor_si128(state2[2][j], _mm_xor_si128(s2, state1[3][(j + 3) & 3]));\
state2[3][j] = _mm_xor_si128(state2[3][j], s2)
s2 = _mm_add_epi8(state1[0][j], state1[0][j]);\
t1 = _mm_srli_epi16(state1[0][j], 7);\
t1 = _mm_and_si128(t1, M128(lsbmask));\
t2 = _mm_shuffle_epi8(M128(mul2mask), t1);\
s2 = _mm_xor_si128(s2, t2);\
state2[0][j] = s2;\
state2[1][j] = state1[0][j];\
state2[2][j] = state1[0][j];\
state2[3][j] = _mm_xor_si128(s2, state1[0][j]);\
s2 = _mm_add_epi8(state1[1][(j + 1) & 3], state1[1][(j + 1) & 3]);\
t1 = _mm_srli_epi16(state1[1][(j + 1) & 3], 7);\
t1 = _mm_and_si128(t1, M128(lsbmask));\
t2 = _mm_shuffle_epi8(M128(mul2mask), t1);\
s2 = _mm_xor_si128(s2, t2);\
state2[0][j] = _mm_xor_si128(state2[0][j], _mm_xor_si128(s2, state1[1][(j + 1) & 3]));\
state2[1][j] = _mm_xor_si128(state2[1][j], s2);\
state2[2][j] = _mm_xor_si128(state2[2][j], state1[1][(j + 1) & 3]);\
state2[3][j] = _mm_xor_si128(state2[3][j], state1[1][(j + 1) & 3]);\
s2 = _mm_add_epi8(state1[2][(j + 2) & 3], state1[2][(j + 2) & 3]);\
t1 = _mm_srli_epi16(state1[2][(j + 2) & 3], 7);\
t1 = _mm_and_si128(t1, M128(lsbmask));\
t2 = _mm_shuffle_epi8(M128(mul2mask), t1);\
s2 = _mm_xor_si128(s2, t2);\
state2[0][j] = _mm_xor_si128(state2[0][j], state1[2][(j + 2) & 3]);\
state2[1][j] = _mm_xor_si128(state2[1][j], _mm_xor_si128(s2, state1[2][(j + 2) & 3]));\
state2[2][j] = _mm_xor_si128(state2[2][j], s2);\
state2[3][j] = _mm_xor_si128(state2[3][j], state1[2][(j + 2) & 3]);\
s2 = _mm_add_epi8(state1[3][(j + 3) & 3], state1[3][(j + 3) & 3]);\
t1 = _mm_srli_epi16(state1[3][(j + 3) & 3], 7);\
t1 = _mm_and_si128(t1, M128(lsbmask));\
t2 = _mm_shuffle_epi8(M128(mul2mask), t1);\
s2 = _mm_xor_si128(s2, t2);\
state2[0][j] = _mm_xor_si128(state2[0][j], state1[3][(j + 3) & 3]);\
state2[1][j] = _mm_xor_si128(state2[1][j], state1[3][(j + 3) & 3]);\
state2[2][j] = _mm_xor_si128(state2[2][j], _mm_xor_si128(s2, state1[3][(j + 3) & 3]));\
state2[3][j] = _mm_xor_si128(state2[3][j], s2)
#define ECHO_ROUND_UNROLL2 \
ECHO_SUBBYTES(_state, 0, 0);\
ECHO_SUBBYTES(_state, 1, 0);\
ECHO_SUBBYTES(_state, 2, 0);\
ECHO_SUBBYTES(_state, 3, 0);\
ECHO_SUBBYTES(_state, 0, 1);\
ECHO_SUBBYTES(_state, 1, 1);\
ECHO_SUBBYTES(_state, 2, 1);\
ECHO_SUBBYTES(_state, 3, 1);\
ECHO_SUBBYTES(_state, 0, 2);\
ECHO_SUBBYTES(_state, 1, 2);\
ECHO_SUBBYTES(_state, 2, 2);\
ECHO_SUBBYTES(_state, 3, 2);\
ECHO_SUBBYTES(_state, 0, 3);\
ECHO_SUBBYTES(_state, 1, 3);\
ECHO_SUBBYTES(_state, 2, 3);\
ECHO_SUBBYTES(_state, 3, 3);\
ECHO_MIXBYTES(_state, _state2, 0, t1, t2, s2);\
ECHO_MIXBYTES(_state, _state2, 1, t1, t2, s2);\
ECHO_MIXBYTES(_state, _state2, 2, t1, t2, s2);\
ECHO_MIXBYTES(_state, _state2, 3, t1, t2, s2);\
ECHO_SUBBYTES(_state2, 0, 0);\
ECHO_SUBBYTES(_state2, 1, 0);\
ECHO_SUBBYTES(_state2, 2, 0);\
ECHO_SUBBYTES(_state2, 3, 0);\
ECHO_SUBBYTES(_state2, 0, 1);\
ECHO_SUBBYTES(_state2, 1, 1);\
ECHO_SUBBYTES(_state2, 2, 1);\
ECHO_SUBBYTES(_state2, 3, 1);\
ECHO_SUBBYTES(_state2, 0, 2);\
ECHO_SUBBYTES(_state2, 1, 2);\
ECHO_SUBBYTES(_state2, 2, 2);\
ECHO_SUBBYTES(_state2, 3, 2);\
ECHO_SUBBYTES(_state2, 0, 3);\
ECHO_SUBBYTES(_state2, 1, 3);\
ECHO_SUBBYTES(_state2, 2, 3);\
ECHO_SUBBYTES(_state2, 3, 3);\
ECHO_MIXBYTES(_state2, _state, 0, t1, t2, s2);\
ECHO_MIXBYTES(_state2, _state, 1, t1, t2, s2);\
ECHO_MIXBYTES(_state2, _state, 2, t1, t2, s2);\
ECHO_MIXBYTES(_state2, _state, 3, t1, t2, s2)
ECHO_SUBBYTES(_state, 0, 0);\
ECHO_SUBBYTES(_state, 1, 0);\
ECHO_SUBBYTES(_state, 2, 0);\
ECHO_SUBBYTES(_state, 3, 0);\
ECHO_SUBBYTES(_state, 0, 1);\
ECHO_SUBBYTES(_state, 1, 1);\
ECHO_SUBBYTES(_state, 2, 1);\
ECHO_SUBBYTES(_state, 3, 1);\
ECHO_SUBBYTES(_state, 0, 2);\
ECHO_SUBBYTES(_state, 1, 2);\
ECHO_SUBBYTES(_state, 2, 2);\
ECHO_SUBBYTES(_state, 3, 2);\
ECHO_SUBBYTES(_state, 0, 3);\
ECHO_SUBBYTES(_state, 1, 3);\
ECHO_SUBBYTES(_state, 2, 3);\
ECHO_SUBBYTES(_state, 3, 3);\
ECHO_MIXBYTES(_state, _state2, 0, t1, t2, s2);\
ECHO_MIXBYTES(_state, _state2, 1, t1, t2, s2);\
ECHO_MIXBYTES(_state, _state2, 2, t1, t2, s2);\
ECHO_MIXBYTES(_state, _state2, 3, t1, t2, s2);\
ECHO_SUBBYTES(_state2, 0, 0);\
ECHO_SUBBYTES(_state2, 1, 0);\
ECHO_SUBBYTES(_state2, 2, 0);\
ECHO_SUBBYTES(_state2, 3, 0);\
ECHO_SUBBYTES(_state2, 0, 1);\
ECHO_SUBBYTES(_state2, 1, 1);\
ECHO_SUBBYTES(_state2, 2, 1);\
ECHO_SUBBYTES(_state2, 3, 1);\
ECHO_SUBBYTES(_state2, 0, 2);\
ECHO_SUBBYTES(_state2, 1, 2);\
ECHO_SUBBYTES(_state2, 2, 2);\
ECHO_SUBBYTES(_state2, 3, 2);\
ECHO_SUBBYTES(_state2, 0, 3);\
ECHO_SUBBYTES(_state2, 1, 3);\
ECHO_SUBBYTES(_state2, 2, 3);\
ECHO_SUBBYTES(_state2, 3, 3);\
ECHO_MIXBYTES(_state2, _state, 0, t1, t2, s2);\
ECHO_MIXBYTES(_state2, _state, 1, t1, t2, s2);\
ECHO_MIXBYTES(_state2, _state, 2, t1, t2, s2);\
ECHO_MIXBYTES(_state2, _state, 3, t1, t2, s2)
#define SAVESTATE(dst, src)\
dst[0][0] = src[0][0];\
dst[0][1] = src[0][1];\
dst[0][2] = src[0][2];\
dst[0][3] = src[0][3];\
dst[1][0] = src[1][0];\
dst[1][1] = src[1][1];\
dst[1][2] = src[1][2];\
dst[1][3] = src[1][3];\
dst[2][0] = src[2][0];\
dst[2][1] = src[2][1];\
dst[2][2] = src[2][2];\
dst[2][3] = src[2][3];\
dst[3][0] = src[3][0];\
dst[3][1] = src[3][1];\
dst[3][2] = src[3][2];\
dst[3][3] = src[3][3]
dst[0][0] = src[0][0];\
dst[0][1] = src[0][1];\
dst[0][2] = src[0][2];\
dst[0][3] = src[0][3];\
dst[1][0] = src[1][0];\
dst[1][1] = src[1][1];\
dst[1][2] = src[1][2];\
dst[1][3] = src[1][3];\
dst[2][0] = src[2][0];\
dst[2][1] = src[2][1];\
dst[2][2] = src[2][2];\
dst[2][3] = src[2][3];\
dst[3][0] = src[3][0];\
dst[3][1] = src[3][1];\
dst[3][2] = src[3][2];\
dst[3][3] = src[3][3]
void Compress(hashState_echo *ctx, const unsigned char *pmsg, unsigned int uBlockCount)
{
unsigned int r, b, i, j;
// __m128i t1, t2, t3, t4, s1, s2, s3, k1, ktemp;
__m128i t1, t2, s2, k1;
__m128i _state[4][4], _state2[4][4], _statebackup[4][4];
unsigned int r, b, i, j;
__m128i t1, t2, s2, k1;
__m128i _state[4][4], _state2[4][4], _statebackup[4][4];
for(i = 0; i < 4; i++)
for(j = 0; j < ctx->uHashSize / 256; j++)
_state[i][j] = ctx->state[i][j];
for(i = 0; i < 4; i++)
for(j = 0; j < ctx->uHashSize / 256; j++)
_state[i][j] = ctx->state[i][j];
for(b = 0; b < uBlockCount; b++)
{
ctx->k = _mm_add_epi64(ctx->k, ctx->const1536);
#ifdef NO_AES_NI
// transform cv
for(i = 0; i < 4; i++)
for(j = 0; j < ctx->uHashSize / 256; j++)
{
TRANSFORM(_state[i][j], _k_ipt, t1, t2);
}
#endif
for(b = 0; b < uBlockCount; b++)
// load message
for(j = ctx->uHashSize / 256; j < 4; j++)
{
ctx->k = _mm_add_epi64(ctx->k, ctx->const1536);
// load message
for(j = ctx->uHashSize / 256; j < 4; j++)
{
for(i = 0; i < 4; i++)
{
_state[i][j] = _mm_loadu_si128((__m128i*)pmsg + 4 * (j - (ctx->uHashSize / 256)) + i);
#ifdef NO_AES_NI
// transform message
TRANSFORM(_state[i][j], _k_ipt, t1, t2);
#endif
}
}
// save state
SAVESTATE(_statebackup, _state);
k1 = ctx->k;
#ifndef NO_AES_NI
for(r = 0; r < ctx->uRounds / 2; r++)
{
ECHO_ROUND_UNROLL2;
}
#else
for(r = 0; r < ctx->uRounds / 2; r++)
{
_state2[0][0] = M128(zero); _state2[1][0] = M128(zero); _state2[2][0] = M128(zero); _state2[3][0] = M128(zero);
_state2[0][1] = M128(zero); _state2[1][1] = M128(zero); _state2[2][1] = M128(zero); _state2[3][1] = M128(zero);
_state2[0][2] = M128(zero); _state2[1][2] = M128(zero); _state2[2][2] = M128(zero); _state2[3][2] = M128(zero);
_state2[0][3] = M128(zero); _state2[1][3] = M128(zero); _state2[2][3] = M128(zero); _state2[3][3] = M128(zero);
ECHO_SUB_AND_MIX(_state, 0, 0, _state2, 0, 0, 1, 2, 3);
ECHO_SUB_AND_MIX(_state, 1, 0, _state2, 3, 1, 2, 3, 0);
ECHO_SUB_AND_MIX(_state, 2, 0, _state2, 2, 2, 3, 0, 1);
ECHO_SUB_AND_MIX(_state, 3, 0, _state2, 1, 3, 0, 1, 2);
ECHO_SUB_AND_MIX(_state, 0, 1, _state2, 1, 0, 1, 2, 3);
ECHO_SUB_AND_MIX(_state, 1, 1, _state2, 0, 1, 2, 3, 0);
ECHO_SUB_AND_MIX(_state, 2, 1, _state2, 3, 2, 3, 0, 1);
ECHO_SUB_AND_MIX(_state, 3, 1, _state2, 2, 3, 0, 1, 2);
ECHO_SUB_AND_MIX(_state, 0, 2, _state2, 2, 0, 1, 2, 3);
ECHO_SUB_AND_MIX(_state, 1, 2, _state2, 1, 1, 2, 3, 0);
ECHO_SUB_AND_MIX(_state, 2, 2, _state2, 0, 2, 3, 0, 1);
ECHO_SUB_AND_MIX(_state, 3, 2, _state2, 3, 3, 0, 1, 2);
ECHO_SUB_AND_MIX(_state, 0, 3, _state2, 3, 0, 1, 2, 3);
ECHO_SUB_AND_MIX(_state, 1, 3, _state2, 2, 1, 2, 3, 0);
ECHO_SUB_AND_MIX(_state, 2, 3, _state2, 1, 2, 3, 0, 1);
ECHO_SUB_AND_MIX(_state, 3, 3, _state2, 0, 3, 0, 1, 2);
_state[0][0] = M128(zero); _state[1][0] = M128(zero); _state[2][0] = M128(zero); _state[3][0] = M128(zero);
_state[0][1] = M128(zero); _state[1][1] = M128(zero); _state[2][1] = M128(zero); _state[3][1] = M128(zero);
_state[0][2] = M128(zero); _state[1][2] = M128(zero); _state[2][2] = M128(zero); _state[3][2] = M128(zero);
_state[0][3] = M128(zero); _state[1][3] = M128(zero); _state[2][3] = M128(zero); _state[3][3] = M128(zero);
ECHO_SUB_AND_MIX(_state2, 0, 0, _state, 0, 0, 1, 2, 3);
ECHO_SUB_AND_MIX(_state2, 1, 0, _state, 3, 1, 2, 3, 0);
ECHO_SUB_AND_MIX(_state2, 2, 0, _state, 2, 2, 3, 0, 1);
ECHO_SUB_AND_MIX(_state2, 3, 0, _state, 1, 3, 0, 1, 2);
ECHO_SUB_AND_MIX(_state2, 0, 1, _state, 1, 0, 1, 2, 3);
ECHO_SUB_AND_MIX(_state2, 1, 1, _state, 0, 1, 2, 3, 0);
ECHO_SUB_AND_MIX(_state2, 2, 1, _state, 3, 2, 3, 0, 1);
ECHO_SUB_AND_MIX(_state2, 3, 1, _state, 2, 3, 0, 1, 2);
ECHO_SUB_AND_MIX(_state2, 0, 2, _state, 2, 0, 1, 2, 3);
ECHO_SUB_AND_MIX(_state2, 1, 2, _state, 1, 1, 2, 3, 0);
ECHO_SUB_AND_MIX(_state2, 2, 2, _state, 0, 2, 3, 0, 1);
ECHO_SUB_AND_MIX(_state2, 3, 2, _state, 3, 3, 0, 1, 2);
ECHO_SUB_AND_MIX(_state2, 0, 3, _state, 3, 0, 1, 2, 3);
ECHO_SUB_AND_MIX(_state2, 1, 3, _state, 2, 1, 2, 3, 0);
ECHO_SUB_AND_MIX(_state2, 2, 3, _state, 1, 2, 3, 0, 1);
ECHO_SUB_AND_MIX(_state2, 3, 3, _state, 0, 3, 0, 1, 2);
}
#endif
if(ctx->uHashSize == 256)
{
for(i = 0; i < 4; i++)
{
_state[i][0] = _mm_xor_si128(_state[i][0], _state[i][1]);
_state[i][0] = _mm_xor_si128(_state[i][0], _state[i][2]);
_state[i][0] = _mm_xor_si128(_state[i][0], _state[i][3]);
_state[i][0] = _mm_xor_si128(_state[i][0], _statebackup[i][0]);
_state[i][0] = _mm_xor_si128(_state[i][0], _statebackup[i][1]);
_state[i][0] = _mm_xor_si128(_state[i][0], _statebackup[i][2]);
_state[i][0] = _mm_xor_si128(_state[i][0], _statebackup[i][3]);
}
}
else
{
for(i = 0; i < 4; i++)
{
_state[i][0] = _mm_xor_si128(_state[i][0], _state[i][2]);
_state[i][1] = _mm_xor_si128(_state[i][1], _state[i][3]);
_state[i][0] = _mm_xor_si128(_state[i][0], _statebackup[i][0]);
_state[i][0] = _mm_xor_si128(_state[i][0], _statebackup[i][2]);
_state[i][1] = _mm_xor_si128(_state[i][1], _statebackup[i][1]);
_state[i][1] = _mm_xor_si128(_state[i][1], _statebackup[i][3]);
}
}
pmsg += ctx->uBlockLength;
for(i = 0; i < 4; i++)
{
_state[i][j] = _mm_loadu_si128((__m128i*)pmsg + 4 * (j - (ctx->uHashSize / 256)) + i);
}
}
#ifdef NO_AES_NI
// transform state
for(i = 0; i < 4; i++)
for(j = 0; j < 4; j++)
{
TRANSFORM(_state[i][j], _k_opt, t1, t2);
}
#endif
// save state
SAVESTATE(_statebackup, _state);
SAVESTATE(ctx->state, _state);
k1 = ctx->k;
for(r = 0; r < ctx->uRounds / 2; r++)
{
ECHO_ROUND_UNROLL2;
}
if(ctx->uHashSize == 256)
{
for(i = 0; i < 4; i++)
{
_state[i][0] = _mm_xor_si128(_state[i][0], _state[i][1]);
_state[i][0] = _mm_xor_si128(_state[i][0], _state[i][2]);
_state[i][0] = _mm_xor_si128(_state[i][0], _state[i][3]);
_state[i][0] = _mm_xor_si128(_state[i][0], _statebackup[i][0]);
_state[i][0] = _mm_xor_si128(_state[i][0], _statebackup[i][1]);
_state[i][0] = _mm_xor_si128(_state[i][0], _statebackup[i][2]);
_state[i][0] = _mm_xor_si128(_state[i][0], _statebackup[i][3]);
}
}
else
{
for(i = 0; i < 4; i++)
{
_state[i][0] = _mm_xor_si128(_state[i][0], _state[i][2]);
_state[i][1] = _mm_xor_si128(_state[i][1], _state[i][3]);
_state[i][0] = _mm_xor_si128(_state[i][0], _statebackup[i][0]);
_state[i][0] = _mm_xor_si128(_state[i][0], _statebackup[i][2]);
_state[i][1] = _mm_xor_si128(_state[i][1], _statebackup[i][1]);
_state[i][1] = _mm_xor_si128(_state[i][1], _statebackup[i][3]);
}
}
pmsg += ctx->uBlockLength;
}
SAVESTATE(ctx->state, _state);
}

View File

@@ -30,6 +30,7 @@
typedef struct
{
__m128i state[4][4];
BitSequence buffer[192];
__m128i k;
__m128i hashsize;
__m128i const1536;
@@ -39,9 +40,8 @@ typedef struct
unsigned int uBlockLength;
unsigned int uBufferBytes;
DataLength processed_bits;
BitSequence buffer[192];
} hashState_echo;
} hashState_echo __attribute__ ((aligned (64)));
HashReturn init_echo(hashState_echo *state, int hashbitlen);

File diff suppressed because it is too large Load Diff

View File

@@ -1,320 +0,0 @@
/* $Id: sph_echo.h 216 2010-06-08 09:46:57Z tp $ */
/**
* ECHO interface. ECHO is a family of functions which differ by
* their output size; this implementation defines ECHO for output
* sizes 224, 256, 384 and 512 bits.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @file sph_echo.h
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
*/
#ifndef SPH_ECHO_H__
#define SPH_ECHO_H__
#ifdef __cplusplus
extern "C"{
#endif
#include <stddef.h>
#include "algo/sha/sph_types.h"
/**
* Output size (in bits) for ECHO-224.
*/
#define SPH_SIZE_echo224 224
/**
* Output size (in bits) for ECHO-256.
*/
#define SPH_SIZE_echo256 256
/**
* Output size (in bits) for ECHO-384.
*/
#define SPH_SIZE_echo384 384
/**
* Output size (in bits) for ECHO-512.
*/
#define SPH_SIZE_echo512 512
/**
* This structure is a context for ECHO computations: it contains the
* intermediate values and some data from the last entered block. Once
* an ECHO computation has been performed, the context can be reused for
* another computation. This specific structure is used for ECHO-224
* and ECHO-256.
*
* The contents of this structure are private. A running ECHO computation
* can be cloned by copying the context (e.g. with a simple
* <code>memcpy()</code>).
*/
typedef struct {
#ifndef DOXYGEN_IGNORE
unsigned char buf[192]; /* first field, for alignment */
size_t ptr;
union {
sph_u32 Vs[4][4];
#if SPH_64
sph_u64 Vb[4][2];
#endif
} u;
sph_u32 C0, C1, C2, C3;
#endif
} sph_echo_small_context;
/**
* This structure is a context for ECHO computations: it contains the
* intermediate values and some data from the last entered block. Once
* an ECHO computation has been performed, the context can be reused for
* another computation. This specific structure is used for ECHO-384
* and ECHO-512.
*
* The contents of this structure are private. A running ECHO computation
* can be cloned by copying the context (e.g. with a simple
* <code>memcpy()</code>).
*/
typedef struct {
#ifndef DOXYGEN_IGNORE
unsigned char buf[128]; /* first field, for alignment */
size_t ptr;
union {
sph_u32 Vs[8][4];
#if SPH_64
sph_u64 Vb[8][2];
#endif
} u;
sph_u32 C0, C1, C2, C3;
#endif
} sph_echo_big_context;
/**
* Type for a ECHO-224 context (identical to the common "small" context).
*/
typedef sph_echo_small_context sph_echo224_context;
/**
* Type for a ECHO-256 context (identical to the common "small" context).
*/
typedef sph_echo_small_context sph_echo256_context;
/**
* Type for a ECHO-384 context (identical to the common "big" context).
*/
typedef sph_echo_big_context sph_echo384_context;
/**
* Type for a ECHO-512 context (identical to the common "big" context).
*/
typedef sph_echo_big_context sph_echo512_context;
/**
* Initialize an ECHO-224 context. This process performs no memory allocation.
*
* @param cc the ECHO-224 context (pointer to a
* <code>sph_echo224_context</code>)
*/
void sph_echo224_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the ECHO-224 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_echo224(void *cc, const void *data, size_t len);
/**
* Terminate the current ECHO-224 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (28 bytes). The context is automatically
* reinitialized.
*
* @param cc the ECHO-224 context
* @param dst the destination buffer
*/
void sph_echo224_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (28 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the ECHO-224 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_echo224_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
/**
* Initialize an ECHO-256 context. This process performs no memory allocation.
*
* @param cc the ECHO-256 context (pointer to a
* <code>sph_echo256_context</code>)
*/
void sph_echo256_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the ECHO-256 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_echo256(void *cc, const void *data, size_t len);
/**
* Terminate the current ECHO-256 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (32 bytes). The context is automatically
* reinitialized.
*
* @param cc the ECHO-256 context
* @param dst the destination buffer
*/
void sph_echo256_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (32 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the ECHO-256 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_echo256_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
/**
* Initialize an ECHO-384 context. This process performs no memory allocation.
*
* @param cc the ECHO-384 context (pointer to a
* <code>sph_echo384_context</code>)
*/
void sph_echo384_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the ECHO-384 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_echo384(void *cc, const void *data, size_t len);
/**
* Terminate the current ECHO-384 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (48 bytes). The context is automatically
* reinitialized.
*
* @param cc the ECHO-384 context
* @param dst the destination buffer
*/
void sph_echo384_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (48 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the ECHO-384 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_echo384_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
/**
* Initialize an ECHO-512 context. This process performs no memory allocation.
*
* @param cc the ECHO-512 context (pointer to a
* <code>sph_echo512_context</code>)
*/
void sph_echo512_init(void *cc);
/**
* Process some data bytes. It is acceptable that <code>len</code> is zero
* (in which case this function does nothing).
*
* @param cc the ECHO-512 context
* @param data the input data
* @param len the input data length (in bytes)
*/
void sph_echo512(void *cc, const void *data, size_t len);
/**
* Terminate the current ECHO-512 computation and output the result into
* the provided buffer. The destination buffer must be wide enough to
* accomodate the result (64 bytes). The context is automatically
* reinitialized.
*
* @param cc the ECHO-512 context
* @param dst the destination buffer
*/
void sph_echo512_close(void *cc, void *dst);
/**
* Add a few additional bits (0 to 7) to the current computation, then
* terminate it and output the result in the provided buffer, which must
* be wide enough to accomodate the result (64 bytes). If bit number i
* in <code>ub</code> has value 2^i, then the extra bits are those
* numbered 7 downto 8-n (this is the big-endian convention at the byte
* level). The context is automatically reinitialized.
*
* @param cc the ECHO-512 context
* @param ub the extra bits
* @param n the number of extra bits (0 to 7)
* @param dst the destination buffer
*/
void sph_echo512_addbits_and_close(
void *cc, unsigned ub, unsigned n, void *dst);
#ifdef __cplusplus
}
#endif
#endif