This commit is contained in:
Jay D Dee
2018-01-08 22:04:43 -05:00
parent 2d2e54f001
commit bee78eac76
58 changed files with 2817 additions and 499 deletions

258
algo/x13/drop.c Normal file
View File

@@ -0,0 +1,258 @@
/**
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2015 kernels10, tpruvot
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @file drop.c
* @author kernels10 <kernels10@gmail.com.com>
* @author tpruvot <tpruvot@github>
*/
#define POK_BOOL_MASK 0x00008000
#define POK_DATA_MASK 0xFFFF0000
#include "algo-gate-api.h"
#include <string.h>
#include "algo/blake/sph_blake.h"
#include "algo/groestl/sph_groestl.h"
#include "algo/jh/sph_jh.h"
#include "algo/keccak/sph_keccak.h"
#include "algo/skein/sph_skein.h"
#include "algo/cubehash/sph_cubehash.h"
#include "algo/echo/sph_echo.h"
#include "algo/fugue//sph_fugue.h"
#include "algo/luffa/sph_luffa.h"
#include "algo/simd/sph_simd.h"
#include "algo/shavite/sph_shavite.h"
static void shiftr_lp(const uint32_t *input, uint32_t *output, unsigned int shift)
{
if(!shift) {
memcpy(output, input, 64);
return;
}
memset(output, 0, 64);
for(int i = 0; i < 15; ++i) {
output[i + 1] |= (input[i] >> (32 - shift));
output[i] |= (input[i] << shift);
}
output[15] |= (input[15] << shift);
return;
}
static void switchHash(const void *input, void *output, int id)
{
/*
sph_keccak512_context ctx_keccak;
sph_blake512_context ctx_blake;
sph_groestl512_context ctx_groestl;
sph_skein512_context ctx_skein;
sph_luffa512_context ctx_luffa;
sph_echo512_context ctx_echo;
sph_simd512_context ctx_simd;
sph_cubehash512_context ctx_cubehash;
sph_fugue512_context ctx_fugue;
sph_shavite512_context ctx_shavite;
switch(id) {
case 0:
sph_keccak512_init(&ctx_keccak); sph_keccak512(&ctx_keccak, input, 64); sph_keccak512_close(&ctx_keccak, output);
break;
case 1:
sph_blake512_init(&ctx_blake); sph_blake512(&ctx_blake, input, 64); sph_blake512_close(&ctx_blake, output);
break;
case 2:
sph_groestl512_init(&ctx_groestl); sph_groestl512(&ctx_groestl, input, 64); sph_groestl512_close(&ctx_groestl, output);
break;
case 3:
sph_skein512_init(&ctx_skein); sph_skein512(&ctx_skein, input, 64); sph_skein512_close(&ctx_skein, output);
break;
case 4:
sph_luffa512_init(&ctx_luffa); sph_luffa512(&ctx_luffa, input, 64); sph_luffa512_close(&ctx_luffa, output);
break;
case 5:
sph_echo512_init(&ctx_echo); sph_echo512(&ctx_echo, input, 64); sph_echo512_close(&ctx_echo, output);
break;
case 6:
sph_shavite512_init(&ctx_shavite); sph_shavite512(&ctx_shavite, input, 64); sph_shavite512_close(&ctx_shavite, output);
break;
case 7:
sph_fugue512_init(&ctx_fugue); sph_fugue512(&ctx_fugue, input, 64); sph_fugue512_close(&ctx_fugue, output);
break;
case 8:
sph_simd512_init(&ctx_simd); sph_simd512(&ctx_simd, input, 64); sph_simd512_close(&ctx_simd, output);
break;
case 9:
sph_cubehash512_init(&ctx_cubehash); sph_cubehash512(&ctx_cubehash, input, 64); sph_cubehash512_close(&ctx_cubehash, output);
break;
default:
break;
}
*/
}
void droplp_hash(void *state, const void *input)
{
uint32_t _ALIGN(64) hash[2][16];
sph_jh512_context ctx_jh;
uint32_t *hashA = hash[0];
uint32_t *hashB = hash[1];
sph_jh512_init(&ctx_jh);
sph_jh512(&ctx_jh, input, 80);
sph_jh512_close(&ctx_jh, (void*)(hashA));
unsigned int startPosition = hashA[0] % 31;
unsigned int i = 0;
int j = 0;
int start = 0;
for (i = startPosition; i < 31; i+=9) {
start = i % 10;
for (j = start; j < 10; j++) {
shiftr_lp(hashA, hashB, (i & 3));
switchHash((const void*)hashB, (void*)hashA, j);
}
for (j = 0; j < start; j++) {
shiftr_lp(hashA, hashB, (i & 3));
switchHash((const void*)hashB, (void*)hashA, j);
}
}
for (i = 0; i < startPosition; i += 9) {
start = i % 10;
for (j = start; j < 10; j++) {
shiftr_lp(hashA, hashB, (i & 3));
switchHash((const void*)hashB, (void*)hashA, j);
}
for (j = 0; j < start; j++) {
shiftr_lp(hashA, hashB, (i & 3));
switchHash((const void*)hashB, (void*)hashA, j);
}
}
memcpy(state, hashA, 32);
}
static void droplp_hash_pok(void *output, uint32_t *pdata, const uint32_t version)
{
uint32_t _ALIGN(64) hash[8];
uint32_t pok;
pdata[0] = version;
droplp_hash(hash, pdata);
// fill PoK
pok = version | (hash[0] & POK_DATA_MASK);
if (pdata[0] != pok) {
pdata[0] = pok;
droplp_hash(hash, pdata);
}
memcpy(output, hash, 32);
}
int scanhash_drop(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done)
{
uint32_t _ALIGN(64) hash[16];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t version = pdata[0] & (~POK_DATA_MASK);
const uint32_t first_nonce = pdata[19];
uint32_t nonce = first_nonce;
#define tmpdata pdata
if (opt_benchmark)
ptarget[7] = 0x07ff;
const uint32_t htarg = ptarget[7];
do {
tmpdata[19] = nonce;
droplp_hash_pok(hash, tmpdata, version);
if (hash[7] <= htarg && fulltest(hash, ptarget)) {
pdata[0] = tmpdata[0];
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce + 1;
if (opt_debug)
applog(LOG_INFO, "found nonce %x", nonce);
return 1;
}
nonce++;
} while (nonce < max_nonce && !work_restart[thr_id].restart);
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce + 1;
return 0;
}
void drop_get_new_work( struct work* work, struct work* g_work, int thr_id,
uint32_t* end_nonce_ptr, bool clean_job )
{
// ignore POK in first word
// const int nonce_i = 19;
const int wkcmp_sz = 72; // (19-1) * sizeof(uint32_t)
uint32_t *nonceptr = algo_gate.get_nonceptr( work->data );
if ( memcmp( &work->data[1], &g_work->data[1], wkcmp_sz )
&& ( clean_job || ( *nonceptr >= *end_nonce_ptr ) ) )
{
work_free( work );
work_copy( work, g_work );
*nonceptr = ( 0xffffffffU / opt_n_threads ) * thr_id;
if ( opt_randomize )
*nonceptr += ( (rand() *4 ) & UINT32_MAX ) / opt_n_threads;
*end_nonce_ptr = ( 0xffffffffU / opt_n_threads ) * (thr_id+1) - 0x20;
}
else
++(*nonceptr);
}
void drop_display_pok( struct work* work )
{
if ( work->data[0] & 0x00008000 )
applog(LOG_BLUE, "POK received: %08xx", work->data[0] );
}
// Need to fix POK offset problems like zr5
bool register_drop_algo( algo_gate_t* gate )
{
algo_not_tested();
gate->scanhash = (void*)&scanhash_drop;
gate->hash = (void*)&droplp_hash_pok;
gate->get_new_work = (void*)&drop_get_new_work;
gate->set_target = (void*)&scrypt_set_target;
gate->build_stratum_request = (void*)&std_be_build_stratum_request;
gate->work_decode = (void*)&std_be_work_decode;
gate->submit_getwork_result = (void*)&std_be_submit_getwork_result;
gate->set_work_data_endian = (void*)&set_work_data_big_endian;
gate->display_extra_data = (void*)&drop_display_pok;
gate->work_data_size = 80;
gate->work_cmp_size = 72;
return true;
};

158
algo/x13/skunk-4way.c Normal file
View File

@@ -0,0 +1,158 @@
#include "skunk-gate.h"
#ifdef __AVX2__
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "algo/skein/skein-hash-4way.h"
#include "algo/gost/sph_gost.h"
#include "algo/fugue/sph_fugue.h"
#include "algo/cubehash/sse2/cubehash_sse2.h"
typedef struct {
skein512_4way_context skein;
cubehashParam cube;
sph_fugue512_context fugue;
sph_gost512_context gost;
} skunk_4way_ctx_holder;
static __thread skunk_4way_ctx_holder skunk_4way_ctx;
void skunk_4way_hash( void *output, const void *input )
{
uint64_t hash0[8] __attribute__ ((aligned (64)));
uint64_t hash1[8] __attribute__ ((aligned (64)));
uint64_t hash2[8] __attribute__ ((aligned (64)));
uint64_t hash3[8] __attribute__ ((aligned (64)));
uint64_t vhash[8*4] __attribute__ ((aligned (64)));
skunk_4way_ctx_holder ctx __attribute__ ((aligned (64)));
memcpy( &ctx, &skunk_4way_ctx, sizeof(skunk_4way_ctx) );
skein512_4way( &ctx.skein, input, 80 );
skein512_4way_close( &ctx.skein, vhash );
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
cubehashUpdateDigest( &ctx.cube, (byte*) hash0, (const byte*)hash0, 64 );
memcpy( &ctx.cube, &skunk_4way_ctx.cube, sizeof(cubehashParam) );
cubehashUpdateDigest( &ctx.cube, (byte*)hash1, (const byte*) hash1, 64 );
memcpy( &ctx.cube, &skunk_4way_ctx.cube, sizeof(cubehashParam) );
cubehashUpdateDigest( &ctx.cube, (byte*)hash2, (const byte*) hash2, 64 );
memcpy( &ctx.cube, &skunk_4way_ctx.cube, sizeof(cubehashParam) );
cubehashUpdateDigest( &ctx.cube, (byte*)hash3, (const byte*) hash3, 64 );
sph_fugue512( &ctx.fugue, hash0, 64 );
sph_fugue512_close( &ctx.fugue, hash0 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash1, 64 );
sph_fugue512_close( &ctx.fugue, hash1 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash2, 64 );
sph_fugue512_close( &ctx.fugue, hash2 );
sph_fugue512_init( &ctx.fugue );
sph_fugue512( &ctx.fugue, hash3, 64 );
sph_fugue512_close( &ctx.fugue, hash3 );
sph_gost512( &ctx.gost, hash0, 64 );
sph_gost512_close( &ctx.gost, hash0 );
sph_gost512_init( &ctx.gost );
sph_gost512( &ctx.gost, hash1, 64 );
sph_gost512_close( &ctx.gost, hash1 );
sph_gost512_init( &ctx.gost );
sph_gost512( &ctx.gost, hash2, 64 );
sph_gost512_close( &ctx.gost, hash2 );
sph_gost512_init( &ctx.gost );
sph_gost512( &ctx.gost, hash3, 64 );
sph_gost512_close( &ctx.gost, hash3 );
memcpy( output, hash0, 32 );
memcpy( output+32, hash1, 32 );
memcpy( output+64, hash2, 32 );
memcpy( output+96, hash3, 32 );
}
int scanhash_skunk_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done )
{
uint32_t hash[4*8] __attribute__ ((aligned (64)));
uint32_t vdata[24*4] __attribute__ ((aligned (64)));
uint32_t endiandata[20] __attribute__((aligned(64)));
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
uint32_t n = first_nonce;
uint32_t *nonces = work->nonces;
bool *found = work->nfound;
int num_found = 0;
uint32_t *noncep0 = vdata + 73; // 9*8 + 1
uint32_t *noncep1 = vdata + 75;
uint32_t *noncep2 = vdata + 77;
uint32_t *noncep3 = vdata + 79;
const uint32_t Htarg = ptarget[7];
volatile uint8_t *restart = &(work_restart[thr_id].restart);
if ( opt_benchmark )
((uint32_t*)ptarget)[7] = 0x0cff;
for ( int k = 0; k < 19; k++ )
be32enc( &endiandata[k], pdata[k] );
uint64_t *edata = (uint64_t*)endiandata;
mm256_interleave_4x64( (uint64_t*)vdata, edata, edata, edata, edata, 640 );
do
{
found[0] = found[1] = found[2] = found[3] = false;
be32enc( noncep0, n );
be32enc( noncep1, n+1 );
be32enc( noncep2, n+2 );
be32enc( noncep3, n+3 );
skunk_4way_hash( hash, vdata );
pdata[19] = n;
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
{
found[0] = true;
num_found++;
nonces[0] = n;
work_set_target_ratio( work, hash );
}
if ( (hash+8)[7] <= Htarg && fulltest( hash+8, ptarget ) )
{
found[1] = true;
num_found++;
nonces[1] = n+1;
work_set_target_ratio( work, hash+8 );
}
if ( (hash+16)[7] <= Htarg && fulltest( hash+16, ptarget ) )
{
found[2] = true;
num_found++;
nonces[2] = n+2;
work_set_target_ratio( work, hash+16 );
}
if ( (hash+24)[7] <= Htarg && fulltest( hash+24, ptarget ) )
{
found[3] = true;
num_found++;
nonces[3] = n+3;
work_set_target_ratio( work, hash+24 );
}
n +=4;
} while ( ( num_found == 0 ) && ( n < max_nonce ) && !(*restart) );
*hashes_done = n - first_nonce + 1;
return num_found;
}
bool skunk_4way_thread_init()
{
skein512_4way_init( &skunk_4way_ctx.skein );
cubehashInit( &skunk_4way_ctx.cube, 512, 16, 32 );
sph_fugue512_init( &skunk_4way_ctx.fugue );
sph_gost512_init( &skunk_4way_ctx.gost );
return true;
}
#endif

18
algo/x13/skunk-gate.c Normal file
View File

@@ -0,0 +1,18 @@
#include "skunk-gate.h"
bool register_skunk_algo( algo_gate_t* gate )
{
gate->optimizations = SSE2_OPT | AVX_OPT | AVX2_OPT | FOUR_WAY_OPT;
#if defined (SKUNK_4WAY)
gate->miner_thread_init = (void*)&skunk_4way_thread_init;
gate->scanhash = (void*)&scanhash_skunk_4way;
gate->hash = (void*)&skunk_4way_hash;
// init_skunk_4way_ctx();
#else
gate->miner_thread_init = (void*)&skunk_thread_init;
gate->scanhash = (void*)&scanhash_skunk;
gate->hash = (void*)&skunkhash;
#endif
return true;
}

33
algo/x13/skunk-gate.h Normal file
View File

@@ -0,0 +1,33 @@
#ifndef SKUNK_GATE_H__
#define SKUNK_GATE_H__ 1
#include "algo-gate-api.h"
#include <stdint.h>
#if defined(HASH_4WAY)
#define SKUNK_4WAY
#endif
bool register_skunk_algo( algo_gate_t* gate );
#if defined(SKUNK_4WAY)
void skunk_4way_hash( void *state, const void *input );
int scanhash_skunk_4way( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
bool skunk_4way_thread_init();
//void init_skunk_4way_ctx();
#endif
void skunkhash( void *state, const void *input );
int scanhash_skunk( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done );
bool skunk_thread_init();
#endif

90
algo/x13/skunk.c Normal file
View File

@@ -0,0 +1,90 @@
#include "skunk-gate.h"
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "algo/gost/sph_gost.h"
#include "algo/skein/sph_skein.h"
#include "algo/fugue/sph_fugue.h"
#include "algo/cubehash/sse2/cubehash_sse2.h"
typedef struct {
sph_skein512_context skein;
cubehashParam cube;
sph_fugue512_context fugue;
sph_gost512_context gost;
} skunk_ctx_holder;
static __thread skunk_ctx_holder skunk_ctx;
void skunkhash( void *output, const void *input )
{
unsigned char hash[128] __attribute__ ((aligned (64)));
skunk_ctx_holder ctx __attribute__ ((aligned (64)));
memcpy( &ctx, &skunk_ctx, sizeof(skunk_ctx) );
sph_skein512( &ctx.skein, input+64, 16 );
sph_skein512_close( &ctx.skein, (void*) hash );
cubehashUpdateDigest( &ctx.cube, (byte*) hash, (const byte*)hash, 64 );
sph_fugue512( &ctx.fugue, hash, 64 );
sph_fugue512_close( &ctx.fugue, hash );
sph_gost512( &ctx.gost, hash, 64 );
sph_gost512_close( &ctx.gost, hash );
memcpy(output, hash, 32);
}
int scanhash_skunk( int thr_id, struct work *work, uint32_t max_nonce,
uint64_t *hashes_done )
{
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
uint32_t _ALIGN(64) endiandata[20];
uint32_t nonce = first_nonce;
volatile uint8_t *restart = &(work_restart[thr_id].restart);
if ( opt_benchmark )
((uint32_t*)ptarget)[7] = 0x0cff;
for ( int k = 0; k < 19; k++ )
be32enc( &endiandata[k], pdata[k] );
// precalc midstate
sph_skein512_init( &skunk_ctx.skein );
sph_skein512( &skunk_ctx.skein, endiandata, 64 );
const uint32_t Htarg = ptarget[7];
do
{
uint32_t hash[8];
be32enc( &endiandata[19], nonce );
skunkhash( hash, endiandata );
if ( hash[7] <= Htarg && fulltest( hash, ptarget ) )
{
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce;
return 1;
}
nonce++;
} while ( nonce < max_nonce && !(*restart) );
pdata[19] = nonce;
*hashes_done = pdata[19] - first_nonce + 1;
return 0;
}
bool skunk_thread_init()
{
sph_skein512_init( &skunk_ctx.skein );
cubehashInit( &skunk_ctx.cube, 512, 16, 32 );
sph_fugue512_init( &skunk_ctx.fugue );
sph_gost512_init( &skunk_ctx.gost );
return true;
}

View File

@@ -7,7 +7,7 @@
#include <string.h>
#include <stdio.h>
#include "algo/blake/blake-hash-4way.h"
#include "algo/bmw/sph_bmw.h"
#include "algo/bmw/bmw-hash-4way.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/skein/skein-hash-4way.h"
#include "algo/jh/jh-hash-4way.h"
@@ -22,7 +22,7 @@
typedef struct {
blake512_4way_context blake;
sph_bmw512_context bmw;
bmw512_4way_context bmw;
hashState_groestl groestl;
skein512_4way_context skein;
jh512_4way_context jh;
@@ -41,7 +41,7 @@ x13_4way_ctx_holder x13_4way_ctx __attribute__ ((aligned (64)));
void init_x13_4way_ctx()
{
blake512_4way_init( &x13_4way_ctx.blake );
sph_bmw512_init( &x13_4way_ctx.bmw );
bmw512_4way_init( &x13_4way_ctx.bmw );
init_groestl( &x13_4way_ctx.groestl, 64 );
skein512_4way_init( &x13_4way_ctx.skein );
jh512_4way_init( &x13_4way_ctx.jh );
@@ -69,22 +69,13 @@ void x13_4way_hash( void *state, const void *input )
blake512_4way( &ctx.blake, input, 80 );
blake512_4way_close( &ctx.blake, vhash );
// 2 Bmw
bmw512_4way( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash );
// Serial
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
// 2 Bmw
sph_bmw512( &ctx.bmw, hash0, 64 );
sph_bmw512_close( &ctx.bmw, hash0 );
memcpy( &ctx.bmw, &x13_4way_ctx.bmw, sizeof(sph_bmw512_context) );
sph_bmw512( &ctx.bmw, hash1, 64 );
sph_bmw512_close( &ctx.bmw, hash1 );
memcpy( &ctx.bmw, &x13_4way_ctx.bmw, sizeof(sph_bmw512_context) );
sph_bmw512( &ctx.bmw, hash2, 64 );
sph_bmw512_close( &ctx.bmw, hash2 );
memcpy( &ctx.bmw, &x13_4way_ctx.bmw, sizeof(sph_bmw512_context) );
sph_bmw512( &ctx.bmw, hash3, 64 );
sph_bmw512_close( &ctx.bmw, hash3 );
// 3 Groestl
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
memcpy( &ctx.groestl, &x13_4way_ctx.groestl, sizeof(hashState_groestl) );

View File

@@ -7,7 +7,7 @@
#include <string.h>
#include <stdio.h>
#include "algo/blake/blake-hash-4way.h"
#include "algo/bmw/sph_bmw.h"
#include "algo/bmw/bmw-hash-4way.h"
#include "algo/groestl/aes_ni/hash-groestl.h"
#include "algo/skein/skein-hash-4way.h"
#include "algo/jh/jh-hash-4way.h"
@@ -23,7 +23,7 @@
typedef struct {
blake512_4way_context blake;
sph_bmw512_context bmw;
bmw512_4way_context bmw;
hashState_groestl groestl;
skein512_4way_context skein;
jh512_4way_context jh;
@@ -44,7 +44,7 @@ static __thread blake512_4way_context x13sm3_ctx_mid;
void init_x13sm3_4way_ctx()
{
blake512_4way_init( &x13sm3_4way_ctx.blake );
sph_bmw512_init( &x13sm3_4way_ctx.bmw );
bmw512_4way_init( &x13sm3_4way_ctx.bmw );
init_groestl( &x13sm3_4way_ctx.groestl, 64 );
skein512_4way_init( &x13sm3_4way_ctx.skein );
jh512_4way_init( &x13sm3_4way_ctx.jh );
@@ -76,22 +76,13 @@ void x13sm3_4way_hash( void *state, const void *input )
// blake512_4way( &ctx.blake, input, 80 );
blake512_4way_close( &ctx.blake, vhash );
// Bmw
bmw512_4way( &ctx.bmw, vhash, 64 );
bmw512_4way_close( &ctx.bmw, vhash );
// Serial
mm256_deinterleave_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
// Bmw
sph_bmw512( &ctx.bmw, hash0, 64 );
sph_bmw512_close( &ctx.bmw, hash0 );
memcpy( &ctx.bmw, &x13sm3_4way_ctx.bmw, sizeof(sph_bmw512_context) );
sph_bmw512( &ctx.bmw, hash1, 64 );
sph_bmw512_close( &ctx.bmw, hash1 );
memcpy( &ctx.bmw, &x13sm3_4way_ctx.bmw, sizeof(sph_bmw512_context) );
sph_bmw512( &ctx.bmw, hash2, 64 );
sph_bmw512_close( &ctx.bmw, hash2 );
memcpy( &ctx.bmw, &x13sm3_4way_ctx.bmw, sizeof(sph_bmw512_context) );
sph_bmw512( &ctx.bmw, hash3, 64 );
sph_bmw512_close( &ctx.bmw, hash3 );
// Groestl
update_and_final_groestl( &ctx.groestl, (char*)hash0, (char*)hash0, 512 );
memcpy( &ctx.groestl, &x13sm3_4way_ctx.groestl,