/* * Copyright 2009 Colin Percival, 2011 ArtForz, 2011-2014 pooler * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * This file was originally written by Colin Percival as part of the Tarsnap * online backup system. */ #include "algo-gate-api.h" #include #include #include static const uint32_t keypad[12] = { 0x80000000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x00000280 }; static const uint32_t innerpad[11] = { 0x80000000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x000004a0 }; static const uint32_t outerpad[8] = { 0x80000000, 0, 0, 0, 0, 0, 0, 0x00000300 }; static const uint32_t finalblk[16] = { 0x00000001, 0x80000000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x00000620 }; static __thread char *scratchbuf; int scratchbuf_size = 0; static inline void HMAC_SHA256_80_init(const uint32_t *key, uint32_t *tstate, uint32_t *ostate) { uint32_t ihash[8]; uint32_t pad[16]; int i; /* tstate is assumed to contain the midstate of key */ memcpy(pad, key + 16, 16); memcpy(pad + 4, keypad, 48); sha256_transform(tstate, pad, 0); memcpy(ihash, tstate, 32); sha256_init(ostate); for (i = 0; i < 8; i++) pad[i] = ihash[i] ^ 0x5c5c5c5c; for (; i < 16; i++) pad[i] = 0x5c5c5c5c; sha256_transform(ostate, pad, 0); sha256_init(tstate); for (i = 0; i < 8; i++) pad[i] = ihash[i] ^ 0x36363636; for (; i < 16; i++) pad[i] = 0x36363636; sha256_transform(tstate, pad, 0); } static inline void PBKDF2_SHA256_80_128(const uint32_t *tstate, const uint32_t *ostate, const uint32_t *salt, uint32_t *output) { uint32_t istate[8], ostate2[8]; uint32_t ibuf[16], obuf[16]; int i, j; memcpy(istate, tstate, 32); sha256_transform(istate, salt, 0); memcpy(ibuf, salt + 16, 16); memcpy(ibuf + 5, innerpad, 44); memcpy(obuf + 8, outerpad, 32); for (i = 0; i < 4; i++) { memcpy(obuf, istate, 32); ibuf[4] = i + 1; sha256_transform(obuf, ibuf, 0); memcpy(ostate2, ostate, 32); sha256_transform(ostate2, obuf, 0); for (j = 0; j < 8; j++) output[8 * i + j] = swab32(ostate2[j]); } } static inline void PBKDF2_SHA256_128_32(uint32_t *tstate, uint32_t *ostate, const uint32_t *salt, uint32_t *output) { uint32_t buf[16]; int i; sha256_transform(tstate, salt, 1); sha256_transform(tstate, salt + 16, 1); sha256_transform(tstate, finalblk, 0); memcpy(buf, tstate, 32); memcpy(buf + 8, outerpad, 32); sha256_transform(ostate, buf, 0); for (i = 0; i < 8; i++) output[i] = swab32(ostate[i]); } #ifdef HAVE_SHA256_4WAY static const uint32_t keypad_4way[4 * 12] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000280, 0x00000280, 0x00000280, 0x00000280 }; static const uint32_t innerpad_4way[4 * 11] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x000004a0, 0x000004a0, 0x000004a0, 0x000004a0 }; static const uint32_t outerpad_4way[4 * 8] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000300, 0x00000300, 0x00000300, 0x00000300 }; static const uint32_t _ALIGN(16) finalblk_4way[4 * 16] = { 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000620, 0x00000620, 0x00000620, 0x00000620 }; static inline void HMAC_SHA256_80_init_4way(const uint32_t *key, uint32_t *tstate, uint32_t *ostate) { uint32_t _ALIGN(16) ihash[4 * 8]; uint32_t _ALIGN(16) pad[4 * 16]; int i; /* tstate is assumed to contain the midstate of key */ memcpy(pad, key + 4 * 16, 4 * 16); memcpy(pad + 4 * 4, keypad_4way, 4 * 48); sha256_transform_4way(tstate, pad, 0); memcpy(ihash, tstate, 4 * 32); sha256_init_4way(ostate); for (i = 0; i < 4 * 8; i++) pad[i] = ihash[i] ^ 0x5c5c5c5c; for (; i < 4 * 16; i++) pad[i] = 0x5c5c5c5c; sha256_transform_4way(ostate, pad, 0); sha256_init_4way(tstate); for (i = 0; i < 4 * 8; i++) pad[i] = ihash[i] ^ 0x36363636; for (; i < 4 * 16; i++) pad[i] = 0x36363636; sha256_transform_4way(tstate, pad, 0); } static inline void PBKDF2_SHA256_80_128_4way(const uint32_t *tstate, const uint32_t *ostate, const uint32_t *salt, uint32_t *output) { uint32_t _ALIGN(16) istate[4 * 8]; uint32_t _ALIGN(16) ostate2[4 * 8]; uint32_t _ALIGN(16) ibuf[4 * 16]; uint32_t _ALIGN(16) obuf[4 * 16]; int i, j; memcpy(istate, tstate, 4 * 32); sha256_transform_4way(istate, salt, 0); memcpy(ibuf, salt + 4 * 16, 4 * 16); memcpy(ibuf + 4 * 5, innerpad_4way, 4 * 44); memcpy(obuf + 4 * 8, outerpad_4way, 4 * 32); for (i = 0; i < 4; i++) { memcpy(obuf, istate, 4 * 32); ibuf[4 * 4 + 0] = i + 1; ibuf[4 * 4 + 1] = i + 1; ibuf[4 * 4 + 2] = i + 1; ibuf[4 * 4 + 3] = i + 1; sha256_transform_4way(obuf, ibuf, 0); memcpy(ostate2, ostate, 4 * 32); sha256_transform_4way(ostate2, obuf, 0); for (j = 0; j < 4 * 8; j++) output[4 * 8 * i + j] = swab32(ostate2[j]); } } static inline void PBKDF2_SHA256_128_32_4way(uint32_t *tstate, uint32_t *ostate, const uint32_t *salt, uint32_t *output) { uint32_t _ALIGN(16) buf[4 * 16]; int i; sha256_transform_4way(tstate, salt, 1); sha256_transform_4way(tstate, salt + 4 * 16, 1); sha256_transform_4way(tstate, finalblk_4way, 0); memcpy(buf, tstate, 4 * 32); memcpy(buf + 4 * 8, outerpad_4way, 4 * 32); sha256_transform_4way(ostate, buf, 0); for (i = 0; i < 4 * 8; i++) output[i] = swab32(ostate[i]); } #endif /* HAVE_SHA256_4WAY */ #ifdef HAVE_SHA256_8WAY static const uint32_t _ALIGN(32) finalblk_8way[8 * 16] = { 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620 }; static inline void HMAC_SHA256_80_init_8way(const uint32_t *key, uint32_t *tstate, uint32_t *ostate) { uint32_t _ALIGN(32) ihash[8 * 8]; uint32_t _ALIGN(32) pad[8 * 16]; int i; /* tstate is assumed to contain the midstate of key */ memcpy(pad, key + 8 * 16, 8 * 16); for (i = 0; i < 8; i++) pad[8 * 4 + i] = 0x80000000; memset(pad + 8 * 5, 0x00, 8 * 40); for (i = 0; i < 8; i++) pad[8 * 15 + i] = 0x00000280; sha256_transform_8way(tstate, pad, 0); memcpy(ihash, tstate, 8 * 32); sha256_init_8way(ostate); for (i = 0; i < 8 * 8; i++) pad[i] = ihash[i] ^ 0x5c5c5c5c; for (; i < 8 * 16; i++) pad[i] = 0x5c5c5c5c; sha256_transform_8way(ostate, pad, 0); sha256_init_8way(tstate); for (i = 0; i < 8 * 8; i++) pad[i] = ihash[i] ^ 0x36363636; for (; i < 8 * 16; i++) pad[i] = 0x36363636; sha256_transform_8way(tstate, pad, 0); } static inline void PBKDF2_SHA256_80_128_8way(const uint32_t *tstate, const uint32_t *ostate, const uint32_t *salt, uint32_t *output) { uint32_t _ALIGN(32) istate[8 * 8]; uint32_t _ALIGN(32) ostate2[8 * 8]; uint32_t _ALIGN(32) ibuf[8 * 16]; uint32_t _ALIGN(32) obuf[8 * 16]; int i, j; memcpy(istate, tstate, 8 * 32); sha256_transform_8way(istate, salt, 0); memcpy(ibuf, salt + 8 * 16, 8 * 16); for (i = 0; i < 8; i++) ibuf[8 * 5 + i] = 0x80000000; memset(ibuf + 8 * 6, 0x00, 8 * 36); for (i = 0; i < 8; i++) ibuf[8 * 15 + i] = 0x000004a0; for (i = 0; i < 8; i++) obuf[8 * 8 + i] = 0x80000000; memset(obuf + 8 * 9, 0x00, 8 * 24); for (i = 0; i < 8; i++) obuf[8 * 15 + i] = 0x00000300; for (i = 0; i < 4; i++) { memcpy(obuf, istate, 8 * 32); ibuf[8 * 4 + 0] = i + 1; ibuf[8 * 4 + 1] = i + 1; ibuf[8 * 4 + 2] = i + 1; ibuf[8 * 4 + 3] = i + 1; ibuf[8 * 4 + 4] = i + 1; ibuf[8 * 4 + 5] = i + 1; ibuf[8 * 4 + 6] = i + 1; ibuf[8 * 4 + 7] = i + 1; sha256_transform_8way(obuf, ibuf, 0); memcpy(ostate2, ostate, 8 * 32); sha256_transform_8way(ostate2, obuf, 0); for (j = 0; j < 8 * 8; j++) output[8 * 8 * i + j] = swab32(ostate2[j]); } } static inline void PBKDF2_SHA256_128_32_8way(uint32_t *tstate, uint32_t *ostate, const uint32_t *salt, uint32_t *output) { uint32_t _ALIGN(32) buf[8 * 16]; int i; sha256_transform_8way(tstate, salt, 1); sha256_transform_8way(tstate, salt + 8 * 16, 1); sha256_transform_8way(tstate, finalblk_8way, 0); memcpy(buf, tstate, 8 * 32); for (i = 0; i < 8; i++) buf[8 * 8 + i] = 0x80000000; memset(buf + 8 * 9, 0x00, 8 * 24); for (i = 0; i < 8; i++) buf[8 * 15 + i] = 0x00000300; sha256_transform_8way(ostate, buf, 0); for (i = 0; i < 8 * 8; i++) output[i] = swab32(ostate[i]); } #endif /* HAVE_SHA256_8WAY */ //#if defined(USE_ASM) && defined(__x86_64__) #define SCRYPT_MAX_WAYS 12 #define HAVE_SCRYPT_3WAY 1 int scrypt_best_throughput(); void scrypt_core(uint32_t *X, uint32_t *V, int N); void scrypt_core_3way(uint32_t *X, uint32_t *V, int N); #if defined(USE_AVX2) #undef SCRYPT_MAX_WAYS #define SCRYPT_MAX_WAYS 24 #define HAVE_SCRYPT_6WAY 1 void scrypt_core_6way(uint32_t *X, uint32_t *V, int N); #endif #ifndef SCRYPT_MAX_WAYS #define SCRYPT_MAX_WAYS 1 #define scrypt_best_throughput() 1 #endif unsigned char *scrypt_buffer_alloc(int N) { return (uchar*) malloc((size_t)N * SCRYPT_MAX_WAYS * 128 + 63); } static bool scrypt_1024_1_1_256(const uint32_t *input, uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N, int thr_id ) { uint32_t tstate[8], ostate[8]; uint32_t X[32]; uint32_t *V; V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63)); memcpy(tstate, midstate, 32); HMAC_SHA256_80_init(input, tstate, ostate); PBKDF2_SHA256_80_128(tstate, ostate, input, X); scrypt_core(X, V, N); PBKDF2_SHA256_128_32(tstate, ostate, X, output); return true; } #ifdef HAVE_SHA256_4WAY static bool scrypt_1024_1_1_256_4way(const uint32_t *input, uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N, int thrid ) { uint32_t _ALIGN(128) tstate[4 * 8]; uint32_t _ALIGN(128) ostate[4 * 8]; uint32_t _ALIGN(128) W[4 * 32]; uint32_t _ALIGN(128) X[4 * 32]; uint32_t *V; int i, k; V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63)); for (i = 0; i < 20; i++) for (k = 0; k < 4; k++) W[4 * i + k] = input[k * 20 + i]; for (i = 0; i < 8; i++) for (k = 0; k < 4; k++) tstate[4 * i + k] = midstate[i]; HMAC_SHA256_80_init_4way(W, tstate, ostate); PBKDF2_SHA256_80_128_4way(tstate, ostate, W, W); for (i = 0; i < 32; i++) for (k = 0; k < 4; k++) X[k * 32 + i] = W[4 * i + k]; scrypt_core(X + 0 * 32, V, N); scrypt_core(X + 1 * 32, V, N); scrypt_core(X + 2 * 32, V, N); scrypt_core(X + 3 * 32, V, N); for (i = 0; i < 32; i++) for (k = 0; k < 4; k++) W[4 * i + k] = X[k * 32 + i]; PBKDF2_SHA256_128_32_4way(tstate, ostate, W, W); for (i = 0; i < 8; i++) for (k = 0; k < 4; k++) output[k * 8 + i] = W[4 * i + k]; return true; } #endif /* HAVE_SHA256_4WAY */ #ifdef HAVE_SCRYPT_3WAY static bool scrypt_1024_1_1_256_3way(const uint32_t *input, uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N, int thrid ) { uint32_t _ALIGN(64) tstate[3 * 8], ostate[3 * 8]; uint32_t _ALIGN(64) X[3 * 32]; uint32_t *V; V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63)); memcpy(tstate + 0, midstate, 32); memcpy(tstate + 8, midstate, 32); memcpy(tstate + 16, midstate, 32); HMAC_SHA256_80_init(input + 0, tstate + 0, ostate + 0); HMAC_SHA256_80_init(input + 20, tstate + 8, ostate + 8); HMAC_SHA256_80_init(input + 40, tstate + 16, ostate + 16); if ( work_restart[thrid].restart ) return false; PBKDF2_SHA256_80_128(tstate + 0, ostate + 0, input + 0, X + 0); PBKDF2_SHA256_80_128(tstate + 8, ostate + 8, input + 20, X + 32); PBKDF2_SHA256_80_128(tstate + 16, ostate + 16, input + 40, X + 64); if ( work_restart[thrid].restart ) return false; scrypt_core_3way(X, V, N); if ( work_restart[thrid].restart ) return false; PBKDF2_SHA256_128_32(tstate + 0, ostate + 0, X + 0, output + 0); PBKDF2_SHA256_128_32(tstate + 8, ostate + 8, X + 32, output + 8); PBKDF2_SHA256_128_32(tstate + 16, ostate + 16, X + 64, output + 16); return true; } #ifdef HAVE_SHA256_4WAY static bool scrypt_1024_1_1_256_12way(const uint32_t *input, uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N, int thrid ) { uint32_t _ALIGN(128) tstate[12 * 8]; uint32_t _ALIGN(128) ostate[12 * 8]; uint32_t _ALIGN(128) W[12 * 32]; uint32_t _ALIGN(128) X[12 * 32]; uint32_t *V; int i, j, k; V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63)); for (j = 0; j < 3; j++) for (i = 0; i < 20; i++) for (k = 0; k < 4; k++) W[128 * j + 4 * i + k] = input[80 * j + k * 20 + i]; for (j = 0; j < 3; j++) for (i = 0; i < 8; i++) for (k = 0; k < 4; k++) tstate[32 * j + 4 * i + k] = midstate[i]; HMAC_SHA256_80_init_4way(W + 0, tstate + 0, ostate + 0); HMAC_SHA256_80_init_4way(W + 128, tstate + 32, ostate + 32); HMAC_SHA256_80_init_4way(W + 256, tstate + 64, ostate + 64); if ( work_restart[thrid].restart ) return false; PBKDF2_SHA256_80_128_4way(tstate + 0, ostate + 0, W + 0, W + 0); PBKDF2_SHA256_80_128_4way(tstate + 32, ostate + 32, W + 128, W + 128); PBKDF2_SHA256_80_128_4way(tstate + 64, ostate + 64, W + 256, W + 256); if ( work_restart[thrid].restart ) return false; for (j = 0; j < 3; j++) for (i = 0; i < 32; i++) for (k = 0; k < 4; k++) X[128 * j + k * 32 + i] = W[128 * j + 4 * i + k]; scrypt_core_3way(X + 0 * 96, V, N); scrypt_core_3way(X + 1 * 96, V, N); scrypt_core_3way(X + 2 * 96, V, N); scrypt_core_3way(X + 3 * 96, V, N); if ( work_restart[thrid].restart ) return false; for (j = 0; j < 3; j++) for (i = 0; i < 32; i++) for (k = 0; k < 4; k++) W[128 * j + 4 * i + k] = X[128 * j + k * 32 + i]; PBKDF2_SHA256_128_32_4way(tstate + 0, ostate + 0, W + 0, W + 0); PBKDF2_SHA256_128_32_4way(tstate + 32, ostate + 32, W + 128, W + 128); PBKDF2_SHA256_128_32_4way(tstate + 64, ostate + 64, W + 256, W + 256); for (j = 0; j < 3; j++) for (i = 0; i < 8; i++) for (k = 0; k < 4; k++) output[32 * j + k * 8 + i] = W[128 * j + 4 * i + k]; return true; } #endif /* HAVE_SHA256_4WAY */ #endif /* HAVE_SCRYPT_3WAY */ #ifdef HAVE_SCRYPT_6WAY static bool scrypt_1024_1_1_256_24way( const uint32_t *input, uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N, int thrid ) { uint32_t _ALIGN(128) tstate[24 * 8]; uint32_t _ALIGN(128) ostate[24 * 8]; uint32_t _ALIGN(128) W[24 * 32]; uint32_t _ALIGN(128) X[24 * 32]; uint32_t *V; int i, j, k; V = (uint32_t *)( ( (uintptr_t)(scratchpad) + 63 ) & ~ (uintptr_t)(63) ); for ( j = 0; j < 3; j++ ) for ( i = 0; i < 20; i++ ) for ( k = 0; k < 8; k++ ) W[8 * 32 * j + 8 * i + k] = input[8 * 20 * j + k * 20 + i]; for ( j = 0; j < 3; j++ ) for ( i = 0; i < 8; i++ ) for ( k = 0; k < 8; k++ ) tstate[8 * 8 * j + 8 * i + k] = midstate[i]; HMAC_SHA256_80_init_8way( W + 0, tstate + 0, ostate + 0 ); HMAC_SHA256_80_init_8way( W + 256, tstate + 64, ostate + 64 ); HMAC_SHA256_80_init_8way( W + 512, tstate + 128, ostate + 128 ); if ( work_restart[thrid].restart ) return false; PBKDF2_SHA256_80_128_8way( tstate + 0, ostate + 0, W + 0, W + 0 ); PBKDF2_SHA256_80_128_8way( tstate + 64, ostate + 64, W + 256, W + 256 ); PBKDF2_SHA256_80_128_8way( tstate + 128, ostate + 128, W + 512, W + 512 ); if ( work_restart[thrid].restart ) return false; for ( j = 0; j < 3; j++ ) for ( i = 0; i < 32; i++ ) for ( k = 0; k < 8; k++ ) X[8 * 32 * j + k * 32 + i] = W[8 * 32 * j + 8 * i + k]; scrypt_core_6way( X + 0 * 32, V, N ); scrypt_core_6way( X + 6 * 32, V, N ); scrypt_core_6way( X + 12 * 32, V, N ); scrypt_core_6way( X + 18 * 32, V, N ); if ( work_restart[thrid].restart ) return false; for ( j = 0; j < 3; j++ ) for ( i = 0; i < 32; i++ ) for ( k = 0; k < 8; k++ ) W[8 * 32 * j + 8 * i + k] = X[8 * 32 * j + k * 32 + i]; PBKDF2_SHA256_128_32_8way( tstate + 0, ostate + 0, W + 0, W + 0 ); PBKDF2_SHA256_128_32_8way( tstate + 64, ostate + 64, W + 256, W + 256 ); PBKDF2_SHA256_128_32_8way( tstate + 128, ostate + 128, W + 512, W + 512 ); for ( j = 0; j < 3; j++ ) for ( i = 0; i < 8; i++ ) for ( k = 0; k < 8; k++ ) output[8 * 8 * j + k * 8 + i] = W[8 * 32 * j + 8 * i + k]; return true; } #endif /* HAVE_SCRYPT_6WAY */ extern int scanhash_scrypt( struct work *work, uint32_t max_nonce, uint64_t *hashes_done, struct thr_info *mythr ) { uint32_t *pdata = work->data; uint32_t *ptarget = work->target; uint32_t data[SCRYPT_MAX_WAYS * 20], hash[SCRYPT_MAX_WAYS * 8]; uint32_t midstate[8]; uint32_t n = pdata[19] - 1; int thr_id = mythr->id; // thr_id arg is deprecated int throughput = scrypt_best_throughput(); int i; volatile uint8_t *restart = &(work_restart[thr_id].restart); #ifdef HAVE_SHA256_4WAY if (sha256_use_4way()) throughput *= 4; #endif // applog(LOG_INFO,"Scrypt thoughput %d",throughput); for (i = 0; i < throughput; i++) memcpy(data + i * 20, pdata, 80); sha256_init(midstate); sha256_transform(midstate, data, 0); do { bool rc = true; for (i = 0; i < throughput; i++) data[i * 20 + 19] = ++n; #if defined(HAVE_SHA256_4WAY) if (throughput == 4) rc = scrypt_1024_1_1_256_4way(data, hash, midstate, scratchbuf, scratchbuf_size, thr_id ); else #endif #if defined(HAVE_SCRYPT_3WAY) && defined(HAVE_SHA256_4WAY) if (throughput == 12) rc = scrypt_1024_1_1_256_12way(data, hash, midstate, scratchbuf, scratchbuf_size, thr_id ); else #endif #if defined(HAVE_SCRYPT_6WAY) if (throughput == 24) rc = scrypt_1024_1_1_256_24way(data, hash, midstate, scratchbuf, scratchbuf_size, thr_id ); else #endif #if defined(HAVE_SCRYPT_3WAY) if (throughput == 3) rc = scrypt_1024_1_1_256_3way(data, hash, midstate, scratchbuf, scratchbuf_size, thr_id ); else #endif rc = scrypt_1024_1_1_256(data, hash, midstate, scratchbuf, scratchbuf_size, thr_id ); if ( rc ) for ( i = 0; i < throughput; i++ ) { if ( unlikely( valid_hash( hash + i * 8, ptarget ) ) ) { pdata[19] = data[i * 20 + 19]; submit_solution( work, hash + i * 8, mythr ); } } } while ( likely( ( n < ( max_nonce - throughput ) ) && !(*restart) ) ); *hashes_done = n - pdata[19]; pdata[19] = n; return 0; } bool scrypt_miner_thread_init( int thr_id ) { scratchbuf = scrypt_buffer_alloc( scratchbuf_size ); if ( scratchbuf ) return true; applog( LOG_ERR, "Thread %u: Scrypt buffer allocation failed", thr_id ); return false; } bool register_scrypt_algo( algo_gate_t* gate ) { gate->optimizations = SSE2_OPT | AVX2_OPT; gate->miner_thread_init =(void*)&scrypt_miner_thread_init; gate->scanhash = (void*)&scanhash_scrypt; opt_target_factor = 65536.0; if ( !opt_param_n ) { opt_param_n = 1024; scratchbuf_size = 1024; } else scratchbuf_size = opt_param_n; applog(LOG_INFO,"Scrypt paramaters: N= %d, R= 1.", opt_param_n ); return true; };