Files
mines/zano/libethash/progpow.cpp

361 lines
11 KiB
C++

// ethash: C/C++ implementation of Ethash, the Ethereum Proof of Work algorithm.
// Copyright 2018 Pawel Bylica.
// Licensed under the Apache License, Version 2.0. See the LICENSE file.
#include <ethash/progpow.hpp>
#include "bit_manipulation.h"
#include "endianness.hpp"
#include "ethash-internal.hpp"
#include "kiss99.hpp"
#include <ethash/keccak.hpp>
#include <array>
namespace progpow
{
namespace
{
/// A variant of Keccak hash function for ProgPoW.
///
/// This Keccak hash function uses 800-bit permutation (Keccak-f[800]) with 576 bitrate.
/// It take exactly 576 bits of input (split across 3 arguments) and adds no padding.
///
/// @param header_hash The 256-bit header hash.
/// @param nonce The 64-bit nonce.
/// @param mix_hash Additional 256-bits of data.
/// @return The 256-bit output of the hash function.
hash256 keccak_progpow_256(
const hash256& header_hash, uint64_t nonce, const hash256& mix_hash) noexcept
{
static constexpr size_t num_words =
sizeof(header_hash.word32s) / sizeof(header_hash.word32s[0]);
uint32_t state[25] = {};
size_t i;
for (i = 0; i < num_words; ++i)
state[i] = le::uint32(header_hash.word32s[i]);
state[i++] = static_cast<uint32_t>(nonce);
state[i++] = static_cast<uint32_t>(nonce >> 32);
for (uint32_t mix_word : mix_hash.word32s)
state[i++] = le::uint32(mix_word);
ethash_keccakf800(state);
hash256 output;
for (i = 0; i < num_words; ++i)
output.word32s[i] = le::uint32(state[i]);
return output;
}
/// The same as keccak_progpow_256() but uses null mix
/// and returns top 64 bits of the output being a big-endian prefix of the 256-bit hash.
inline uint64_t keccak_progpow_64(const hash256& header_hash, uint64_t nonce) noexcept
{
const hash256 h = keccak_progpow_256(header_hash, nonce, {});
return be::uint64(h.word64s[0]);
}
/// ProgPoW mix RNG state.
///
/// Encapsulates the state of the random number generator used in computing ProgPoW mix.
/// This includes the state of the KISS99 RNG and the precomputed random permutation of the
/// sequence of mix item indexes.
class mix_rng_state
{
public:
inline explicit mix_rng_state(uint64_t seed) noexcept;
uint32_t next_dst() noexcept { return dst_seq[(dst_counter++) % num_regs]; }
uint32_t next_src() noexcept { return src_seq[(src_counter++) % num_regs]; }
kiss99 rng;
private:
size_t dst_counter = 0;
std::array<uint32_t, num_regs> dst_seq;
size_t src_counter = 0;
std::array<uint32_t, num_regs> src_seq;
};
mix_rng_state::mix_rng_state(uint64_t seed) noexcept
{
const auto seed_lo = static_cast<uint32_t>(seed);
const auto seed_hi = static_cast<uint32_t>(seed >> 32);
const auto z = fnv1a(fnv_offset_basis, seed_lo);
const auto w = fnv1a(z, seed_hi);
const auto jsr = fnv1a(w, seed_lo);
const auto jcong = fnv1a(jsr, seed_hi);
rng = kiss99{z, w, jsr, jcong};
// Create random permutations of mix destinations / sources.
// Uses Fisher-Yates shuffle.
for (uint32_t i = 0; i < num_regs; ++i)
{
dst_seq[i] = i;
src_seq[i] = i;
}
for (uint32_t i = num_regs; i > 1; --i)
{
std::swap(dst_seq[i - 1], dst_seq[rng() % i]);
std::swap(src_seq[i - 1], src_seq[rng() % i]);
}
}
NO_SANITIZE("unsigned-integer-overflow")
inline uint32_t random_math(uint32_t a, uint32_t b, uint32_t selector) noexcept
{
switch (selector % 11)
{
default:
case 2:
return a + b;
case 3:
return a * b;
case 4:
return mul_hi32(a, b);
case 5:
return std::min(a, b);
case 6:
return rotl32(a, b);
case 7:
return rotr32(a, b);
case 8:
return a & b;
case 9:
return a | b;
case 10:
return a ^ b;
case 0:
return clz32(a) + clz32(b);
case 1:
return popcount32(a) + popcount32(b);
}
}
/// Merge data from `b` and `a`.
/// Assuming `a` has high entropy, only do ops that retain entropy even if `b`
/// has low entropy (i.e. do not do `a & b`).
NO_SANITIZE("unsigned-integer-overflow")
inline void random_merge(uint32_t& a, uint32_t b, uint32_t selector) noexcept
{
const auto x = (selector >> 16) % 31 + 1; // Additional non-zero selector from higher bits.
switch (selector % 4)
{
case 0:
a = (a * 33) + b;
break;
case 1:
a = (a ^ b) * 33;
break;
case 2:
a = rotl32(a, x) ^ b;
break;
case 3:
a = rotr32(a, x) ^ b;
break;
}
}
using lookup_fn = hash2048 (*)(const epoch_context&, uint32_t);
using mix_array = std::array<std::array<uint32_t, num_regs>, num_lanes>;
void round(
const epoch_context& context, uint32_t r, mix_array& mix, mix_rng_state state, lookup_fn lookup)
{
const uint32_t num_items = static_cast<uint32_t>(context.full_dataset_num_items / 2);
const uint32_t item_index = mix[r % num_lanes][0] % num_items;
const hash2048 item = lookup(context, item_index);
constexpr size_t num_words_per_lane = sizeof(item) / (sizeof(uint32_t) * num_lanes);
constexpr int max_operations =
num_cache_accesses > num_math_operations ? num_cache_accesses : num_math_operations;
// Process lanes.
for (int i = 0; i < max_operations; ++i)
{
if (i < num_cache_accesses) // Random access to cached memory.
{
const auto src = state.next_src();
const auto dst = state.next_dst();
const auto sel = state.rng();
for (size_t l = 0; l < num_lanes; ++l)
{
const size_t offset = mix[l][src] % l1_cache_num_items;
random_merge(mix[l][dst], le::uint32(context.l1_cache[offset]), sel);
}
}
if (i < num_math_operations) // Random math.
{
// Generate 2 unique source indexes.
const auto src_rnd = state.rng() % (num_regs * (num_regs - 1));
const auto src1 = src_rnd % num_regs; // O <= src1 < num_regs
auto src2 = src_rnd / num_regs; // 0 <= src2 < num_regs - 1
if (src2 >= src1)
++src2;
const auto sel1 = state.rng();
const auto dst = state.next_dst();
const auto sel2 = state.rng();
for (size_t l = 0; l < num_lanes; ++l)
{
const uint32_t data = random_math(mix[l][src1], mix[l][src2], sel1);
random_merge(mix[l][dst], data, sel2);
}
}
}
// DAG access pattern.
uint32_t dsts[num_words_per_lane];
uint32_t sels[num_words_per_lane];
for (size_t i = 0; i < num_words_per_lane; ++i)
{
dsts[i] = i == 0 ? 0 : state.next_dst();
sels[i] = state.rng();
}
// DAG access.
for (size_t l = 0; l < num_lanes; ++l)
{
const auto offset = ((l ^ r) % num_lanes) * num_words_per_lane;
for (size_t i = 0; i < num_words_per_lane; ++i)
{
const auto word = le::uint32(item.word32s[offset + i]);
random_merge(mix[l][dsts[i]], word, sels[i]);
}
}
}
mix_array init_mix(uint64_t seed)
{
const uint32_t z = fnv1a(fnv_offset_basis, static_cast<uint32_t>(seed));
const uint32_t w = fnv1a(z, static_cast<uint32_t>(seed >> 32));
mix_array mix;
for (uint32_t l = 0; l < mix.size(); ++l)
{
const uint32_t jsr = fnv1a(w, l);
const uint32_t jcong = fnv1a(jsr, l);
kiss99 rng{z, w, jsr, jcong};
for (auto& row : mix[l])
row = rng();
}
return mix;
}
hash256 hash_mix(
const epoch_context& context, int block_number, uint64_t seed, lookup_fn lookup) noexcept
{
auto mix = init_mix(seed);
mix_rng_state state{uint64_t(block_number / period_length)};
for (uint32_t i = 0; i < 64; ++i)
round(context, i, mix, state, lookup);
// Reduce mix data to a single per-lane result.
uint32_t lane_hash[num_lanes];
for (size_t l = 0; l < num_lanes; ++l)
{
lane_hash[l] = fnv_offset_basis;
for (uint32_t i = 0; i < num_regs; ++i)
lane_hash[l] = fnv1a(lane_hash[l], mix[l][i]);
}
// Reduce all lanes to a single 256-bit result.
static constexpr size_t num_words = sizeof(hash256) / sizeof(uint32_t);
hash256 mix_hash;
for (uint32_t& w : mix_hash.word32s)
w = fnv_offset_basis;
for (size_t l = 0; l < num_lanes; ++l)
mix_hash.word32s[l % num_words] = fnv1a(mix_hash.word32s[l % num_words], lane_hash[l]);
return le::uint32s(mix_hash);
}
} // namespace
result hash(const epoch_context& context, int block_number, const hash256& header_hash,
uint64_t nonce) noexcept
{
const uint64_t seed = keccak_progpow_64(header_hash, nonce);
const hash256 mix_hash = hash_mix(context, block_number, seed, calculate_dataset_item_2048);
const hash256 final_hash = keccak_progpow_256(header_hash, seed, mix_hash);
return {final_hash, mix_hash};
}
result hash(const epoch_context_full& context, int block_number, const hash256& header_hash,
uint64_t nonce) noexcept
{
static const auto lazy_lookup = [](const epoch_context& context, uint32_t index) noexcept
{
auto* full_dataset_1024 = static_cast<const epoch_context_full&>(context).full_dataset;
auto* full_dataset_2048 = reinterpret_cast<hash2048*>(full_dataset_1024);
hash2048& item = full_dataset_2048[index];
if (item.word64s[0] == 0)
{
// TODO: Copy elision here makes it thread-safe?
item = calculate_dataset_item_2048(context, index);
}
return item;
};
const uint64_t seed = keccak_progpow_64(header_hash, nonce);
const hash256 mix_hash = hash_mix(context, block_number, seed, lazy_lookup);
const hash256 final_hash = keccak_progpow_256(header_hash, seed, mix_hash);
return {final_hash, mix_hash};
}
bool verify(const epoch_context& context, int block_number, const hash256& header_hash,
const hash256& mix_hash, uint64_t nonce, const hash256& boundary) noexcept
{
const uint64_t seed = keccak_progpow_64(header_hash, nonce);
const hash256 final_hash = keccak_progpow_256(header_hash, seed, mix_hash);
if (!is_less_or_equal(final_hash, boundary))
return false;
const hash256 expected_mix_hash =
hash_mix(context, block_number, seed, calculate_dataset_item_2048);
return is_equal(expected_mix_hash, mix_hash);
}
search_result search_light(const epoch_context& context, int block_number,
const hash256& header_hash, const hash256& boundary, uint64_t start_nonce,
size_t iterations) noexcept
{
const uint64_t end_nonce = start_nonce + iterations;
for (uint64_t nonce = start_nonce; nonce < end_nonce; ++nonce)
{
result r = hash(context, block_number, header_hash, nonce);
if (is_less_or_equal(r.final_hash, boundary))
return {r, nonce};
}
return {};
}
search_result search(const epoch_context_full& context, int block_number,
const hash256& header_hash, const hash256& boundary, uint64_t start_nonce,
size_t iterations) noexcept
{
const uint64_t end_nonce = start_nonce + iterations;
for (uint64_t nonce = start_nonce; nonce < end_nonce; ++nonce)
{
result r = hash(context, block_number, header_hash, nonce);
if (is_less_or_equal(r.final_hash, boundary))
return {r, nonce};
}
return {};
}
} // namespace progpow