// ethash: C/C++ implementation of Ethash, the Ethereum Proof of Work algorithm. // Copyright 2018 Pawel Bylica. // Licensed under the Apache License, Version 2.0. See the LICENSE file. #include #include "bit_manipulation.h" #include "endianness.hpp" #include "ethash-internal.hpp" #include "kiss99.hpp" #include #include namespace progpow { namespace { /// A variant of Keccak hash function for ProgPoW. /// /// This Keccak hash function uses 800-bit permutation (Keccak-f[800]) with 576 bitrate. /// It take exactly 576 bits of input (split across 3 arguments) and adds no padding. /// /// @param header_hash The 256-bit header hash. /// @param nonce The 64-bit nonce. /// @param mix_hash Additional 256-bits of data. /// @return The 256-bit output of the hash function. hash256 keccak_progpow_256( const hash256& header_hash, uint64_t nonce, const hash256& mix_hash) noexcept { static constexpr size_t num_words = sizeof(header_hash.word32s) / sizeof(header_hash.word32s[0]); uint32_t state[25] = {}; size_t i; for (i = 0; i < num_words; ++i) state[i] = le::uint32(header_hash.word32s[i]); state[i++] = static_cast(nonce); state[i++] = static_cast(nonce >> 32); for (uint32_t mix_word : mix_hash.word32s) state[i++] = le::uint32(mix_word); ethash_keccakf800(state); hash256 output; for (i = 0; i < num_words; ++i) output.word32s[i] = le::uint32(state[i]); return output; } /// The same as keccak_progpow_256() but uses null mix /// and returns top 64 bits of the output being a big-endian prefix of the 256-bit hash. inline uint64_t keccak_progpow_64(const hash256& header_hash, uint64_t nonce) noexcept { const hash256 h = keccak_progpow_256(header_hash, nonce, {}); return be::uint64(h.word64s[0]); } /// ProgPoW mix RNG state. /// /// Encapsulates the state of the random number generator used in computing ProgPoW mix. /// This includes the state of the KISS99 RNG and the precomputed random permutation of the /// sequence of mix item indexes. class mix_rng_state { public: inline explicit mix_rng_state(uint64_t seed) noexcept; uint32_t next_dst() noexcept { return dst_seq[(dst_counter++) % num_regs]; } uint32_t next_src() noexcept { return src_seq[(src_counter++) % num_regs]; } kiss99 rng; private: size_t dst_counter = 0; std::array dst_seq; size_t src_counter = 0; std::array src_seq; }; mix_rng_state::mix_rng_state(uint64_t seed) noexcept { const auto seed_lo = static_cast(seed); const auto seed_hi = static_cast(seed >> 32); const auto z = fnv1a(fnv_offset_basis, seed_lo); const auto w = fnv1a(z, seed_hi); const auto jsr = fnv1a(w, seed_lo); const auto jcong = fnv1a(jsr, seed_hi); rng = kiss99{z, w, jsr, jcong}; // Create random permutations of mix destinations / sources. // Uses Fisher-Yates shuffle. for (uint32_t i = 0; i < num_regs; ++i) { dst_seq[i] = i; src_seq[i] = i; } for (uint32_t i = num_regs; i > 1; --i) { std::swap(dst_seq[i - 1], dst_seq[rng() % i]); std::swap(src_seq[i - 1], src_seq[rng() % i]); } } NO_SANITIZE("unsigned-integer-overflow") inline uint32_t random_math(uint32_t a, uint32_t b, uint32_t selector) noexcept { switch (selector % 11) { default: case 2: return a + b; case 3: return a * b; case 4: return mul_hi32(a, b); case 5: return std::min(a, b); case 6: return rotl32(a, b); case 7: return rotr32(a, b); case 8: return a & b; case 9: return a | b; case 10: return a ^ b; case 0: return clz32(a) + clz32(b); case 1: return popcount32(a) + popcount32(b); } } /// Merge data from `b` and `a`. /// Assuming `a` has high entropy, only do ops that retain entropy even if `b` /// has low entropy (i.e. do not do `a & b`). NO_SANITIZE("unsigned-integer-overflow") inline void random_merge(uint32_t& a, uint32_t b, uint32_t selector) noexcept { const auto x = (selector >> 16) % 31 + 1; // Additional non-zero selector from higher bits. switch (selector % 4) { case 0: a = (a * 33) + b; break; case 1: a = (a ^ b) * 33; break; case 2: a = rotl32(a, x) ^ b; break; case 3: a = rotr32(a, x) ^ b; break; } } using lookup_fn = hash2048 (*)(const epoch_context&, uint32_t); using mix_array = std::array, num_lanes>; void round( const epoch_context& context, uint32_t r, mix_array& mix, mix_rng_state state, lookup_fn lookup) { const uint32_t num_items = static_cast(context.full_dataset_num_items / 2); const uint32_t item_index = mix[r % num_lanes][0] % num_items; const hash2048 item = lookup(context, item_index); constexpr size_t num_words_per_lane = sizeof(item) / (sizeof(uint32_t) * num_lanes); constexpr int max_operations = num_cache_accesses > num_math_operations ? num_cache_accesses : num_math_operations; // Process lanes. for (int i = 0; i < max_operations; ++i) { if (i < num_cache_accesses) // Random access to cached memory. { const auto src = state.next_src(); const auto dst = state.next_dst(); const auto sel = state.rng(); for (size_t l = 0; l < num_lanes; ++l) { const size_t offset = mix[l][src] % l1_cache_num_items; random_merge(mix[l][dst], le::uint32(context.l1_cache[offset]), sel); } } if (i < num_math_operations) // Random math. { // Generate 2 unique source indexes. const auto src_rnd = state.rng() % (num_regs * (num_regs - 1)); const auto src1 = src_rnd % num_regs; // O <= src1 < num_regs auto src2 = src_rnd / num_regs; // 0 <= src2 < num_regs - 1 if (src2 >= src1) ++src2; const auto sel1 = state.rng(); const auto dst = state.next_dst(); const auto sel2 = state.rng(); for (size_t l = 0; l < num_lanes; ++l) { const uint32_t data = random_math(mix[l][src1], mix[l][src2], sel1); random_merge(mix[l][dst], data, sel2); } } } // DAG access pattern. uint32_t dsts[num_words_per_lane]; uint32_t sels[num_words_per_lane]; for (size_t i = 0; i < num_words_per_lane; ++i) { dsts[i] = i == 0 ? 0 : state.next_dst(); sels[i] = state.rng(); } // DAG access. for (size_t l = 0; l < num_lanes; ++l) { const auto offset = ((l ^ r) % num_lanes) * num_words_per_lane; for (size_t i = 0; i < num_words_per_lane; ++i) { const auto word = le::uint32(item.word32s[offset + i]); random_merge(mix[l][dsts[i]], word, sels[i]); } } } mix_array init_mix(uint64_t seed) { const uint32_t z = fnv1a(fnv_offset_basis, static_cast(seed)); const uint32_t w = fnv1a(z, static_cast(seed >> 32)); mix_array mix; for (uint32_t l = 0; l < mix.size(); ++l) { const uint32_t jsr = fnv1a(w, l); const uint32_t jcong = fnv1a(jsr, l); kiss99 rng{z, w, jsr, jcong}; for (auto& row : mix[l]) row = rng(); } return mix; } hash256 hash_mix( const epoch_context& context, int block_number, uint64_t seed, lookup_fn lookup) noexcept { auto mix = init_mix(seed); mix_rng_state state{uint64_t(block_number / period_length)}; for (uint32_t i = 0; i < 64; ++i) round(context, i, mix, state, lookup); // Reduce mix data to a single per-lane result. uint32_t lane_hash[num_lanes]; for (size_t l = 0; l < num_lanes; ++l) { lane_hash[l] = fnv_offset_basis; for (uint32_t i = 0; i < num_regs; ++i) lane_hash[l] = fnv1a(lane_hash[l], mix[l][i]); } // Reduce all lanes to a single 256-bit result. static constexpr size_t num_words = sizeof(hash256) / sizeof(uint32_t); hash256 mix_hash; for (uint32_t& w : mix_hash.word32s) w = fnv_offset_basis; for (size_t l = 0; l < num_lanes; ++l) mix_hash.word32s[l % num_words] = fnv1a(mix_hash.word32s[l % num_words], lane_hash[l]); return le::uint32s(mix_hash); } } // namespace result hash(const epoch_context& context, int block_number, const hash256& header_hash, uint64_t nonce) noexcept { const uint64_t seed = keccak_progpow_64(header_hash, nonce); const hash256 mix_hash = hash_mix(context, block_number, seed, calculate_dataset_item_2048); const hash256 final_hash = keccak_progpow_256(header_hash, seed, mix_hash); return {final_hash, mix_hash}; } result hash(const epoch_context_full& context, int block_number, const hash256& header_hash, uint64_t nonce) noexcept { static const auto lazy_lookup = [](const epoch_context& context, uint32_t index) noexcept { auto* full_dataset_1024 = static_cast(context).full_dataset; auto* full_dataset_2048 = reinterpret_cast(full_dataset_1024); hash2048& item = full_dataset_2048[index]; if (item.word64s[0] == 0) { // TODO: Copy elision here makes it thread-safe? item = calculate_dataset_item_2048(context, index); } return item; }; const uint64_t seed = keccak_progpow_64(header_hash, nonce); const hash256 mix_hash = hash_mix(context, block_number, seed, lazy_lookup); const hash256 final_hash = keccak_progpow_256(header_hash, seed, mix_hash); return {final_hash, mix_hash}; } bool verify(const epoch_context& context, int block_number, const hash256& header_hash, const hash256& mix_hash, uint64_t nonce, const hash256& boundary) noexcept { const uint64_t seed = keccak_progpow_64(header_hash, nonce); const hash256 final_hash = keccak_progpow_256(header_hash, seed, mix_hash); if (!is_less_or_equal(final_hash, boundary)) return false; const hash256 expected_mix_hash = hash_mix(context, block_number, seed, calculate_dataset_item_2048); return is_equal(expected_mix_hash, mix_hash); } search_result search_light(const epoch_context& context, int block_number, const hash256& header_hash, const hash256& boundary, uint64_t start_nonce, size_t iterations) noexcept { const uint64_t end_nonce = start_nonce + iterations; for (uint64_t nonce = start_nonce; nonce < end_nonce; ++nonce) { result r = hash(context, block_number, header_hash, nonce); if (is_less_or_equal(r.final_hash, boundary)) return {r, nonce}; } return {}; } search_result search(const epoch_context_full& context, int block_number, const hash256& header_hash, const hash256& boundary, uint64_t start_nonce, size_t iterations) noexcept { const uint64_t end_nonce = start_nonce + iterations; for (uint64_t nonce = start_nonce; nonce < end_nonce; ++nonce) { result r = hash(context, block_number, header_hash, nonce); if (is_less_or_equal(r.final_hash, boundary)) return {r, nonce}; } return {}; } } // namespace progpow