progminer zano miner fork https://github.com/hyle-team/progminer
This commit is contained in:
9569
zano/libethash-cl/CL/cl2.hpp
Normal file
9569
zano/libethash-cl/CL/cl2.hpp
Normal file
File diff suppressed because it is too large
Load Diff
960
zano/libethash-cl/CLMiner.cpp
Normal file
960
zano/libethash-cl/CLMiner.cpp
Normal file
@@ -0,0 +1,960 @@
|
||||
/// OpenCL miner implementation.
|
||||
///
|
||||
/// @file
|
||||
/// @copyright GNU General Public License
|
||||
|
||||
#include <boost/dll.hpp>
|
||||
|
||||
#include <libethcore/Farm.h>
|
||||
#include "CLMiner.h"
|
||||
#include "CLMiner_kernel.h"
|
||||
#include <ethash/ethash.hpp>
|
||||
|
||||
#include "CLMiner.h"
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
|
||||
using namespace dev;
|
||||
using namespace eth;
|
||||
|
||||
namespace dev
|
||||
{
|
||||
namespace eth
|
||||
{
|
||||
|
||||
// WARNING: Do not change the value of the following constant
|
||||
// unless you are prepared to make the neccessary adjustments
|
||||
// to the assembly code for the binary kernels.
|
||||
const size_t c_maxSearchResults = 15;
|
||||
|
||||
struct CLChannel : public LogChannel
|
||||
{
|
||||
static const char* name() { return EthOrange "cl"; }
|
||||
static const int verbosity = 2;
|
||||
static const bool debug = false;
|
||||
};
|
||||
#define cllog clog(CLChannel)
|
||||
#define ETHCL_LOG(_contents) cllog << _contents
|
||||
|
||||
/**
|
||||
* Returns the name of a numerical cl_int error
|
||||
* Takes constants from CL/cl.h and returns them in a readable format
|
||||
*/
|
||||
static const char* strClError(cl_int err)
|
||||
{
|
||||
switch (err)
|
||||
{
|
||||
case CL_SUCCESS:
|
||||
return "CL_SUCCESS";
|
||||
case CL_DEVICE_NOT_FOUND:
|
||||
return "CL_DEVICE_NOT_FOUND";
|
||||
case CL_DEVICE_NOT_AVAILABLE:
|
||||
return "CL_DEVICE_NOT_AVAILABLE";
|
||||
case CL_COMPILER_NOT_AVAILABLE:
|
||||
return "CL_COMPILER_NOT_AVAILABLE";
|
||||
case CL_MEM_OBJECT_ALLOCATION_FAILURE:
|
||||
return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
|
||||
case CL_OUT_OF_RESOURCES:
|
||||
return "CL_OUT_OF_RESOURCES";
|
||||
case CL_OUT_OF_HOST_MEMORY:
|
||||
return "CL_OUT_OF_HOST_MEMORY";
|
||||
case CL_PROFILING_INFO_NOT_AVAILABLE:
|
||||
return "CL_PROFILING_INFO_NOT_AVAILABLE";
|
||||
case CL_MEM_COPY_OVERLAP:
|
||||
return "CL_MEM_COPY_OVERLAP";
|
||||
case CL_IMAGE_FORMAT_MISMATCH:
|
||||
return "CL_IMAGE_FORMAT_MISMATCH";
|
||||
case CL_IMAGE_FORMAT_NOT_SUPPORTED:
|
||||
return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
|
||||
case CL_BUILD_PROGRAM_FAILURE:
|
||||
return "CL_BUILD_PROGRAM_FAILURE";
|
||||
case CL_MAP_FAILURE:
|
||||
return "CL_MAP_FAILURE";
|
||||
case CL_MISALIGNED_SUB_BUFFER_OFFSET:
|
||||
return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
|
||||
case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:
|
||||
return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
|
||||
|
||||
#ifdef CL_VERSION_1_2
|
||||
case CL_COMPILE_PROGRAM_FAILURE:
|
||||
return "CL_COMPILE_PROGRAM_FAILURE";
|
||||
case CL_LINKER_NOT_AVAILABLE:
|
||||
return "CL_LINKER_NOT_AVAILABLE";
|
||||
case CL_LINK_PROGRAM_FAILURE:
|
||||
return "CL_LINK_PROGRAM_FAILURE";
|
||||
case CL_DEVICE_PARTITION_FAILED:
|
||||
return "CL_DEVICE_PARTITION_FAILED";
|
||||
case CL_KERNEL_ARG_INFO_NOT_AVAILABLE:
|
||||
return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
|
||||
#endif // CL_VERSION_1_2
|
||||
|
||||
case CL_INVALID_VALUE:
|
||||
return "CL_INVALID_VALUE";
|
||||
case CL_INVALID_DEVICE_TYPE:
|
||||
return "CL_INVALID_DEVICE_TYPE";
|
||||
case CL_INVALID_PLATFORM:
|
||||
return "CL_INVALID_PLATFORM";
|
||||
case CL_INVALID_DEVICE:
|
||||
return "CL_INVALID_DEVICE";
|
||||
case CL_INVALID_CONTEXT:
|
||||
return "CL_INVALID_CONTEXT";
|
||||
case CL_INVALID_QUEUE_PROPERTIES:
|
||||
return "CL_INVALID_QUEUE_PROPERTIES";
|
||||
case CL_INVALID_COMMAND_QUEUE:
|
||||
return "CL_INVALID_COMMAND_QUEUE";
|
||||
case CL_INVALID_HOST_PTR:
|
||||
return "CL_INVALID_HOST_PTR";
|
||||
case CL_INVALID_MEM_OBJECT:
|
||||
return "CL_INVALID_MEM_OBJECT";
|
||||
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
|
||||
return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
|
||||
case CL_INVALID_IMAGE_SIZE:
|
||||
return "CL_INVALID_IMAGE_SIZE";
|
||||
case CL_INVALID_SAMPLER:
|
||||
return "CL_INVALID_SAMPLER";
|
||||
case CL_INVALID_BINARY:
|
||||
return "CL_INVALID_BINARY";
|
||||
case CL_INVALID_BUILD_OPTIONS:
|
||||
return "CL_INVALID_BUILD_OPTIONS";
|
||||
case CL_INVALID_PROGRAM:
|
||||
return "CL_INVALID_PROGRAM";
|
||||
case CL_INVALID_PROGRAM_EXECUTABLE:
|
||||
return "CL_INVALID_PROGRAM_EXECUTABLE";
|
||||
case CL_INVALID_KERNEL_NAME:
|
||||
return "CL_INVALID_KERNEL_NAME";
|
||||
case CL_INVALID_KERNEL_DEFINITION:
|
||||
return "CL_INVALID_KERNEL_DEFINITION";
|
||||
case CL_INVALID_KERNEL:
|
||||
return "CL_INVALID_KERNEL";
|
||||
case CL_INVALID_ARG_INDEX:
|
||||
return "CL_INVALID_ARG_INDEX";
|
||||
case CL_INVALID_ARG_VALUE:
|
||||
return "CL_INVALID_ARG_VALUE";
|
||||
case CL_INVALID_ARG_SIZE:
|
||||
return "CL_INVALID_ARG_SIZE";
|
||||
case CL_INVALID_KERNEL_ARGS:
|
||||
return "CL_INVALID_KERNEL_ARGS";
|
||||
case CL_INVALID_WORK_DIMENSION:
|
||||
return "CL_INVALID_WORK_DIMENSION";
|
||||
case CL_INVALID_WORK_GROUP_SIZE:
|
||||
return "CL_INVALID_WORK_GROUP_SIZE";
|
||||
case CL_INVALID_WORK_ITEM_SIZE:
|
||||
return "CL_INVALID_WORK_ITEM_SIZE";
|
||||
case CL_INVALID_GLOBAL_OFFSET:
|
||||
return "CL_INVALID_GLOBAL_OFFSET";
|
||||
case CL_INVALID_EVENT_WAIT_LIST:
|
||||
return "CL_INVALID_EVENT_WAIT_LIST";
|
||||
case CL_INVALID_EVENT:
|
||||
return "CL_INVALID_EVENT";
|
||||
case CL_INVALID_OPERATION:
|
||||
return "CL_INVALID_OPERATION";
|
||||
case CL_INVALID_GL_OBJECT:
|
||||
return "CL_INVALID_GL_OBJECT";
|
||||
case CL_INVALID_BUFFER_SIZE:
|
||||
return "CL_INVALID_BUFFER_SIZE";
|
||||
case CL_INVALID_MIP_LEVEL:
|
||||
return "CL_INVALID_MIP_LEVEL";
|
||||
case CL_INVALID_GLOBAL_WORK_SIZE:
|
||||
return "CL_INVALID_GLOBAL_WORK_SIZE";
|
||||
case CL_INVALID_PROPERTY:
|
||||
return "CL_INVALID_PROPERTY";
|
||||
|
||||
#ifdef CL_VERSION_1_2
|
||||
case CL_INVALID_IMAGE_DESCRIPTOR:
|
||||
return "CL_INVALID_IMAGE_DESCRIPTOR";
|
||||
case CL_INVALID_COMPILER_OPTIONS:
|
||||
return "CL_INVALID_COMPILER_OPTIONS";
|
||||
case CL_INVALID_LINKER_OPTIONS:
|
||||
return "CL_INVALID_LINKER_OPTIONS";
|
||||
case CL_INVALID_DEVICE_PARTITION_COUNT:
|
||||
return "CL_INVALID_DEVICE_PARTITION_COUNT";
|
||||
#endif // CL_VERSION_1_2
|
||||
|
||||
#ifdef CL_VERSION_2_0
|
||||
case CL_INVALID_PIPE_SIZE:
|
||||
return "CL_INVALID_PIPE_SIZE";
|
||||
case CL_INVALID_DEVICE_QUEUE:
|
||||
return "CL_INVALID_DEVICE_QUEUE";
|
||||
#endif // CL_VERSION_2_0
|
||||
|
||||
#ifdef CL_VERSION_2_2
|
||||
case CL_INVALID_SPEC_ID:
|
||||
return "CL_INVALID_SPEC_ID";
|
||||
case CL_MAX_SIZE_RESTRICTION_EXCEEDED:
|
||||
return "CL_MAX_SIZE_RESTRICTION_EXCEEDED";
|
||||
#endif // CL_VERSION_2_2
|
||||
}
|
||||
|
||||
return "Unknown CL error encountered";
|
||||
}
|
||||
|
||||
/**
|
||||
* Prints cl::Errors in a uniform way
|
||||
* @param msg text prepending the error message
|
||||
* @param clerr cl:Error object
|
||||
*
|
||||
* Prints errors in the format:
|
||||
* msg: what(), string err() (numeric err())
|
||||
*/
|
||||
static std::string ethCLErrorHelper(const char* msg, cl::Error const& clerr)
|
||||
{
|
||||
std::ostringstream osstream;
|
||||
osstream << msg << ": " << clerr.what() << ": " << strClError(clerr.err()) << " ("
|
||||
<< clerr.err() << ")";
|
||||
return osstream.str();
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
void addDefinition(string& _source, char const* _id, unsigned _value)
|
||||
{
|
||||
char buf[256];
|
||||
sprintf(buf, "#define %s %uu\n", _id, _value);
|
||||
_source.insert(_source.begin(), buf, buf + strlen(buf));
|
||||
}
|
||||
|
||||
std::vector<cl::Platform> getPlatforms()
|
||||
{
|
||||
vector<cl::Platform> platforms;
|
||||
try
|
||||
{
|
||||
cl::Platform::get(&platforms);
|
||||
}
|
||||
catch (cl::Error const& err)
|
||||
{
|
||||
#if defined(CL_PLATFORM_NOT_FOUND_KHR)
|
||||
if (err.err() == CL_PLATFORM_NOT_FOUND_KHR)
|
||||
std::cerr << "No OpenCL platforms found" << std::endl;
|
||||
else
|
||||
#endif
|
||||
std::cerr << "OpenCL error : " << err.what();
|
||||
}
|
||||
return platforms;
|
||||
}
|
||||
|
||||
std::vector<cl::Device> getDevices(
|
||||
std::vector<cl::Platform> const& _platforms, unsigned _platformId)
|
||||
{
|
||||
vector<cl::Device> devices;
|
||||
size_t platform_num = min<size_t>(_platformId, _platforms.size() - 1);
|
||||
try
|
||||
{
|
||||
_platforms[platform_num].getDevices(
|
||||
CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_ACCELERATOR, &devices);
|
||||
}
|
||||
catch (cl::Error const& err)
|
||||
{
|
||||
// if simply no devices found return empty vector
|
||||
if (err.err() != CL_DEVICE_NOT_FOUND)
|
||||
throw err;
|
||||
}
|
||||
return devices;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
} // namespace eth
|
||||
} // namespace dev
|
||||
|
||||
CLMiner::CLMiner(unsigned _index, CLSettings _settings, DeviceDescriptor& _device)
|
||||
: Miner("cl-", _index), m_settings(_settings)
|
||||
{
|
||||
m_deviceDescriptor = _device;
|
||||
m_settings.localWorkSize = ((m_settings.localWorkSize + 7) / 8) * 8;
|
||||
m_settings.globalWorkSize = m_settings.localWorkSize * m_settings.globalWorkSizeMultiplier;
|
||||
}
|
||||
|
||||
CLMiner::~CLMiner()
|
||||
{
|
||||
stopWorking();
|
||||
kick_miner();
|
||||
}
|
||||
|
||||
// NOTE: The following struct must match the one defined in
|
||||
// ethash.cl
|
||||
struct SearchResults
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32_t gid;
|
||||
// Can't use h256 data type here since h256 contains
|
||||
// more than raw data. Kernel returns raw mix hash.
|
||||
uint32_t mix[8];
|
||||
uint32_t pad[7]; // pad to 16 words for easy indexing
|
||||
} rslt[c_maxSearchResults];
|
||||
uint32_t count;
|
||||
uint32_t hashCount;
|
||||
uint32_t abort;
|
||||
};
|
||||
|
||||
void CLMiner::workLoop()
|
||||
{
|
||||
// Memory for zero-ing buffers. Cannot be static or const because crashes on macOS.
|
||||
static uint32_t zerox3[3] = {0, 0, 0};
|
||||
|
||||
uint64_t startNonce = 0;
|
||||
|
||||
// The work package currently processed by GPU.
|
||||
WorkPackage current;
|
||||
current.header = h256();
|
||||
uint64_t old_period_seed = -1;
|
||||
int old_epoch = -1;
|
||||
|
||||
if (!initDevice())
|
||||
return;
|
||||
|
||||
try
|
||||
{
|
||||
// Read results.
|
||||
SearchResults results;
|
||||
|
||||
// zero the result count
|
||||
m_queue.enqueueWriteBuffer(
|
||||
m_searchBuffer, CL_TRUE, offsetof(SearchResults, count), sizeof(zerox3), zerox3);
|
||||
|
||||
while (!shouldStop())
|
||||
{
|
||||
// no need to read the abort flag.
|
||||
m_queue.enqueueReadBuffer(m_searchBuffer, CL_TRUE, offsetof(SearchResults, count),
|
||||
2 * sizeof(results.count), (void*)&results.count);
|
||||
if (results.count)
|
||||
{
|
||||
m_queue.enqueueReadBuffer(m_searchBuffer, CL_TRUE, 0,
|
||||
results.count * sizeof(results.rslt[0]), (void*)&results);
|
||||
}
|
||||
// clean the solution count, hash count, and abort flag
|
||||
m_queue.enqueueWriteBuffer(
|
||||
m_searchBuffer, CL_FALSE, offsetof(SearchResults, count), sizeof(zerox3), zerox3);
|
||||
m_kickEnabled.store(true, std::memory_order_relaxed);
|
||||
|
||||
// Wait for work or 3 seconds (whichever the first)
|
||||
const WorkPackage next = work();
|
||||
if (!next)
|
||||
{
|
||||
boost::system_time const timeout =
|
||||
boost::get_system_time() + boost::posix_time::seconds(3);
|
||||
boost::mutex::scoped_lock l(x_work);
|
||||
m_new_work_signal.timed_wait(l, timeout);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (current.header != next.header)
|
||||
{
|
||||
uint64_t period_seed = next.block / PROGPOW_PERIOD;
|
||||
if (m_nextProgpowPeriod == 0)
|
||||
{
|
||||
m_nextProgpowPeriod = period_seed;
|
||||
// g_io_service.post(
|
||||
// m_progpow_io_strand.wrap(boost::bind(&CLMiner::asyncCompile, this)));
|
||||
// Use thread, don't want to block the io service
|
||||
m_compileThread = new boost::thread(boost::bind(&CLMiner::asyncCompile, this));
|
||||
}
|
||||
|
||||
if (old_period_seed != period_seed)
|
||||
{
|
||||
m_compileThread->join();
|
||||
// sanity check the next kernel
|
||||
if (period_seed != m_nextProgpowPeriod)
|
||||
{
|
||||
// This shouldn't happen!!! Try to recover
|
||||
m_nextProgpowPeriod = period_seed;
|
||||
m_compileThread =
|
||||
new boost::thread(boost::bind(&CLMiner::asyncCompile, this));
|
||||
m_compileThread->join();
|
||||
}
|
||||
m_program = m_nextProgram;
|
||||
m_searchKernel = m_nextSearchKernel;
|
||||
old_period_seed = period_seed;
|
||||
m_nextProgpowPeriod = period_seed + 1;
|
||||
cllog << "Loaded period " << period_seed << " progpow kernel";
|
||||
// g_io_service.post(
|
||||
// m_progpow_io_strand.wrap(boost::bind(&CLMiner::asyncCompile, this)));
|
||||
m_compileThread = new boost::thread(boost::bind(&CLMiner::asyncCompile, this));
|
||||
continue;
|
||||
}
|
||||
if (old_epoch != next.epoch)
|
||||
{
|
||||
if (!initEpoch())
|
||||
break; // This will simply exit the thread
|
||||
old_epoch = next.epoch;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Upper 64 bits of the boundary.
|
||||
const uint64_t target = (uint64_t)(u64)((u256)next.boundary >> 192);
|
||||
assert(target > 0);
|
||||
|
||||
startNonce = next.startNonce;
|
||||
|
||||
// Update header constant buffer.
|
||||
m_queue.enqueueWriteBuffer(m_header, CL_FALSE, 0, 32, next.header.data());
|
||||
|
||||
m_searchKernel.setArg(0, m_searchBuffer); // Supply output buffer to kernel.
|
||||
m_searchKernel.setArg(1, m_header); // Supply header buffer to kernel.
|
||||
m_searchKernel.setArg(2, *m_dag); // Supply DAG buffer to kernel.
|
||||
m_searchKernel.setArg(4, target);
|
||||
|
||||
#ifdef DEV_BUILD
|
||||
if (g_logOptions & LOG_SWITCH)
|
||||
cllog << "Switch time: "
|
||||
<< std::chrono::duration_cast<std::chrono::microseconds>(
|
||||
std::chrono::steady_clock::now() - m_workSwitchStart)
|
||||
.count()
|
||||
<< " us.";
|
||||
#endif
|
||||
}
|
||||
|
||||
// Run the kernel.
|
||||
m_searchKernel.setArg(3, startNonce);
|
||||
m_queue.enqueueNDRangeKernel(
|
||||
m_searchKernel, cl::NullRange, m_settings.globalWorkSize, m_settings.localWorkSize);
|
||||
|
||||
if (results.count)
|
||||
{
|
||||
// Report results while the kernel is running.
|
||||
for (uint32_t i = 0; i < results.count; i++)
|
||||
{
|
||||
uint64_t nonce = current.startNonce + results.rslt[i].gid;
|
||||
h256 mix;
|
||||
memcpy(mix.data(), (char*)results.rslt[i].mix, sizeof(results.rslt[i].mix));
|
||||
|
||||
Farm::f().submitProof(Solution{
|
||||
nonce, mix, current, std::chrono::steady_clock::now(), m_index});
|
||||
|
||||
cllog << EthWhite << "Job: " << current.header.abridged() << " Sol: 0x"
|
||||
<< toHex(nonce) << EthReset;
|
||||
}
|
||||
}
|
||||
|
||||
current = next; // kernel now processing newest work
|
||||
current.startNonce = startNonce;
|
||||
// Increase start nonce for following kernel execution.
|
||||
startNonce += m_settings.globalWorkSize;
|
||||
// Report hash count
|
||||
updateHashRate(m_settings.localWorkSize, results.hashCount);
|
||||
}
|
||||
|
||||
m_queue.finish();
|
||||
m_abortqueue.finish();
|
||||
}
|
||||
catch (cl::Error const& _e)
|
||||
{
|
||||
string _what = ethCLErrorHelper("OpenCL Error", _e);
|
||||
throw std::runtime_error(_what);
|
||||
}
|
||||
}
|
||||
|
||||
void CLMiner::kick_miner()
|
||||
{
|
||||
// Memory for abort Cannot be static because crashes on macOS.
|
||||
bool f = true;
|
||||
if (m_kickEnabled.compare_exchange_weak(f, false, std::memory_order_relaxed))
|
||||
{
|
||||
static const uint32_t one = 1;
|
||||
m_abortqueue.enqueueWriteBuffer(
|
||||
m_searchBuffer, CL_TRUE, offsetof(SearchResults, abort), sizeof(one), &one);
|
||||
}
|
||||
m_new_work_signal.notify_one();
|
||||
}
|
||||
|
||||
void CLMiner::enumDevices(std::map<string, DeviceDescriptor>& _DevicesCollection)
|
||||
{
|
||||
// Load available platforms
|
||||
vector<cl::Platform> platforms = getPlatforms();
|
||||
if (platforms.empty())
|
||||
return;
|
||||
|
||||
unsigned int dIdx = 0;
|
||||
for (unsigned int pIdx = 0; pIdx < platforms.size(); pIdx++)
|
||||
{
|
||||
std::string platformName = platforms.at(pIdx).getInfo<CL_PLATFORM_NAME>();
|
||||
ClPlatformTypeEnum platformType = ClPlatformTypeEnum::Unknown;
|
||||
if (platformName == "AMD Accelerated Parallel Processing")
|
||||
platformType = ClPlatformTypeEnum::Amd;
|
||||
else if (platformName == "Clover")
|
||||
platformType = ClPlatformTypeEnum::Clover;
|
||||
else if (platformName == "NVIDIA CUDA")
|
||||
platformType = ClPlatformTypeEnum::Nvidia;
|
||||
else
|
||||
{
|
||||
std::cerr << "Unrecognized platform " << platformName << std::endl;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
std::string platformVersion = platforms.at(pIdx).getInfo<CL_PLATFORM_VERSION>();
|
||||
unsigned int platformVersionMajor = std::stoi(platformVersion.substr(7, 1));
|
||||
unsigned int platformVersionMinor = std::stoi(platformVersion.substr(9, 1));
|
||||
|
||||
dIdx = 0;
|
||||
vector<cl::Device> devices = getDevices(platforms, pIdx);
|
||||
for (auto const& device : devices)
|
||||
{
|
||||
DeviceTypeEnum clDeviceType = DeviceTypeEnum::Unknown;
|
||||
cl_device_type detectedType = device.getInfo<CL_DEVICE_TYPE>();
|
||||
if (detectedType == CL_DEVICE_TYPE_GPU)
|
||||
clDeviceType = DeviceTypeEnum::Gpu;
|
||||
else if (detectedType == CL_DEVICE_TYPE_CPU)
|
||||
clDeviceType = DeviceTypeEnum::Cpu;
|
||||
else if (detectedType == CL_DEVICE_TYPE_ACCELERATOR)
|
||||
clDeviceType = DeviceTypeEnum::Accelerator;
|
||||
|
||||
string uniqueId;
|
||||
DeviceDescriptor deviceDescriptor;
|
||||
|
||||
if (clDeviceType == DeviceTypeEnum::Gpu && platformType == ClPlatformTypeEnum::Nvidia)
|
||||
{
|
||||
cl_int bus_id, slot_id;
|
||||
if (clGetDeviceInfo(device.get(), 0x4008, sizeof(bus_id), &bus_id, NULL) ==
|
||||
CL_SUCCESS &&
|
||||
clGetDeviceInfo(device.get(), 0x4009, sizeof(slot_id), &slot_id, NULL) ==
|
||||
CL_SUCCESS)
|
||||
{
|
||||
std::ostringstream s;
|
||||
s << setfill('0') << setw(2) << hex << bus_id << ":" << setw(2)
|
||||
<< (unsigned int)(slot_id >> 3) << "." << (unsigned int)(slot_id & 0x7);
|
||||
uniqueId = s.str();
|
||||
}
|
||||
}
|
||||
else if (clDeviceType == DeviceTypeEnum::Gpu &&
|
||||
(platformType == ClPlatformTypeEnum::Amd ||
|
||||
platformType == ClPlatformTypeEnum::Clover))
|
||||
{
|
||||
cl_char t[24];
|
||||
if (clGetDeviceInfo(device.get(), 0x4037, sizeof(t), &t, NULL) == CL_SUCCESS)
|
||||
{
|
||||
std::ostringstream s;
|
||||
s << setfill('0') << setw(2) << hex << (unsigned int)(t[21]) << ":" << setw(2)
|
||||
<< (unsigned int)(t[22]) << "." << (unsigned int)(t[23]);
|
||||
uniqueId = s.str();
|
||||
}
|
||||
}
|
||||
else if (clDeviceType == DeviceTypeEnum::Cpu)
|
||||
{
|
||||
std::ostringstream s;
|
||||
s << "CPU:" << setfill('0') << setw(2) << hex << (pIdx + dIdx);
|
||||
uniqueId = s.str();
|
||||
}
|
||||
else
|
||||
{
|
||||
// We're not prepared (yet) to handle other platforms or types
|
||||
++dIdx;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (_DevicesCollection.find(uniqueId) != _DevicesCollection.end())
|
||||
deviceDescriptor = _DevicesCollection[uniqueId];
|
||||
else
|
||||
deviceDescriptor = DeviceDescriptor();
|
||||
|
||||
// Fill the blanks by OpenCL means
|
||||
deviceDescriptor.name = device.getInfo<CL_DEVICE_NAME>();
|
||||
deviceDescriptor.type = clDeviceType;
|
||||
deviceDescriptor.uniqueId = uniqueId;
|
||||
deviceDescriptor.clDetected = true;
|
||||
deviceDescriptor.clPlatformId = pIdx;
|
||||
deviceDescriptor.clPlatformName = platformName;
|
||||
deviceDescriptor.clPlatformType = platformType;
|
||||
deviceDescriptor.clPlatformVersion = platformVersion;
|
||||
deviceDescriptor.clPlatformVersionMajor = platformVersionMajor;
|
||||
deviceDescriptor.clPlatformVersionMinor = platformVersionMinor;
|
||||
deviceDescriptor.clDeviceOrdinal = dIdx;
|
||||
|
||||
deviceDescriptor.clName = deviceDescriptor.name;
|
||||
deviceDescriptor.clDeviceVersion = device.getInfo<CL_DEVICE_VERSION>();
|
||||
deviceDescriptor.clDeviceVersionMajor =
|
||||
std::stoi(deviceDescriptor.clDeviceVersion.substr(7, 1));
|
||||
deviceDescriptor.clDeviceVersionMinor =
|
||||
std::stoi(deviceDescriptor.clDeviceVersion.substr(9, 1));
|
||||
deviceDescriptor.totalMemory = device.getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>();
|
||||
deviceDescriptor.clMaxMemAlloc = device.getInfo<CL_DEVICE_MAX_MEM_ALLOC_SIZE>();
|
||||
deviceDescriptor.clMaxWorkGroup = device.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>();
|
||||
deviceDescriptor.clMaxComputeUnits = device.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();
|
||||
|
||||
// Apparently some 36 CU devices return a bogus 14!!!
|
||||
deviceDescriptor.clMaxComputeUnits =
|
||||
deviceDescriptor.clMaxComputeUnits == 14 ? 36 : deviceDescriptor.clMaxComputeUnits;
|
||||
|
||||
// Is it an NVIDIA card ?
|
||||
if (platformType == ClPlatformTypeEnum::Nvidia)
|
||||
{
|
||||
size_t siz;
|
||||
clGetDeviceInfo(device.get(), CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV,
|
||||
sizeof(deviceDescriptor.clNvComputeMajor), &deviceDescriptor.clNvComputeMajor,
|
||||
&siz);
|
||||
clGetDeviceInfo(device.get(), CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV,
|
||||
sizeof(deviceDescriptor.clNvComputeMinor), &deviceDescriptor.clNvComputeMinor,
|
||||
&siz);
|
||||
deviceDescriptor.clNvCompute = to_string(deviceDescriptor.clNvComputeMajor) + "." +
|
||||
to_string(deviceDescriptor.clNvComputeMinor);
|
||||
}
|
||||
|
||||
// Upsert Devices Collection
|
||||
_DevicesCollection[uniqueId] = deviceDescriptor;
|
||||
++dIdx;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
bool CLMiner::initDevice()
|
||||
{
|
||||
|
||||
// LookUp device
|
||||
// Load available platforms
|
||||
vector<cl::Platform> platforms = getPlatforms();
|
||||
if (platforms.empty())
|
||||
return false;
|
||||
|
||||
vector<cl::Device> devices = getDevices(platforms, m_deviceDescriptor.clPlatformId);
|
||||
if (devices.empty())
|
||||
return false;
|
||||
|
||||
m_device = devices.at(m_deviceDescriptor.clDeviceOrdinal);
|
||||
|
||||
// create context
|
||||
m_context = cl::Context(m_device);
|
||||
m_queue = cl::CommandQueue(m_context, m_device);
|
||||
m_abortqueue = cl::CommandQueue(m_context, m_device);
|
||||
|
||||
ETHCL_LOG("Creating buffers");
|
||||
// create buffer for header
|
||||
m_header = cl::Buffer(m_context, CL_MEM_READ_ONLY, 32);
|
||||
|
||||
// create mining buffers
|
||||
m_searchBuffer = cl::Buffer(m_context, CL_MEM_READ_WRITE, sizeof(SearchResults));
|
||||
|
||||
// Set Hardware Monitor Info
|
||||
if (m_deviceDescriptor.clPlatformType == ClPlatformTypeEnum::Nvidia)
|
||||
{
|
||||
m_hwmoninfo.deviceType = HwMonitorInfoType::NVIDIA;
|
||||
m_hwmoninfo.devicePciId = m_deviceDescriptor.uniqueId;
|
||||
m_hwmoninfo.deviceIndex = -1; // Will be later on mapped by nvml (see Farm() constructor)
|
||||
}
|
||||
else if (m_deviceDescriptor.clPlatformType == ClPlatformTypeEnum::Amd)
|
||||
{
|
||||
m_hwmoninfo.deviceType = HwMonitorInfoType::AMD;
|
||||
m_hwmoninfo.devicePciId = m_deviceDescriptor.uniqueId;
|
||||
m_hwmoninfo.deviceIndex = -1; // Will be later on mapped by nvml (see Farm() constructor)
|
||||
}
|
||||
else if (m_deviceDescriptor.clPlatformType == ClPlatformTypeEnum::Clover)
|
||||
{
|
||||
m_hwmoninfo.deviceType = HwMonitorInfoType::UNKNOWN;
|
||||
m_hwmoninfo.devicePciId = m_deviceDescriptor.uniqueId;
|
||||
m_hwmoninfo.deviceIndex = -1; // Will be later on mapped by nvml (see Farm() constructor)
|
||||
}
|
||||
else
|
||||
{
|
||||
// Don't know what to do with this
|
||||
cllog << "Unrecognized Platform";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (m_deviceDescriptor.clPlatformVersionMajor == 1 &&
|
||||
(m_deviceDescriptor.clPlatformVersionMinor == 0 ||
|
||||
m_deviceDescriptor.clPlatformVersionMinor == 1))
|
||||
{
|
||||
if (m_deviceDescriptor.clPlatformType == ClPlatformTypeEnum::Clover)
|
||||
{
|
||||
cllog
|
||||
<< "OpenCL " << m_deviceDescriptor.clPlatformVersion
|
||||
<< " not supported, but platform Clover might work nevertheless. USE AT OWN RISK!";
|
||||
}
|
||||
else
|
||||
{
|
||||
cllog << "OpenCL " << m_deviceDescriptor.clPlatformVersion
|
||||
<< " not supported. Minimum required version is 1.2";
|
||||
throw new std::runtime_error("OpenCL 1.2 required");
|
||||
}
|
||||
}
|
||||
|
||||
ostringstream s;
|
||||
s << "Using PciId : " << m_deviceDescriptor.uniqueId << " " << m_deviceDescriptor.clName;
|
||||
|
||||
if (!m_deviceDescriptor.clNvCompute.empty())
|
||||
s << " (Compute " + m_deviceDescriptor.clNvCompute + ")";
|
||||
else
|
||||
s << " " << m_deviceDescriptor.clDeviceVersion;
|
||||
|
||||
s << " Memory : " << dev::getFormattedMemory((double)m_deviceDescriptor.totalMemory);
|
||||
cllog << s.str();
|
||||
|
||||
if ((m_deviceDescriptor.clPlatformType == ClPlatformTypeEnum::Amd) &&
|
||||
(m_deviceDescriptor.clMaxComputeUnits != 36))
|
||||
{
|
||||
m_settings.globalWorkSize =
|
||||
(m_settings.globalWorkSize * m_deviceDescriptor.clMaxComputeUnits) / 36;
|
||||
// make sure that global work size is evenly divisible by the local workgroup size
|
||||
if (m_settings.globalWorkSize % m_settings.localWorkSize != 0)
|
||||
m_settings.globalWorkSize =
|
||||
((m_settings.globalWorkSize / m_settings.localWorkSize) + 1) *
|
||||
m_settings.localWorkSize;
|
||||
cnote << "Adjusting CL work multiplier for " << m_deviceDescriptor.clMaxComputeUnits
|
||||
<< " CUs. Adjusted work multiplier: "
|
||||
<< m_settings.globalWorkSize / m_settings.localWorkSize;
|
||||
}
|
||||
|
||||
|
||||
return true;
|
||||
|
||||
}
|
||||
|
||||
bool CLMiner::initEpoch_internal()
|
||||
{
|
||||
auto startInit = std::chrono::steady_clock::now();
|
||||
size_t RequiredMemory = (m_epochContext.dagSize + m_epochContext.lightSize);
|
||||
|
||||
// Release the pause flag if any
|
||||
resume(MinerPauseEnum::PauseDueToInsufficientMemory);
|
||||
resume(MinerPauseEnum::PauseDueToInitEpochError);
|
||||
|
||||
// Check whether the current device has sufficient memory every time we recreate the dag
|
||||
if (m_deviceDescriptor.totalMemory < RequiredMemory)
|
||||
{
|
||||
cllog << "Epoch " << m_epochContext.epochNumber << " requires "
|
||||
<< dev::getFormattedMemory((double)RequiredMemory) << " memory. Only "
|
||||
<< dev::getFormattedMemory((double)m_deviceDescriptor.totalMemory)
|
||||
<< " available on device.";
|
||||
pause(MinerPauseEnum::PauseDueToInsufficientMemory);
|
||||
return true; // This will prevent to exit the thread and
|
||||
// Eventually resume mining when changing coin or epoch (NiceHash)
|
||||
}
|
||||
|
||||
cllog << "Generating DAG + Light : " << dev::getFormattedMemory((double)RequiredMemory);
|
||||
|
||||
try
|
||||
{
|
||||
char options[256] = {0};
|
||||
#ifndef __clang__
|
||||
|
||||
// Nvidia
|
||||
if (!m_deviceDescriptor.clNvCompute.empty())
|
||||
{
|
||||
m_computeCapability =
|
||||
m_deviceDescriptor.clNvComputeMajor * 10 + m_deviceDescriptor.clNvComputeMinor;
|
||||
int maxregs = m_computeCapability >= 35 ? 72 : 63;
|
||||
sprintf(m_options, "-cl-nv-maxrregcount=%d", maxregs);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
m_dagItems = m_epochContext.dagNumItems;
|
||||
|
||||
cl::Program binaryProgram;
|
||||
|
||||
std::string device_name = m_deviceDescriptor.clName;
|
||||
|
||||
/* If we have a binary kernel, we load it in tandem with the opencl,
|
||||
that way, we can use the dag generate opencl code and fall back on
|
||||
the default kernel if loading fails for whatever reason */
|
||||
bool loadedBinary = false;
|
||||
|
||||
m_settings.noBinary = true;
|
||||
if (!m_settings.noBinary)
|
||||
{
|
||||
std::ifstream kernel_file;
|
||||
vector<unsigned char> bin_data;
|
||||
std::stringstream fname_strm;
|
||||
|
||||
/* Open kernels/ethash_{devicename}_lws{local_work_size}.bin */
|
||||
std::transform(device_name.begin(), device_name.end(), device_name.begin(), ::tolower);
|
||||
fname_strm << boost::dll::program_location().parent_path().string()
|
||||
<< "/kernels/progpow_" << device_name << "_lws" << m_settings.localWorkSize
|
||||
<< ".bin";
|
||||
cllog << "Loading binary kernel " << fname_strm.str();
|
||||
try
|
||||
{
|
||||
kernel_file.open(fname_strm.str(), ios::in | ios::binary);
|
||||
|
||||
if (kernel_file.good())
|
||||
{
|
||||
/* Load the data vector with file data */
|
||||
kernel_file.unsetf(std::ios::skipws);
|
||||
bin_data.insert(bin_data.begin(),
|
||||
std::istream_iterator<unsigned char>(kernel_file),
|
||||
std::istream_iterator<unsigned char>());
|
||||
|
||||
/* Setup the program */
|
||||
cl::Program::Binaries blobs({bin_data});
|
||||
cl::Program program(m_context, {m_device}, blobs);
|
||||
try
|
||||
{
|
||||
program.build({m_device}, options);
|
||||
cllog << "Build info success:"
|
||||
<< program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(m_device);
|
||||
binaryProgram = program;
|
||||
loadedBinary = true;
|
||||
}
|
||||
catch (cl::Error const&)
|
||||
{
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
}
|
||||
if (!loadedBinary)
|
||||
{
|
||||
cwarn << "Failed to load binary kernel: " << fname_strm.str();
|
||||
cwarn << "Falling back to OpenCL kernel...";
|
||||
}
|
||||
}
|
||||
|
||||
// create buffer for dag
|
||||
try
|
||||
{
|
||||
cllog << "Creating light cache buffer, size: "
|
||||
<< dev::getFormattedMemory((double)m_epochContext.lightSize);
|
||||
if (m_light)
|
||||
delete m_light;
|
||||
m_light = new cl::Buffer(m_context, CL_MEM_READ_ONLY, m_epochContext.lightSize);
|
||||
cllog << "Creating DAG buffer, size: "
|
||||
<< dev::getFormattedMemory((double)m_epochContext.dagSize)
|
||||
<< ", free: "
|
||||
<< dev::getFormattedMemory(
|
||||
(double)(m_deviceDescriptor.totalMemory - RequiredMemory));
|
||||
if (m_dag)
|
||||
delete m_dag;
|
||||
m_dag = new cl::Buffer(m_context, CL_MEM_READ_ONLY, m_epochContext.dagSize);
|
||||
cllog << "Loading kernels";
|
||||
|
||||
m_dagKernel = cl::Kernel(m_program, "ethash_calculate_dag_item");
|
||||
|
||||
cllog << "Writing light cache buffer";
|
||||
m_queue.enqueueWriteBuffer(
|
||||
*m_light, CL_TRUE, 0, m_epochContext.lightSize, m_epochContext.lightCache);
|
||||
}
|
||||
catch (cl::Error const& err)
|
||||
{
|
||||
cwarn << ethCLErrorHelper("Creating DAG buffer failed", err);
|
||||
pause(MinerPauseEnum::PauseDueToInitEpochError);
|
||||
return true;
|
||||
}
|
||||
// GPU DAG buffer to kernel
|
||||
m_searchKernel.setArg(2, *m_dag);
|
||||
|
||||
m_dagKernel.setArg(1, *m_light);
|
||||
m_dagKernel.setArg(2, *m_dag);
|
||||
m_dagKernel.setArg(3, -1);
|
||||
|
||||
const uint32_t workItems = m_dagItems * 2; // GPU computes partial 512-bit DAG items.
|
||||
|
||||
uint32_t start;
|
||||
const uint32_t chunk = 10000 * m_settings.localWorkSize;
|
||||
for (start = 0; start <= workItems - chunk; start += chunk)
|
||||
{
|
||||
m_dagKernel.setArg(0, start);
|
||||
m_queue.enqueueNDRangeKernel(
|
||||
m_dagKernel, cl::NullRange, chunk, m_settings.localWorkSize);
|
||||
m_queue.finish();
|
||||
}
|
||||
if (start < workItems)
|
||||
{
|
||||
uint32_t groupsLeft = workItems - start;
|
||||
groupsLeft = (groupsLeft + m_settings.localWorkSize - 1) / m_settings.localWorkSize;
|
||||
m_dagKernel.setArg(0, start);
|
||||
m_queue.enqueueNDRangeKernel(m_dagKernel, cl::NullRange,
|
||||
groupsLeft * m_settings.localWorkSize, m_settings.localWorkSize);
|
||||
m_queue.finish();
|
||||
}
|
||||
|
||||
auto dagTime = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - startInit);
|
||||
cllog << dev::getFormattedMemory((double)m_epochContext.dagSize)
|
||||
<< " of DAG data generated in "
|
||||
<< dagTime.count() << " ms.";
|
||||
}
|
||||
catch (cl::Error const& err)
|
||||
{
|
||||
cllog << ethCLErrorHelper("OpenCL init failed", err);
|
||||
pause(MinerPauseEnum::PauseDueToInitEpochError);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void CLMiner::asyncCompile()
|
||||
{
|
||||
auto saveName = getThreadName();
|
||||
setThreadName(name().c_str());
|
||||
if (!dropThreadPriority())
|
||||
cllog << "Unable to lower compiler priority.";
|
||||
|
||||
compileKernel(m_nextProgpowPeriod, m_nextProgram, m_nextSearchKernel);
|
||||
|
||||
setThreadName(saveName.c_str());
|
||||
}
|
||||
|
||||
void CLMiner::compileKernel(uint64_t period_seed, cl::Program& program, cl::Kernel& searchKernel)
|
||||
{
|
||||
std::string code = ProgPow::getKern(period_seed, ProgPow::KERNEL_CL);
|
||||
code += string(CLMiner_kernel);
|
||||
|
||||
addDefinition(code, "GROUP_SIZE", m_settings.localWorkSize);
|
||||
addDefinition(code, "ACCESSES", 64);
|
||||
addDefinition(code, "LIGHT_WORDS", m_epochContext.lightNumItems);
|
||||
addDefinition(code, "PROGPOW_DAG_BYTES", m_epochContext.dagSize);
|
||||
addDefinition(code, "PROGPOW_DAG_ELEMENTS", m_epochContext.dagNumItems / 2);
|
||||
|
||||
addDefinition(code, "MAX_OUTPUTS", c_maxSearchResults);
|
||||
int platform = 0;
|
||||
switch (m_deviceDescriptor.clPlatformType) {
|
||||
case ClPlatformTypeEnum::Nvidia:
|
||||
platform = 1;
|
||||
break;
|
||||
case ClPlatformTypeEnum::Amd:
|
||||
platform = 2;
|
||||
break;
|
||||
case ClPlatformTypeEnum::Clover:
|
||||
platform = 3;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
addDefinition(code, "PLATFORM", platform);
|
||||
addDefinition(code, "COMPUTE", m_computeCapability);
|
||||
|
||||
if (m_deviceDescriptor.clPlatformType == ClPlatformTypeEnum::Clover)
|
||||
addDefinition(code, "LEGACY", 1);
|
||||
|
||||
#ifdef DEV_BUILD
|
||||
std::string tmpDir;
|
||||
#ifdef _WIN32
|
||||
tmpDir = getenv("TEMP");
|
||||
#else
|
||||
tmpDir = "/tmp";
|
||||
#endif
|
||||
tmpDir.append("/kernel.");
|
||||
tmpDir.append(std::to_string(Index()));
|
||||
tmpDir.append(".");
|
||||
tmpDir.append(std::to_string(period_seed));
|
||||
tmpDir.append(".cl");
|
||||
cllog << "Dumping " << tmpDir;
|
||||
ofstream write;
|
||||
write.open(tmpDir);
|
||||
write << code;
|
||||
write.close();
|
||||
#endif
|
||||
|
||||
// create miner OpenCL program
|
||||
cl::Program::Sources sources{code.data()};
|
||||
program = cl::Program(m_context, sources);
|
||||
try
|
||||
{
|
||||
program.build({m_device}, m_options);
|
||||
}
|
||||
catch (cl::BuildError const& buildErr)
|
||||
{
|
||||
cwarn << "OpenCL kernel build log:\n"
|
||||
<< program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(m_device);
|
||||
cwarn << "OpenCL kernel build error (" << buildErr.err() << "):\n" << buildErr.what();
|
||||
pause(MinerPauseEnum::PauseDueToInitEpochError);
|
||||
return;
|
||||
}
|
||||
searchKernel = cl::Kernel(program, "ethash_search");
|
||||
|
||||
searchKernel.setArg(1, m_header);
|
||||
searchKernel.setArg(5, 0);
|
||||
|
||||
cllog << "Pre-compiled period " << period_seed << " OpenCL ProgPow kernel";
|
||||
}
|
||||
93
zano/libethash-cl/CLMiner.h
Normal file
93
zano/libethash-cl/CLMiner.h
Normal file
@@ -0,0 +1,93 @@
|
||||
/// OpenCL miner implementation.
|
||||
///
|
||||
/// @file
|
||||
/// @copyright GNU General Public License
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <fstream>
|
||||
|
||||
#include <libprogpow/ProgPow.h>
|
||||
#include <libdevcore/Worker.h>
|
||||
#include <libethcore/EthashAux.h>
|
||||
#include <libethcore/Miner.h>
|
||||
|
||||
#include <boost/algorithm/string/predicate.hpp>
|
||||
#include <boost/lexical_cast.hpp>
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#if __GNUC__ >= 6
|
||||
#pragma GCC diagnostic ignored "-Wignored-attributes"
|
||||
#endif
|
||||
#pragma GCC diagnostic ignored "-Wmissing-braces"
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS true
|
||||
#define CL_HPP_ENABLE_EXCEPTIONS true
|
||||
#define CL_HPP_CL_1_2_DEFAULT_BUILD true
|
||||
#define CL_HPP_TARGET_OPENCL_VERSION 120
|
||||
#define CL_HPP_MINIMUM_OPENCL_VERSION 120
|
||||
#include "CL/cl2.hpp"
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
// macOS OpenCL fix:
|
||||
#ifndef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV
|
||||
#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000
|
||||
#endif
|
||||
|
||||
#ifndef CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV
|
||||
#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001
|
||||
#endif
|
||||
|
||||
namespace dev
|
||||
{
|
||||
namespace eth
|
||||
{
|
||||
class CLMiner : public Miner
|
||||
{
|
||||
public:
|
||||
|
||||
CLMiner(unsigned _index, CLSettings _settings, DeviceDescriptor& _device);
|
||||
~CLMiner() override;
|
||||
|
||||
static void enumDevices(std::map<string, DeviceDescriptor>& _DevicesCollection);
|
||||
|
||||
protected:
|
||||
bool initDevice() override;
|
||||
|
||||
bool initEpoch_internal() override;
|
||||
|
||||
void kick_miner() override;
|
||||
|
||||
private:
|
||||
|
||||
void workLoop() override;
|
||||
void compileKernel(uint64_t prog_seed, cl::Program& program, cl::Kernel& searchKernel);
|
||||
void asyncCompile();
|
||||
|
||||
cl::Context m_context;
|
||||
cl::CommandQueue m_queue;
|
||||
cl::CommandQueue m_abortqueue;
|
||||
cl::Kernel m_searchKernel;
|
||||
cl::Kernel m_nextSearchKernel;
|
||||
cl::Kernel m_dagKernel;
|
||||
cl::Device m_device;
|
||||
cl::Buffer m_header;
|
||||
cl::Buffer m_searchBuffer;
|
||||
|
||||
cl::Buffer* m_dag = nullptr;
|
||||
cl::Buffer* m_light = nullptr;
|
||||
|
||||
CLSettings m_settings;
|
||||
|
||||
unsigned m_dagItems = 0;
|
||||
|
||||
cl::Program m_program;
|
||||
cl::Program m_nextProgram;
|
||||
char m_options[256] = {0};
|
||||
int m_computeCapability = 0;
|
||||
|
||||
atomic<bool> m_kickEnabled = {false};
|
||||
|
||||
};
|
||||
|
||||
} // namespace eth
|
||||
} // namespace dev
|
||||
535
zano/libethash-cl/CLMiner_kernel.cl
Normal file
535
zano/libethash-cl/CLMiner_kernel.cl
Normal file
@@ -0,0 +1,535 @@
|
||||
#define OPENCL_PLATFORM_UNKNOWN 0
|
||||
#define OPENCL_PLATFORM_NVIDIA 1
|
||||
#define OPENCL_PLATFORM_AMD 2
|
||||
#define OPENCL_PLATFORM_CLOVER 3
|
||||
|
||||
#ifndef MAX_OUTPUTS
|
||||
#define MAX_OUTPUTS 63U
|
||||
#endif
|
||||
|
||||
#ifndef PLATFORM
|
||||
#define PLATFORM OPENCL_PLATFORM_AMD
|
||||
#endif
|
||||
|
||||
#ifdef cl_clang_storage_class_specifiers
|
||||
#pragma OPENCL EXTENSION cl_clang_storage_class_specifiers : enable
|
||||
#endif
|
||||
|
||||
#define HASHES_PER_GROUP (GROUP_SIZE / PROGPOW_LANES)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32_t uint32s[32 / sizeof(uint32_t)];
|
||||
} hash32_t;
|
||||
|
||||
// Implementation based on:
|
||||
// https://github.com/mjosaarinen/tiny_sha3/blob/master/sha3.c
|
||||
|
||||
__constant const uint32_t keccakf_rndc[24] = {0x00000001, 0x00008082, 0x0000808a, 0x80008000,
|
||||
0x0000808b, 0x80000001, 0x80008081, 0x00008009, 0x0000008a, 0x00000088, 0x80008009, 0x8000000a,
|
||||
0x8000808b, 0x0000008b, 0x00008089, 0x00008003, 0x00008002, 0x00000080, 0x0000800a, 0x8000000a,
|
||||
0x80008081, 0x00008080, 0x80000001, 0x80008008};
|
||||
|
||||
// Implementation of the Keccakf transformation with a width of 800
|
||||
void keccak_f800_round(uint32_t st[25], const int r)
|
||||
{
|
||||
const uint32_t keccakf_rotc[24] = {
|
||||
1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14, 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44};
|
||||
const uint32_t keccakf_piln[24] = {
|
||||
10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4, 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1};
|
||||
|
||||
uint32_t t, bc[5];
|
||||
// Theta
|
||||
for (int i = 0; i < 5; i++)
|
||||
bc[i] = st[i] ^ st[i + 5] ^ st[i + 10] ^ st[i + 15] ^ st[i + 20];
|
||||
|
||||
for (int i = 0; i < 5; i++)
|
||||
{
|
||||
t = bc[(i + 4) % 5] ^ ROTL32(bc[(i + 1) % 5], 1u);
|
||||
for (uint32_t j = 0; j < 25; j += 5)
|
||||
st[j + i] ^= t;
|
||||
}
|
||||
|
||||
// Rho Pi
|
||||
t = st[1];
|
||||
for (int i = 0; i < 24; i++)
|
||||
{
|
||||
uint32_t j = keccakf_piln[i];
|
||||
bc[0] = st[j];
|
||||
st[j] = ROTL32(t, keccakf_rotc[i]);
|
||||
t = bc[0];
|
||||
}
|
||||
|
||||
// Chi
|
||||
for (uint32_t j = 0; j < 25; j += 5)
|
||||
{
|
||||
for (int i = 0; i < 5; i++)
|
||||
bc[i] = st[j + i];
|
||||
for (int i = 0; i < 5; i++)
|
||||
st[j + i] ^= (~bc[(i + 1) % 5]) & bc[(i + 2) % 5];
|
||||
}
|
||||
|
||||
// Iota
|
||||
st[0] ^= keccakf_rndc[r];
|
||||
}
|
||||
|
||||
// Keccak - implemented as a variant of SHAKE
|
||||
// The width is 800, with a bitrate of 576, a capacity of 224, and no padding
|
||||
// Only need 64 bits of output for mining
|
||||
uint64_t keccak_f800(__constant hash32_t const* g_header, uint64_t seed, hash32_t digest)
|
||||
{
|
||||
uint32_t st[25];
|
||||
|
||||
for (int i = 0; i < 25; i++)
|
||||
st[i] = 0;
|
||||
for (int i = 0; i < 8; i++)
|
||||
st[i] = g_header->uint32s[i];
|
||||
st[8] = seed;
|
||||
st[9] = seed >> 32;
|
||||
for (int i = 0; i < 8; i++)
|
||||
st[10 + i] = digest.uint32s[i];
|
||||
|
||||
for (int r = 0; r < 21; r++)
|
||||
{
|
||||
keccak_f800_round(st, r);
|
||||
}
|
||||
// last round can be simplified due to partial output
|
||||
keccak_f800_round(st, 21);
|
||||
|
||||
// Byte swap so byte 0 of hash is MSB of result
|
||||
uint64_t res = (uint64_t)st[1] << 32 | st[0];
|
||||
return as_ulong(as_uchar8(res).s76543210);
|
||||
}
|
||||
|
||||
#define fnv1a(h, d) (h = (h ^ d) * 0x1000193)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32_t z, w, jsr, jcong;
|
||||
} kiss99_t;
|
||||
|
||||
// KISS99 is simple, fast, and passes the TestU01 suite
|
||||
// https://en.wikipedia.org/wiki/KISS_(algorithm)
|
||||
// http://www.cse.yorku.ca/~oz/marsaglia-rng.html
|
||||
uint32_t kiss99(kiss99_t* st)
|
||||
{
|
||||
st->z = 36969 * (st->z & 65535) + (st->z >> 16);
|
||||
st->w = 18000 * (st->w & 65535) + (st->w >> 16);
|
||||
uint32_t MWC = ((st->z << 16) + st->w);
|
||||
st->jsr ^= (st->jsr << 17);
|
||||
st->jsr ^= (st->jsr >> 13);
|
||||
st->jsr ^= (st->jsr << 5);
|
||||
st->jcong = 69069 * st->jcong + 1234567;
|
||||
return ((MWC ^ st->jcong) + st->jsr);
|
||||
}
|
||||
|
||||
void fill_mix(uint64_t seed, uint32_t lane_id, uint32_t mix[PROGPOW_REGS])
|
||||
{
|
||||
// Use FNV to expand the per-warp seed to per-lane
|
||||
// Use KISS to expand the per-lane seed to fill mix
|
||||
uint32_t fnv_hash = 0x811c9dc5;
|
||||
kiss99_t st;
|
||||
st.z = fnv1a(fnv_hash, seed);
|
||||
st.w = fnv1a(fnv_hash, seed >> 32);
|
||||
st.jsr = fnv1a(fnv_hash, lane_id);
|
||||
st.jcong = fnv1a(fnv_hash, lane_id);
|
||||
#pragma unroll
|
||||
for (int i = 0; i < PROGPOW_REGS; i++)
|
||||
mix[i] = kiss99(&st);
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32_t uint32s[PROGPOW_LANES];
|
||||
uint64_t uint64s[PROGPOW_LANES / 2];
|
||||
} shuffle_t;
|
||||
|
||||
// NOTE: This struct must match the one defined in CLMiner.cpp
|
||||
struct SearchResults
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint gid;
|
||||
uint mix[8];
|
||||
uint pad[7]; // pad to 16 words for easy indexing
|
||||
} rslt[MAX_OUTPUTS];
|
||||
uint count;
|
||||
uint hashCount;
|
||||
uint abort;
|
||||
};
|
||||
|
||||
|
||||
#if PLATFORM != OPENCL_PLATFORM_NVIDIA // use maxrregs on nv
|
||||
__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
|
||||
#endif
|
||||
__kernel void
|
||||
ethash_search(__global struct SearchResults* restrict g_output, __constant hash32_t const* g_header,
|
||||
__global dag_t const* g_dag, ulong start_nonce, ulong target, uint hack_false)
|
||||
{
|
||||
if (g_output->abort)
|
||||
return;
|
||||
|
||||
__local shuffle_t share[HASHES_PER_GROUP];
|
||||
__local uint32_t c_dag[PROGPOW_CACHE_WORDS];
|
||||
|
||||
uint32_t const lid = get_local_id(0);
|
||||
uint32_t const gid = get_global_id(0);
|
||||
uint64_t const nonce = start_nonce + gid;
|
||||
|
||||
const uint32_t lane_id = lid & (PROGPOW_LANES - 1);
|
||||
const uint32_t group_id = lid / PROGPOW_LANES;
|
||||
|
||||
// Load the first portion of the DAG into the cache
|
||||
for (uint32_t word = lid * PROGPOW_DAG_LOADS; word < PROGPOW_CACHE_WORDS;
|
||||
word += GROUP_SIZE * PROGPOW_DAG_LOADS)
|
||||
{
|
||||
dag_t load = g_dag[word / PROGPOW_DAG_LOADS];
|
||||
for (int i = 0; i < PROGPOW_DAG_LOADS; i++)
|
||||
c_dag[word + i] = load.s[i];
|
||||
}
|
||||
|
||||
hash32_t digest;
|
||||
for (int i = 0; i < 8; i++)
|
||||
digest.uint32s[i] = 0;
|
||||
// keccak(header..nonce)
|
||||
uint64_t seed = keccak_f800(g_header, start_nonce + gid, digest);
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
#pragma unroll 1
|
||||
for (uint32_t h = 0; h < PROGPOW_LANES; h++)
|
||||
{
|
||||
uint32_t mix[PROGPOW_REGS];
|
||||
|
||||
// share the hash's seed across all lanes
|
||||
if (lane_id == h)
|
||||
share[group_id].uint64s[0] = seed;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
uint64_t hash_seed = share[group_id].uint64s[0];
|
||||
|
||||
// initialize mix for all lanes
|
||||
fill_mix(hash_seed, lane_id, mix);
|
||||
|
||||
#pragma unroll 1
|
||||
for (uint32_t l = 0; l < PROGPOW_CNT_DAG; l++)
|
||||
progPowLoop(l, mix, g_dag, c_dag, share[0].uint64s, hack_false);
|
||||
|
||||
// Reduce mix data to a per-lane 32-bit digest
|
||||
uint32_t mix_hash = 0x811c9dc5;
|
||||
#pragma unroll
|
||||
for (int i = 0; i < PROGPOW_REGS; i++)
|
||||
fnv1a(mix_hash, mix[i]);
|
||||
|
||||
// Reduce all lanes to a single 256-bit digest
|
||||
hash32_t digest_temp;
|
||||
for (int i = 0; i < 8; i++)
|
||||
digest_temp.uint32s[i] = 0x811c9dc5;
|
||||
share[group_id].uint32s[lane_id] = mix_hash;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
#pragma unroll
|
||||
for (int i = 0; i < PROGPOW_LANES; i++)
|
||||
fnv1a(digest_temp.uint32s[i % 8], share[group_id].uint32s[i]);
|
||||
if (h == lane_id)
|
||||
digest = digest_temp;
|
||||
}
|
||||
|
||||
if (lid == 0)
|
||||
atomic_inc(&g_output->hashCount);
|
||||
|
||||
// keccak(header .. keccak(header..nonce) .. digest);
|
||||
if (keccak_f800(g_header, seed, digest) <= target)
|
||||
{
|
||||
uint slot = atomic_inc(&g_output->count);
|
||||
if (slot < MAX_OUTPUTS)
|
||||
{
|
||||
g_output->rslt[slot].gid = gid;
|
||||
for (int i = 0; i < 8; i++)
|
||||
g_output->rslt[slot].mix[i] = digest.uint32s[i];
|
||||
}
|
||||
atomic_inc(&g_output->abort);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// DAG calculation logic
|
||||
//
|
||||
|
||||
|
||||
#ifndef LIGHT_WORDS
|
||||
#define LIGHT_WORDS 262139
|
||||
#endif
|
||||
|
||||
#define ETHASH_DATASET_PARENTS 256
|
||||
#define NODE_WORDS (64 / 4)
|
||||
|
||||
#define FNV_PRIME 0x01000193
|
||||
|
||||
__constant uint2 const Keccak_f1600_RC[24] = {
|
||||
(uint2)(0x00000001, 0x00000000),
|
||||
(uint2)(0x00008082, 0x00000000),
|
||||
(uint2)(0x0000808a, 0x80000000),
|
||||
(uint2)(0x80008000, 0x80000000),
|
||||
(uint2)(0x0000808b, 0x00000000),
|
||||
(uint2)(0x80000001, 0x00000000),
|
||||
(uint2)(0x80008081, 0x80000000),
|
||||
(uint2)(0x00008009, 0x80000000),
|
||||
(uint2)(0x0000008a, 0x00000000),
|
||||
(uint2)(0x00000088, 0x00000000),
|
||||
(uint2)(0x80008009, 0x00000000),
|
||||
(uint2)(0x8000000a, 0x00000000),
|
||||
(uint2)(0x8000808b, 0x00000000),
|
||||
(uint2)(0x0000008b, 0x80000000),
|
||||
(uint2)(0x00008089, 0x80000000),
|
||||
(uint2)(0x00008003, 0x80000000),
|
||||
(uint2)(0x00008002, 0x80000000),
|
||||
(uint2)(0x00000080, 0x80000000),
|
||||
(uint2)(0x0000800a, 0x00000000),
|
||||
(uint2)(0x8000000a, 0x80000000),
|
||||
(uint2)(0x80008081, 0x80000000),
|
||||
(uint2)(0x00008080, 0x80000000),
|
||||
(uint2)(0x80000001, 0x00000000),
|
||||
(uint2)(0x80008008, 0x80000000),
|
||||
};
|
||||
|
||||
#if PLATFORM == OPENCL_PLATFORM_NVIDIA && COMPUTE >= 35
|
||||
static uint2 ROL2(const uint2 a, const int offset)
|
||||
{
|
||||
uint2 result;
|
||||
if (offset >= 32)
|
||||
{
|
||||
asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(a.x), "r"(a.y), "r"(offset));
|
||||
asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(a.y), "r"(a.x), "r"(offset));
|
||||
}
|
||||
else
|
||||
{
|
||||
asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(a.y), "r"(a.x), "r"(offset));
|
||||
asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(a.x), "r"(a.y), "r"(offset));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
#elif PLATFORM == OPENCL_PLATFORM_AMD
|
||||
#pragma OPENCL EXTENSION cl_amd_media_ops : enable
|
||||
static uint2 ROL2(const uint2 vv, const int r)
|
||||
{
|
||||
if (r <= 32)
|
||||
{
|
||||
return amd_bitalign((vv).xy, (vv).yx, 32 - r);
|
||||
}
|
||||
else
|
||||
{
|
||||
return amd_bitalign((vv).yx, (vv).xy, 64 - r);
|
||||
}
|
||||
}
|
||||
#else
|
||||
static uint2 ROL2(const uint2 v, const int n)
|
||||
{
|
||||
uint2 result;
|
||||
if (n <= 32)
|
||||
{
|
||||
result.y = ((v.y << (n)) | (v.x >> (32 - n)));
|
||||
result.x = ((v.x << (n)) | (v.y >> (32 - n)));
|
||||
}
|
||||
else
|
||||
{
|
||||
result.y = ((v.x << (n - 32)) | (v.y >> (64 - n)));
|
||||
result.x = ((v.y << (n - 32)) | (v.x >> (64 - n)));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void chi(uint2* a, const uint n, const uint2* t)
|
||||
{
|
||||
a[n + 0] = bitselect(t[n + 0] ^ t[n + 2], t[n + 0], t[n + 1]);
|
||||
a[n + 1] = bitselect(t[n + 1] ^ t[n + 3], t[n + 1], t[n + 2]);
|
||||
a[n + 2] = bitselect(t[n + 2] ^ t[n + 4], t[n + 2], t[n + 3]);
|
||||
a[n + 3] = bitselect(t[n + 3] ^ t[n + 0], t[n + 3], t[n + 4]);
|
||||
a[n + 4] = bitselect(t[n + 4] ^ t[n + 1], t[n + 4], t[n + 0]);
|
||||
}
|
||||
|
||||
static void keccak_f1600_round(uint2* a, uint r)
|
||||
{
|
||||
uint2 t[25];
|
||||
uint2 u;
|
||||
|
||||
// Theta
|
||||
t[0] = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20];
|
||||
t[1] = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21];
|
||||
t[2] = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22];
|
||||
t[3] = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23];
|
||||
t[4] = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24];
|
||||
u = t[4] ^ ROL2(t[1], 1);
|
||||
a[0] ^= u;
|
||||
a[5] ^= u;
|
||||
a[10] ^= u;
|
||||
a[15] ^= u;
|
||||
a[20] ^= u;
|
||||
u = t[0] ^ ROL2(t[2], 1);
|
||||
a[1] ^= u;
|
||||
a[6] ^= u;
|
||||
a[11] ^= u;
|
||||
a[16] ^= u;
|
||||
a[21] ^= u;
|
||||
u = t[1] ^ ROL2(t[3], 1);
|
||||
a[2] ^= u;
|
||||
a[7] ^= u;
|
||||
a[12] ^= u;
|
||||
a[17] ^= u;
|
||||
a[22] ^= u;
|
||||
u = t[2] ^ ROL2(t[4], 1);
|
||||
a[3] ^= u;
|
||||
a[8] ^= u;
|
||||
a[13] ^= u;
|
||||
a[18] ^= u;
|
||||
a[23] ^= u;
|
||||
u = t[3] ^ ROL2(t[0], 1);
|
||||
a[4] ^= u;
|
||||
a[9] ^= u;
|
||||
a[14] ^= u;
|
||||
a[19] ^= u;
|
||||
a[24] ^= u;
|
||||
|
||||
// Rho Pi
|
||||
|
||||
t[0] = a[0];
|
||||
t[10] = ROL2(a[1], 1);
|
||||
t[20] = ROL2(a[2], 62);
|
||||
t[5] = ROL2(a[3], 28);
|
||||
t[15] = ROL2(a[4], 27);
|
||||
|
||||
t[16] = ROL2(a[5], 36);
|
||||
t[1] = ROL2(a[6], 44);
|
||||
t[11] = ROL2(a[7], 6);
|
||||
t[21] = ROL2(a[8], 55);
|
||||
t[6] = ROL2(a[9], 20);
|
||||
|
||||
t[7] = ROL2(a[10], 3);
|
||||
t[17] = ROL2(a[11], 10);
|
||||
t[2] = ROL2(a[12], 43);
|
||||
t[12] = ROL2(a[13], 25);
|
||||
t[22] = ROL2(a[14], 39);
|
||||
|
||||
t[23] = ROL2(a[15], 41);
|
||||
t[8] = ROL2(a[16], 45);
|
||||
t[18] = ROL2(a[17], 15);
|
||||
t[3] = ROL2(a[18], 21);
|
||||
t[13] = ROL2(a[19], 8);
|
||||
|
||||
t[14] = ROL2(a[20], 18);
|
||||
t[24] = ROL2(a[21], 2);
|
||||
t[9] = ROL2(a[22], 61);
|
||||
t[19] = ROL2(a[23], 56);
|
||||
t[4] = ROL2(a[24], 14);
|
||||
|
||||
// Chi
|
||||
chi(a, 0, t);
|
||||
|
||||
// Iota
|
||||
a[0] ^= Keccak_f1600_RC[r];
|
||||
|
||||
chi(a, 5, t);
|
||||
chi(a, 10, t);
|
||||
chi(a, 15, t);
|
||||
chi(a, 20, t);
|
||||
}
|
||||
|
||||
static void keccak_f1600_no_absorb(uint2* a, uint out_size, uint isolate)
|
||||
{
|
||||
// Originally I unrolled the first and last rounds to interface
|
||||
// better with surrounding code, however I haven't done this
|
||||
// without causing the AMD compiler to blow up the VGPR usage.
|
||||
|
||||
|
||||
// uint o = 25;
|
||||
for (uint r = 0; r < 24;)
|
||||
{
|
||||
// This dynamic branch stops the AMD compiler unrolling the loop
|
||||
// and additionally saves about 33% of the VGPRs, enough to gain another
|
||||
// wavefront. Ideally we'd get 4 in flight, but 3 is the best I can
|
||||
// massage out of the compiler. It doesn't really seem to matter how
|
||||
// much we try and help the compiler save VGPRs because it seems to throw
|
||||
// that information away, hence the implementation of keccak here
|
||||
// doesn't bother.
|
||||
if (isolate)
|
||||
{
|
||||
keccak_f1600_round(a, r++);
|
||||
// if (r == 23) o = out_size;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// final round optimised for digest size
|
||||
// keccak_f1600_round(a, 23, out_size);
|
||||
}
|
||||
|
||||
#define copy(dst, src, count) \
|
||||
for (uint i = 0; i != count; ++i) \
|
||||
{ \
|
||||
(dst)[i] = (src)[i]; \
|
||||
}
|
||||
|
||||
static uint fnv(uint x, uint y)
|
||||
{
|
||||
return x * FNV_PRIME ^ y;
|
||||
}
|
||||
|
||||
static uint4 fnv4(uint4 x, uint4 y)
|
||||
{
|
||||
return x * FNV_PRIME ^ y;
|
||||
}
|
||||
|
||||
typedef union
|
||||
{
|
||||
uint words[64 / sizeof(uint)];
|
||||
uint2 uint2s[64 / sizeof(uint2)];
|
||||
uint4 uint4s[64 / sizeof(uint4)];
|
||||
} hash64_t;
|
||||
|
||||
typedef union
|
||||
{
|
||||
uint words[200 / sizeof(uint)];
|
||||
uint2 uint2s[200 / sizeof(uint2)];
|
||||
uint4 uint4s[200 / sizeof(uint4)];
|
||||
} hash200_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint4 uint4s[128 / sizeof(uint4)];
|
||||
} hash128_t;
|
||||
|
||||
static void SHA3_512(uint2* s, uint isolate)
|
||||
{
|
||||
for (uint i = 8; i != 25; ++i)
|
||||
{
|
||||
s[i] = (uint2){0, 0};
|
||||
}
|
||||
s[8].x = 0x00000001;
|
||||
s[8].y = 0x80000000;
|
||||
keccak_f1600_no_absorb(s, 8, isolate);
|
||||
}
|
||||
|
||||
__kernel void ethash_calculate_dag_item(
|
||||
uint start, __global hash64_t const* g_light, __global hash64_t* g_dag, uint isolate)
|
||||
{
|
||||
uint const node_index = start + get_global_id(0);
|
||||
if (node_index * sizeof(hash64_t) >= PROGPOW_DAG_BYTES)
|
||||
return;
|
||||
|
||||
hash200_t dag_node;
|
||||
copy(dag_node.uint4s, g_light[node_index % LIGHT_WORDS].uint4s, 4);
|
||||
dag_node.words[0] ^= node_index;
|
||||
SHA3_512(dag_node.uint2s, isolate);
|
||||
|
||||
for (uint i = 0; i != ETHASH_DATASET_PARENTS; ++i)
|
||||
{
|
||||
uint parent_index = fnv(node_index ^ i, dag_node.words[i % NODE_WORDS]) % LIGHT_WORDS;
|
||||
|
||||
for (uint w = 0; w != 4; ++w)
|
||||
{
|
||||
dag_node.uint4s[w] = fnv4(dag_node.uint4s[w], g_light[parent_index].uint4s[w]);
|
||||
}
|
||||
}
|
||||
SHA3_512(dag_node.uint2s, isolate);
|
||||
copy(g_dag[node_index].uint4s, dag_node.uint4s, 4);
|
||||
}
|
||||
36
zano/libethash-cl/CMakeLists.txt
Normal file
36
zano/libethash-cl/CMakeLists.txt
Normal file
@@ -0,0 +1,36 @@
|
||||
# A custom command and target to turn the OpenCL kernel into a byte array header
|
||||
# The normal build depends on it properly and if the kernel file is changed, then
|
||||
# a rebuild of libethash-cl should be triggered
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/CLMiner_kernel.h
|
||||
COMMAND ${CMAKE_COMMAND} ARGS
|
||||
-DTXT2STR_SOURCE_FILE="${CMAKE_CURRENT_SOURCE_DIR}/CLMiner_kernel.cl"
|
||||
-DTXT2STR_VARIABLE_NAME=CLMiner_kernel
|
||||
-DTXT2STR_HEADER_FILE="${CMAKE_CURRENT_BINARY_DIR}/CLMiner_kernel.h"
|
||||
-P "${CMAKE_CURRENT_SOURCE_DIR}/../cmake/txt2str.cmake"
|
||||
COMMENT "Generating OpenCL Kernel"
|
||||
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/CLMiner_kernel.cl
|
||||
)
|
||||
add_custom_target(cl_kernel DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/CLMiner_kernel.h ${CMAKE_CURRENT_SOURCE_DIR}/CLMiner_kernel.cl)
|
||||
|
||||
set(SOURCES
|
||||
CLMiner.h CLMiner.cpp
|
||||
${CMAKE_CURRENT_BINARY_DIR}/CLMiner_kernel.h
|
||||
)
|
||||
|
||||
if(APPLE)
|
||||
# On macOS use system OpenCL library.
|
||||
find_package(OpenCL REQUIRED)
|
||||
else()
|
||||
hunter_add_package(OpenCL)
|
||||
find_package(OpenCL CONFIG REQUIRED)
|
||||
endif()
|
||||
|
||||
include_directories(${CMAKE_CURRENT_BINARY_DIR})
|
||||
include_directories(..)
|
||||
|
||||
add_library(ethash-cl ${SOURCES})
|
||||
target_link_libraries(ethash-cl PUBLIC ethcore ethash progpow)
|
||||
target_link_libraries(ethash-cl PRIVATE OpenCL::OpenCL)
|
||||
target_link_libraries(ethash-cl PRIVATE Boost::filesystem Boost::thread)
|
||||
674
zano/libethash-cl/kernels/LICENSE
Normal file
674
zano/libethash-cl/kernels/LICENSE
Normal file
@@ -0,0 +1,674 @@
|
||||
GNU GENERAL PUBLIC LICENSE
|
||||
Version 3, 29 June 2007
|
||||
|
||||
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
Preamble
|
||||
|
||||
The GNU General Public License is a free, copyleft license for
|
||||
software and other kinds of works.
|
||||
|
||||
The licenses for most software and other practical works are designed
|
||||
to take away your freedom to share and change the works. By contrast,
|
||||
the GNU General Public License is intended to guarantee your freedom to
|
||||
share and change all versions of a program--to make sure it remains free
|
||||
software for all its users. We, the Free Software Foundation, use the
|
||||
GNU General Public License for most of our software; it applies also to
|
||||
any other work released this way by its authors. You can apply it to
|
||||
your programs, too.
|
||||
|
||||
When we speak of free software, we are referring to freedom, not
|
||||
price. Our General Public Licenses are designed to make sure that you
|
||||
have the freedom to distribute copies of free software (and charge for
|
||||
them if you wish), that you receive source code or can get it if you
|
||||
want it, that you can change the software or use pieces of it in new
|
||||
free programs, and that you know you can do these things.
|
||||
|
||||
To protect your rights, we need to prevent others from denying you
|
||||
these rights or asking you to surrender the rights. Therefore, you have
|
||||
certain responsibilities if you distribute copies of the software, or if
|
||||
you modify it: responsibilities to respect the freedom of others.
|
||||
|
||||
For example, if you distribute copies of such a program, whether
|
||||
gratis or for a fee, you must pass on to the recipients the same
|
||||
freedoms that you received. You must make sure that they, too, receive
|
||||
or can get the source code. And you must show them these terms so they
|
||||
know their rights.
|
||||
|
||||
Developers that use the GNU GPL protect your rights with two steps:
|
||||
(1) assert copyright on the software, and (2) offer you this License
|
||||
giving you legal permission to copy, distribute and/or modify it.
|
||||
|
||||
For the developers' and authors' protection, the GPL clearly explains
|
||||
that there is no warranty for this free software. For both users' and
|
||||
authors' sake, the GPL requires that modified versions be marked as
|
||||
changed, so that their problems will not be attributed erroneously to
|
||||
authors of previous versions.
|
||||
|
||||
Some devices are designed to deny users access to install or run
|
||||
modified versions of the software inside them, although the manufacturer
|
||||
can do so. This is fundamentally incompatible with the aim of
|
||||
protecting users' freedom to change the software. The systematic
|
||||
pattern of such abuse occurs in the area of products for individuals to
|
||||
use, which is precisely where it is most unacceptable. Therefore, we
|
||||
have designed this version of the GPL to prohibit the practice for those
|
||||
products. If such problems arise substantially in other domains, we
|
||||
stand ready to extend this provision to those domains in future versions
|
||||
of the GPL, as needed to protect the freedom of users.
|
||||
|
||||
Finally, every program is threatened constantly by software patents.
|
||||
States should not allow patents to restrict development and use of
|
||||
software on general-purpose computers, but in those that do, we wish to
|
||||
avoid the special danger that patents applied to a free program could
|
||||
make it effectively proprietary. To prevent this, the GPL assures that
|
||||
patents cannot be used to render the program non-free.
|
||||
|
||||
The precise terms and conditions for copying, distribution and
|
||||
modification follow.
|
||||
|
||||
TERMS AND CONDITIONS
|
||||
|
||||
0. Definitions.
|
||||
|
||||
"This License" refers to version 3 of the GNU General Public License.
|
||||
|
||||
"Copyright" also means copyright-like laws that apply to other kinds of
|
||||
works, such as semiconductor masks.
|
||||
|
||||
"The Program" refers to any copyrightable work licensed under this
|
||||
License. Each licensee is addressed as "you". "Licensees" and
|
||||
"recipients" may be individuals or organizations.
|
||||
|
||||
To "modify" a work means to copy from or adapt all or part of the work
|
||||
in a fashion requiring copyright permission, other than the making of an
|
||||
exact copy. The resulting work is called a "modified version" of the
|
||||
earlier work or a work "based on" the earlier work.
|
||||
|
||||
A "covered work" means either the unmodified Program or a work based
|
||||
on the Program.
|
||||
|
||||
To "propagate" a work means to do anything with it that, without
|
||||
permission, would make you directly or secondarily liable for
|
||||
infringement under applicable copyright law, except executing it on a
|
||||
computer or modifying a private copy. Propagation includes copying,
|
||||
distribution (with or without modification), making available to the
|
||||
public, and in some countries other activities as well.
|
||||
|
||||
To "convey" a work means any kind of propagation that enables other
|
||||
parties to make or receive copies. Mere interaction with a user through
|
||||
a computer network, with no transfer of a copy, is not conveying.
|
||||
|
||||
An interactive user interface displays "Appropriate Legal Notices"
|
||||
to the extent that it includes a convenient and prominently visible
|
||||
feature that (1) displays an appropriate copyright notice, and (2)
|
||||
tells the user that there is no warranty for the work (except to the
|
||||
extent that warranties are provided), that licensees may convey the
|
||||
work under this License, and how to view a copy of this License. If
|
||||
the interface presents a list of user commands or options, such as a
|
||||
menu, a prominent item in the list meets this criterion.
|
||||
|
||||
1. Source Code.
|
||||
|
||||
The "source code" for a work means the preferred form of the work
|
||||
for making modifications to it. "Object code" means any non-source
|
||||
form of a work.
|
||||
|
||||
A "Standard Interface" means an interface that either is an official
|
||||
standard defined by a recognized standards body, or, in the case of
|
||||
interfaces specified for a particular programming language, one that
|
||||
is widely used among developers working in that language.
|
||||
|
||||
The "System Libraries" of an executable work include anything, other
|
||||
than the work as a whole, that (a) is included in the normal form of
|
||||
packaging a Major Component, but which is not part of that Major
|
||||
Component, and (b) serves only to enable use of the work with that
|
||||
Major Component, or to implement a Standard Interface for which an
|
||||
implementation is available to the public in source code form. A
|
||||
"Major Component", in this context, means a major essential component
|
||||
(kernel, window system, and so on) of the specific operating system
|
||||
(if any) on which the executable work runs, or a compiler used to
|
||||
produce the work, or an object code interpreter used to run it.
|
||||
|
||||
The "Corresponding Source" for a work in object code form means all
|
||||
the source code needed to generate, install, and (for an executable
|
||||
work) run the object code and to modify the work, including scripts to
|
||||
control those activities. However, it does not include the work's
|
||||
System Libraries, or general-purpose tools or generally available free
|
||||
programs which are used unmodified in performing those activities but
|
||||
which are not part of the work. For example, Corresponding Source
|
||||
includes interface definition files associated with source files for
|
||||
the work, and the source code for shared libraries and dynamically
|
||||
linked subprograms that the work is specifically designed to require,
|
||||
such as by intimate data communication or control flow between those
|
||||
subprograms and other parts of the work.
|
||||
|
||||
The Corresponding Source need not include anything that users
|
||||
can regenerate automatically from other parts of the Corresponding
|
||||
Source.
|
||||
|
||||
The Corresponding Source for a work in source code form is that
|
||||
same work.
|
||||
|
||||
2. Basic Permissions.
|
||||
|
||||
All rights granted under this License are granted for the term of
|
||||
copyright on the Program, and are irrevocable provided the stated
|
||||
conditions are met. This License explicitly affirms your unlimited
|
||||
permission to run the unmodified Program. The output from running a
|
||||
covered work is covered by this License only if the output, given its
|
||||
content, constitutes a covered work. This License acknowledges your
|
||||
rights of fair use or other equivalent, as provided by copyright law.
|
||||
|
||||
You may make, run and propagate covered works that you do not
|
||||
convey, without conditions so long as your license otherwise remains
|
||||
in force. You may convey covered works to others for the sole purpose
|
||||
of having them make modifications exclusively for you, or provide you
|
||||
with facilities for running those works, provided that you comply with
|
||||
the terms of this License in conveying all material for which you do
|
||||
not control copyright. Those thus making or running the covered works
|
||||
for you must do so exclusively on your behalf, under your direction
|
||||
and control, on terms that prohibit them from making any copies of
|
||||
your copyrighted material outside their relationship with you.
|
||||
|
||||
Conveying under any other circumstances is permitted solely under
|
||||
the conditions stated below. Sublicensing is not allowed; section 10
|
||||
makes it unnecessary.
|
||||
|
||||
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
|
||||
|
||||
No covered work shall be deemed part of an effective technological
|
||||
measure under any applicable law fulfilling obligations under article
|
||||
11 of the WIPO copyright treaty adopted on 20 December 1996, or
|
||||
similar laws prohibiting or restricting circumvention of such
|
||||
measures.
|
||||
|
||||
When you convey a covered work, you waive any legal power to forbid
|
||||
circumvention of technological measures to the extent such circumvention
|
||||
is effected by exercising rights under this License with respect to
|
||||
the covered work, and you disclaim any intention to limit operation or
|
||||
modification of the work as a means of enforcing, against the work's
|
||||
users, your or third parties' legal rights to forbid circumvention of
|
||||
technological measures.
|
||||
|
||||
4. Conveying Verbatim Copies.
|
||||
|
||||
You may convey verbatim copies of the Program's source code as you
|
||||
receive it, in any medium, provided that you conspicuously and
|
||||
appropriately publish on each copy an appropriate copyright notice;
|
||||
keep intact all notices stating that this License and any
|
||||
non-permissive terms added in accord with section 7 apply to the code;
|
||||
keep intact all notices of the absence of any warranty; and give all
|
||||
recipients a copy of this License along with the Program.
|
||||
|
||||
You may charge any price or no price for each copy that you convey,
|
||||
and you may offer support or warranty protection for a fee.
|
||||
|
||||
5. Conveying Modified Source Versions.
|
||||
|
||||
You may convey a work based on the Program, or the modifications to
|
||||
produce it from the Program, in the form of source code under the
|
||||
terms of section 4, provided that you also meet all of these conditions:
|
||||
|
||||
a) The work must carry prominent notices stating that you modified
|
||||
it, and giving a relevant date.
|
||||
|
||||
b) The work must carry prominent notices stating that it is
|
||||
released under this License and any conditions added under section
|
||||
7. This requirement modifies the requirement in section 4 to
|
||||
"keep intact all notices".
|
||||
|
||||
c) You must license the entire work, as a whole, under this
|
||||
License to anyone who comes into possession of a copy. This
|
||||
License will therefore apply, along with any applicable section 7
|
||||
additional terms, to the whole of the work, and all its parts,
|
||||
regardless of how they are packaged. This License gives no
|
||||
permission to license the work in any other way, but it does not
|
||||
invalidate such permission if you have separately received it.
|
||||
|
||||
d) If the work has interactive user interfaces, each must display
|
||||
Appropriate Legal Notices; however, if the Program has interactive
|
||||
interfaces that do not display Appropriate Legal Notices, your
|
||||
work need not make them do so.
|
||||
|
||||
A compilation of a covered work with other separate and independent
|
||||
works, which are not by their nature extensions of the covered work,
|
||||
and which are not combined with it such as to form a larger program,
|
||||
in or on a volume of a storage or distribution medium, is called an
|
||||
"aggregate" if the compilation and its resulting copyright are not
|
||||
used to limit the access or legal rights of the compilation's users
|
||||
beyond what the individual works permit. Inclusion of a covered work
|
||||
in an aggregate does not cause this License to apply to the other
|
||||
parts of the aggregate.
|
||||
|
||||
6. Conveying Non-Source Forms.
|
||||
|
||||
You may convey a covered work in object code form under the terms
|
||||
of sections 4 and 5, provided that you also convey the
|
||||
machine-readable Corresponding Source under the terms of this License,
|
||||
in one of these ways:
|
||||
|
||||
a) Convey the object code in, or embodied in, a physical product
|
||||
(including a physical distribution medium), accompanied by the
|
||||
Corresponding Source fixed on a durable physical medium
|
||||
customarily used for software interchange.
|
||||
|
||||
b) Convey the object code in, or embodied in, a physical product
|
||||
(including a physical distribution medium), accompanied by a
|
||||
written offer, valid for at least three years and valid for as
|
||||
long as you offer spare parts or customer support for that product
|
||||
model, to give anyone who possesses the object code either (1) a
|
||||
copy of the Corresponding Source for all the software in the
|
||||
product that is covered by this License, on a durable physical
|
||||
medium customarily used for software interchange, for a price no
|
||||
more than your reasonable cost of physically performing this
|
||||
conveying of source, or (2) access to copy the
|
||||
Corresponding Source from a network server at no charge.
|
||||
|
||||
c) Convey individual copies of the object code with a copy of the
|
||||
written offer to provide the Corresponding Source. This
|
||||
alternative is allowed only occasionally and noncommercially, and
|
||||
only if you received the object code with such an offer, in accord
|
||||
with subsection 6b.
|
||||
|
||||
d) Convey the object code by offering access from a designated
|
||||
place (gratis or for a charge), and offer equivalent access to the
|
||||
Corresponding Source in the same way through the same place at no
|
||||
further charge. You need not require recipients to copy the
|
||||
Corresponding Source along with the object code. If the place to
|
||||
copy the object code is a network server, the Corresponding Source
|
||||
may be on a different server (operated by you or a third party)
|
||||
that supports equivalent copying facilities, provided you maintain
|
||||
clear directions next to the object code saying where to find the
|
||||
Corresponding Source. Regardless of what server hosts the
|
||||
Corresponding Source, you remain obligated to ensure that it is
|
||||
available for as long as needed to satisfy these requirements.
|
||||
|
||||
e) Convey the object code using peer-to-peer transmission, provided
|
||||
you inform other peers where the object code and Corresponding
|
||||
Source of the work are being offered to the general public at no
|
||||
charge under subsection 6d.
|
||||
|
||||
A separable portion of the object code, whose source code is excluded
|
||||
from the Corresponding Source as a System Library, need not be
|
||||
included in conveying the object code work.
|
||||
|
||||
A "User Product" is either (1) a "consumer product", which means any
|
||||
tangible personal property which is normally used for personal, family,
|
||||
or household purposes, or (2) anything designed or sold for incorporation
|
||||
into a dwelling. In determining whether a product is a consumer product,
|
||||
doubtful cases shall be resolved in favor of coverage. For a particular
|
||||
product received by a particular user, "normally used" refers to a
|
||||
typical or common use of that class of product, regardless of the status
|
||||
of the particular user or of the way in which the particular user
|
||||
actually uses, or expects or is expected to use, the product. A product
|
||||
is a consumer product regardless of whether the product has substantial
|
||||
commercial, industrial or non-consumer uses, unless such uses represent
|
||||
the only significant mode of use of the product.
|
||||
|
||||
"Installation Information" for a User Product means any methods,
|
||||
procedures, authorization keys, or other information required to install
|
||||
and execute modified versions of a covered work in that User Product from
|
||||
a modified version of its Corresponding Source. The information must
|
||||
suffice to ensure that the continued functioning of the modified object
|
||||
code is in no case prevented or interfered with solely because
|
||||
modification has been made.
|
||||
|
||||
If you convey an object code work under this section in, or with, or
|
||||
specifically for use in, a User Product, and the conveying occurs as
|
||||
part of a transaction in which the right of possession and use of the
|
||||
User Product is transferred to the recipient in perpetuity or for a
|
||||
fixed term (regardless of how the transaction is characterized), the
|
||||
Corresponding Source conveyed under this section must be accompanied
|
||||
by the Installation Information. But this requirement does not apply
|
||||
if neither you nor any third party retains the ability to install
|
||||
modified object code on the User Product (for example, the work has
|
||||
been installed in ROM).
|
||||
|
||||
The requirement to provide Installation Information does not include a
|
||||
requirement to continue to provide support service, warranty, or updates
|
||||
for a work that has been modified or installed by the recipient, or for
|
||||
the User Product in which it has been modified or installed. Access to a
|
||||
network may be denied when the modification itself materially and
|
||||
adversely affects the operation of the network or violates the rules and
|
||||
protocols for communication across the network.
|
||||
|
||||
Corresponding Source conveyed, and Installation Information provided,
|
||||
in accord with this section must be in a format that is publicly
|
||||
documented (and with an implementation available to the public in
|
||||
source code form), and must require no special password or key for
|
||||
unpacking, reading or copying.
|
||||
|
||||
7. Additional Terms.
|
||||
|
||||
"Additional permissions" are terms that supplement the terms of this
|
||||
License by making exceptions from one or more of its conditions.
|
||||
Additional permissions that are applicable to the entire Program shall
|
||||
be treated as though they were included in this License, to the extent
|
||||
that they are valid under applicable law. If additional permissions
|
||||
apply only to part of the Program, that part may be used separately
|
||||
under those permissions, but the entire Program remains governed by
|
||||
this License without regard to the additional permissions.
|
||||
|
||||
When you convey a copy of a covered work, you may at your option
|
||||
remove any additional permissions from that copy, or from any part of
|
||||
it. (Additional permissions may be written to require their own
|
||||
removal in certain cases when you modify the work.) You may place
|
||||
additional permissions on material, added by you to a covered work,
|
||||
for which you have or can give appropriate copyright permission.
|
||||
|
||||
Notwithstanding any other provision of this License, for material you
|
||||
add to a covered work, you may (if authorized by the copyright holders of
|
||||
that material) supplement the terms of this License with terms:
|
||||
|
||||
a) Disclaiming warranty or limiting liability differently from the
|
||||
terms of sections 15 and 16 of this License; or
|
||||
|
||||
b) Requiring preservation of specified reasonable legal notices or
|
||||
author attributions in that material or in the Appropriate Legal
|
||||
Notices displayed by works containing it; or
|
||||
|
||||
c) Prohibiting misrepresentation of the origin of that material, or
|
||||
requiring that modified versions of such material be marked in
|
||||
reasonable ways as different from the original version; or
|
||||
|
||||
d) Limiting the use for publicity purposes of names of licensors or
|
||||
authors of the material; or
|
||||
|
||||
e) Declining to grant rights under trademark law for use of some
|
||||
trade names, trademarks, or service marks; or
|
||||
|
||||
f) Requiring indemnification of licensors and authors of that
|
||||
material by anyone who conveys the material (or modified versions of
|
||||
it) with contractual assumptions of liability to the recipient, for
|
||||
any liability that these contractual assumptions directly impose on
|
||||
those licensors and authors.
|
||||
|
||||
All other non-permissive additional terms are considered "further
|
||||
restrictions" within the meaning of section 10. If the Program as you
|
||||
received it, or any part of it, contains a notice stating that it is
|
||||
governed by this License along with a term that is a further
|
||||
restriction, you may remove that term. If a license document contains
|
||||
a further restriction but permits relicensing or conveying under this
|
||||
License, you may add to a covered work material governed by the terms
|
||||
of that license document, provided that the further restriction does
|
||||
not survive such relicensing or conveying.
|
||||
|
||||
If you add terms to a covered work in accord with this section, you
|
||||
must place, in the relevant source files, a statement of the
|
||||
additional terms that apply to those files, or a notice indicating
|
||||
where to find the applicable terms.
|
||||
|
||||
Additional terms, permissive or non-permissive, may be stated in the
|
||||
form of a separately written license, or stated as exceptions;
|
||||
the above requirements apply either way.
|
||||
|
||||
8. Termination.
|
||||
|
||||
You may not propagate or modify a covered work except as expressly
|
||||
provided under this License. Any attempt otherwise to propagate or
|
||||
modify it is void, and will automatically terminate your rights under
|
||||
this License (including any patent licenses granted under the third
|
||||
paragraph of section 11).
|
||||
|
||||
However, if you cease all violation of this License, then your
|
||||
license from a particular copyright holder is reinstated (a)
|
||||
provisionally, unless and until the copyright holder explicitly and
|
||||
finally terminates your license, and (b) permanently, if the copyright
|
||||
holder fails to notify you of the violation by some reasonable means
|
||||
prior to 60 days after the cessation.
|
||||
|
||||
Moreover, your license from a particular copyright holder is
|
||||
reinstated permanently if the copyright holder notifies you of the
|
||||
violation by some reasonable means, this is the first time you have
|
||||
received notice of violation of this License (for any work) from that
|
||||
copyright holder, and you cure the violation prior to 30 days after
|
||||
your receipt of the notice.
|
||||
|
||||
Termination of your rights under this section does not terminate the
|
||||
licenses of parties who have received copies or rights from you under
|
||||
this License. If your rights have been terminated and not permanently
|
||||
reinstated, you do not qualify to receive new licenses for the same
|
||||
material under section 10.
|
||||
|
||||
9. Acceptance Not Required for Having Copies.
|
||||
|
||||
You are not required to accept this License in order to receive or
|
||||
run a copy of the Program. Ancillary propagation of a covered work
|
||||
occurring solely as a consequence of using peer-to-peer transmission
|
||||
to receive a copy likewise does not require acceptance. However,
|
||||
nothing other than this License grants you permission to propagate or
|
||||
modify any covered work. These actions infringe copyright if you do
|
||||
not accept this License. Therefore, by modifying or propagating a
|
||||
covered work, you indicate your acceptance of this License to do so.
|
||||
|
||||
10. Automatic Licensing of Downstream Recipients.
|
||||
|
||||
Each time you convey a covered work, the recipient automatically
|
||||
receives a license from the original licensors, to run, modify and
|
||||
propagate that work, subject to this License. You are not responsible
|
||||
for enforcing compliance by third parties with this License.
|
||||
|
||||
An "entity transaction" is a transaction transferring control of an
|
||||
organization, or substantially all assets of one, or subdividing an
|
||||
organization, or merging organizations. If propagation of a covered
|
||||
work results from an entity transaction, each party to that
|
||||
transaction who receives a copy of the work also receives whatever
|
||||
licenses to the work the party's predecessor in interest had or could
|
||||
give under the previous paragraph, plus a right to possession of the
|
||||
Corresponding Source of the work from the predecessor in interest, if
|
||||
the predecessor has it or can get it with reasonable efforts.
|
||||
|
||||
You may not impose any further restrictions on the exercise of the
|
||||
rights granted or affirmed under this License. For example, you may
|
||||
not impose a license fee, royalty, or other charge for exercise of
|
||||
rights granted under this License, and you may not initiate litigation
|
||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
||||
any patent claim is infringed by making, using, selling, offering for
|
||||
sale, or importing the Program or any portion of it.
|
||||
|
||||
11. Patents.
|
||||
|
||||
A "contributor" is a copyright holder who authorizes use under this
|
||||
License of the Program or a work on which the Program is based. The
|
||||
work thus licensed is called the contributor's "contributor version".
|
||||
|
||||
A contributor's "essential patent claims" are all patent claims
|
||||
owned or controlled by the contributor, whether already acquired or
|
||||
hereafter acquired, that would be infringed by some manner, permitted
|
||||
by this License, of making, using, or selling its contributor version,
|
||||
but do not include claims that would be infringed only as a
|
||||
consequence of further modification of the contributor version. For
|
||||
purposes of this definition, "control" includes the right to grant
|
||||
patent sublicenses in a manner consistent with the requirements of
|
||||
this License.
|
||||
|
||||
Each contributor grants you a non-exclusive, worldwide, royalty-free
|
||||
patent license under the contributor's essential patent claims, to
|
||||
make, use, sell, offer for sale, import and otherwise run, modify and
|
||||
propagate the contents of its contributor version.
|
||||
|
||||
In the following three paragraphs, a "patent license" is any express
|
||||
agreement or commitment, however denominated, not to enforce a patent
|
||||
(such as an express permission to practice a patent or covenant not to
|
||||
sue for patent infringement). To "grant" such a patent license to a
|
||||
party means to make such an agreement or commitment not to enforce a
|
||||
patent against the party.
|
||||
|
||||
If you convey a covered work, knowingly relying on a patent license,
|
||||
and the Corresponding Source of the work is not available for anyone
|
||||
to copy, free of charge and under the terms of this License, through a
|
||||
publicly available network server or other readily accessible means,
|
||||
then you must either (1) cause the Corresponding Source to be so
|
||||
available, or (2) arrange to deprive yourself of the benefit of the
|
||||
patent license for this particular work, or (3) arrange, in a manner
|
||||
consistent with the requirements of this License, to extend the patent
|
||||
license to downstream recipients. "Knowingly relying" means you have
|
||||
actual knowledge that, but for the patent license, your conveying the
|
||||
covered work in a country, or your recipient's use of the covered work
|
||||
in a country, would infringe one or more identifiable patents in that
|
||||
country that you have reason to believe are valid.
|
||||
|
||||
If, pursuant to or in connection with a single transaction or
|
||||
arrangement, you convey, or propagate by procuring conveyance of, a
|
||||
covered work, and grant a patent license to some of the parties
|
||||
receiving the covered work authorizing them to use, propagate, modify
|
||||
or convey a specific copy of the covered work, then the patent license
|
||||
you grant is automatically extended to all recipients of the covered
|
||||
work and works based on it.
|
||||
|
||||
A patent license is "discriminatory" if it does not include within
|
||||
the scope of its coverage, prohibits the exercise of, or is
|
||||
conditioned on the non-exercise of one or more of the rights that are
|
||||
specifically granted under this License. You may not convey a covered
|
||||
work if you are a party to an arrangement with a third party that is
|
||||
in the business of distributing software, under which you make payment
|
||||
to the third party based on the extent of your activity of conveying
|
||||
the work, and under which the third party grants, to any of the
|
||||
parties who would receive the covered work from you, a discriminatory
|
||||
patent license (a) in connection with copies of the covered work
|
||||
conveyed by you (or copies made from those copies), or (b) primarily
|
||||
for and in connection with specific products or compilations that
|
||||
contain the covered work, unless you entered into that arrangement,
|
||||
or that patent license was granted, prior to 28 March 2007.
|
||||
|
||||
Nothing in this License shall be construed as excluding or limiting
|
||||
any implied license or other defenses to infringement that may
|
||||
otherwise be available to you under applicable patent law.
|
||||
|
||||
12. No Surrender of Others' Freedom.
|
||||
|
||||
If conditions are imposed on you (whether by court order, agreement or
|
||||
otherwise) that contradict the conditions of this License, they do not
|
||||
excuse you from the conditions of this License. If you cannot convey a
|
||||
covered work so as to satisfy simultaneously your obligations under this
|
||||
License and any other pertinent obligations, then as a consequence you may
|
||||
not convey it at all. For example, if you agree to terms that obligate you
|
||||
to collect a royalty for further conveying from those to whom you convey
|
||||
the Program, the only way you could satisfy both those terms and this
|
||||
License would be to refrain entirely from conveying the Program.
|
||||
|
||||
13. Use with the GNU Affero General Public License.
|
||||
|
||||
Notwithstanding any other provision of this License, you have
|
||||
permission to link or combine any covered work with a work licensed
|
||||
under version 3 of the GNU Affero General Public License into a single
|
||||
combined work, and to convey the resulting work. The terms of this
|
||||
License will continue to apply to the part which is the covered work,
|
||||
but the special requirements of the GNU Affero General Public License,
|
||||
section 13, concerning interaction through a network will apply to the
|
||||
combination as such.
|
||||
|
||||
14. Revised Versions of this License.
|
||||
|
||||
The Free Software Foundation may publish revised and/or new versions of
|
||||
the GNU General Public License from time to time. Such new versions will
|
||||
be similar in spirit to the present version, but may differ in detail to
|
||||
address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the
|
||||
Program specifies that a certain numbered version of the GNU General
|
||||
Public License "or any later version" applies to it, you have the
|
||||
option of following the terms and conditions either of that numbered
|
||||
version or of any later version published by the Free Software
|
||||
Foundation. If the Program does not specify a version number of the
|
||||
GNU General Public License, you may choose any version ever published
|
||||
by the Free Software Foundation.
|
||||
|
||||
If the Program specifies that a proxy can decide which future
|
||||
versions of the GNU General Public License can be used, that proxy's
|
||||
public statement of acceptance of a version permanently authorizes you
|
||||
to choose that version for the Program.
|
||||
|
||||
Later license versions may give you additional or different
|
||||
permissions. However, no additional obligations are imposed on any
|
||||
author or copyright holder as a result of your choosing to follow a
|
||||
later version.
|
||||
|
||||
15. Disclaimer of Warranty.
|
||||
|
||||
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
|
||||
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
|
||||
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
|
||||
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
|
||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
|
||||
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
|
||||
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
|
||||
|
||||
16. Limitation of Liability.
|
||||
|
||||
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
|
||||
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
|
||||
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
|
||||
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
|
||||
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
|
||||
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
|
||||
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
|
||||
SUCH DAMAGES.
|
||||
|
||||
17. Interpretation of Sections 15 and 16.
|
||||
|
||||
If the disclaimer of warranty and limitation of liability provided
|
||||
above cannot be given local legal effect according to their terms,
|
||||
reviewing courts shall apply local law that most closely approximates
|
||||
an absolute waiver of all civil liability in connection with the
|
||||
Program, unless a warranty or assumption of liability accompanies a
|
||||
copy of the Program in return for a fee.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
How to Apply These Terms to Your New Programs
|
||||
|
||||
If you develop a new program, and you want it to be of the greatest
|
||||
possible use to the public, the best way to achieve this is to make it
|
||||
free software which everyone can redistribute and change under these terms.
|
||||
|
||||
To do so, attach the following notices to the program. It is safest
|
||||
to attach them to the start of each source file to most effectively
|
||||
state the exclusion of warranty; and each file should have at least
|
||||
the "copyright" line and a pointer to where the full notice is found.
|
||||
|
||||
<one line to give the program's name and a brief idea of what it does.>
|
||||
Copyright (C) <year> <name of author>
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
Also add information on how to contact you by electronic and paper mail.
|
||||
|
||||
If the program does terminal interaction, make it output a short
|
||||
notice like this when it starts in an interactive mode:
|
||||
|
||||
<program> Copyright (C) <year> <name of author>
|
||||
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
||||
This is free software, and you are welcome to redistribute it
|
||||
under certain conditions; type `show c' for details.
|
||||
|
||||
The hypothetical commands `show w' and `show c' should show the appropriate
|
||||
parts of the General Public License. Of course, your program's commands
|
||||
might be different; for a GUI interface, you would use an "about box".
|
||||
|
||||
You should also get your employer (if you work as a programmer) or school,
|
||||
if any, to sign a "copyright disclaimer" for the program, if necessary.
|
||||
For more information on this, and how to apply and follow the GNU GPL, see
|
||||
<http://www.gnu.org/licenses/>.
|
||||
|
||||
The GNU General Public License does not permit incorporating your program
|
||||
into proprietary programs. If your program is a subroutine library, you
|
||||
may consider it more useful to permit linking proprietary applications with
|
||||
the library. If this is what you want to do, use the GNU Lesser General
|
||||
Public License instead of this License. But first, please read
|
||||
<http://www.gnu.org/philosophy/why-not-lgpl.html>.
|
||||
14
zano/libethash-cl/kernels/README.md
Normal file
14
zano/libethash-cl/kernels/README.md
Normal file
@@ -0,0 +1,14 @@
|
||||
# ethash-kernels
|
||||
For whatever reason, Zawawawa released his Ethash kernels as open source code. This repo is a verbaitm copy of that code with a simplistic build environment and [Progminer](https://github.com/gangnamtestnet/progminer) as a target. Although the code for Progminer has yet to be released.
|
||||
|
||||
## Requirements
|
||||
On Linux, all you need is [clrxasm](https://github.com/CLRX/CLRX-mirror) installed. Everything should build fairly quickly, just make sure to ```mkdir build``` before you ```make```. MacOS should be the same. Windows ¯\_(ツ)_/¯
|
||||
|
||||
## Donations
|
||||
Please buy me alcohol:
|
||||
- BTC: 3L2S7FHvTHpjzWqvqgaZBAaqsDzWAgFAdP
|
||||
- BCH: qq22texutzx4ar4020lmqk0w9vrmvgauc5svtmg6ym
|
||||
- ETH: 0x9545144F8e473FcD1FF470ab55EF381D4f990C56
|
||||
- LTC: MWwiHTdKfQDerhQ8a5a4mavGmiAZQYWyB1
|
||||
|
||||
You should also go support Zawawawa, buy him a beer or two for being an awesome chap.
|
||||
0
zano/libethash-cl/kernels/bin/placeholder
Normal file
0
zano/libethash-cl/kernels/bin/placeholder
Normal file
458
zano/libethash-cl/kernels/cl/ethash.cl
Normal file
458
zano/libethash-cl/kernels/cl/ethash.cl
Normal file
@@ -0,0 +1,458 @@
|
||||
// Copyright 2017 Yurio Miyazawa (a.k.a zawawa) <me@yurio.net>
|
||||
//
|
||||
// This file is part of Gateless Gate Sharp.
|
||||
//
|
||||
// Gateless Gate Sharp is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// Gateless Gate Sharp is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Gateless Gate Sharp. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
|
||||
#if (defined(__Tahiti__) || defined(__Pitcairn__) || defined(__Capeverde__) || defined(__Oland__) || defined(__Hainan__))
|
||||
#define LEGACY
|
||||
#endif
|
||||
|
||||
#ifdef cl_clang_storage_class_specifiers
|
||||
#pragma OPENCL EXTENSION cl_clang_storage_class_specifiers : enable
|
||||
#endif
|
||||
|
||||
#if defined(cl_amd_media_ops)
|
||||
#pragma OPENCL EXTENSION cl_amd_media_ops : enable
|
||||
#elif defined(cl_nv_pragma_unroll)
|
||||
uint amd_bitalign(uint src0, uint src1, uint src2)
|
||||
{
|
||||
uint dest;
|
||||
asm("shf.r.wrap.b32 %0, %2, %1, %3;" : "=r"(dest) : "r"(src0), "r"(src1), "r"(src2));
|
||||
return dest;
|
||||
}
|
||||
#else
|
||||
#define amd_bitalign(src0, src1, src2) ((uint) (((((ulong)(src0)) << 32) | (ulong)(src1)) >> ((src2) & 31)))
|
||||
#endif
|
||||
|
||||
#if WORKSIZE % 4 != 0
|
||||
#error "WORKSIZE has to be a multiple of 4"
|
||||
#endif
|
||||
|
||||
#define FNV_PRIME 0x01000193U
|
||||
|
||||
static __constant uint2 const Keccak_f1600_RC[24] = {
|
||||
(uint2)(0x00000001, 0x00000000),
|
||||
(uint2)(0x00008082, 0x00000000),
|
||||
(uint2)(0x0000808a, 0x80000000),
|
||||
(uint2)(0x80008000, 0x80000000),
|
||||
(uint2)(0x0000808b, 0x00000000),
|
||||
(uint2)(0x80000001, 0x00000000),
|
||||
(uint2)(0x80008081, 0x80000000),
|
||||
(uint2)(0x00008009, 0x80000000),
|
||||
(uint2)(0x0000008a, 0x00000000),
|
||||
(uint2)(0x00000088, 0x00000000),
|
||||
(uint2)(0x80008009, 0x00000000),
|
||||
(uint2)(0x8000000a, 0x00000000),
|
||||
(uint2)(0x8000808b, 0x00000000),
|
||||
(uint2)(0x0000008b, 0x80000000),
|
||||
(uint2)(0x00008089, 0x80000000),
|
||||
(uint2)(0x00008003, 0x80000000),
|
||||
(uint2)(0x00008002, 0x80000000),
|
||||
(uint2)(0x00000080, 0x80000000),
|
||||
(uint2)(0x0000800a, 0x00000000),
|
||||
(uint2)(0x8000000a, 0x80000000),
|
||||
(uint2)(0x80008081, 0x80000000),
|
||||
(uint2)(0x00008080, 0x80000000),
|
||||
(uint2)(0x80000001, 0x00000000),
|
||||
(uint2)(0x80008008, 0x80000000),
|
||||
};
|
||||
|
||||
#ifdef cl_amd_media_ops
|
||||
|
||||
#ifdef LEGACY
|
||||
#define barrier(x) mem_fence(x)
|
||||
#endif
|
||||
|
||||
#define ROTL64_1(x, y) amd_bitalign((x), (x).s10, 32 - (y))
|
||||
#define ROTL64_2(x, y) amd_bitalign((x).s10, (x), 32 - (y))
|
||||
|
||||
#else
|
||||
|
||||
#define ROTL64_1(x, y) as_uint2(rotate(as_ulong(x), (ulong)(y)))
|
||||
#define ROTL64_2(x, y) ROTL64_1(x, (y) + 32)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#define KECCAKF_1600_RND(a, i, outsz) do { \
|
||||
const uint2 m0 = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20] ^ ROTL64_1(a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22], 1);\
|
||||
const uint2 m1 = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21] ^ ROTL64_1(a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23], 1);\
|
||||
const uint2 m2 = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22] ^ ROTL64_1(a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24], 1);\
|
||||
const uint2 m3 = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23] ^ ROTL64_1(a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20], 1);\
|
||||
const uint2 m4 = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24] ^ ROTL64_1(a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21], 1);\
|
||||
\
|
||||
const uint2 tmp = a[1]^m0;\
|
||||
\
|
||||
a[0] ^= m4;\
|
||||
a[5] ^= m4; \
|
||||
a[10] ^= m4; \
|
||||
a[15] ^= m4; \
|
||||
a[20] ^= m4; \
|
||||
\
|
||||
a[6] ^= m0; \
|
||||
a[11] ^= m0; \
|
||||
a[16] ^= m0; \
|
||||
a[21] ^= m0; \
|
||||
\
|
||||
a[2] ^= m1; \
|
||||
a[7] ^= m1; \
|
||||
a[12] ^= m1; \
|
||||
a[17] ^= m1; \
|
||||
a[22] ^= m1; \
|
||||
\
|
||||
a[3] ^= m2; \
|
||||
a[8] ^= m2; \
|
||||
a[13] ^= m2; \
|
||||
a[18] ^= m2; \
|
||||
a[23] ^= m2; \
|
||||
\
|
||||
a[4] ^= m3; \
|
||||
a[9] ^= m3; \
|
||||
a[14] ^= m3; \
|
||||
a[19] ^= m3; \
|
||||
a[24] ^= m3; \
|
||||
\
|
||||
a[1] = ROTL64_2(a[6], 12);\
|
||||
a[6] = ROTL64_1(a[9], 20);\
|
||||
a[9] = ROTL64_2(a[22], 29);\
|
||||
a[22] = ROTL64_2(a[14], 7);\
|
||||
a[14] = ROTL64_1(a[20], 18);\
|
||||
a[20] = ROTL64_2(a[2], 30);\
|
||||
a[2] = ROTL64_2(a[12], 11);\
|
||||
a[12] = ROTL64_1(a[13], 25);\
|
||||
a[13] = ROTL64_1(a[19], 8);\
|
||||
a[19] = ROTL64_2(a[23], 24);\
|
||||
a[23] = ROTL64_2(a[15], 9);\
|
||||
a[15] = ROTL64_1(a[4], 27);\
|
||||
a[4] = ROTL64_1(a[24], 14);\
|
||||
a[24] = ROTL64_1(a[21], 2);\
|
||||
a[21] = ROTL64_2(a[8], 23);\
|
||||
a[8] = ROTL64_2(a[16], 13);\
|
||||
a[16] = ROTL64_2(a[5], 4);\
|
||||
a[5] = ROTL64_1(a[3], 28);\
|
||||
a[3] = ROTL64_1(a[18], 21);\
|
||||
a[18] = ROTL64_1(a[17], 15);\
|
||||
a[17] = ROTL64_1(a[11], 10);\
|
||||
a[11] = ROTL64_1(a[7], 6);\
|
||||
a[7] = ROTL64_1(a[10], 3);\
|
||||
a[10] = ROTL64_1(tmp, 1);\
|
||||
\
|
||||
uint2 m5 = a[0]; uint2 m6 = a[1]; a[0] = bitselect(a[0]^a[2],a[0],a[1]); \
|
||||
a[0] ^= as_uint2(Keccak_f1600_RC[i]); \
|
||||
if (outsz > 1) { \
|
||||
a[1] = bitselect(a[1]^a[3],a[1],a[2]); a[2] = bitselect(a[2]^a[4],a[2],a[3]); a[3] = bitselect(a[3]^m5,a[3],a[4]); a[4] = bitselect(a[4]^m6,a[4],m5);\
|
||||
if (outsz > 4) { \
|
||||
m5 = a[5]; m6 = a[6]; a[5] = bitselect(a[5]^a[7],a[5],a[6]); a[6] = bitselect(a[6]^a[8],a[6],a[7]); a[7] = bitselect(a[7]^a[9],a[7],a[8]); a[8] = bitselect(a[8]^m5,a[8],a[9]); a[9] = bitselect(a[9]^m6,a[9],m5);\
|
||||
if (outsz > 8) { \
|
||||
m5 = a[10]; m6 = a[11]; a[10] = bitselect(a[10]^a[12],a[10],a[11]); a[11] = bitselect(a[11]^a[13],a[11],a[12]); a[12] = bitselect(a[12]^a[14],a[12],a[13]); a[13] = bitselect(a[13]^m5,a[13],a[14]); a[14] = bitselect(a[14]^m6,a[14],m5);\
|
||||
m5 = a[15]; m6 = a[16]; a[15] = bitselect(a[15]^a[17],a[15],a[16]); a[16] = bitselect(a[16]^a[18],a[16],a[17]); a[17] = bitselect(a[17]^a[19],a[17],a[18]); a[18] = bitselect(a[18]^m5,a[18],a[19]); a[19] = bitselect(a[19]^m6,a[19],m5);\
|
||||
m5 = a[20]; m6 = a[21]; a[20] = bitselect(a[20]^a[22],a[20],a[21]); a[21] = bitselect(a[21]^a[23],a[21],a[22]); a[22] = bitselect(a[22]^a[24],a[22],a[23]); a[23] = bitselect(a[23]^m5,a[23],a[24]); a[24] = bitselect(a[24]^m6,a[24],m5);\
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define KECCAK_PROCESS(st, in_size, out_size) do { \
|
||||
for (int r = 0; r < 24; ++r) { \
|
||||
int os = (r < 23 ? 25 : (out_size));\
|
||||
KECCAKF_1600_RND(st, r, os); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define fnv(x, y) ((x) * FNV_PRIME ^ (y))
|
||||
#define fnv_reduce(v) fnv(fnv(fnv(v.x, v.y), v.z), v.w)
|
||||
|
||||
typedef union {
|
||||
uint uints[128 / sizeof(uint)];
|
||||
ulong ulongs[128 / sizeof(ulong)];
|
||||
uint2 uint2s[128 / sizeof(uint2)];
|
||||
uint4 uint4s[128 / sizeof(uint4)];
|
||||
uint8 uint8s[128 / sizeof(uint8)];
|
||||
uint16 uint16s[128 / sizeof(uint16)];
|
||||
ulong8 ulong8s[128 / sizeof(ulong8)];
|
||||
} hash128_t;
|
||||
|
||||
|
||||
typedef union {
|
||||
ulong8 ulong8s[1];
|
||||
ulong4 ulong4s[2];
|
||||
uint2 uint2s[8];
|
||||
uint4 uint4s[4];
|
||||
uint8 uint8s[2];
|
||||
uint16 uint16s[1];
|
||||
ulong ulongs[8];
|
||||
uint uints[16];
|
||||
} compute_hash_share;
|
||||
|
||||
|
||||
#ifdef LEGACY
|
||||
|
||||
#define MIX(x) \
|
||||
do { \
|
||||
if (get_local_id(0) == lane_idx) { \
|
||||
uint s = mix.s0; \
|
||||
s = select(mix.s1, s, (x) != 1); \
|
||||
s = select(mix.s2, s, (x) != 2); \
|
||||
s = select(mix.s3, s, (x) != 3); \
|
||||
s = select(mix.s4, s, (x) != 4); \
|
||||
s = select(mix.s5, s, (x) != 5); \
|
||||
s = select(mix.s6, s, (x) != 6); \
|
||||
s = select(mix.s7, s, (x) != 7); \
|
||||
buffer[hash_id] = fnv(init0 ^ (a + x), s) % dag_size; \
|
||||
} \
|
||||
barrier(CLK_LOCAL_MEM_FENCE); \
|
||||
mix = fnv(mix, g_dag[buffer[hash_id]].uint8s[thread_id]); \
|
||||
} while(0)
|
||||
|
||||
#else
|
||||
|
||||
#define MIX(x) \
|
||||
do { \
|
||||
uint s = mix.s0; \
|
||||
s = select(mix.s1, s, (x) != 1); \
|
||||
s = select(mix.s2, s, (x) != 2); \
|
||||
s = select(mix.s3, s, (x) != 3); \
|
||||
s = select(mix.s4, s, (x) != 4); \
|
||||
s = select(mix.s5, s, (x) != 5); \
|
||||
s = select(mix.s6, s, (x) != 6); \
|
||||
s = select(mix.s7, s, (x) != 7); \
|
||||
buffer[get_local_id(0)] = fnv(init0 ^ (a + x), s) % dag_size; \
|
||||
mix = fnv(mix, g_dag[buffer[lane_idx]].uint8s[thread_id]); \
|
||||
mem_fence(CLK_LOCAL_MEM_FENCE); \
|
||||
} while(0)
|
||||
|
||||
#endif
|
||||
|
||||
// NOTE: This struct must match the one defined in CLMiner.cpp
|
||||
struct SearchResults {
|
||||
struct {
|
||||
uint gid;
|
||||
uint mix[8];
|
||||
uint pad[7]; // pad to 16 words for easy indexing
|
||||
} rslt[MAX_OUTPUTS];
|
||||
uint count;
|
||||
uint hashCount;
|
||||
uint abort;
|
||||
};
|
||||
|
||||
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
|
||||
__kernel void search(
|
||||
__global struct SearchResults* restrict g_output,
|
||||
__constant uint2 const* g_header,
|
||||
__global ulong8 const* _g_dag,
|
||||
uint dag_size,
|
||||
ulong start_nonce,
|
||||
ulong target
|
||||
)
|
||||
{
|
||||
#ifdef FAST_EXIT
|
||||
if (g_output->abort)
|
||||
return;
|
||||
#endif
|
||||
|
||||
__global hash128_t const* g_dag = (__global hash128_t const*) _g_dag;
|
||||
|
||||
const uint thread_id = get_local_id(0) % 4;
|
||||
const uint hash_id = get_local_id(0) / 4;
|
||||
const uint gid = get_global_id(0);
|
||||
|
||||
__local compute_hash_share sharebuf[WORKSIZE / 4];
|
||||
#ifdef LEGACY
|
||||
__local uint buffer[WORKSIZE / 4];
|
||||
#else
|
||||
__local uint buffer[WORKSIZE];
|
||||
#endif
|
||||
__local compute_hash_share * const share = sharebuf + hash_id;
|
||||
|
||||
// sha3_512(header .. nonce)
|
||||
uint2 state[25];
|
||||
state[0] = g_header[0];
|
||||
state[1] = g_header[1];
|
||||
state[2] = g_header[2];
|
||||
state[3] = g_header[3];
|
||||
state[4] = as_uint2(start_nonce + gid);
|
||||
state[5] = as_uint2(0x0000000000000001UL);
|
||||
state[6] = (uint2)(0);
|
||||
state[7] = (uint2)(0);
|
||||
state[8] = as_uint2(0x8000000000000000UL);
|
||||
state[9] = (uint2)(0);
|
||||
state[10] = (uint2)(0);
|
||||
state[11] = (uint2)(0);
|
||||
state[12] = (uint2)(0);
|
||||
state[13] = (uint2)(0);
|
||||
state[14] = (uint2)(0);
|
||||
state[15] = (uint2)(0);
|
||||
state[16] = (uint2)(0);
|
||||
state[17] = (uint2)(0);
|
||||
state[18] = (uint2)(0);
|
||||
state[19] = (uint2)(0);
|
||||
state[20] = (uint2)(0);
|
||||
state[21] = (uint2)(0);
|
||||
state[22] = (uint2)(0);
|
||||
state[23] = (uint2)(0);
|
||||
state[24] = (uint2)(0);
|
||||
|
||||
uint2 mixhash[4];
|
||||
|
||||
for (int pass = 0; pass < 2; ++pass) {
|
||||
KECCAK_PROCESS(state, select(5, 12, pass != 0), select(8, 1, pass != 0));
|
||||
if (pass > 0)
|
||||
break;
|
||||
|
||||
uint init0;
|
||||
uint8 mix;
|
||||
|
||||
#pragma unroll 1
|
||||
for (uint tid = 0; tid < 4; tid++) {
|
||||
if (tid == thread_id) {
|
||||
share->uint2s[0] = state[0];
|
||||
share->uint2s[1] = state[1];
|
||||
share->uint2s[2] = state[2];
|
||||
share->uint2s[3] = state[3];
|
||||
share->uint2s[4] = state[4];
|
||||
share->uint2s[5] = state[5];
|
||||
share->uint2s[6] = state[6];
|
||||
share->uint2s[7] = state[7];
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
mix = share->uint8s[thread_id & 1];
|
||||
init0 = share->uints[0];
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
#ifndef LEGACY
|
||||
#pragma unroll 1
|
||||
#endif
|
||||
for (uint a = 0; a < ACCESSES; a += 8) {
|
||||
const uint lane_idx = 4 * hash_id + a / 8 % 4;
|
||||
for (uint x = 0; x < 8; ++x)
|
||||
MIX(x);
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
share->uint2s[thread_id] = (uint2)(fnv_reduce(mix.lo), fnv_reduce(mix.hi));
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid == thread_id) {
|
||||
state[8] = share->uint2s[0];
|
||||
state[9] = share->uint2s[1];
|
||||
state[10] = share->uint2s[2];
|
||||
state[11] = share->uint2s[3];
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
mixhash[0] = state[8];
|
||||
mixhash[1] = state[9];
|
||||
mixhash[2] = state[10];
|
||||
mixhash[3] = state[11];
|
||||
|
||||
state[12] = as_uint2(0x0000000000000001UL);
|
||||
state[13] = (uint2)(0);
|
||||
state[14] = (uint2)(0);
|
||||
state[15] = (uint2)(0);
|
||||
state[16] = as_uint2(0x8000000000000000UL);
|
||||
state[17] = (uint2)(0);
|
||||
state[18] = (uint2)(0);
|
||||
state[19] = (uint2)(0);
|
||||
state[20] = (uint2)(0);
|
||||
state[21] = (uint2)(0);
|
||||
state[22] = (uint2)(0);
|
||||
state[23] = (uint2)(0);
|
||||
state[24] = (uint2)(0);
|
||||
}
|
||||
|
||||
#ifdef FAST_EXIT
|
||||
if (get_local_id(0) == 0)
|
||||
atomic_inc(&g_output->hashCount);
|
||||
#endif
|
||||
|
||||
if (as_ulong(as_uchar8(state[0]).s76543210) <= target) {
|
||||
#ifdef FAST_EXIT
|
||||
atomic_inc(&g_output->abort);
|
||||
#endif
|
||||
uint slot = min(MAX_OUTPUTS - 1u, atomic_inc(&g_output->count));
|
||||
g_output->rslt[slot].gid = gid;
|
||||
g_output->rslt[slot].mix[0] = mixhash[0].s0;
|
||||
g_output->rslt[slot].mix[1] = mixhash[0].s1;
|
||||
g_output->rslt[slot].mix[2] = mixhash[1].s0;
|
||||
g_output->rslt[slot].mix[3] = mixhash[1].s1;
|
||||
g_output->rslt[slot].mix[4] = mixhash[2].s0;
|
||||
g_output->rslt[slot].mix[5] = mixhash[2].s1;
|
||||
g_output->rslt[slot].mix[6] = mixhash[3].s0;
|
||||
g_output->rslt[slot].mix[7] = mixhash[3].s1;
|
||||
}
|
||||
}
|
||||
|
||||
typedef union _Node {
|
||||
uint dwords[16];
|
||||
uint2 qwords[8];
|
||||
uint4 dqwords[4];
|
||||
} Node;
|
||||
|
||||
static void SHA3_512(uint2 *s)
|
||||
{
|
||||
uint2 st[25];
|
||||
|
||||
for (uint i = 0; i < 8; ++i)
|
||||
st[i] = s[i];
|
||||
|
||||
st[8] = (uint2)(0x00000001, 0x80000000);
|
||||
|
||||
for (uint i = 9; i != 25; ++i)
|
||||
st[i] = (uint2)(0);
|
||||
|
||||
KECCAK_PROCESS(st, 8, 8);
|
||||
|
||||
for (uint i = 0; i < 8; ++i)
|
||||
s[i] = st[i];
|
||||
}
|
||||
|
||||
__kernel void GenerateDAG(uint start, __global const uint16 *_Cache, __global uint16 *_DAG, uint light_size)
|
||||
{
|
||||
__global const Node *Cache = (__global const Node *) _Cache;
|
||||
__global Node *DAG = (__global Node *) _DAG;
|
||||
uint NodeIdx = start + get_global_id(0);
|
||||
|
||||
Node DAGNode = Cache[NodeIdx % light_size];
|
||||
|
||||
DAGNode.dwords[0] ^= NodeIdx;
|
||||
SHA3_512(DAGNode.qwords);
|
||||
|
||||
for (uint i = 0; i < 256; ++i) {
|
||||
uint ParentIdx = fnv(NodeIdx ^ i, DAGNode.dwords[i & 15]) % light_size;
|
||||
__global const Node *ParentNode = Cache + ParentIdx;
|
||||
|
||||
#pragma unroll
|
||||
for (uint x = 0; x < 4; ++x) {
|
||||
DAGNode.dqwords[x] *= (uint4)(FNV_PRIME);
|
||||
DAGNode.dqwords[x] ^= ParentNode->dqwords[x];
|
||||
}
|
||||
}
|
||||
|
||||
SHA3_512(DAGNode.qwords);
|
||||
|
||||
//if (NodeIdx < DAG_SIZE)
|
||||
DAG[NodeIdx] = DAGNode;
|
||||
}
|
||||
0
zano/libethash-cl/kernels/isa/placeholder
Normal file
0
zano/libethash-cl/kernels/isa/placeholder
Normal file
Reference in New Issue
Block a user