This commit is contained in:
Dobromir Popov
2025-09-05 13:22:21 +03:00
parent 8a189d271f
commit c5be4c88c1
21 changed files with 4897 additions and 0 deletions

View File

@@ -0,0 +1,33 @@
# Dockerfile for building RinHash HIP implementation on Linux
# Using your existing ROCm toolbox image
FROM kyuz0/amd-strix-halo-toolboxes:rocm-7rc-rocwmma
# Install additional build tools if needed
RUN apt-get update && apt-get install -y \
cmake \
ninja-build \
git \
&& rm -rf /var/lib/apt/lists/* 2>/dev/null || true
# Create build directory
WORKDIR /build
# Copy source files
COPY gpu/RinHash-hip/ /build/
# Create output directory
RUN mkdir -p /output
# Build using CMake
RUN mkdir -p build && \
cd build && \
cmake -G "Ninja" \
-DHIP_PLATFORM=amd \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX=/output \
.. && \
cmake --build . -j$(nproc) && \
cmake --install . || cp rinhash-hip-miner /output/
# Default command to copy binaries to output
CMD ["sh", "-c", "cp build/rinhash-hip-miner /output/ 2>/dev/null || cp /output/rinhash-hip-miner /output/ && echo 'HIP build completed successfully! Binary copied to /output/'"]

View File

@@ -0,0 +1,86 @@
#!/bin/bash
# Build script for RinHash HIP implementation using Docker
# This script builds the HIP miner in a containerized ROCm environment
echo "================================================"
echo " RinHash HIP/ROCm Linux Docker Build Script"
echo "================================================"
# Check if Docker is available
if ! command -v docker &> /dev/null; then
echo "ERROR: Docker not found in PATH"
echo "Please install Docker first"
exit 1
fi
echo "Docker found:"
docker --version
echo ""
# Build directory setup
BUILD_DIR="$(dirname "$0")"
cd "$BUILD_DIR" || exit 1
echo "Building Docker image for HIP/ROCm compilation..."
echo ""
# Build the Docker image
sudo docker build -f Dockerfile.hip-linux -t rinhash-hip-builder .
if [ $? -ne 0 ]; then
echo ""
echo "==============================================="
echo " DOCKER BUILD FAILED!"
echo "==============================================="
echo ""
echo "Common issues:"
echo "1. Docker not properly installed"
echo "2. Insufficient permissions"
echo "3. Network connectivity issues"
echo "4. ROCm base image not available"
echo ""
exit 1
fi
echo ""
echo "Docker image built successfully!"
echo ""
# Create output directory
mkdir -p hip-output
echo "Running container to build HIP binaries..."
echo ""
# Run the container and extract binaries
sudo docker run --rm \
-v "$(pwd)/hip-output:/output" \
rinhash-hip-builder
if [ $? -eq 0 ]; then
echo ""
echo "==============================================="
echo " BUILD SUCCESSFUL!"
echo "==============================================="
echo ""
echo "Binaries created in hip-output/:"
ls -la hip-output/
echo ""
echo "To test the miner (requires AMD GPU with ROCm):"
echo " ./hip-output/rinhash-hip-miner --help"
echo ""
echo "Note: To run on AMD GPU, you'll need ROCm runtime installed:"
echo " sudo apt install rocm-dev hip-runtime-amd"
echo ""
else
echo ""
echo "==============================================="
echo " BUILD FAILED!"
echo "==============================================="
echo ""
echo "Check the error messages above for details."
echo ""
exit 1
fi
echo "HIP build completed successfully!"

View File

@@ -0,0 +1,92 @@
#!/bin/bash
# RinHash HIP Build Script for Linux
# This script builds the HIP implementation of RinHash for AMD GPUs
echo "======================================"
echo " RinHash HIP Miner Build Script"
echo "======================================"
# Check if hipcc is available
if ! command -v hipcc &> /dev/null; then
echo "ERROR: hipcc not found in PATH"
echo "Please install ROCm/HIP toolkit"
echo "On Ubuntu/Debian: sudo apt install rocm-dev hip-runtime-amd"
echo "Or download from: https://rocm.docs.amd.com/en/latest/deploy/linux/quick_start.html"
exit 1
fi
echo "HIP compiler found:"
hipcc --version
echo ""
# Check if cmake is available
if ! command -v cmake &> /dev/null; then
echo "ERROR: CMake not found in PATH"
echo "Please install cmake: sudo apt install cmake"
exit 1
fi
echo "CMake found:"
cmake --version | head -1
echo ""
echo "Building RinHash HIP miner..."
echo ""
# Create build directory
mkdir -p build
cd build
# Configure with CMake
cmake -G "Ninja" \
-DHIP_PLATFORM=amd \
-DCMAKE_BUILD_TYPE=Release \
..
if [ $? -ne 0 ]; then
echo "CMake configuration failed!"
echo "Trying without Ninja..."
cmake -DHIP_PLATFORM=amd \
-DCMAKE_BUILD_TYPE=Release \
..
if [ $? -ne 0 ]; then
echo "CMake configuration failed completely!"
exit 1
fi
fi
# Build
cmake --build . -j$(nproc)
if [ $? -eq 0 ]; then
echo ""
echo "======================================"
echo " BUILD SUCCESSFUL!"
echo "======================================"
echo ""
echo "Executable created:"
echo " build/rinhash-hip-miner"
echo ""
echo "To test the miner:"
echo " cd build && ./rinhash-hip-miner --help"
echo ""
echo "To check AMD GPU availability:"
echo " rocm-smi"
echo ""
else
echo ""
echo "======================================"
echo " BUILD FAILED!"
echo "======================================"
echo ""
echo "Common issues:"
echo "1. Missing ROCm development libraries"
echo "2. Incompatible HIP version"
echo "3. Missing development tools"
echo ""
exit 1
fi
echo "Build completed successfully!"

View File

@@ -0,0 +1,21 @@
cmake_minimum_required(VERSION 3.21)
project(RinHashHIP LANGUAGES CXX HIP)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_HIP_STANDARD 17)
# Enable HIP
find_package(HIP REQUIRED)
set(SOURCES
rinhash.hip.cu
sha3-256.hip.cu
)
add_executable(rinhash-hip-miner ${SOURCES})
target_include_directories(rinhash-hip-miner PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
target_compile_definitions(rinhash-hip-miner PRIVATE __HIP_PLATFORM_AMD__)
target_link_libraries(rinhash-hip-miner PRIVATE HIP::device)

View File

@@ -0,0 +1,929 @@
#include <stdint.h>
#include <string.h>
#include <stdio.h>
//=== Argon2 定数 ===//
#define ARGON2_BLOCK_SIZE 1024
#define ARGON2_QWORDS_IN_BLOCK (ARGON2_BLOCK_SIZE / 8)
#define ARGON2_OWORDS_IN_BLOCK (ARGON2_BLOCK_SIZE / 16)
#define ARGON2_HWORDS_IN_BLOCK (ARGON2_BLOCK_SIZE / 32)
#define ARGON2_SYNC_POINTS 4
#define ARGON2_PREHASH_DIGEST_LENGTH 64
#define ARGON2_PREHASH_SEED_LENGTH 72
#define ARGON2_VERSION_10 0x10
#define ARGON2_VERSION_13 0x13
#define ARGON2_ADDRESSES_IN_BLOCK 128
//=== Blake2b 定数 ===//
#define BLAKE2B_BLOCKBYTES 128
#define BLAKE2B_OUTBYTES 64
#define BLAKE2B_KEYBYTES 64
#define BLAKE2B_SALTBYTES 16
#define BLAKE2B_PERSONALBYTES 16
#define BLAKE2B_ROUNDS 12
//=== 構造体定義 ===//
typedef struct __align__(64) block_ {
uint64_t v[ARGON2_QWORDS_IN_BLOCK];
} block;
typedef struct Argon2_instance_t {
block *memory; /* Memory pointer */
uint32_t version;
uint32_t passes; /* Number of passes */
uint32_t memory_blocks; /* Number of blocks in memory */
uint32_t segment_length;
uint32_t lane_length;
uint32_t lanes;
uint32_t threads;
int print_internals; /* whether to print the memory blocks */
} argon2_instance_t;
/*
* Argon2 position: where we construct the block right now. Used to distribute
* work between threads.
*/
typedef struct Argon2_position_t {
uint32_t pass;
uint32_t lane;
uint8_t slice;
uint32_t index;
} argon2_position_t;
typedef struct __blake2b_state {
uint64_t h[8];
uint64_t t[2];
uint64_t f[2];
uint8_t buf[BLAKE2B_BLOCKBYTES];
unsigned buflen;
unsigned outlen;
uint8_t last_node;
} blake2b_state;
typedef struct __blake2b_param {
uint8_t digest_length; /* 1 */
uint8_t key_length; /* 2 */
uint8_t fanout; /* 3 */
uint8_t depth; /* 4 */
uint32_t leaf_length; /* 8 */
uint64_t node_offset; /* 16 */
uint8_t node_depth; /* 17 */
uint8_t inner_length; /* 18 */
uint8_t reserved[14]; /* 32 */
uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */
uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */
} blake2b_param;
//=== 定数メモリ ===//
__constant__ uint64_t blake2b_IV[8] = {
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
};
__constant__ uint8_t blake2b_sigma[12][16] = {
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
{11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},
{7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8},
{9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13},
{2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9},
{12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11},
{13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10},
{6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5},
{10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0},
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}
};
//=== 共通ヘルパー関数 ===//
__device__ __forceinline__ uint64_t rotr64(uint64_t x, uint32_t n) {
return (x >> n) | (x << (64 - n));
}
// fBlaMka関数をCリファレンス実装と完全に一致させる
__device__ __forceinline__ uint64_t fBlaMka(uint64_t x, uint64_t y) {
const uint64_t m = 0xFFFFFFFFULL;
uint64_t xy = (x & m) * (y & m);
return x + y + 2 * xy;
}
// Blake2b G関数 - リファレンス実装と完全に一致させる
__device__ __forceinline__ void blake2b_G(uint64_t& a, uint64_t& b, uint64_t& c, uint64_t& d, uint64_t m1, uint64_t m2) {
a = a + b + m1;
d = rotr64(d ^ a, 32);
c = c + d;
b = rotr64(b ^ c, 24);
a = a + b + m2;
d = rotr64(d ^ a, 16);
c = c + d;
b = rotr64(b ^ c, 63);
}
// リトルエンディアンでの32ビット値の格納
__device__ __forceinline__ void store32(void *dst, uint32_t w) {
#if defined(NATIVE_LITTLE_ENDIAN)
memcpy(dst, &w, sizeof w);
#else
uint8_t *p = (uint8_t *)dst;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
#endif
}
__device__ __forceinline__ void blake2b_increment_counter(blake2b_state *S,
uint64_t inc) {
S->t[0] += inc;
S->t[1] += (S->t[0] < inc);
}
__device__ __forceinline__ void blake2b_set_lastnode(blake2b_state *S) {
S->f[1] = (uint64_t)-1;
}
__device__ __forceinline__ void blake2b_set_lastblock(blake2b_state *S) {
if (S->last_node) {
blake2b_set_lastnode(S);
}
S->f[0] = (uint64_t)-1;
}
// Add structure-specific memset function
__device__ void blake2b_state_memset(blake2b_state* S) {
for (int i = 0; i < sizeof(blake2b_state); i++) {
((uint8_t*)S)[i] = 0;
}
}
// Add missing xor_block function
__device__ void xor_block(block* dst, const block* src) {
for (int i = 0; i < ARGON2_QWORDS_IN_BLOCK; i++) {
dst->v[i] ^= src->v[i];
}
}
// custom memcpy, apparently cuda's memcpy is slow
// when called within a kernel
__device__ void c_memcpy(void *dest, const void *src, size_t n) {
uint8_t *d = (uint8_t*)dest;
const uint8_t *s = (const uint8_t*)src;
for (size_t i = 0; i < n; i++) {
d[i] = s[i];
}
}
// Add missing copy_block function
__device__ void copy_block(block* dst, const block* src) {
c_memcpy(dst->v, src->v, sizeof(uint64_t) * ARGON2_QWORDS_IN_BLOCK);
}
// fill_blockをCリファレンス実装と完全に一致させる
__device__ void fill_block(const block* prev_block, const block* ref_block, block* next_block, int with_xor) {
block blockR = {};
block block_tmp = {};
unsigned i;
copy_block(&blockR, ref_block);
xor_block(&blockR, prev_block);
copy_block(&block_tmp, &blockR);
if (with_xor) {
xor_block(&block_tmp, next_block);
}
// G function without macro
auto g = [](uint64_t& a, uint64_t& b, uint64_t& c, uint64_t& d) {
a = fBlaMka(a, b);
d = rotr64(d ^ a, 32);
c = fBlaMka(c, d);
b = rotr64(b ^ c, 24);
a = fBlaMka(a, b);
d = rotr64(d ^ a, 16);
c = fBlaMka(c, d);
b = rotr64(b ^ c, 63);
};
// BLAKE2_ROUND_NOMSG function without macro
auto blake2_round = [&g](uint64_t& v0, uint64_t& v1, uint64_t& v2, uint64_t& v3,
uint64_t& v4, uint64_t& v5, uint64_t& v6, uint64_t& v7,
uint64_t& v8, uint64_t& v9, uint64_t& v10, uint64_t& v11,
uint64_t& v12, uint64_t& v13, uint64_t& v14, uint64_t& v15) {
do {
g(v0, v4, v8, v12);
g(v1, v5, v9, v13);
g(v2, v6, v10, v14);
g(v3, v7, v11, v15);
g(v0, v5, v10, v15);
g(v1, v6, v11, v12);
g(v2, v7, v8, v13);
g(v3, v4, v9, v14);
} while ((void)0, 0);
};
// Apply Blake2 on columns
for (i = 0; i < 8; ++i) {
blake2_round(
blockR.v[16 * i], blockR.v[16 * i + 1], blockR.v[16 * i + 2],
blockR.v[16 * i + 3], blockR.v[16 * i + 4], blockR.v[16 * i + 5],
blockR.v[16 * i + 6], blockR.v[16 * i + 7], blockR.v[16 * i + 8],
blockR.v[16 * i + 9], blockR.v[16 * i + 10], blockR.v[16 * i + 11],
blockR.v[16 * i + 12], blockR.v[16 * i + 13], blockR.v[16 * i + 14],
blockR.v[16 * i + 15]
);
}
// Apply Blake2 on rows
for (i = 0; i < 8; i++) {
blake2_round(
blockR.v[2 * i], blockR.v[2 * i + 1], blockR.v[2 * i + 16],
blockR.v[2 * i + 17], blockR.v[2 * i + 32], blockR.v[2 * i + 33],
blockR.v[2 * i + 48], blockR.v[2 * i + 49], blockR.v[2 * i + 64],
blockR.v[2 * i + 65], blockR.v[2 * i + 80], blockR.v[2 * i + 81],
blockR.v[2 * i + 96], blockR.v[2 * i + 97], blockR.v[2 * i + 112],
blockR.v[2 * i + 113]
);
}
copy_block(next_block, &block_tmp);
xor_block(next_block, &blockR);
}
template<typename T, typename ptr_t>
__device__ void c_memset(ptr_t dest, T val, int count) {
for(int i=0; i<count; i++)
dest[i] = val;
}
__device__ void init_block_value(block *b, uint8_t in) { c_memset(b->v, in, sizeof(b->v)); }
__device__ void next_addresses(block *address_block, block *input_block,
const block *zero_block) {
input_block->v[6]++;
fill_block(zero_block, input_block, address_block, 0);
fill_block(zero_block, address_block, address_block, 0);
}
__device__ void G1(uint64_t& a, uint64_t& b, uint64_t& c, uint64_t& d, uint64_t x, uint64_t y) {
a = a + b + x;
d = rotr64(d ^ a, 32);
c = c + d;
b = rotr64(b ^ c, 24);
a = a + b + y;
d = rotr64(d ^ a, 16);
c = c + d;
b = rotr64(b ^ c, 63);
}
// Blake2b compression function F
__device__ void blake2b_compress(blake2b_state* S, const uint8_t block[BLAKE2B_BLOCKBYTES]) {
uint64_t m[16];
uint64_t v[16];
// Load message block into m[16]
for (int i = 0; i < 16; i++) {
const uint8_t* p = block + i * 8;
m[i] = ((uint64_t)p[0])
| ((uint64_t)p[1] << 8)
| ((uint64_t)p[2] << 16)
| ((uint64_t)p[3] << 24)
| ((uint64_t)p[4] << 32)
| ((uint64_t)p[5] << 40)
| ((uint64_t)p[6] << 48)
| ((uint64_t)p[7] << 56);
}
// Initialize v[0..15]
for (int i = 0; i < 8; i++) {
v[i] = S->h[i];
v[i + 8] = blake2b_IV[i];
}
v[12] ^= S->t[0];
v[13] ^= S->t[1];
v[14] ^= S->f[0];
v[15] ^= S->f[1];
for (int r = 0; r < BLAKE2B_ROUNDS; r++) {
const uint8_t* s = blake2b_sigma[r];
// Column step
G1(v[0], v[4], v[8], v[12], m[s[0]], m[s[1]]);
G1(v[1], v[5], v[9], v[13], m[s[2]], m[s[3]]);
G1(v[2], v[6], v[10], v[14], m[s[4]], m[s[5]]);
G1(v[3], v[7], v[11], v[15], m[s[6]], m[s[7]]);
// Diagonal step
G1(v[0], v[5], v[10], v[15], m[s[8]], m[s[9]]);
G1(v[1], v[6], v[11], v[12], m[s[10]], m[s[11]]);
G1(v[2], v[7], v[8], v[13], m[s[12]], m[s[13]]);
G1(v[3], v[4], v[9], v[14], m[s[14]], m[s[15]]);
}
// Finalization
for (int i = 0; i < 8; i++) {
S->h[i] ^= v[i] ^ v[i + 8];
}
}
// Helper functions to load/store 64-bit values in little-endian order
__device__ __forceinline__ uint64_t load64(const void* src) {
const uint8_t* p = (const uint8_t*)src;
return ((uint64_t)(p[0]))
| ((uint64_t)(p[1]) << 8)
| ((uint64_t)(p[2]) << 16)
| ((uint64_t)(p[3]) << 24)
| ((uint64_t)(p[4]) << 32)
| ((uint64_t)(p[5]) << 40)
| ((uint64_t)(p[6]) << 48)
| ((uint64_t)(p[7]) << 56);
}
__device__ __forceinline__ void store64(void* dst, uint64_t w) {
uint8_t* p = (uint8_t*)dst;
p[0] = (uint8_t)(w);
p[1] = (uint8_t)(w >> 8);
p[2] = (uint8_t)(w >> 16);
p[3] = (uint8_t)(w >> 24);
p[4] = (uint8_t)(w >> 32);
p[5] = (uint8_t)(w >> 40);
p[6] = (uint8_t)(w >> 48);
p[7] = (uint8_t)(w >> 56);
}
__device__ void load_block(block *dst, const void *input) {
unsigned i;
for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
dst->v[i] = load64((const uint8_t *)input + i * sizeof(dst->v[i]));
}
}
__device__ void store_block(void *output, const block *src) {
unsigned i;
for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
store64((uint8_t *)output + i * sizeof(src->v[i]), src->v[i]);
}
}
// Blake2b init function to match reference implementation exactly
__device__ int blake2b_init(blake2b_state* S, size_t outlen) {
blake2b_param P;
// Clear state using our custom function
blake2b_state_memset(S);
// Set parameters according to Blake2b spec
P.digest_length = (uint8_t)outlen;
P.key_length = 0;
P.fanout = 1;
P.depth = 1;
P.leaf_length = 0;
P.node_offset = 0;
P.node_depth = 0;
P.inner_length = 0;
c_memset(P.reserved, 0, sizeof(P.reserved));
c_memset(P.salt, 0, sizeof(P.salt));
c_memset(P.personal, 0, sizeof(P.personal));
// Initialize state vector with IV
for (int i = 0; i < 8; i++) {
S->h[i] = blake2b_IV[i];
}
const unsigned char *p = (const unsigned char *)(&P);
/* IV XOR Parameter Block */
for (int i = 0; i < 8; ++i) {
S->h[i] ^= load64(&p[i * sizeof(S->h[i])]);
}
S->outlen = P.digest_length;
return 0; // Success
}
__device__ int FLAG_clear_internal_memory = 0;
__device__ void clear_internal_memory(void *v, size_t n) {
if (FLAG_clear_internal_memory && v) {
// secure_wipe_memory(v, n);
}
}
// Blake2b update function to match reference implementation
__device__ int blake2b_update(blake2b_state* S, const uint8_t* in, size_t inlen) {
const uint8_t *pin = (const uint8_t *)in;
if (inlen == 0) {
return 0;
}
/* Sanity check */
if (S == NULL || in == NULL) {
return -1;
}
/* Is this a reused state? */
if (S->f[0] != 0) {
return -1;
}
if (S->buflen + inlen > BLAKE2B_BLOCKBYTES) {
/* Complete current block */
size_t left = S->buflen;
size_t fill = BLAKE2B_BLOCKBYTES - left;
c_memcpy(&S->buf[left], pin, fill);
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
blake2b_compress(S, S->buf);
S->buflen = 0;
inlen -= fill;
pin += fill;
/* Avoid buffer copies when possible */
while (inlen > BLAKE2B_BLOCKBYTES) {
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
blake2b_compress(S, pin);
inlen -= BLAKE2B_BLOCKBYTES;
pin += BLAKE2B_BLOCKBYTES;
}
}
c_memcpy(&S->buf[S->buflen], pin, inlen);
S->buflen += (unsigned int)inlen;
return 0; // Success
}
// Blake2b final function to match reference implementation
__device__ int blake2b_final(blake2b_state* S, uint8_t* out, size_t outlen) {
if (!S || !out)
return -1;
uint8_t buffer[BLAKE2B_OUTBYTES] = {0};
unsigned int i;
blake2b_increment_counter(S, S->buflen);
blake2b_set_lastblock(S);
c_memset(&S->buf[S->buflen], 0, BLAKE2B_BLOCKBYTES - S->buflen); /* Padding */
blake2b_compress(S, S->buf);
for (i = 0; i < 8; ++i) { /* Output full hash to temp buffer */
store64(buffer + sizeof(S->h[i]) * i, S->h[i]);
}
c_memcpy(out, buffer, S->outlen);
return 0;
}
__device__ int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key,
size_t keylen) {
blake2b_param P;
if (S == NULL) {
return -1;
}
/* Setup Parameter Block for keyed BLAKE2 */
P.digest_length = (uint8_t)outlen;
P.key_length = (uint8_t)keylen;
P.fanout = 1;
P.depth = 1;
P.leaf_length = 0;
P.node_offset = 0;
P.node_depth = 0;
P.inner_length = 0;
c_memset(P.reserved, 0, sizeof(P.reserved));
c_memset(P.salt, 0, sizeof(P.salt));
c_memset(P.personal, 0, sizeof(P.personal));
// Initialize state vector with IV
for (int i = 0; i < 8; i++) {
S->h[i] = blake2b_IV[i];
}
// XOR first element with param
const unsigned char *p = (const unsigned char *)(&P);
/* IV XOR Parameter Block */
for (int i = 0; i < 8; ++i) {
S->h[i] ^= load64(&p[i * sizeof(S->h[i])]);
}
S->outlen = P.digest_length;
uint8_t block[BLAKE2B_BLOCKBYTES];
c_memset(block, 0, BLAKE2B_BLOCKBYTES);
c_memcpy(block, key, keylen);
blake2b_update(S, block, BLAKE2B_BLOCKBYTES);
/* Burn the key from stack */
clear_internal_memory(block, BLAKE2B_BLOCKBYTES);
return 0;
}
// Blake2b all-in-one function
__device__ int blake2b(void *out, size_t outlen, const void *in, size_t inlen,
const void *key, size_t keylen) {
blake2b_state S;
int ret = -1;
/* Verify parameters */
if (NULL == in && inlen > 0) {
goto fail;
}
if (NULL == out || outlen == 0 || outlen > BLAKE2B_OUTBYTES) {
goto fail;
}
if ((NULL == key && keylen > 0) || keylen > BLAKE2B_KEYBYTES) {
goto fail;
}
if (keylen > 0) {
if (blake2b_init_key(&S, outlen, key, keylen) < 0) {
goto fail;
}
} else {
if (blake2b_init(&S, outlen) < 0) {
goto fail;
}
}
if (blake2b_update(&S, (const uint8_t*)in, inlen) < 0) {
goto fail;
}
ret = blake2b_final(&S, (uint8_t*)out, outlen);
fail:
clear_internal_memory(&S, sizeof(S));
return ret;
}
// index_alpha関数を完全にCリファレンス実装と一致させる関数のシグネチャも含め
__device__ uint32_t index_alpha(const argon2_instance_t *instance,
const argon2_position_t *position, uint32_t pseudo_rand,
int same_lane) {
uint32_t reference_area_size;
uint64_t relative_position;
uint32_t start_position, absolute_position;
if (0 == position->pass) {
/* First pass */
if (0 == position->slice) {
/* First slice */
reference_area_size =
position->index - 1; /* all but the previous */
} else {
if (same_lane) {
/* The same lane => add current segment */
reference_area_size =
position->slice * instance->segment_length +
position->index - 1;
} else {
reference_area_size =
position->slice * instance->segment_length +
((position->index == 0) ? (-1) : 0);
}
}
} else {
/* Second pass */
if (same_lane) {
reference_area_size = instance->lane_length -
instance->segment_length + position->index -
1;
} else {
reference_area_size = instance->lane_length -
instance->segment_length +
((position->index == 0) ? (-1) : 0);
}
}
/* 1.2.4. Mapping pseudo_rand to 0..<reference_area_size-1> and produce
* relative position */
relative_position = pseudo_rand;
relative_position = relative_position * relative_position >> 32;
relative_position = reference_area_size - 1 -
(reference_area_size * relative_position >> 32);
/* 1.2.5 Computing starting position */
start_position = 0;
if (0 != position->pass) {
start_position = (position->slice == ARGON2_SYNC_POINTS - 1)
? 0
: (position->slice + 1) * instance->segment_length;
}
/* 1.2.6. Computing absolute position */
absolute_position = (start_position + relative_position) %
instance->lane_length; /* absolute position */
return absolute_position;
}
// fill_segment関数を追加Cリファレンス実装と完全に一致
__device__ void fill_segment(const argon2_instance_t *instance,
argon2_position_t position) {
block *ref_block = NULL, *curr_block = NULL;
block address_block, input_block, zero_block;
uint64_t pseudo_rand, ref_index, ref_lane;
uint32_t prev_offset, curr_offset;
uint32_t starting_index;
uint32_t i;
int data_independent_addressing;
data_independent_addressing = false;
if (data_independent_addressing) {
init_block_value(&zero_block, 0);
init_block_value(&input_block, 0);
input_block.v[0] = position.pass;
input_block.v[1] = position.lane;
input_block.v[2] = position.slice;
input_block.v[3] = instance->memory_blocks;
input_block.v[4] = instance->passes;
input_block.v[5] = 0;
}
starting_index = 0;
if ((0 == position.pass) && (0 == position.slice)) {
starting_index = 2; /* we have already generated the first two blocks */
/* Don't forget to generate the first block of addresses: */
if (data_independent_addressing) {
next_addresses(&address_block, &input_block, &zero_block);
}
}
/* Offset of the current block */
curr_offset = position.lane * instance->lane_length +
position.slice * instance->segment_length + starting_index;
if (0 == curr_offset % instance->lane_length) {
/* Last block in this lane */
prev_offset = curr_offset + instance->lane_length - 1;
} else {
/* Previous block */
prev_offset = curr_offset - 1;
}
for (i = starting_index; i < instance->segment_length;
++i, ++curr_offset, ++prev_offset) {
/*1.1 Rotating prev_offset if needed */
if (curr_offset % instance->lane_length == 1) {
prev_offset = curr_offset - 1;
}
/* 1.2 Computing the index of the reference block */
/* 1.2.1 Taking pseudo-random value from the previous block */
if (data_independent_addressing) {
if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) {
next_addresses(&address_block, &input_block, &zero_block);
}
pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK];
} else {
pseudo_rand = instance->memory[prev_offset].v[0];
}
/* 1.2.2 Computing the lane of the reference block */
ref_lane = ((pseudo_rand >> 32)) % instance->lanes;
if ((position.pass == 0) && (position.slice == 0)) {
/* Can not reference other lanes yet */
ref_lane = position.lane;
}
/* 1.2.3 Computing the number of possible reference block within the
* lane.
*/
position.index = i;
ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,
ref_lane == position.lane);
/* 2 Creating a new block */
ref_block =
instance->memory + instance->lane_length * ref_lane + ref_index;
curr_block = instance->memory + curr_offset;
if (ARGON2_VERSION_10 == instance->version) {
/* version 1.2.1 and earlier: overwrite, not XOR */
fill_block(instance->memory + prev_offset, ref_block, curr_block, 0);
} else {
if(0 == position.pass) {
fill_block(instance->memory + prev_offset, ref_block,
curr_block, 0);
} else {
fill_block(instance->memory + prev_offset, ref_block,
curr_block, 1);
}
}
}
}
// fill_memory関数をCリファレンス実装と完全に一致させる
__device__ void fill_memory(block* memory, uint32_t passes, uint32_t lanes, uint32_t lane_length, uint32_t segment_length) {
argon2_instance_t instance;
instance.version = ARGON2_VERSION_13;
instance.passes = passes;
instance.memory = memory;
instance.memory_blocks = lanes * lane_length;
instance.segment_length = segment_length;
instance.lane_length = lane_length;
instance.lanes = lanes;
instance.threads = lanes;
instance.print_internals = 0;
argon2_position_t position;
for (uint32_t pass = 0; pass < passes; ++pass) {
position.pass = pass;
for (uint32_t slice = 0; slice < ARGON2_SYNC_POINTS; ++slice) {
position.slice = slice;
for (uint32_t lane = 0; lane < lanes; ++lane) {
position.lane = lane;
fill_segment(&instance, position);
}
}
}
}
// blake2b_long関数をCリファレンス実装と完全に一致させる
__device__ int blake2b_long(void *pout, size_t outlen, const void *in, size_t inlen) {
uint8_t *out = (uint8_t *)pout;
blake2b_state blake_state;
uint8_t outlen_bytes[sizeof(uint32_t)] = {0};
int ret = -1;
if (outlen > UINT32_MAX) {
goto fail;
}
/* Ensure little-endian byte order! */
store32(outlen_bytes, (uint32_t)outlen);
#define TRY(statement) \
do { \
ret = statement; \
if (ret < 0) { \
goto fail; \
} \
} while ((void)0, 0)
if (outlen <= BLAKE2B_OUTBYTES) {
TRY(blake2b_init(&blake_state, outlen));
TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
TRY(blake2b_update(&blake_state, (const uint8_t*)in, inlen));
TRY(blake2b_final(&blake_state, out, outlen));
} else {
uint32_t toproduce;
uint8_t out_buffer[BLAKE2B_OUTBYTES];
uint8_t in_buffer[BLAKE2B_OUTBYTES];
TRY(blake2b_init(&blake_state, BLAKE2B_OUTBYTES));
TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
TRY(blake2b_update(&blake_state, (const uint8_t*)in, inlen));
TRY(blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES));
c_memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
out += BLAKE2B_OUTBYTES / 2;
toproduce = (uint32_t)outlen - BLAKE2B_OUTBYTES / 2;
while (toproduce > BLAKE2B_OUTBYTES) {
c_memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
TRY(blake2b(out_buffer, BLAKE2B_OUTBYTES, in_buffer, BLAKE2B_OUTBYTES, NULL, 0));
c_memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
out += BLAKE2B_OUTBYTES / 2;
toproduce -= BLAKE2B_OUTBYTES / 2;
}
c_memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
TRY(blake2b(out_buffer, toproduce, in_buffer, BLAKE2B_OUTBYTES, NULL,
0));
c_memcpy(out, out_buffer, toproduce);
}
fail:
clear_internal_memory(&blake_state, sizeof(blake_state));
return ret;
#undef TRY
}
// device_argon2d_hash関数を完全にCリファレンス実装と一致させる
__device__ void device_argon2d_hash(
uint8_t* output,
const uint8_t* input, size_t input_len,
uint32_t t_cost, uint32_t m_cost, uint32_t lanes,
block* memory,
const uint8_t* salt, size_t salt_len
) {
argon2_instance_t instance;
// 1. メモリサイズの調整
uint32_t memory_blocks = m_cost;
if (memory_blocks < 2 * ARGON2_SYNC_POINTS * lanes) {
memory_blocks = 2 * ARGON2_SYNC_POINTS * lanes;
}
uint32_t segment_length = memory_blocks / (lanes * ARGON2_SYNC_POINTS);
memory_blocks = segment_length * (lanes * ARGON2_SYNC_POINTS);
uint32_t lane_length = segment_length * ARGON2_SYNC_POINTS;
// Initialize instance with the provided memory pointer
instance.version = ARGON2_VERSION_13;
instance.memory = memory; // Use the provided memory pointer
instance.passes = t_cost;
instance.memory_blocks = memory_blocks;
instance.segment_length = segment_length;
instance.lane_length = lane_length;
instance.lanes = lanes;
instance.threads = 1;
// 2. 初期ハッシュの計算
uint8_t blockhash[ARGON2_PREHASH_DIGEST_LENGTH];
blake2b_state BlakeHash;
blake2b_init(&BlakeHash, ARGON2_PREHASH_DIGEST_LENGTH);
uint8_t value[sizeof(uint32_t)];
store32(&value, lanes);
blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
store32(&value, 32);
blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
store32(&value, memory_blocks);
blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
store32(&value, t_cost);
blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
store32(&value, ARGON2_VERSION_13);
blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
store32(&value, 0);
blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
store32(&value, input_len);
blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
blake2b_update(&BlakeHash, (const uint8_t *)input, input_len);
store32(&value, salt_len);
blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
blake2b_update(&BlakeHash, (const uint8_t *)salt, salt_len);
store32(&value, 0);
blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
store32(&value, 0);
blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH);
// 3. Initialize first blocks in each lane
uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE];
uint8_t initial_hash[ARGON2_PREHASH_SEED_LENGTH];
c_memcpy(initial_hash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH);
c_memset(initial_hash + ARGON2_PREHASH_DIGEST_LENGTH, 0, ARGON2_PREHASH_SEED_LENGTH - ARGON2_PREHASH_DIGEST_LENGTH);
for (uint32_t l = 0; l < lanes; ++l) {
store32(initial_hash + ARGON2_PREHASH_DIGEST_LENGTH, 0);
store32(initial_hash + ARGON2_PREHASH_DIGEST_LENGTH + 4, l);
blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, initial_hash, ARGON2_PREHASH_SEED_LENGTH);
load_block(&memory[l * lane_length], blockhash_bytes);
store32(initial_hash + ARGON2_PREHASH_DIGEST_LENGTH, 1);
blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, initial_hash, ARGON2_PREHASH_SEED_LENGTH);
load_block(&memory[l * lane_length + 1], blockhash_bytes);
}
// 4. Fill memory
fill_memory(memory, t_cost, lanes, lane_length, segment_length);
// 5. Final block mixing
block final_block;
copy_block(&final_block, &memory[0 * lane_length + (lane_length - 1)]);
for (uint32_t l = 1; l < lanes; ++l) {
uint32_t last_block_in_lane = l * lane_length + (lane_length - 1);
xor_block(&final_block, &memory[last_block_in_lane]);
}
// 6. Final hash
uint8_t final_block_bytes[ARGON2_BLOCK_SIZE];
store_block(final_block_bytes, &final_block);
blake2b_long(output, 32, final_block_bytes, ARGON2_BLOCK_SIZE);
}
//=== __global__ カーネル例salt 指定版)===//
// ホスト側でブロック用メモリをあらかじめ確保し、そのポインタmemory_ptrを渡すことを前提としています。
__global__ void argon2d_hash_device_kernel(
uint8_t* output,
const uint8_t* input, size_t input_len,
uint32_t t_cost, uint32_t m_cost, uint32_t lanes,
block* memory_ptr, // ホスト側で確保したメモリ領域へのポインタ
const uint8_t* salt, size_t salt_len
) {
if (threadIdx.x == 0 && blockIdx.x == 0) {
device_argon2d_hash(output, input, input_len, t_cost, m_cost, lanes, memory_ptr, salt, salt_len);
}
}

View File

@@ -0,0 +1,272 @@
#include "blaze3_cpu.cuh"
// Number of threads per thread block
__constant__ const int NUM_THREADS = 16;
// redefine functions, but for the GPU
// all of them are the same but with g_ prefixed
__constant__ const u32 g_IV[8] = {
0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19,
};
__constant__ const int g_MSG_PERMUTATION[] = {
2, 6, 3, 10, 7, 0, 4, 13,
1, 11, 12, 5, 9, 14, 15, 8
};
__device__ u32 g_rotr(u32 value, int shift) {
return (value >> shift)|(value << (usize - shift));
}
__device__ void g_g(u32 state[16], u32 a, u32 b, u32 c, u32 d, u32 mx, u32 my) {
state[a] = state[a] + state[b] + mx;
state[d] = g_rotr((state[d] ^ state[a]), 16);
state[c] = state[c] + state[d];
state[b] = g_rotr((state[b] ^ state[c]), 12);
state[a] = state[a] + state[b] + my;
state[d] = g_rotr((state[d] ^ state[a]), 8);
state[c] = state[c] + state[d];
state[b] = g_rotr((state[b] ^ state[c]), 7);
}
__device__ void g_round(u32 state[16], u32 m[16]) {
// Mix the columns.
g_g(state, 0, 4, 8, 12, m[0], m[1]);
g_g(state, 1, 5, 9, 13, m[2], m[3]);
g_g(state, 2, 6, 10, 14, m[4], m[5]);
g_g(state, 3, 7, 11, 15, m[6], m[7]);
// Mix the diagonals.
g_g(state, 0, 5, 10, 15, m[8], m[9]);
g_g(state, 1, 6, 11, 12, m[10], m[11]);
g_g(state, 2, 7, 8, 13, m[12], m[13]);
g_g(state, 3, 4, 9, 14, m[14], m[15]);
}
__device__ void g_permute(u32 m[16]) {
u32 permuted[16];
for(int i=0; i<16; i++)
permuted[i] = m[g_MSG_PERMUTATION[i]];
for(int i=0; i<16; i++)
m[i] = permuted[i];
}
// custom memcpy, apparently cuda's memcpy is slow
// when called within a kernel
__device__ void g_memcpy(u32 *lhs, const u32 *rhs, int size) {
// assuming u32 is 4 bytes
int len = size / 4;
for(int i=0; i<len; i++)
lhs[i] = rhs[i];
}
// custom memset
template<typename T, typename ptr_t>
__device__ void g_memset(ptr_t dest, T val, int count) {
for(int i=0; i<count; i++)
dest[i] = val;
}
__device__ void g_compress(
u32 *chaining_value,
u32 *block_words,
u64 counter,
u32 block_len,
u32 flags,
u32 *state
) {
// Search for better alternative
g_memcpy(state, chaining_value, 32);
g_memcpy(state+8, g_IV, 16);
state[12] = (u32)counter;
state[13] = (u32)(counter >> 32);
state[14] = block_len;
state[15] = flags;
u32 block[16];
g_memcpy(block, block_words, 64);
g_round(state, block); // round 1
g_permute(block);
g_round(state, block); // round 2
g_permute(block);
g_round(state, block); // round 3
g_permute(block);
g_round(state, block); // round 4
g_permute(block);
g_round(state, block); // round 5
g_permute(block);
g_round(state, block); // round 6
g_permute(block);
g_round(state, block); // round 7
for(int i=0; i<8; i++){
state[i] ^= state[i + 8];
state[i + 8] ^= chaining_value[i];
}
}
__device__ void g_words_from_little_endian_bytes(
u8 *bytes, u32 *words, u32 bytes_len
) {
u32 tmp;
for(u32 i=0; i<bytes_len; i+=4) {
tmp = (bytes[i+3]<<24) | (bytes[i+2]<<16) | (bytes[i+1]<<8) | bytes[i];
words[i/4] = tmp;
}
}
__device__ void Chunk::g_compress_chunk(u32 out_flags) {
if(flags&PARENT) {
g_compress(
key,
data,
0, // counter is always zero for parent nodes
BLOCK_LEN,
flags | out_flags,
raw_hash
);
return;
}
u32 chaining_value[8];
u32 block_len = BLOCK_LEN, flagger;
g_memcpy(chaining_value, key, 32);
bool empty_input = (leaf_len==0);
if(empty_input) {
for(u32 i=0; i<BLOCK_LEN; i++)
leaf_data[i] = 0U;
leaf_len = BLOCK_LEN;
}
// move all mem allocs outside loop
u32 block_words[16];
u8 block_cast[BLOCK_LEN];
for(u32 i=0; i<leaf_len; i+=BLOCK_LEN) {
flagger = flags;
// for the last message block
if(i+BLOCK_LEN > leaf_len)
block_len = leaf_len%BLOCK_LEN;
else
block_len = BLOCK_LEN;
// special case
if(empty_input)
block_len = 0;
// clear up block_words
g_memset(block_words, 0, 16);
u32 new_block_len(block_len);
if(block_len%4)
new_block_len += 4 - (block_len%4);
// This memcpy is fine since data is a byte array
memcpy(block_cast, leaf_data+i, new_block_len*sizeof(*block_cast));
g_words_from_little_endian_bytes(leaf_data+i, block_words, new_block_len);
if(i==0)
flagger |= CHUNK_START;
if(i+BLOCK_LEN >= leaf_len)
flagger |= CHUNK_END | out_flags;
// raw hash for root node
g_compress(
chaining_value,
block_words,
counter,
block_len,
flagger,
raw_hash
);
g_memcpy(chaining_value, raw_hash, 32);
}
}
__global__ void compute(Chunk *data, int l, int r) {
// n is always a power of 2
int n = r-l;
int tid = blockDim.x * blockIdx.x + threadIdx.x;
if(tid >= n)
return;
if(n==1) {
data[l].g_compress_chunk();
// printf("Compressing : %d\n", l);
}
else {
compute<<<n/2,16>>>(data, l, l+n/2);
cudaDeviceSynchronize();
compute<<<n/2,16>>>(data, l+n/2, r);
cudaDeviceSynchronize();
data[l].flags |= PARENT;
memcpy(data[l].data, data[l].raw_hash, 32);
memcpy(data[l].data+8, data[l+n/2].raw_hash, 32);
data[l].g_compress_chunk();
// printf("Compressing : %d to %d\n", l, r);
}
}
// CPU version of light_hash (unchanged)
void light_hash(Chunk *data, int N, Chunk *result, Chunk *memory_bar) {
const int data_size = N*sizeof(Chunk);
// Device settings
// Allows DeviceSync to be called upto 16 levels of recursion
cudaDeviceSetLimit(cudaLimitDevRuntimeSyncDepth, 16);
// Device vector
Chunk *g_data = memory_bar;
cudaMemcpy(g_data, data, data_size, cudaMemcpyHostToDevice);
// Actual computation of hash
compute<<<N,32>>>(g_data, 0, N);
cudaMemcpy(result, g_data, sizeof(Chunk), cudaMemcpyDeviceToHost);
}
// Device-callable version of light_hash
__device__ void light_hash_device(const uint8_t* input, size_t input_len, uint8_t* output) {
// Create a single chunk for processing the input
Chunk chunk;
// Initialize the chunk with the input data
for (int i = 0; i < 8; i++) {
chunk.key[i] = g_IV[i]; // Use device constant IV
}
// Copy the input data to leaf_data (with bounds checking)
size_t copy_len = min(input_len, (size_t)BLOCK_LEN * 16); // Ensure we don't overflow
for (size_t i = 0; i < copy_len; i++) {
chunk.leaf_data[i] = input[i];
}
chunk.leaf_len = copy_len;
chunk.counter = 0;
chunk.flags = 0; // Default flags
// Process the chunk directly
chunk.g_compress_chunk(ROOT); // Set ROOT flag for final output
// Copy the raw hash to the output
for (int i = 0; i < 8; i++) {
// Convert 32-bit words to bytes in little-endian format
output[i*4] = (uint8_t)(chunk.raw_hash[i]);
output[i*4+1] = (uint8_t)(chunk.raw_hash[i] >> 8);
output[i*4+2] = (uint8_t)(chunk.raw_hash[i] >> 16);
output[i*4+3] = (uint8_t)(chunk.raw_hash[i] >> 24);
}
}
// Alias for compatibility with other device code
__device__ void blake3_hash_device(const uint8_t* input, size_t input_len, uint8_t* output) {
light_hash_device(input, input_len, output);
}

View File

@@ -0,0 +1,420 @@
#include <iostream>
#include <algorithm>
#include <cstring>
#include <vector>
using namespace std;
// Let's use a pinned memory vector!
#include <thrust/host_vector.h>
#include <thrust/system/cuda/experimental/pinned_allocator.h>
using u32 = uint32_t;
using u64 = uint64_t;
using u8 = uint8_t;
const u32 OUT_LEN = 32;
const u32 KEY_LEN = 32;
const u32 BLOCK_LEN = 64;
const u32 CHUNK_LEN = 1024;
// Multiple chunks make a snicker bar :)
const u32 SNICKER = 1U << 10;
// Factory height and snicker size have an inversly propotional relationship
// FACTORY_HT * (log2 SNICKER) + 10 >= 64
const u32 FACTORY_HT = 5;
const u32 CHUNK_START = 1 << 0;
const u32 CHUNK_END = 1 << 1;
const u32 PARENT = 1 << 2;
const u32 ROOT = 1 << 3;
const u32 KEYED_HASH = 1 << 4;
const u32 DERIVE_KEY_CONTEXT = 1 << 5;
const u32 DERIVE_KEY_MATERIAL = 1 << 6;
const int usize = sizeof(u32) * 8;
u32 IV[8] = {
0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19,
};
const int MSG_PERMUTATION[] = {
2, 6, 3, 10, 7, 0, 4, 13,
1, 11, 12, 5, 9, 14, 15, 8
};
u32 rotr(u32 value, int shift) {
return (value >> shift)|(value << (usize - shift));
}
void g(u32 state[16], u32 a, u32 b, u32 c, u32 d, u32 mx, u32 my) {
state[a] = state[a] + state[b] + mx;
state[d] = rotr((state[d] ^ state[a]), 16);
state[c] = state[c] + state[d];
state[b] = rotr((state[b] ^ state[c]), 12);
state[a] = state[a] + state[b] + my;
state[d] = rotr((state[d] ^ state[a]), 8);
state[c] = state[c] + state[d];
state[b] = rotr((state[b] ^ state[c]), 7);
}
void round(u32 state[16], u32 m[16]) {
// Mix the columns.
g(state, 0, 4, 8, 12, m[0], m[1]);
g(state, 1, 5, 9, 13, m[2], m[3]);
g(state, 2, 6, 10, 14, m[4], m[5]);
g(state, 3, 7, 11, 15, m[6], m[7]);
// Mix the diagonals.
g(state, 0, 5, 10, 15, m[8], m[9]);
g(state, 1, 6, 11, 12, m[10], m[11]);
g(state, 2, 7, 8, 13, m[12], m[13]);
g(state, 3, 4, 9, 14, m[14], m[15]);
}
void permute(u32 m[16]) {
u32 permuted[16];
for(int i=0; i<16; i++)
permuted[i] = m[MSG_PERMUTATION[i]];
for(int i=0; i<16; i++)
m[i] = permuted[i];
}
void compress(
u32 *chaining_value,
u32 *block_words,
u64 counter,
u32 block_len,
u32 flags,
u32 *state
) {
memcpy(state, chaining_value, 8*sizeof(*state));
memcpy(state+8, IV, 4*sizeof(*state));
state[12] = (u32)counter;
state[13] = (u32)(counter >> 32);
state[14] = block_len;
state[15] = flags;
u32 block[16];
memcpy(block, block_words, 16*sizeof(*block));
round(state, block); // round 1
permute(block);
round(state, block); // round 2
permute(block);
round(state, block); // round 3
permute(block);
round(state, block); // round 4
permute(block);
round(state, block); // round 5
permute(block);
round(state, block); // round 6
permute(block);
round(state, block); // round 7
for(int i=0; i<8; i++){
state[i] ^= state[i + 8];
state[i + 8] ^= chaining_value[i];
}
}
void words_from_little_endian_bytes(u8 *bytes, u32 *words, u32 bytes_len) {
u32 tmp;
for(u32 i=0; i<bytes_len; i+=4) {
tmp = (bytes[i+3]<<24) | (bytes[i+2]<<16) | (bytes[i+1]<<8) | bytes[i];
words[i/4] = tmp;
}
}
struct Chunk {
// use only when it is a leaf node
// leaf data may have less than 1024 bytes
u8 leaf_data[1024];
u32 leaf_len;
// use in all other cases
// data will always have 64 bytes
u32 data[16];
u32 flags;
u32 raw_hash[16];
u32 key[8];
// only useful for leaf nodes
u64 counter;
// Constructor for leaf nodes
__device__ __host__ Chunk(char *input, int size, u32 _flags, u32 *_key, u64 ctr){
counter = ctr;
flags = _flags;
memcpy(key, _key, 8*sizeof(*key));
memset(leaf_data, 0, 1024);
memcpy(leaf_data, input, size);
leaf_len = size;
}
__device__ __host__ Chunk(u32 _flags, u32 *_key) {
counter = 0;
flags = _flags;
memcpy(key, _key, 8*sizeof(*key));
leaf_len = 0;
}
__device__ __host__ Chunk() {}
// Chunk() : leaf_len(0) {}
// process data in sizes of message blocks and store cv in hash
void compress_chunk(u32=0);
__device__ void g_compress_chunk(u32=0);
};
void Chunk::compress_chunk(u32 out_flags) {
if(flags&PARENT) {
compress(
key,
data,
0, // counter is always zero for parent nodes
BLOCK_LEN,
flags | out_flags,
raw_hash
);
return;
}
u32 chaining_value[8], block_len = BLOCK_LEN, flagger;
memcpy(chaining_value, key, 8*sizeof(*chaining_value));
bool empty_input = (leaf_len==0);
if(empty_input) {
for(u32 i=0; i<BLOCK_LEN; i++)
leaf_data[i] = 0U;
leaf_len = BLOCK_LEN;
}
for(u32 i=0; i<leaf_len; i+=BLOCK_LEN) {
flagger = flags;
// for the last message block
if(i+BLOCK_LEN > leaf_len)
block_len = leaf_len%BLOCK_LEN;
else
block_len = BLOCK_LEN;
// special case
if(empty_input)
block_len = 0;
u32 block_words[16];
memset(block_words, 0, 16*sizeof(*block_words));
u32 new_block_len(block_len);
if(block_len%4)
new_block_len += 4 - (block_len%4);
// BLOCK_LEN is the max possible length of block_cast
u8 block_cast[BLOCK_LEN];
memset(block_cast, 0, new_block_len*sizeof(*block_cast));
memcpy(block_cast, leaf_data+i, block_len*sizeof(*block_cast));
words_from_little_endian_bytes(block_cast, block_words, new_block_len);
if(i==0)
flagger |= CHUNK_START;
if(i+BLOCK_LEN >= leaf_len)
flagger |= CHUNK_END | out_flags;
// raw hash for root node
compress(
chaining_value,
block_words,
counter,
block_len,
flagger,
raw_hash
);
memcpy(chaining_value, raw_hash, 8*sizeof(*chaining_value));
}
}
using thrust_vector = thrust::host_vector<
Chunk,
thrust::system::cuda::experimental::pinned_allocator<Chunk>
>;
// The GPU hasher
void light_hash(Chunk*, int, Chunk*, Chunk*);
// Sanity checks
Chunk hash_many(Chunk *data, int first, int last, Chunk *memory_bar) {
// n will always be a power of 2
int n = last-first;
// Reduce GPU calling overhead
if(n == 1) {
data[first].compress_chunk();
return data[first];
}
Chunk ret;
light_hash(data+first, n, &ret, memory_bar);
return ret;
// CPU style execution
// Chunk left, right;
// left = hash_many(data, first, first+n/2);
// right = hash_many(data, first+n/2, last);
// Chunk parent(left.flags, left.key);
// parent.flags |= PARENT;
// memcpy(parent.data, left.raw_hash, 32);
// memcpy(parent.data+8, right.raw_hash, 32);
// parent.compress_chunk();
// return parent;
}
Chunk merge(Chunk &left, Chunk &right);
void hash_root(Chunk &node, vector<u8> &out_slice);
struct Hasher {
u32 key[8];
u32 flags;
u64 ctr;
u64 file_size;
// A memory bar for CUDA to use during it's computation
Chunk* memory_bar;
// Factory is an array of FACTORY_HT possible SNICKER bars
thrust_vector factory[FACTORY_HT];
// methods
static Hasher new_internal(u32 key[8], u32 flags, u64 fsize);
static Hasher _new(u64);
// initializes cuda memory (if needed)
void init();
// frees cuda memory (if it is there)
// free nullptr is a no-op
~Hasher() {
if(memory_bar)
cudaFree(memory_bar);
else
free(memory_bar);
}
void update(char *input, int size);
void finalize(vector<u8> &out_slice);
void propagate();
};
Hasher Hasher::new_internal(u32 key[8], u32 flags, u64 fsize) {
return Hasher{
{
key[0], key[1], key[2], key[3],
key[4], key[5], key[6], key[7]
},
flags,
0, // counter
fsize
};
}
Hasher Hasher::_new(u64 fsize) { return new_internal(IV, 0, fsize); }
void Hasher::init() {
if(file_size<1) {
memory_bar = nullptr;
return;
}
u64 num_chunks = ceil(file_size / CHUNK_LEN);
u32 bar_size = min(num_chunks, (u64)SNICKER);
// Just for safety :)
++bar_size;
cudaMalloc(&memory_bar, bar_size*sizeof(Chunk));
// Let the most commonly used places always have memory
// +1 so that it does not resize when it hits CHUNK_LEN
u32 RESERVE = SNICKER + 1;
factory[0].reserve(RESERVE);
factory[1].reserve(RESERVE);
}
void Hasher::propagate() {
int level=0;
// nodes move to upper levels if lower one is one SNICKER long
while(factory[level].size() == SNICKER) {
Chunk subtree = hash_many(factory[level].data(), 0, SNICKER, memory_bar);
factory[level].clear();
++level;
factory[level].push_back(subtree);
}
}
void Hasher::update(char *input, int size) {
factory[0].push_back(Chunk(input, size, flags, key, ctr));
++ctr;
if(factory[0].size() == SNICKER)
propagate();
}
void Hasher::finalize(vector<u8> &out_slice) {
Chunk root(flags, key);
for(int i=0; i<FACTORY_HT; i++) {
vector<Chunk> subtrees;
u32 n = factory[i].size(), divider=SNICKER;
if(!n)
continue;
int start = 0;
while(divider) {
if(n&divider) {
Chunk subtree = hash_many(factory[i].data(), start, start+divider, memory_bar);
subtrees.push_back(subtree);
start += divider;
}
divider >>= 1;
}
while(subtrees.size()>1) {
Chunk tmp1 = subtrees.back();
subtrees.pop_back();
Chunk tmp2 = subtrees.back();
subtrees.pop_back();
// tmp2 is the left child
// tmp1 is the right child
// that's the order they appear within the array
Chunk tmp = merge(tmp2, tmp1);
subtrees.push_back(tmp);
}
if(i<FACTORY_HT-1)
factory[i+1].push_back(subtrees[0]);
else
root = subtrees[0];
}
hash_root(root, out_slice);
}
Chunk merge(Chunk &left, Chunk &right) {
// cout << "Called merge once\n";
left.compress_chunk();
right.compress_chunk();
Chunk parent(left.flags, left.key);
parent.flags |= PARENT;
// 32 bytes need to be copied for all of these
memcpy(parent.data, left.raw_hash, 32);
memcpy(parent.data+8, right.raw_hash, 32);
return parent;
}
void hash_root(Chunk &node, vector<u8> &out_slice) {
// the last message block must not be hashed like the others
// it needs to be hashed with the root flag
u64 output_block_counter = 0;
u64 i=0, k=2*OUT_LEN;
u32 words[16] = {};
for(; int(out_slice.size()-i)>0; i+=k) {
node.counter = output_block_counter;
node.compress_chunk(ROOT);
// words is u32[16]
memcpy(words, node.raw_hash, 16*sizeof(*words));
vector<u8> out_block(min(k, (u64)out_slice.size()-i));
for(u32 l=0; l<out_block.size(); l+=4) {
for(u32 j=0; j<min(4U, (u32)out_block.size()-l); j++)
out_block[l+j] = (words[l/4]>>(8*j)) & 0x000000FF;
}
for(u32 j=0; j<out_block.size(); j++)
out_slice[i+j] = out_block[j];
++output_block_counter;
}
}

View File

@@ -0,0 +1,91 @@
#!/bin/bash
# RinHash HIP Build Script for Linux
# This script builds the HIP implementation of RinHash for AMD GPUs
echo "======================================"
echo " RinHash HIP Miner Build Script"
echo "======================================"
# Check if hipcc is available
if ! command -v hipcc &> /dev/null; then
echo "ERROR: hipcc not found in PATH"
echo "Please install ROCm/HIP toolkit"
echo "On Ubuntu/Debian: sudo apt install rocm-dev hip-runtime-amd"
echo "Or download from: https://rocm.docs.amd.com/en/latest/deploy/linux/quick_start.html"
exit 1
fi
echo "HIP compiler found:"
hipcc --version
echo ""
# Check if cmake is available
if ! command -v cmake &> /dev/null; then
echo "ERROR: CMake not found in PATH"
echo "Please install cmake: sudo apt install cmake"
exit 1
fi
echo "CMake found:"
cmake --version | head -1
echo ""
echo "Building RinHash HIP miner..."
echo ""
# Create build directory
mkdir -p build
cd build
# Configure with CMake
cmake -G "Ninja" \
-DHIP_PLATFORM=amd \
-DCMAKE_BUILD_TYPE=Release \
..
if [ $? -ne 0 ]; then
echo "CMake configuration failed!"
echo "Trying without Ninja..."
cmake -DHIP_PLATFORM=amd \
-DCMAKE_BUILD_TYPE=Release \
..
if [ $? -ne 0 ]; then
echo "CMake configuration failed completely!"
exit 1
fi
fi
# Build
cmake --build . -j$(nproc)
if [ $? -eq 0 ]; then
echo ""
echo "======================================"
echo " BUILD SUCCESSFUL!"
echo "======================================"
echo ""
echo "Executable created:"
echo " build/rinhash-hip-miner"
echo ""
echo "To test the miner:"
echo " cd build && ./rinhash-hip-miner --help"
echo ""
echo "To check AMD GPU availability:"
echo " rocm-smi"
echo ""
else
echo ""
echo "======================================"
echo " BUILD FAILED!"
echo "======================================"
echo ""
echo "Common issues:"
echo "1. Missing ROCm development libraries"
echo "2. Incompatible HIP version"
echo "3. Missing development tools"
echo ""
exit 1
fi
echo "Build completed successfully!"

View File

@@ -0,0 +1,18 @@
@echo off
setlocal
where hipcc >nul 2>nul
if errorlevel 1 (
echo ERROR: hipcc not found. Please install ROCm/HIP toolchain.
exit /b 1
)
if not exist build mkdir build
cd build
cmake -G "Ninja" -DHIP_PLATFORM=amd -DCMAKE_BUILD_TYPE=Release ..
if errorlevel 1 exit /b 1
cmake --build . -j
if errorlevel 1 exit /b 1
cd ..
echo Build done. Executable should be at build\rinhash-hip-miner.exe

View File

@@ -0,0 +1,175 @@
# This is the CMakeCache file.
# For build in directory: /tmp/rinhash-hip/build
# It was generated by CMake: /usr/bin/cmake
# You can edit this file to change values found and used by cmake.
# If you do not want to change any of the values, simply exit the editor.
# If you do want to change a value, simply edit, save, and exit the editor.
# The syntax for the file is as follows:
# KEY:TYPE=VALUE
# KEY is the name of a variable in the cache.
# TYPE is a hint to GUIs for the type of VALUE, DO NOT EDIT TYPE!.
# VALUE is the current value for the KEY.
########################
# EXTERNAL cache entries
########################
//Path to a program.
CMAKE_ADDR2LINE:FILEPATH=CMAKE_ADDR2LINE-NOTFOUND
//Path to a program.
CMAKE_AR:FILEPATH=CMAKE_AR-NOTFOUND
//No help, variable specified on the command line.
CMAKE_BUILD_TYPE:UNINITIALIZED=Release
//CXX compiler
CMAKE_CXX_COMPILER:FILEPATH=/opt/rocm-7.0/llvm/bin/clang++
//LLVM archiver
CMAKE_CXX_COMPILER_AR:FILEPATH=CMAKE_CXX_COMPILER_AR-NOTFOUND
//`clang-scan-deps` dependency scanner
CMAKE_CXX_COMPILER_CLANG_SCAN_DEPS:FILEPATH=CMAKE_CXX_COMPILER_CLANG_SCAN_DEPS-NOTFOUND
//Generate index for LLVM archive
CMAKE_CXX_COMPILER_RANLIB:FILEPATH=CMAKE_CXX_COMPILER_RANLIB-NOTFOUND
//Path to a program.
CMAKE_DLLTOOL:FILEPATH=CMAKE_DLLTOOL-NOTFOUND
//Value Computed by CMake.
CMAKE_FIND_PACKAGE_REDIRECTS_DIR:STATIC=/tmp/rinhash-hip/build/CMakeFiles/pkgRedirects
//HIP compiler
CMAKE_HIP_COMPILER:FILEPATH=/opt/rocm-7.0/llvm/bin/clang++
//HIP platform
CMAKE_HIP_PLATFORM:STRING=amd
//Path to a program.
CMAKE_LINKER:FILEPATH=/opt/rocm-7.0/llvm/bin/ld.lld
//Path to a program.
CMAKE_MAKE_PROGRAM:FILEPATH=/usr/sbin/gmake
//Path to a program.
CMAKE_NM:FILEPATH=/opt/rocm-7.0/llvm/bin/llvm-nm
//Path to a program.
CMAKE_OBJCOPY:FILEPATH=/opt/rocm-7.0/llvm/bin/llvm-objcopy
//Path to a program.
CMAKE_OBJDUMP:FILEPATH=/opt/rocm-7.0/llvm/bin/llvm-objdump
//Value Computed by CMake
CMAKE_PROJECT_DESCRIPTION:STATIC=
//Value Computed by CMake
CMAKE_PROJECT_HOMEPAGE_URL:STATIC=
//Value Computed by CMake
CMAKE_PROJECT_NAME:STATIC=RinHashHIP
//Path to a program.
CMAKE_READELF:FILEPATH=CMAKE_READELF-NOTFOUND
//Path to a program.
CMAKE_STRIP:FILEPATH=/opt/rocm-7.0/llvm/bin/llvm-strip
//Path to a program.
CMAKE_TAPI:FILEPATH=CMAKE_TAPI-NOTFOUND
//No help, variable specified on the command line.
HIP_PLATFORM:UNINITIALIZED=amd
//Value Computed by CMake
RinHashHIP_BINARY_DIR:STATIC=/tmp/rinhash-hip/build
//Value Computed by CMake
RinHashHIP_IS_TOP_LEVEL:STATIC=ON
//Value Computed by CMake
RinHashHIP_SOURCE_DIR:STATIC=/tmp/rinhash-hip
########################
# INTERNAL cache entries
########################
//ADVANCED property for variable: CMAKE_ADDR2LINE
CMAKE_ADDR2LINE-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_AR
CMAKE_AR-ADVANCED:INTERNAL=1
//This is the directory where this CMakeCache.txt was created
CMAKE_CACHEFILE_DIR:INTERNAL=/tmp/rinhash-hip/build
//Major version of cmake used to create the current loaded cache
CMAKE_CACHE_MAJOR_VERSION:INTERNAL=3
//Minor version of cmake used to create the current loaded cache
CMAKE_CACHE_MINOR_VERSION:INTERNAL=31
//Patch version of cmake used to create the current loaded cache
CMAKE_CACHE_PATCH_VERSION:INTERNAL=6
//Path to CMake executable.
CMAKE_COMMAND:INTERNAL=/usr/bin/cmake
//Path to cpack program executable.
CMAKE_CPACK_COMMAND:INTERNAL=/usr/bin/cpack
//Path to ctest program executable.
CMAKE_CTEST_COMMAND:INTERNAL=/usr/bin/ctest
//ADVANCED property for variable: CMAKE_CXX_COMPILER
CMAKE_CXX_COMPILER-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_CXX_COMPILER_AR
CMAKE_CXX_COMPILER_AR-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_CXX_COMPILER_CLANG_SCAN_DEPS
CMAKE_CXX_COMPILER_CLANG_SCAN_DEPS-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_CXX_COMPILER_RANLIB
CMAKE_CXX_COMPILER_RANLIB-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_DLLTOOL
CMAKE_DLLTOOL-ADVANCED:INTERNAL=1
//Path to cache edit program executable.
CMAKE_EDIT_COMMAND:INTERNAL=/usr/bin/ccmake
//Executable file format
CMAKE_EXECUTABLE_FORMAT:INTERNAL=ELF
//Name of external makefile project generator.
CMAKE_EXTRA_GENERATOR:INTERNAL=
//Name of generator.
CMAKE_GENERATOR:INTERNAL=Unix Makefiles
//Generator instance identifier.
CMAKE_GENERATOR_INSTANCE:INTERNAL=
//Name of generator platform.
CMAKE_GENERATOR_PLATFORM:INTERNAL=
//Name of generator toolset.
CMAKE_GENERATOR_TOOLSET:INTERNAL=
//ADVANCED property for variable: CMAKE_HIP_COMPILER
CMAKE_HIP_COMPILER-ADVANCED:INTERNAL=1
//Source directory with the top level CMakeLists.txt file for this
// project
CMAKE_HOME_DIRECTORY:INTERNAL=/tmp/rinhash-hip
//ADVANCED property for variable: CMAKE_LINKER
CMAKE_LINKER-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_MAKE_PROGRAM
CMAKE_MAKE_PROGRAM-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_NM
CMAKE_NM-ADVANCED:INTERNAL=1
//number of local generators
CMAKE_NUMBER_OF_MAKEFILES:INTERNAL=1
//ADVANCED property for variable: CMAKE_OBJCOPY
CMAKE_OBJCOPY-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_OBJDUMP
CMAKE_OBJDUMP-ADVANCED:INTERNAL=1
//Platform information initialized
CMAKE_PLATFORM_INFO_INITIALIZED:INTERNAL=1
//ADVANCED property for variable: CMAKE_RANLIB
CMAKE_RANLIB-ADVANCED:INTERNAL=1
//noop for ranlib
CMAKE_RANLIB:INTERNAL=:
//ADVANCED property for variable: CMAKE_READELF
CMAKE_READELF-ADVANCED:INTERNAL=1
//Path to CMake installation.
CMAKE_ROOT:INTERNAL=/usr/share/cmake
//ADVANCED property for variable: CMAKE_STRIP
CMAKE_STRIP-ADVANCED:INTERNAL=1
//ADVANCED property for variable: CMAKE_TAPI
CMAKE_TAPI-ADVANCED:INTERNAL=1
//uname command
CMAKE_UNAME:INTERNAL=/usr/sbin/uname

View File

@@ -0,0 +1,97 @@
set(CMAKE_CXX_COMPILER "/opt/rocm-7.0/llvm/bin/clang++")
set(CMAKE_CXX_COMPILER_ARG1 "")
set(CMAKE_CXX_COMPILER_ID "Clang")
set(CMAKE_CXX_COMPILER_VERSION "20.0.0")
set(CMAKE_CXX_COMPILER_VERSION_INTERNAL "")
set(CMAKE_CXX_COMPILER_WRAPPER "")
set(CMAKE_CXX_STANDARD_COMPUTED_DEFAULT "17")
set(CMAKE_CXX_EXTENSIONS_COMPUTED_DEFAULT "ON")
set(CMAKE_CXX_STANDARD_LATEST "")
set(CMAKE_CXX_COMPILE_FEATURES "")
set(CMAKE_CXX98_COMPILE_FEATURES "")
set(CMAKE_CXX11_COMPILE_FEATURES "")
set(CMAKE_CXX14_COMPILE_FEATURES "")
set(CMAKE_CXX17_COMPILE_FEATURES "")
set(CMAKE_CXX20_COMPILE_FEATURES "")
set(CMAKE_CXX23_COMPILE_FEATURES "")
set(CMAKE_CXX26_COMPILE_FEATURES "")
set(CMAKE_CXX_PLATFORM_ID "Linux")
set(CMAKE_CXX_SIMULATE_ID "")
set(CMAKE_CXX_COMPILER_FRONTEND_VARIANT "GNU")
set(CMAKE_CXX_SIMULATE_VERSION "")
set(CMAKE_AR "CMAKE_AR-NOTFOUND")
set(CMAKE_CXX_COMPILER_AR "CMAKE_CXX_COMPILER_AR-NOTFOUND")
set(CMAKE_RANLIB ":")
set(CMAKE_CXX_COMPILER_RANLIB "CMAKE_CXX_COMPILER_RANLIB-NOTFOUND")
set(CMAKE_LINKER "/opt/rocm-7.0/llvm/bin/ld.lld")
set(CMAKE_LINKER_LINK "")
set(CMAKE_LINKER_LLD "")
set(CMAKE_CXX_COMPILER_LINKER "")
set(CMAKE_CXX_COMPILER_LINKER_ID "")
set(CMAKE_CXX_COMPILER_LINKER_VERSION )
set(CMAKE_CXX_COMPILER_LINKER_FRONTEND_VARIANT )
set(CMAKE_MT "")
set(CMAKE_TAPI "CMAKE_TAPI-NOTFOUND")
set(CMAKE_COMPILER_IS_GNUCXX )
set(CMAKE_CXX_COMPILER_LOADED 1)
set(CMAKE_CXX_COMPILER_WORKS )
set(CMAKE_CXX_ABI_COMPILED )
set(CMAKE_CXX_COMPILER_ENV_VAR "CXX")
set(CMAKE_CXX_COMPILER_ID_RUN 1)
set(CMAKE_CXX_SOURCE_FILE_EXTENSIONS C;M;c++;cc;cpp;cxx;m;mm;mpp;CPP;ixx;cppm;ccm;cxxm;c++m)
set(CMAKE_CXX_IGNORE_EXTENSIONS inl;h;hpp;HPP;H;o;O;obj;OBJ;def;DEF;rc;RC)
foreach (lang IN ITEMS C OBJC OBJCXX)
if (CMAKE_${lang}_COMPILER_ID_RUN)
foreach(extension IN LISTS CMAKE_${lang}_SOURCE_FILE_EXTENSIONS)
list(REMOVE_ITEM CMAKE_CXX_SOURCE_FILE_EXTENSIONS ${extension})
endforeach()
endif()
endforeach()
set(CMAKE_CXX_LINKER_PREFERENCE 30)
set(CMAKE_CXX_LINKER_PREFERENCE_PROPAGATES 1)
set(CMAKE_CXX_LINKER_DEPFILE_SUPPORTED )
# Save compiler ABI information.
set(CMAKE_CXX_SIZEOF_DATA_PTR "")
set(CMAKE_CXX_COMPILER_ABI "")
set(CMAKE_CXX_BYTE_ORDER "")
set(CMAKE_CXX_LIBRARY_ARCHITECTURE "")
if(CMAKE_CXX_SIZEOF_DATA_PTR)
set(CMAKE_SIZEOF_VOID_P "${CMAKE_CXX_SIZEOF_DATA_PTR}")
endif()
if(CMAKE_CXX_COMPILER_ABI)
set(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_CXX_COMPILER_ABI}")
endif()
if(CMAKE_CXX_LIBRARY_ARCHITECTURE)
set(CMAKE_LIBRARY_ARCHITECTURE "")
endif()
set(CMAKE_CXX_CL_SHOWINCLUDES_PREFIX "")
if(CMAKE_CXX_CL_SHOWINCLUDES_PREFIX)
set(CMAKE_CL_SHOWINCLUDES_PREFIX "${CMAKE_CXX_CL_SHOWINCLUDES_PREFIX}")
endif()
set(CMAKE_CXX_IMPLICIT_INCLUDE_DIRECTORIES "")
set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES "")
set(CMAKE_CXX_IMPLICIT_LINK_DIRECTORIES "")
set(CMAKE_CXX_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "")
set(CMAKE_CXX_COMPILER_CLANG_RESOURCE_DIR "/opt/rocm-7.0/lib/llvm/lib/clang/20")
set(CMAKE_CXX_COMPILER_IMPORT_STD "")

View File

@@ -0,0 +1,15 @@
set(CMAKE_HOST_SYSTEM "Linux-6.8.0-79-generic")
set(CMAKE_HOST_SYSTEM_NAME "Linux")
set(CMAKE_HOST_SYSTEM_VERSION "6.8.0-79-generic")
set(CMAKE_HOST_SYSTEM_PROCESSOR "x86_64")
set(CMAKE_SYSTEM "Linux-6.8.0-79-generic")
set(CMAKE_SYSTEM_NAME "Linux")
set(CMAKE_SYSTEM_VERSION "6.8.0-79-generic")
set(CMAKE_SYSTEM_PROCESSOR "x86_64")
set(CMAKE_CROSSCOMPILING "FALSE")
set(CMAKE_SYSTEM_LOADED 1)

View File

@@ -0,0 +1,919 @@
/* This source file must have a .cpp extension so that all C++ compilers
recognize the extension without flags. Borland does not know .cxx for
example. */
#ifndef __cplusplus
# error "A C compiler has been selected for C++."
#endif
#if !defined(__has_include)
/* If the compiler does not have __has_include, pretend the answer is
always no. */
# define __has_include(x) 0
#endif
/* Version number components: V=Version, R=Revision, P=Patch
Version date components: YYYY=Year, MM=Month, DD=Day */
#if defined(__INTEL_COMPILER) || defined(__ICC)
# define COMPILER_ID "Intel"
# if defined(_MSC_VER)
# define SIMULATE_ID "MSVC"
# endif
# if defined(__GNUC__)
# define SIMULATE_ID "GNU"
# endif
/* __INTEL_COMPILER = VRP prior to 2021, and then VVVV for 2021 and later,
except that a few beta releases use the old format with V=2021. */
# if __INTEL_COMPILER < 2021 || __INTEL_COMPILER == 202110 || __INTEL_COMPILER == 202111
# define COMPILER_VERSION_MAJOR DEC(__INTEL_COMPILER/100)
# define COMPILER_VERSION_MINOR DEC(__INTEL_COMPILER/10 % 10)
# if defined(__INTEL_COMPILER_UPDATE)
# define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER_UPDATE)
# else
# define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER % 10)
# endif
# else
# define COMPILER_VERSION_MAJOR DEC(__INTEL_COMPILER)
# define COMPILER_VERSION_MINOR DEC(__INTEL_COMPILER_UPDATE)
/* The third version component from --version is an update index,
but no macro is provided for it. */
# define COMPILER_VERSION_PATCH DEC(0)
# endif
# if defined(__INTEL_COMPILER_BUILD_DATE)
/* __INTEL_COMPILER_BUILD_DATE = YYYYMMDD */
# define COMPILER_VERSION_TWEAK DEC(__INTEL_COMPILER_BUILD_DATE)
# endif
# if defined(_MSC_VER)
/* _MSC_VER = VVRR */
# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
# endif
# if defined(__GNUC__)
# define SIMULATE_VERSION_MAJOR DEC(__GNUC__)
# elif defined(__GNUG__)
# define SIMULATE_VERSION_MAJOR DEC(__GNUG__)
# endif
# if defined(__GNUC_MINOR__)
# define SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__)
# endif
# if defined(__GNUC_PATCHLEVEL__)
# define SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__)
# endif
#elif (defined(__clang__) && defined(__INTEL_CLANG_COMPILER)) || defined(__INTEL_LLVM_COMPILER)
# define COMPILER_ID "IntelLLVM"
#if defined(_MSC_VER)
# define SIMULATE_ID "MSVC"
#endif
#if defined(__GNUC__)
# define SIMULATE_ID "GNU"
#endif
/* __INTEL_LLVM_COMPILER = VVVVRP prior to 2021.2.0, VVVVRRPP for 2021.2.0 and
* later. Look for 6 digit vs. 8 digit version number to decide encoding.
* VVVV is no smaller than the current year when a version is released.
*/
#if __INTEL_LLVM_COMPILER < 1000000L
# define COMPILER_VERSION_MAJOR DEC(__INTEL_LLVM_COMPILER/100)
# define COMPILER_VERSION_MINOR DEC(__INTEL_LLVM_COMPILER/10 % 10)
# define COMPILER_VERSION_PATCH DEC(__INTEL_LLVM_COMPILER % 10)
#else
# define COMPILER_VERSION_MAJOR DEC(__INTEL_LLVM_COMPILER/10000)
# define COMPILER_VERSION_MINOR DEC(__INTEL_LLVM_COMPILER/100 % 100)
# define COMPILER_VERSION_PATCH DEC(__INTEL_LLVM_COMPILER % 100)
#endif
#if defined(_MSC_VER)
/* _MSC_VER = VVRR */
# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
#endif
#if defined(__GNUC__)
# define SIMULATE_VERSION_MAJOR DEC(__GNUC__)
#elif defined(__GNUG__)
# define SIMULATE_VERSION_MAJOR DEC(__GNUG__)
#endif
#if defined(__GNUC_MINOR__)
# define SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__)
#endif
#if defined(__GNUC_PATCHLEVEL__)
# define SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__)
#endif
#elif defined(__PATHCC__)
# define COMPILER_ID "PathScale"
# define COMPILER_VERSION_MAJOR DEC(__PATHCC__)
# define COMPILER_VERSION_MINOR DEC(__PATHCC_MINOR__)
# if defined(__PATHCC_PATCHLEVEL__)
# define COMPILER_VERSION_PATCH DEC(__PATHCC_PATCHLEVEL__)
# endif
#elif defined(__BORLANDC__) && defined(__CODEGEARC_VERSION__)
# define COMPILER_ID "Embarcadero"
# define COMPILER_VERSION_MAJOR HEX(__CODEGEARC_VERSION__>>24 & 0x00FF)
# define COMPILER_VERSION_MINOR HEX(__CODEGEARC_VERSION__>>16 & 0x00FF)
# define COMPILER_VERSION_PATCH DEC(__CODEGEARC_VERSION__ & 0xFFFF)
#elif defined(__BORLANDC__)
# define COMPILER_ID "Borland"
/* __BORLANDC__ = 0xVRR */
# define COMPILER_VERSION_MAJOR HEX(__BORLANDC__>>8)
# define COMPILER_VERSION_MINOR HEX(__BORLANDC__ & 0xFF)
#elif defined(__WATCOMC__) && __WATCOMC__ < 1200
# define COMPILER_ID "Watcom"
/* __WATCOMC__ = VVRR */
# define COMPILER_VERSION_MAJOR DEC(__WATCOMC__ / 100)
# define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10)
# if (__WATCOMC__ % 10) > 0
# define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10)
# endif
#elif defined(__WATCOMC__)
# define COMPILER_ID "OpenWatcom"
/* __WATCOMC__ = VVRP + 1100 */
# define COMPILER_VERSION_MAJOR DEC((__WATCOMC__ - 1100) / 100)
# define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10)
# if (__WATCOMC__ % 10) > 0
# define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10)
# endif
#elif defined(__SUNPRO_CC)
# define COMPILER_ID "SunPro"
# if __SUNPRO_CC >= 0x5100
/* __SUNPRO_CC = 0xVRRP */
# define COMPILER_VERSION_MAJOR HEX(__SUNPRO_CC>>12)
# define COMPILER_VERSION_MINOR HEX(__SUNPRO_CC>>4 & 0xFF)
# define COMPILER_VERSION_PATCH HEX(__SUNPRO_CC & 0xF)
# else
/* __SUNPRO_CC = 0xVRP */
# define COMPILER_VERSION_MAJOR HEX(__SUNPRO_CC>>8)
# define COMPILER_VERSION_MINOR HEX(__SUNPRO_CC>>4 & 0xF)
# define COMPILER_VERSION_PATCH HEX(__SUNPRO_CC & 0xF)
# endif
#elif defined(__HP_aCC)
# define COMPILER_ID "HP"
/* __HP_aCC = VVRRPP */
# define COMPILER_VERSION_MAJOR DEC(__HP_aCC/10000)
# define COMPILER_VERSION_MINOR DEC(__HP_aCC/100 % 100)
# define COMPILER_VERSION_PATCH DEC(__HP_aCC % 100)
#elif defined(__DECCXX)
# define COMPILER_ID "Compaq"
/* __DECCXX_VER = VVRRTPPPP */
# define COMPILER_VERSION_MAJOR DEC(__DECCXX_VER/10000000)
# define COMPILER_VERSION_MINOR DEC(__DECCXX_VER/100000 % 100)
# define COMPILER_VERSION_PATCH DEC(__DECCXX_VER % 10000)
#elif defined(__IBMCPP__) && defined(__COMPILER_VER__)
# define COMPILER_ID "zOS"
/* __IBMCPP__ = VRP */
# define COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100)
# define COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10)
# define COMPILER_VERSION_PATCH DEC(__IBMCPP__ % 10)
#elif defined(__open_xl__) && defined(__clang__)
# define COMPILER_ID "IBMClang"
# define COMPILER_VERSION_MAJOR DEC(__open_xl_version__)
# define COMPILER_VERSION_MINOR DEC(__open_xl_release__)
# define COMPILER_VERSION_PATCH DEC(__open_xl_modification__)
# define COMPILER_VERSION_TWEAK DEC(__open_xl_ptf_fix_level__)
#elif defined(__ibmxl__) && defined(__clang__)
# define COMPILER_ID "XLClang"
# define COMPILER_VERSION_MAJOR DEC(__ibmxl_version__)
# define COMPILER_VERSION_MINOR DEC(__ibmxl_release__)
# define COMPILER_VERSION_PATCH DEC(__ibmxl_modification__)
# define COMPILER_VERSION_TWEAK DEC(__ibmxl_ptf_fix_level__)
#elif defined(__IBMCPP__) && !defined(__COMPILER_VER__) && __IBMCPP__ >= 800
# define COMPILER_ID "XL"
/* __IBMCPP__ = VRP */
# define COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100)
# define COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10)
# define COMPILER_VERSION_PATCH DEC(__IBMCPP__ % 10)
#elif defined(__IBMCPP__) && !defined(__COMPILER_VER__) && __IBMCPP__ < 800
# define COMPILER_ID "VisualAge"
/* __IBMCPP__ = VRP */
# define COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100)
# define COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10)
# define COMPILER_VERSION_PATCH DEC(__IBMCPP__ % 10)
#elif defined(__NVCOMPILER)
# define COMPILER_ID "NVHPC"
# define COMPILER_VERSION_MAJOR DEC(__NVCOMPILER_MAJOR__)
# define COMPILER_VERSION_MINOR DEC(__NVCOMPILER_MINOR__)
# if defined(__NVCOMPILER_PATCHLEVEL__)
# define COMPILER_VERSION_PATCH DEC(__NVCOMPILER_PATCHLEVEL__)
# endif
#elif defined(__PGI)
# define COMPILER_ID "PGI"
# define COMPILER_VERSION_MAJOR DEC(__PGIC__)
# define COMPILER_VERSION_MINOR DEC(__PGIC_MINOR__)
# if defined(__PGIC_PATCHLEVEL__)
# define COMPILER_VERSION_PATCH DEC(__PGIC_PATCHLEVEL__)
# endif
#elif defined(__clang__) && defined(__cray__)
# define COMPILER_ID "CrayClang"
# define COMPILER_VERSION_MAJOR DEC(__cray_major__)
# define COMPILER_VERSION_MINOR DEC(__cray_minor__)
# define COMPILER_VERSION_PATCH DEC(__cray_patchlevel__)
# define COMPILER_VERSION_INTERNAL_STR __clang_version__
#elif defined(_CRAYC)
# define COMPILER_ID "Cray"
# define COMPILER_VERSION_MAJOR DEC(_RELEASE_MAJOR)
# define COMPILER_VERSION_MINOR DEC(_RELEASE_MINOR)
#elif defined(__TI_COMPILER_VERSION__)
# define COMPILER_ID "TI"
/* __TI_COMPILER_VERSION__ = VVVRRRPPP */
# define COMPILER_VERSION_MAJOR DEC(__TI_COMPILER_VERSION__/1000000)
# define COMPILER_VERSION_MINOR DEC(__TI_COMPILER_VERSION__/1000 % 1000)
# define COMPILER_VERSION_PATCH DEC(__TI_COMPILER_VERSION__ % 1000)
#elif defined(__CLANG_FUJITSU)
# define COMPILER_ID "FujitsuClang"
# define COMPILER_VERSION_MAJOR DEC(__FCC_major__)
# define COMPILER_VERSION_MINOR DEC(__FCC_minor__)
# define COMPILER_VERSION_PATCH DEC(__FCC_patchlevel__)
# define COMPILER_VERSION_INTERNAL_STR __clang_version__
#elif defined(__FUJITSU)
# define COMPILER_ID "Fujitsu"
# if defined(__FCC_version__)
# define COMPILER_VERSION __FCC_version__
# elif defined(__FCC_major__)
# define COMPILER_VERSION_MAJOR DEC(__FCC_major__)
# define COMPILER_VERSION_MINOR DEC(__FCC_minor__)
# define COMPILER_VERSION_PATCH DEC(__FCC_patchlevel__)
# endif
# if defined(__fcc_version)
# define COMPILER_VERSION_INTERNAL DEC(__fcc_version)
# elif defined(__FCC_VERSION)
# define COMPILER_VERSION_INTERNAL DEC(__FCC_VERSION)
# endif
#elif defined(__ghs__)
# define COMPILER_ID "GHS"
/* __GHS_VERSION_NUMBER = VVVVRP */
# ifdef __GHS_VERSION_NUMBER
# define COMPILER_VERSION_MAJOR DEC(__GHS_VERSION_NUMBER / 100)
# define COMPILER_VERSION_MINOR DEC(__GHS_VERSION_NUMBER / 10 % 10)
# define COMPILER_VERSION_PATCH DEC(__GHS_VERSION_NUMBER % 10)
# endif
#elif defined(__TASKING__)
# define COMPILER_ID "Tasking"
# define COMPILER_VERSION_MAJOR DEC(__VERSION__/1000)
# define COMPILER_VERSION_MINOR DEC(__VERSION__ % 100)
# define COMPILER_VERSION_INTERNAL DEC(__VERSION__)
#elif defined(__ORANGEC__)
# define COMPILER_ID "OrangeC"
# define COMPILER_VERSION_MAJOR DEC(__ORANGEC_MAJOR__)
# define COMPILER_VERSION_MINOR DEC(__ORANGEC_MINOR__)
# define COMPILER_VERSION_PATCH DEC(__ORANGEC_PATCHLEVEL__)
#elif defined(__SCO_VERSION__)
# define COMPILER_ID "SCO"
#elif defined(__ARMCC_VERSION) && !defined(__clang__)
# define COMPILER_ID "ARMCC"
#if __ARMCC_VERSION >= 1000000
/* __ARMCC_VERSION = VRRPPPP */
# define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/1000000)
# define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 100)
# define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION % 10000)
#else
/* __ARMCC_VERSION = VRPPPP */
# define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/100000)
# define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 10)
# define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION % 10000)
#endif
#elif defined(__clang__) && defined(__apple_build_version__)
# define COMPILER_ID "AppleClang"
# if defined(_MSC_VER)
# define SIMULATE_ID "MSVC"
# endif
# define COMPILER_VERSION_MAJOR DEC(__clang_major__)
# define COMPILER_VERSION_MINOR DEC(__clang_minor__)
# define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__)
# if defined(_MSC_VER)
/* _MSC_VER = VVRR */
# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
# endif
# define COMPILER_VERSION_TWEAK DEC(__apple_build_version__)
#elif defined(__clang__) && defined(__ARMCOMPILER_VERSION)
# define COMPILER_ID "ARMClang"
# define COMPILER_VERSION_MAJOR DEC(__ARMCOMPILER_VERSION/1000000)
# define COMPILER_VERSION_MINOR DEC(__ARMCOMPILER_VERSION/10000 % 100)
# define COMPILER_VERSION_PATCH DEC(__ARMCOMPILER_VERSION/100 % 100)
# define COMPILER_VERSION_INTERNAL DEC(__ARMCOMPILER_VERSION)
#elif defined(__clang__) && defined(__ti__)
# define COMPILER_ID "TIClang"
# define COMPILER_VERSION_MAJOR DEC(__ti_major__)
# define COMPILER_VERSION_MINOR DEC(__ti_minor__)
# define COMPILER_VERSION_PATCH DEC(__ti_patchlevel__)
# define COMPILER_VERSION_INTERNAL DEC(__ti_version__)
#elif defined(__clang__)
# define COMPILER_ID "Clang"
# if defined(_MSC_VER)
# define SIMULATE_ID "MSVC"
# endif
# define COMPILER_VERSION_MAJOR DEC(__clang_major__)
# define COMPILER_VERSION_MINOR DEC(__clang_minor__)
# define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__)
# if defined(_MSC_VER)
/* _MSC_VER = VVRR */
# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
# endif
#elif defined(__LCC__) && (defined(__GNUC__) || defined(__GNUG__) || defined(__MCST__))
# define COMPILER_ID "LCC"
# define COMPILER_VERSION_MAJOR DEC(__LCC__ / 100)
# define COMPILER_VERSION_MINOR DEC(__LCC__ % 100)
# if defined(__LCC_MINOR__)
# define COMPILER_VERSION_PATCH DEC(__LCC_MINOR__)
# endif
# if defined(__GNUC__) && defined(__GNUC_MINOR__)
# define SIMULATE_ID "GNU"
# define SIMULATE_VERSION_MAJOR DEC(__GNUC__)
# define SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__)
# if defined(__GNUC_PATCHLEVEL__)
# define SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__)
# endif
# endif
#elif defined(__GNUC__) || defined(__GNUG__)
# define COMPILER_ID "GNU"
# if defined(__GNUC__)
# define COMPILER_VERSION_MAJOR DEC(__GNUC__)
# else
# define COMPILER_VERSION_MAJOR DEC(__GNUG__)
# endif
# if defined(__GNUC_MINOR__)
# define COMPILER_VERSION_MINOR DEC(__GNUC_MINOR__)
# endif
# if defined(__GNUC_PATCHLEVEL__)
# define COMPILER_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__)
# endif
#elif defined(_MSC_VER)
# define COMPILER_ID "MSVC"
/* _MSC_VER = VVRR */
# define COMPILER_VERSION_MAJOR DEC(_MSC_VER / 100)
# define COMPILER_VERSION_MINOR DEC(_MSC_VER % 100)
# if defined(_MSC_FULL_VER)
# if _MSC_VER >= 1400
/* _MSC_FULL_VER = VVRRPPPPP */
# define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 100000)
# else
/* _MSC_FULL_VER = VVRRPPPP */
# define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 10000)
# endif
# endif
# if defined(_MSC_BUILD)
# define COMPILER_VERSION_TWEAK DEC(_MSC_BUILD)
# endif
#elif defined(_ADI_COMPILER)
# define COMPILER_ID "ADSP"
#if defined(__VERSIONNUM__)
/* __VERSIONNUM__ = 0xVVRRPPTT */
# define COMPILER_VERSION_MAJOR DEC(__VERSIONNUM__ >> 24 & 0xFF)
# define COMPILER_VERSION_MINOR DEC(__VERSIONNUM__ >> 16 & 0xFF)
# define COMPILER_VERSION_PATCH DEC(__VERSIONNUM__ >> 8 & 0xFF)
# define COMPILER_VERSION_TWEAK DEC(__VERSIONNUM__ & 0xFF)
#endif
#elif defined(__IAR_SYSTEMS_ICC__) || defined(__IAR_SYSTEMS_ICC)
# define COMPILER_ID "IAR"
# if defined(__VER__) && defined(__ICCARM__)
# define COMPILER_VERSION_MAJOR DEC((__VER__) / 1000000)
# define COMPILER_VERSION_MINOR DEC(((__VER__) / 1000) % 1000)
# define COMPILER_VERSION_PATCH DEC((__VER__) % 1000)
# define COMPILER_VERSION_INTERNAL DEC(__IAR_SYSTEMS_ICC__)
# elif defined(__VER__) && (defined(__ICCAVR__) || defined(__ICCRX__) || defined(__ICCRH850__) || defined(__ICCRL78__) || defined(__ICC430__) || defined(__ICCRISCV__) || defined(__ICCV850__) || defined(__ICC8051__) || defined(__ICCSTM8__))
# define COMPILER_VERSION_MAJOR DEC((__VER__) / 100)
# define COMPILER_VERSION_MINOR DEC((__VER__) - (((__VER__) / 100)*100))
# define COMPILER_VERSION_PATCH DEC(__SUBVERSION__)
# define COMPILER_VERSION_INTERNAL DEC(__IAR_SYSTEMS_ICC__)
# endif
/* These compilers are either not known or too old to define an
identification macro. Try to identify the platform and guess that
it is the native compiler. */
#elif defined(__hpux) || defined(__hpua)
# define COMPILER_ID "HP"
#else /* unknown compiler */
# define COMPILER_ID ""
#endif
/* Construct the string literal in pieces to prevent the source from
getting matched. Store it in a pointer rather than an array
because some compilers will just produce instructions to fill the
array rather than assigning a pointer to a static array. */
char const* info_compiler = "INFO" ":" "compiler[" COMPILER_ID "]";
#ifdef SIMULATE_ID
char const* info_simulate = "INFO" ":" "simulate[" SIMULATE_ID "]";
#endif
#ifdef __QNXNTO__
char const* qnxnto = "INFO" ":" "qnxnto[]";
#endif
#if defined(__CRAYXT_COMPUTE_LINUX_TARGET)
char const *info_cray = "INFO" ":" "compiler_wrapper[CrayPrgEnv]";
#endif
#define STRINGIFY_HELPER(X) #X
#define STRINGIFY(X) STRINGIFY_HELPER(X)
/* Identify known platforms by name. */
#if defined(__linux) || defined(__linux__) || defined(linux)
# define PLATFORM_ID "Linux"
#elif defined(__MSYS__)
# define PLATFORM_ID "MSYS"
#elif defined(__CYGWIN__)
# define PLATFORM_ID "Cygwin"
#elif defined(__MINGW32__)
# define PLATFORM_ID "MinGW"
#elif defined(__APPLE__)
# define PLATFORM_ID "Darwin"
#elif defined(_WIN32) || defined(__WIN32__) || defined(WIN32)
# define PLATFORM_ID "Windows"
#elif defined(__FreeBSD__) || defined(__FreeBSD)
# define PLATFORM_ID "FreeBSD"
#elif defined(__NetBSD__) || defined(__NetBSD)
# define PLATFORM_ID "NetBSD"
#elif defined(__OpenBSD__) || defined(__OPENBSD)
# define PLATFORM_ID "OpenBSD"
#elif defined(__sun) || defined(sun)
# define PLATFORM_ID "SunOS"
#elif defined(_AIX) || defined(__AIX) || defined(__AIX__) || defined(__aix) || defined(__aix__)
# define PLATFORM_ID "AIX"
#elif defined(__hpux) || defined(__hpux__)
# define PLATFORM_ID "HP-UX"
#elif defined(__HAIKU__)
# define PLATFORM_ID "Haiku"
#elif defined(__BeOS) || defined(__BEOS__) || defined(_BEOS)
# define PLATFORM_ID "BeOS"
#elif defined(__QNX__) || defined(__QNXNTO__)
# define PLATFORM_ID "QNX"
#elif defined(__tru64) || defined(_tru64) || defined(__TRU64__)
# define PLATFORM_ID "Tru64"
#elif defined(__riscos) || defined(__riscos__)
# define PLATFORM_ID "RISCos"
#elif defined(__sinix) || defined(__sinix__) || defined(__SINIX__)
# define PLATFORM_ID "SINIX"
#elif defined(__UNIX_SV__)
# define PLATFORM_ID "UNIX_SV"
#elif defined(__bsdos__)
# define PLATFORM_ID "BSDOS"
#elif defined(_MPRAS) || defined(MPRAS)
# define PLATFORM_ID "MP-RAS"
#elif defined(__osf) || defined(__osf__)
# define PLATFORM_ID "OSF1"
#elif defined(_SCO_SV) || defined(SCO_SV) || defined(sco_sv)
# define PLATFORM_ID "SCO_SV"
#elif defined(__ultrix) || defined(__ultrix__) || defined(_ULTRIX)
# define PLATFORM_ID "ULTRIX"
#elif defined(__XENIX__) || defined(_XENIX) || defined(XENIX)
# define PLATFORM_ID "Xenix"
#elif defined(__WATCOMC__)
# if defined(__LINUX__)
# define PLATFORM_ID "Linux"
# elif defined(__DOS__)
# define PLATFORM_ID "DOS"
# elif defined(__OS2__)
# define PLATFORM_ID "OS2"
# elif defined(__WINDOWS__)
# define PLATFORM_ID "Windows3x"
# elif defined(__VXWORKS__)
# define PLATFORM_ID "VxWorks"
# else /* unknown platform */
# define PLATFORM_ID
# endif
#elif defined(__INTEGRITY)
# if defined(INT_178B)
# define PLATFORM_ID "Integrity178"
# else /* regular Integrity */
# define PLATFORM_ID "Integrity"
# endif
# elif defined(_ADI_COMPILER)
# define PLATFORM_ID "ADSP"
#else /* unknown platform */
# define PLATFORM_ID
#endif
/* For windows compilers MSVC and Intel we can determine
the architecture of the compiler being used. This is because
the compilers do not have flags that can change the architecture,
but rather depend on which compiler is being used
*/
#if defined(_WIN32) && defined(_MSC_VER)
# if defined(_M_IA64)
# define ARCHITECTURE_ID "IA64"
# elif defined(_M_ARM64EC)
# define ARCHITECTURE_ID "ARM64EC"
# elif defined(_M_X64) || defined(_M_AMD64)
# define ARCHITECTURE_ID "x64"
# elif defined(_M_IX86)
# define ARCHITECTURE_ID "X86"
# elif defined(_M_ARM64)
# define ARCHITECTURE_ID "ARM64"
# elif defined(_M_ARM)
# if _M_ARM == 4
# define ARCHITECTURE_ID "ARMV4I"
# elif _M_ARM == 5
# define ARCHITECTURE_ID "ARMV5I"
# else
# define ARCHITECTURE_ID "ARMV" STRINGIFY(_M_ARM)
# endif
# elif defined(_M_MIPS)
# define ARCHITECTURE_ID "MIPS"
# elif defined(_M_SH)
# define ARCHITECTURE_ID "SHx"
# else /* unknown architecture */
# define ARCHITECTURE_ID ""
# endif
#elif defined(__WATCOMC__)
# if defined(_M_I86)
# define ARCHITECTURE_ID "I86"
# elif defined(_M_IX86)
# define ARCHITECTURE_ID "X86"
# else /* unknown architecture */
# define ARCHITECTURE_ID ""
# endif
#elif defined(__IAR_SYSTEMS_ICC__) || defined(__IAR_SYSTEMS_ICC)
# if defined(__ICCARM__)
# define ARCHITECTURE_ID "ARM"
# elif defined(__ICCRX__)
# define ARCHITECTURE_ID "RX"
# elif defined(__ICCRH850__)
# define ARCHITECTURE_ID "RH850"
# elif defined(__ICCRL78__)
# define ARCHITECTURE_ID "RL78"
# elif defined(__ICCRISCV__)
# define ARCHITECTURE_ID "RISCV"
# elif defined(__ICCAVR__)
# define ARCHITECTURE_ID "AVR"
# elif defined(__ICC430__)
# define ARCHITECTURE_ID "MSP430"
# elif defined(__ICCV850__)
# define ARCHITECTURE_ID "V850"
# elif defined(__ICC8051__)
# define ARCHITECTURE_ID "8051"
# elif defined(__ICCSTM8__)
# define ARCHITECTURE_ID "STM8"
# else /* unknown architecture */
# define ARCHITECTURE_ID ""
# endif
#elif defined(__ghs__)
# if defined(__PPC64__)
# define ARCHITECTURE_ID "PPC64"
# elif defined(__ppc__)
# define ARCHITECTURE_ID "PPC"
# elif defined(__ARM__)
# define ARCHITECTURE_ID "ARM"
# elif defined(__x86_64__)
# define ARCHITECTURE_ID "x64"
# elif defined(__i386__)
# define ARCHITECTURE_ID "X86"
# else /* unknown architecture */
# define ARCHITECTURE_ID ""
# endif
#elif defined(__clang__) && defined(__ti__)
# if defined(__ARM_ARCH)
# define ARCHITECTURE_ID "ARM"
# else /* unknown architecture */
# define ARCHITECTURE_ID ""
# endif
#elif defined(__TI_COMPILER_VERSION__)
# if defined(__TI_ARM__)
# define ARCHITECTURE_ID "ARM"
# elif defined(__MSP430__)
# define ARCHITECTURE_ID "MSP430"
# elif defined(__TMS320C28XX__)
# define ARCHITECTURE_ID "TMS320C28x"
# elif defined(__TMS320C6X__) || defined(_TMS320C6X)
# define ARCHITECTURE_ID "TMS320C6x"
# else /* unknown architecture */
# define ARCHITECTURE_ID ""
# endif
# elif defined(__ADSPSHARC__)
# define ARCHITECTURE_ID "SHARC"
# elif defined(__ADSPBLACKFIN__)
# define ARCHITECTURE_ID "Blackfin"
#elif defined(__TASKING__)
# if defined(__CTC__) || defined(__CPTC__)
# define ARCHITECTURE_ID "TriCore"
# elif defined(__CMCS__)
# define ARCHITECTURE_ID "MCS"
# elif defined(__CARM__)
# define ARCHITECTURE_ID "ARM"
# elif defined(__CARC__)
# define ARCHITECTURE_ID "ARC"
# elif defined(__C51__)
# define ARCHITECTURE_ID "8051"
# elif defined(__CPCP__)
# define ARCHITECTURE_ID "PCP"
# else
# define ARCHITECTURE_ID ""
# endif
#else
# define ARCHITECTURE_ID
#endif
/* Convert integer to decimal digit literals. */
#define DEC(n) \
('0' + (((n) / 10000000)%10)), \
('0' + (((n) / 1000000)%10)), \
('0' + (((n) / 100000)%10)), \
('0' + (((n) / 10000)%10)), \
('0' + (((n) / 1000)%10)), \
('0' + (((n) / 100)%10)), \
('0' + (((n) / 10)%10)), \
('0' + ((n) % 10))
/* Convert integer to hex digit literals. */
#define HEX(n) \
('0' + ((n)>>28 & 0xF)), \
('0' + ((n)>>24 & 0xF)), \
('0' + ((n)>>20 & 0xF)), \
('0' + ((n)>>16 & 0xF)), \
('0' + ((n)>>12 & 0xF)), \
('0' + ((n)>>8 & 0xF)), \
('0' + ((n)>>4 & 0xF)), \
('0' + ((n) & 0xF))
/* Construct a string literal encoding the version number. */
#ifdef COMPILER_VERSION
char const* info_version = "INFO" ":" "compiler_version[" COMPILER_VERSION "]";
/* Construct a string literal encoding the version number components. */
#elif defined(COMPILER_VERSION_MAJOR)
char const info_version[] = {
'I', 'N', 'F', 'O', ':',
'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','[',
COMPILER_VERSION_MAJOR,
# ifdef COMPILER_VERSION_MINOR
'.', COMPILER_VERSION_MINOR,
# ifdef COMPILER_VERSION_PATCH
'.', COMPILER_VERSION_PATCH,
# ifdef COMPILER_VERSION_TWEAK
'.', COMPILER_VERSION_TWEAK,
# endif
# endif
# endif
']','\0'};
#endif
/* Construct a string literal encoding the internal version number. */
#ifdef COMPILER_VERSION_INTERNAL
char const info_version_internal[] = {
'I', 'N', 'F', 'O', ':',
'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','_',
'i','n','t','e','r','n','a','l','[',
COMPILER_VERSION_INTERNAL,']','\0'};
#elif defined(COMPILER_VERSION_INTERNAL_STR)
char const* info_version_internal = "INFO" ":" "compiler_version_internal[" COMPILER_VERSION_INTERNAL_STR "]";
#endif
/* Construct a string literal encoding the version number components. */
#ifdef SIMULATE_VERSION_MAJOR
char const info_simulate_version[] = {
'I', 'N', 'F', 'O', ':',
's','i','m','u','l','a','t','e','_','v','e','r','s','i','o','n','[',
SIMULATE_VERSION_MAJOR,
# ifdef SIMULATE_VERSION_MINOR
'.', SIMULATE_VERSION_MINOR,
# ifdef SIMULATE_VERSION_PATCH
'.', SIMULATE_VERSION_PATCH,
# ifdef SIMULATE_VERSION_TWEAK
'.', SIMULATE_VERSION_TWEAK,
# endif
# endif
# endif
']','\0'};
#endif
/* Construct the string literal in pieces to prevent the source from
getting matched. Store it in a pointer rather than an array
because some compilers will just produce instructions to fill the
array rather than assigning a pointer to a static array. */
char const* info_platform = "INFO" ":" "platform[" PLATFORM_ID "]";
char const* info_arch = "INFO" ":" "arch[" ARCHITECTURE_ID "]";
#define CXX_STD_98 199711L
#define CXX_STD_11 201103L
#define CXX_STD_14 201402L
#define CXX_STD_17 201703L
#define CXX_STD_20 202002L
#define CXX_STD_23 202302L
#if defined(__INTEL_COMPILER) && defined(_MSVC_LANG)
# if _MSVC_LANG > CXX_STD_17
# define CXX_STD _MSVC_LANG
# elif _MSVC_LANG == CXX_STD_17 && defined(__cpp_aggregate_paren_init)
# define CXX_STD CXX_STD_20
# elif _MSVC_LANG > CXX_STD_14 && __cplusplus > CXX_STD_17
# define CXX_STD CXX_STD_20
# elif _MSVC_LANG > CXX_STD_14
# define CXX_STD CXX_STD_17
# elif defined(__INTEL_CXX11_MODE__) && defined(__cpp_aggregate_nsdmi)
# define CXX_STD CXX_STD_14
# elif defined(__INTEL_CXX11_MODE__)
# define CXX_STD CXX_STD_11
# else
# define CXX_STD CXX_STD_98
# endif
#elif defined(_MSC_VER) && defined(_MSVC_LANG)
# if _MSVC_LANG > __cplusplus
# define CXX_STD _MSVC_LANG
# else
# define CXX_STD __cplusplus
# endif
#elif defined(__NVCOMPILER)
# if __cplusplus == CXX_STD_17 && defined(__cpp_aggregate_paren_init)
# define CXX_STD CXX_STD_20
# else
# define CXX_STD __cplusplus
# endif
#elif defined(__INTEL_COMPILER) || defined(__PGI)
# if __cplusplus == CXX_STD_11 && defined(__cpp_namespace_attributes)
# define CXX_STD CXX_STD_17
# elif __cplusplus == CXX_STD_11 && defined(__cpp_aggregate_nsdmi)
# define CXX_STD CXX_STD_14
# else
# define CXX_STD __cplusplus
# endif
#elif (defined(__IBMCPP__) || defined(__ibmxl__)) && defined(__linux__)
# if __cplusplus == CXX_STD_11 && defined(__cpp_aggregate_nsdmi)
# define CXX_STD CXX_STD_14
# else
# define CXX_STD __cplusplus
# endif
#elif __cplusplus == 1 && defined(__GXX_EXPERIMENTAL_CXX0X__)
# define CXX_STD CXX_STD_11
#else
# define CXX_STD __cplusplus
#endif
const char* info_language_standard_default = "INFO" ":" "standard_default["
#if CXX_STD > CXX_STD_23
"26"
#elif CXX_STD > CXX_STD_20
"23"
#elif CXX_STD > CXX_STD_17
"20"
#elif CXX_STD > CXX_STD_14
"17"
#elif CXX_STD > CXX_STD_11
"14"
#elif CXX_STD >= CXX_STD_11
"11"
#else
"98"
#endif
"]";
const char* info_language_extensions_default = "INFO" ":" "extensions_default["
#if (defined(__clang__) || defined(__GNUC__) || defined(__xlC__) || \
defined(__TI_COMPILER_VERSION__)) && \
!defined(__STRICT_ANSI__)
"ON"
#else
"OFF"
#endif
"]";
/*--------------------------------------------------------------------------*/
int main(int argc, char* argv[])
{
int require = 0;
require += info_compiler[argc];
require += info_platform[argc];
require += info_arch[argc];
#ifdef COMPILER_VERSION_MAJOR
require += info_version[argc];
#endif
#ifdef COMPILER_VERSION_INTERNAL
require += info_version_internal[argc];
#endif
#ifdef SIMULATE_ID
require += info_simulate[argc];
#endif
#ifdef SIMULATE_VERSION_MAJOR
require += info_simulate_version[argc];
#endif
#if defined(__CRAYXT_COMPUTE_LINUX_TARGET)
require += info_cray[argc];
#endif
require += info_language_standard_default[argc];
require += info_language_extensions_default[argc];
(void)argv;
return require;
}

View File

@@ -0,0 +1,926 @@
#if !defined(__HIP__) && !defined(__NVCC__)
# error "A C or C++ compiler has been selected for HIP"
#endif
/* Version number components: V=Version, R=Revision, P=Patch
Version date components: YYYY=Year, MM=Month, DD=Day */
#if defined(__NVCC__)
# define COMPILER_ID "NVIDIA"
# if defined(_MSC_VER)
# define SIMULATE_ID "MSVC"
# elif defined(__clang__)
# define SIMULATE_ID "Clang"
# elif defined(__GNUC__)
# define SIMULATE_ID "GNU"
# endif
# if defined(__CUDACC_VER_MAJOR__)
# define COMPILER_VERSION_MAJOR DEC(__CUDACC_VER_MAJOR__)
# define COMPILER_VERSION_MINOR DEC(__CUDACC_VER_MINOR__)
# define COMPILER_VERSION_PATCH DEC(__CUDACC_VER_BUILD__)
# endif
# if defined(_MSC_VER)
/* _MSC_VER = VVRR */
# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
# elif defined(__clang__)
# define SIMULATE_VERSION_MAJOR DEC(__clang_major__)
# define SIMULATE_VERSION_MINOR DEC(__clang_minor__)
# elif defined(__GNUC__)
# define SIMULATE_VERSION_MAJOR DEC(__GNUC__)
# define SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__)
# endif
#elif defined(__clang__)
# define COMPILER_ID "Clang"
# if defined(_MSC_VER)
# define SIMULATE_ID "MSVC"
# endif
# define COMPILER_VERSION_MAJOR DEC(__clang_major__)
# define COMPILER_VERSION_MINOR DEC(__clang_minor__)
# define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__)
# if defined(_MSC_VER)
/* _MSC_VER = VVRR */
# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
# endif
/* These compilers are either not known or too old to define an
identification macro. Try to identify the platform and guess that
it is the native compiler. */
#elif defined(__hpux) || defined(__hpua)
# define COMPILER_ID "HP"
#else /* unknown compiler */
# define COMPILER_ID ""
#endif
/* Detect host compiler used by NVCC. */
#ifdef __NVCC__
/* Version number components: V=Version, R=Revision, P=Patch
Version date components: YYYY=Year, MM=Month, DD=Day */
#if defined(__INTEL_COMPILER) || defined(__ICC)
# define HOST_COMPILER_ID "Intel"
# if defined(_MSC_VER)
# define HOST_SIMULATE_ID "MSVC"
# endif
# if defined(__GNUC__)
# define HOST_SIMULATE_ID "GNU"
# endif
/* __INTEL_COMPILER = VRP prior to 2021, and then VVVV for 2021 and later,
except that a few beta releases use the old format with V=2021. */
# if __INTEL_COMPILER < 2021 || __INTEL_COMPILER == 202110 || __INTEL_COMPILER == 202111
# define HOST_COMPILER_VERSION_MAJOR DEC(__INTEL_COMPILER/100)
# define HOST_COMPILER_VERSION_MINOR DEC(__INTEL_COMPILER/10 % 10)
# if defined(__INTEL_COMPILER_UPDATE)
# define HOST_COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER_UPDATE)
# else
# define HOST_COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER % 10)
# endif
# else
# define HOST_COMPILER_VERSION_MAJOR DEC(__INTEL_COMPILER)
# define HOST_COMPILER_VERSION_MINOR DEC(__INTEL_COMPILER_UPDATE)
/* The third version component from --version is an update index,
but no macro is provided for it. */
# define HOST_COMPILER_VERSION_PATCH DEC(0)
# endif
# if defined(__INTEL_COMPILER_BUILD_DATE)
/* __INTEL_COMPILER_BUILD_DATE = YYYYMMDD */
# define HOST_COMPILER_VERSION_TWEAK DEC(__INTEL_COMPILER_BUILD_DATE)
# endif
# if defined(_MSC_VER)
/* _MSC_VER = VVRR */
# define HOST_SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
# define HOST_SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
# endif
# if defined(__GNUC__)
# define HOST_SIMULATE_VERSION_MAJOR DEC(__GNUC__)
# elif defined(__GNUG__)
# define HOST_SIMULATE_VERSION_MAJOR DEC(__GNUG__)
# endif
# if defined(__GNUC_MINOR__)
# define HOST_SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__)
# endif
# if defined(__GNUC_PATCHLEVEL__)
# define HOST_SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__)
# endif
#elif (defined(__clang__) && defined(__INTEL_CLANG_COMPILER)) || defined(__INTEL_LLVM_COMPILER)
# define HOST_COMPILER_ID "IntelLLVM"
#if defined(_MSC_VER)
# define HOST_SIMULATE_ID "MSVC"
#endif
#if defined(__GNUC__)
# define HOST_SIMULATE_ID "GNU"
#endif
/* __INTEL_LLVM_COMPILER = VVVVRP prior to 2021.2.0, VVVVRRPP for 2021.2.0 and
* later. Look for 6 digit vs. 8 digit version number to decide encoding.
* VVVV is no smaller than the current year when a version is released.
*/
#if __INTEL_LLVM_COMPILER < 1000000L
# define HOST_COMPILER_VERSION_MAJOR DEC(__INTEL_LLVM_COMPILER/100)
# define HOST_COMPILER_VERSION_MINOR DEC(__INTEL_LLVM_COMPILER/10 % 10)
# define HOST_COMPILER_VERSION_PATCH DEC(__INTEL_LLVM_COMPILER % 10)
#else
# define HOST_COMPILER_VERSION_MAJOR DEC(__INTEL_LLVM_COMPILER/10000)
# define HOST_COMPILER_VERSION_MINOR DEC(__INTEL_LLVM_COMPILER/100 % 100)
# define HOST_COMPILER_VERSION_PATCH DEC(__INTEL_LLVM_COMPILER % 100)
#endif
#if defined(_MSC_VER)
/* _MSC_VER = VVRR */
# define HOST_SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
# define HOST_SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
#endif
#if defined(__GNUC__)
# define HOST_SIMULATE_VERSION_MAJOR DEC(__GNUC__)
#elif defined(__GNUG__)
# define HOST_SIMULATE_VERSION_MAJOR DEC(__GNUG__)
#endif
#if defined(__GNUC_MINOR__)
# define HOST_SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__)
#endif
#if defined(__GNUC_PATCHLEVEL__)
# define HOST_SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__)
#endif
#elif defined(__PATHCC__)
# define HOST_COMPILER_ID "PathScale"
# define HOST_COMPILER_VERSION_MAJOR DEC(__PATHCC__)
# define HOST_COMPILER_VERSION_MINOR DEC(__PATHCC_MINOR__)
# if defined(__PATHCC_PATCHLEVEL__)
# define HOST_COMPILER_VERSION_PATCH DEC(__PATHCC_PATCHLEVEL__)
# endif
#elif defined(__BORLANDC__) && defined(__CODEGEARC_VERSION__)
# define HOST_COMPILER_ID "Embarcadero"
# define HOST_COMPILER_VERSION_MAJOR HEX(__CODEGEARC_VERSION__>>24 & 0x00FF)
# define HOST_COMPILER_VERSION_MINOR HEX(__CODEGEARC_VERSION__>>16 & 0x00FF)
# define HOST_COMPILER_VERSION_PATCH DEC(__CODEGEARC_VERSION__ & 0xFFFF)
#elif defined(__BORLANDC__)
# define HOST_COMPILER_ID "Borland"
/* __BORLANDC__ = 0xVRR */
# define HOST_COMPILER_VERSION_MAJOR HEX(__BORLANDC__>>8)
# define HOST_COMPILER_VERSION_MINOR HEX(__BORLANDC__ & 0xFF)
#elif defined(__WATCOMC__) && __WATCOMC__ < 1200
# define HOST_COMPILER_ID "Watcom"
/* __WATCOMC__ = VVRR */
# define HOST_COMPILER_VERSION_MAJOR DEC(__WATCOMC__ / 100)
# define HOST_COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10)
# if (__WATCOMC__ % 10) > 0
# define HOST_COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10)
# endif
#elif defined(__WATCOMC__)
# define HOST_COMPILER_ID "OpenWatcom"
/* __WATCOMC__ = VVRP + 1100 */
# define HOST_COMPILER_VERSION_MAJOR DEC((__WATCOMC__ - 1100) / 100)
# define HOST_COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10)
# if (__WATCOMC__ % 10) > 0
# define HOST_COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10)
# endif
#elif defined(__SUNPRO_CC)
# define HOST_COMPILER_ID "SunPro"
# if __SUNPRO_CC >= 0x5100
/* __SUNPRO_CC = 0xVRRP */
# define HOST_COMPILER_VERSION_MAJOR HEX(__SUNPRO_CC>>12)
# define HOST_COMPILER_VERSION_MINOR HEX(__SUNPRO_CC>>4 & 0xFF)
# define HOST_COMPILER_VERSION_PATCH HEX(__SUNPRO_CC & 0xF)
# else
/* __SUNPRO_CC = 0xVRP */
# define HOST_COMPILER_VERSION_MAJOR HEX(__SUNPRO_CC>>8)
# define HOST_COMPILER_VERSION_MINOR HEX(__SUNPRO_CC>>4 & 0xF)
# define HOST_COMPILER_VERSION_PATCH HEX(__SUNPRO_CC & 0xF)
# endif
#elif defined(__HP_aCC)
# define HOST_COMPILER_ID "HP"
/* __HP_aCC = VVRRPP */
# define HOST_COMPILER_VERSION_MAJOR DEC(__HP_aCC/10000)
# define HOST_COMPILER_VERSION_MINOR DEC(__HP_aCC/100 % 100)
# define HOST_COMPILER_VERSION_PATCH DEC(__HP_aCC % 100)
#elif defined(__DECCXX)
# define HOST_COMPILER_ID "Compaq"
/* __DECCXX_VER = VVRRTPPPP */
# define HOST_COMPILER_VERSION_MAJOR DEC(__DECCXX_VER/10000000)
# define HOST_COMPILER_VERSION_MINOR DEC(__DECCXX_VER/100000 % 100)
# define HOST_COMPILER_VERSION_PATCH DEC(__DECCXX_VER % 10000)
#elif defined(__IBMCPP__) && defined(__COMPILER_VER__)
# define HOST_COMPILER_ID "zOS"
/* __IBMCPP__ = VRP */
# define HOST_COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100)
# define HOST_COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10)
# define HOST_COMPILER_VERSION_PATCH DEC(__IBMCPP__ % 10)
#elif defined(__open_xl__) && defined(__clang__)
# define HOST_COMPILER_ID "IBMClang"
# define HOST_COMPILER_VERSION_MAJOR DEC(__open_xl_version__)
# define HOST_COMPILER_VERSION_MINOR DEC(__open_xl_release__)
# define HOST_COMPILER_VERSION_PATCH DEC(__open_xl_modification__)
# define HOST_COMPILER_VERSION_TWEAK DEC(__open_xl_ptf_fix_level__)
#elif defined(__ibmxl__) && defined(__clang__)
# define HOST_COMPILER_ID "XLClang"
# define HOST_COMPILER_VERSION_MAJOR DEC(__ibmxl_version__)
# define HOST_COMPILER_VERSION_MINOR DEC(__ibmxl_release__)
# define HOST_COMPILER_VERSION_PATCH DEC(__ibmxl_modification__)
# define HOST_COMPILER_VERSION_TWEAK DEC(__ibmxl_ptf_fix_level__)
#elif defined(__IBMCPP__) && !defined(__COMPILER_VER__) && __IBMCPP__ >= 800
# define HOST_COMPILER_ID "XL"
/* __IBMCPP__ = VRP */
# define HOST_COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100)
# define HOST_COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10)
# define HOST_COMPILER_VERSION_PATCH DEC(__IBMCPP__ % 10)
#elif defined(__IBMCPP__) && !defined(__COMPILER_VER__) && __IBMCPP__ < 800
# define HOST_COMPILER_ID "VisualAge"
/* __IBMCPP__ = VRP */
# define HOST_COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100)
# define HOST_COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10)
# define HOST_COMPILER_VERSION_PATCH DEC(__IBMCPP__ % 10)
#elif defined(__NVCOMPILER)
# define HOST_COMPILER_ID "NVHPC"
# define HOST_COMPILER_VERSION_MAJOR DEC(__NVCOMPILER_MAJOR__)
# define HOST_COMPILER_VERSION_MINOR DEC(__NVCOMPILER_MINOR__)
# if defined(__NVCOMPILER_PATCHLEVEL__)
# define HOST_COMPILER_VERSION_PATCH DEC(__NVCOMPILER_PATCHLEVEL__)
# endif
#elif defined(__PGI)
# define HOST_COMPILER_ID "PGI"
# define HOST_COMPILER_VERSION_MAJOR DEC(__PGIC__)
# define HOST_COMPILER_VERSION_MINOR DEC(__PGIC_MINOR__)
# if defined(__PGIC_PATCHLEVEL__)
# define HOST_COMPILER_VERSION_PATCH DEC(__PGIC_PATCHLEVEL__)
# endif
#elif defined(__clang__) && defined(__cray__)
# define HOST_COMPILER_ID "CrayClang"
# define HOST_COMPILER_VERSION_MAJOR DEC(__cray_major__)
# define HOST_COMPILER_VERSION_MINOR DEC(__cray_minor__)
# define HOST_COMPILER_VERSION_PATCH DEC(__cray_patchlevel__)
# define HOST_COMPILER_VERSION_INTERNAL_STR __clang_version__
#elif defined(_CRAYC)
# define HOST_COMPILER_ID "Cray"
# define HOST_COMPILER_VERSION_MAJOR DEC(_RELEASE_MAJOR)
# define HOST_COMPILER_VERSION_MINOR DEC(_RELEASE_MINOR)
#elif defined(__TI_COMPILER_VERSION__)
# define HOST_COMPILER_ID "TI"
/* __TI_COMPILER_VERSION__ = VVVRRRPPP */
# define HOST_COMPILER_VERSION_MAJOR DEC(__TI_COMPILER_VERSION__/1000000)
# define HOST_COMPILER_VERSION_MINOR DEC(__TI_COMPILER_VERSION__/1000 % 1000)
# define HOST_COMPILER_VERSION_PATCH DEC(__TI_COMPILER_VERSION__ % 1000)
#elif defined(__CLANG_FUJITSU)
# define HOST_COMPILER_ID "FujitsuClang"
# define HOST_COMPILER_VERSION_MAJOR DEC(__FCC_major__)
# define HOST_COMPILER_VERSION_MINOR DEC(__FCC_minor__)
# define HOST_COMPILER_VERSION_PATCH DEC(__FCC_patchlevel__)
# define HOST_COMPILER_VERSION_INTERNAL_STR __clang_version__
#elif defined(__FUJITSU)
# define HOST_COMPILER_ID "Fujitsu"
# if defined(__FCC_version__)
# define HOST_COMPILER_VERSION __FCC_version__
# elif defined(__FCC_major__)
# define HOST_COMPILER_VERSION_MAJOR DEC(__FCC_major__)
# define HOST_COMPILER_VERSION_MINOR DEC(__FCC_minor__)
# define HOST_COMPILER_VERSION_PATCH DEC(__FCC_patchlevel__)
# endif
# if defined(__fcc_version)
# define HOST_COMPILER_VERSION_INTERNAL DEC(__fcc_version)
# elif defined(__FCC_VERSION)
# define HOST_COMPILER_VERSION_INTERNAL DEC(__FCC_VERSION)
# endif
#elif defined(__ghs__)
# define HOST_COMPILER_ID "GHS"
/* __GHS_VERSION_NUMBER = VVVVRP */
# ifdef __GHS_VERSION_NUMBER
# define HOST_COMPILER_VERSION_MAJOR DEC(__GHS_VERSION_NUMBER / 100)
# define HOST_COMPILER_VERSION_MINOR DEC(__GHS_VERSION_NUMBER / 10 % 10)
# define HOST_COMPILER_VERSION_PATCH DEC(__GHS_VERSION_NUMBER % 10)
# endif
#elif defined(__TASKING__)
# define HOST_COMPILER_ID "Tasking"
# define HOST_COMPILER_VERSION_MAJOR DEC(__VERSION__/1000)
# define HOST_COMPILER_VERSION_MINOR DEC(__VERSION__ % 100)
# define HOST_COMPILER_VERSION_INTERNAL DEC(__VERSION__)
#elif defined(__ORANGEC__)
# define HOST_COMPILER_ID "OrangeC"
# define HOST_COMPILER_VERSION_MAJOR DEC(__ORANGEC_MAJOR__)
# define HOST_COMPILER_VERSION_MINOR DEC(__ORANGEC_MINOR__)
# define HOST_COMPILER_VERSION_PATCH DEC(__ORANGEC_PATCHLEVEL__)
#elif defined(__SCO_VERSION__)
# define HOST_COMPILER_ID "SCO"
#elif defined(__ARMCC_VERSION) && !defined(__clang__)
# define HOST_COMPILER_ID "ARMCC"
#if __ARMCC_VERSION >= 1000000
/* __ARMCC_VERSION = VRRPPPP */
# define HOST_COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/1000000)
# define HOST_COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 100)
# define HOST_COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION % 10000)
#else
/* __ARMCC_VERSION = VRPPPP */
# define HOST_COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/100000)
# define HOST_COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 10)
# define HOST_COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION % 10000)
#endif
#elif defined(__clang__) && defined(__apple_build_version__)
# define HOST_COMPILER_ID "AppleClang"
# if defined(_MSC_VER)
# define HOST_SIMULATE_ID "MSVC"
# endif
# define HOST_COMPILER_VERSION_MAJOR DEC(__clang_major__)
# define HOST_COMPILER_VERSION_MINOR DEC(__clang_minor__)
# define HOST_COMPILER_VERSION_PATCH DEC(__clang_patchlevel__)
# if defined(_MSC_VER)
/* _MSC_VER = VVRR */
# define HOST_SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
# define HOST_SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
# endif
# define HOST_COMPILER_VERSION_TWEAK DEC(__apple_build_version__)
#elif defined(__clang__) && defined(__ARMCOMPILER_VERSION)
# define HOST_COMPILER_ID "ARMClang"
# define HOST_COMPILER_VERSION_MAJOR DEC(__ARMCOMPILER_VERSION/1000000)
# define HOST_COMPILER_VERSION_MINOR DEC(__ARMCOMPILER_VERSION/10000 % 100)
# define HOST_COMPILER_VERSION_PATCH DEC(__ARMCOMPILER_VERSION/100 % 100)
# define HOST_COMPILER_VERSION_INTERNAL DEC(__ARMCOMPILER_VERSION)
#elif defined(__clang__) && defined(__ti__)
# define HOST_COMPILER_ID "TIClang"
# define HOST_COMPILER_VERSION_MAJOR DEC(__ti_major__)
# define HOST_COMPILER_VERSION_MINOR DEC(__ti_minor__)
# define HOST_COMPILER_VERSION_PATCH DEC(__ti_patchlevel__)
# define HOST_COMPILER_VERSION_INTERNAL DEC(__ti_version__)
#elif defined(__clang__)
# define HOST_COMPILER_ID "Clang"
# if defined(_MSC_VER)
# define HOST_SIMULATE_ID "MSVC"
# endif
# define HOST_COMPILER_VERSION_MAJOR DEC(__clang_major__)
# define HOST_COMPILER_VERSION_MINOR DEC(__clang_minor__)
# define HOST_COMPILER_VERSION_PATCH DEC(__clang_patchlevel__)
# if defined(_MSC_VER)
/* _MSC_VER = VVRR */
# define HOST_SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
# define HOST_SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
# endif
#elif defined(__LCC__) && (defined(__GNUC__) || defined(__GNUG__) || defined(__MCST__))
# define HOST_COMPILER_ID "LCC"
# define HOST_COMPILER_VERSION_MAJOR DEC(__LCC__ / 100)
# define HOST_COMPILER_VERSION_MINOR DEC(__LCC__ % 100)
# if defined(__LCC_MINOR__)
# define HOST_COMPILER_VERSION_PATCH DEC(__LCC_MINOR__)
# endif
# if defined(__GNUC__) && defined(__GNUC_MINOR__)
# define HOST_SIMULATE_ID "GNU"
# define HOST_SIMULATE_VERSION_MAJOR DEC(__GNUC__)
# define HOST_SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__)
# if defined(__GNUC_PATCHLEVEL__)
# define HOST_SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__)
# endif
# endif
#elif defined(__GNUC__) || defined(__GNUG__)
# define HOST_COMPILER_ID "GNU"
# if defined(__GNUC__)
# define HOST_COMPILER_VERSION_MAJOR DEC(__GNUC__)
# else
# define HOST_COMPILER_VERSION_MAJOR DEC(__GNUG__)
# endif
# if defined(__GNUC_MINOR__)
# define HOST_COMPILER_VERSION_MINOR DEC(__GNUC_MINOR__)
# endif
# if defined(__GNUC_PATCHLEVEL__)
# define HOST_COMPILER_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__)
# endif
#elif defined(_MSC_VER)
# define HOST_COMPILER_ID "MSVC"
/* _MSC_VER = VVRR */
# define HOST_COMPILER_VERSION_MAJOR DEC(_MSC_VER / 100)
# define HOST_COMPILER_VERSION_MINOR DEC(_MSC_VER % 100)
# if defined(_MSC_FULL_VER)
# if _MSC_VER >= 1400
/* _MSC_FULL_VER = VVRRPPPPP */
# define HOST_COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 100000)
# else
/* _MSC_FULL_VER = VVRRPPPP */
# define HOST_COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 10000)
# endif
# endif
# if defined(_MSC_BUILD)
# define HOST_COMPILER_VERSION_TWEAK DEC(_MSC_BUILD)
# endif
#elif defined(_ADI_COMPILER)
# define HOST_COMPILER_ID "ADSP"
#if defined(__VERSIONNUM__)
/* __VERSIONNUM__ = 0xVVRRPPTT */
# define HOST_COMPILER_VERSION_MAJOR DEC(__VERSIONNUM__ >> 24 & 0xFF)
# define HOST_COMPILER_VERSION_MINOR DEC(__VERSIONNUM__ >> 16 & 0xFF)
# define HOST_COMPILER_VERSION_PATCH DEC(__VERSIONNUM__ >> 8 & 0xFF)
# define HOST_COMPILER_VERSION_TWEAK DEC(__VERSIONNUM__ & 0xFF)
#endif
#elif defined(__IAR_SYSTEMS_ICC__) || defined(__IAR_SYSTEMS_ICC)
# define HOST_COMPILER_ID "IAR"
# if defined(__VER__) && defined(__ICCARM__)
# define HOST_COMPILER_VERSION_MAJOR DEC((__VER__) / 1000000)
# define HOST_COMPILER_VERSION_MINOR DEC(((__VER__) / 1000) % 1000)
# define HOST_COMPILER_VERSION_PATCH DEC((__VER__) % 1000)
# define HOST_COMPILER_VERSION_INTERNAL DEC(__IAR_SYSTEMS_ICC__)
# elif defined(__VER__) && (defined(__ICCAVR__) || defined(__ICCRX__) || defined(__ICCRH850__) || defined(__ICCRL78__) || defined(__ICC430__) || defined(__ICCRISCV__) || defined(__ICCV850__) || defined(__ICC8051__) || defined(__ICCSTM8__))
# define HOST_COMPILER_VERSION_MAJOR DEC((__VER__) / 100)
# define HOST_COMPILER_VERSION_MINOR DEC((__VER__) - (((__VER__) / 100)*100))
# define HOST_COMPILER_VERSION_PATCH DEC(__SUBVERSION__)
# define HOST_COMPILER_VERSION_INTERNAL DEC(__IAR_SYSTEMS_ICC__)
# endif
#endif
#endif /* __NVCC__ */
/* Construct the string literal in pieces to prevent the source from
getting matched. Store it in a pointer rather than an array
because some compilers will just produce instructions to fill the
array rather than assigning a pointer to a static array. */
char const* info_compiler = "INFO" ":" "compiler[" COMPILER_ID "]";
#ifdef SIMULATE_ID
char const* info_simulate = "INFO" ":" "simulate[" SIMULATE_ID "]";
#endif
#define STRINGIFY_HELPER(X) #X
#define STRINGIFY(X) STRINGIFY_HELPER(X)
/* Identify known platforms by name. */
#if defined(__linux) || defined(__linux__) || defined(linux)
# define PLATFORM_ID "Linux"
#elif defined(__MSYS__)
# define PLATFORM_ID "MSYS"
#elif defined(__CYGWIN__)
# define PLATFORM_ID "Cygwin"
#elif defined(__MINGW32__)
# define PLATFORM_ID "MinGW"
#elif defined(__APPLE__)
# define PLATFORM_ID "Darwin"
#elif defined(_WIN32) || defined(__WIN32__) || defined(WIN32)
# define PLATFORM_ID "Windows"
#elif defined(__FreeBSD__) || defined(__FreeBSD)
# define PLATFORM_ID "FreeBSD"
#elif defined(__NetBSD__) || defined(__NetBSD)
# define PLATFORM_ID "NetBSD"
#elif defined(__OpenBSD__) || defined(__OPENBSD)
# define PLATFORM_ID "OpenBSD"
#elif defined(__sun) || defined(sun)
# define PLATFORM_ID "SunOS"
#elif defined(_AIX) || defined(__AIX) || defined(__AIX__) || defined(__aix) || defined(__aix__)
# define PLATFORM_ID "AIX"
#elif defined(__hpux) || defined(__hpux__)
# define PLATFORM_ID "HP-UX"
#elif defined(__HAIKU__)
# define PLATFORM_ID "Haiku"
#elif defined(__BeOS) || defined(__BEOS__) || defined(_BEOS)
# define PLATFORM_ID "BeOS"
#elif defined(__QNX__) || defined(__QNXNTO__)
# define PLATFORM_ID "QNX"
#elif defined(__tru64) || defined(_tru64) || defined(__TRU64__)
# define PLATFORM_ID "Tru64"
#elif defined(__riscos) || defined(__riscos__)
# define PLATFORM_ID "RISCos"
#elif defined(__sinix) || defined(__sinix__) || defined(__SINIX__)
# define PLATFORM_ID "SINIX"
#elif defined(__UNIX_SV__)
# define PLATFORM_ID "UNIX_SV"
#elif defined(__bsdos__)
# define PLATFORM_ID "BSDOS"
#elif defined(_MPRAS) || defined(MPRAS)
# define PLATFORM_ID "MP-RAS"
#elif defined(__osf) || defined(__osf__)
# define PLATFORM_ID "OSF1"
#elif defined(_SCO_SV) || defined(SCO_SV) || defined(sco_sv)
# define PLATFORM_ID "SCO_SV"
#elif defined(__ultrix) || defined(__ultrix__) || defined(_ULTRIX)
# define PLATFORM_ID "ULTRIX"
#elif defined(__XENIX__) || defined(_XENIX) || defined(XENIX)
# define PLATFORM_ID "Xenix"
#elif defined(__WATCOMC__)
# if defined(__LINUX__)
# define PLATFORM_ID "Linux"
# elif defined(__DOS__)
# define PLATFORM_ID "DOS"
# elif defined(__OS2__)
# define PLATFORM_ID "OS2"
# elif defined(__WINDOWS__)
# define PLATFORM_ID "Windows3x"
# elif defined(__VXWORKS__)
# define PLATFORM_ID "VxWorks"
# else /* unknown platform */
# define PLATFORM_ID
# endif
#elif defined(__INTEGRITY)
# if defined(INT_178B)
# define PLATFORM_ID "Integrity178"
# else /* regular Integrity */
# define PLATFORM_ID "Integrity"
# endif
# elif defined(_ADI_COMPILER)
# define PLATFORM_ID "ADSP"
#else /* unknown platform */
# define PLATFORM_ID
#endif
/* For windows compilers MSVC and Intel we can determine
the architecture of the compiler being used. This is because
the compilers do not have flags that can change the architecture,
but rather depend on which compiler is being used
*/
#if defined(_WIN32) && defined(_MSC_VER)
# if defined(_M_IA64)
# define ARCHITECTURE_ID "IA64"
# elif defined(_M_ARM64EC)
# define ARCHITECTURE_ID "ARM64EC"
# elif defined(_M_X64) || defined(_M_AMD64)
# define ARCHITECTURE_ID "x64"
# elif defined(_M_IX86)
# define ARCHITECTURE_ID "X86"
# elif defined(_M_ARM64)
# define ARCHITECTURE_ID "ARM64"
# elif defined(_M_ARM)
# if _M_ARM == 4
# define ARCHITECTURE_ID "ARMV4I"
# elif _M_ARM == 5
# define ARCHITECTURE_ID "ARMV5I"
# else
# define ARCHITECTURE_ID "ARMV" STRINGIFY(_M_ARM)
# endif
# elif defined(_M_MIPS)
# define ARCHITECTURE_ID "MIPS"
# elif defined(_M_SH)
# define ARCHITECTURE_ID "SHx"
# else /* unknown architecture */
# define ARCHITECTURE_ID ""
# endif
#elif defined(__WATCOMC__)
# if defined(_M_I86)
# define ARCHITECTURE_ID "I86"
# elif defined(_M_IX86)
# define ARCHITECTURE_ID "X86"
# else /* unknown architecture */
# define ARCHITECTURE_ID ""
# endif
#elif defined(__IAR_SYSTEMS_ICC__) || defined(__IAR_SYSTEMS_ICC)
# if defined(__ICCARM__)
# define ARCHITECTURE_ID "ARM"
# elif defined(__ICCRX__)
# define ARCHITECTURE_ID "RX"
# elif defined(__ICCRH850__)
# define ARCHITECTURE_ID "RH850"
# elif defined(__ICCRL78__)
# define ARCHITECTURE_ID "RL78"
# elif defined(__ICCRISCV__)
# define ARCHITECTURE_ID "RISCV"
# elif defined(__ICCAVR__)
# define ARCHITECTURE_ID "AVR"
# elif defined(__ICC430__)
# define ARCHITECTURE_ID "MSP430"
# elif defined(__ICCV850__)
# define ARCHITECTURE_ID "V850"
# elif defined(__ICC8051__)
# define ARCHITECTURE_ID "8051"
# elif defined(__ICCSTM8__)
# define ARCHITECTURE_ID "STM8"
# else /* unknown architecture */
# define ARCHITECTURE_ID ""
# endif
#elif defined(__ghs__)
# if defined(__PPC64__)
# define ARCHITECTURE_ID "PPC64"
# elif defined(__ppc__)
# define ARCHITECTURE_ID "PPC"
# elif defined(__ARM__)
# define ARCHITECTURE_ID "ARM"
# elif defined(__x86_64__)
# define ARCHITECTURE_ID "x64"
# elif defined(__i386__)
# define ARCHITECTURE_ID "X86"
# else /* unknown architecture */
# define ARCHITECTURE_ID ""
# endif
#elif defined(__clang__) && defined(__ti__)
# if defined(__ARM_ARCH)
# define ARCHITECTURE_ID "ARM"
# else /* unknown architecture */
# define ARCHITECTURE_ID ""
# endif
#elif defined(__TI_COMPILER_VERSION__)
# if defined(__TI_ARM__)
# define ARCHITECTURE_ID "ARM"
# elif defined(__MSP430__)
# define ARCHITECTURE_ID "MSP430"
# elif defined(__TMS320C28XX__)
# define ARCHITECTURE_ID "TMS320C28x"
# elif defined(__TMS320C6X__) || defined(_TMS320C6X)
# define ARCHITECTURE_ID "TMS320C6x"
# else /* unknown architecture */
# define ARCHITECTURE_ID ""
# endif
# elif defined(__ADSPSHARC__)
# define ARCHITECTURE_ID "SHARC"
# elif defined(__ADSPBLACKFIN__)
# define ARCHITECTURE_ID "Blackfin"
#elif defined(__TASKING__)
# if defined(__CTC__) || defined(__CPTC__)
# define ARCHITECTURE_ID "TriCore"
# elif defined(__CMCS__)
# define ARCHITECTURE_ID "MCS"
# elif defined(__CARM__)
# define ARCHITECTURE_ID "ARM"
# elif defined(__CARC__)
# define ARCHITECTURE_ID "ARC"
# elif defined(__C51__)
# define ARCHITECTURE_ID "8051"
# elif defined(__CPCP__)
# define ARCHITECTURE_ID "PCP"
# else
# define ARCHITECTURE_ID ""
# endif
#else
# define ARCHITECTURE_ID
#endif
/* Convert integer to decimal digit literals. */
#define DEC(n) \
('0' + (((n) / 10000000)%10)), \
('0' + (((n) / 1000000)%10)), \
('0' + (((n) / 100000)%10)), \
('0' + (((n) / 10000)%10)), \
('0' + (((n) / 1000)%10)), \
('0' + (((n) / 100)%10)), \
('0' + (((n) / 10)%10)), \
('0' + ((n) % 10))
/* Convert integer to hex digit literals. */
#define HEX(n) \
('0' + ((n)>>28 & 0xF)), \
('0' + ((n)>>24 & 0xF)), \
('0' + ((n)>>20 & 0xF)), \
('0' + ((n)>>16 & 0xF)), \
('0' + ((n)>>12 & 0xF)), \
('0' + ((n)>>8 & 0xF)), \
('0' + ((n)>>4 & 0xF)), \
('0' + ((n) & 0xF))
/* Construct a string literal encoding the version number. */
#ifdef COMPILER_VERSION
char const* info_version = "INFO" ":" "compiler_version[" COMPILER_VERSION "]";
/* Construct a string literal encoding the version number components. */
#elif defined(COMPILER_VERSION_MAJOR)
char const info_version[] = {
'I', 'N', 'F', 'O', ':',
'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','[',
COMPILER_VERSION_MAJOR,
# ifdef COMPILER_VERSION_MINOR
'.', COMPILER_VERSION_MINOR,
# ifdef COMPILER_VERSION_PATCH
'.', COMPILER_VERSION_PATCH,
# ifdef COMPILER_VERSION_TWEAK
'.', COMPILER_VERSION_TWEAK,
# endif
# endif
# endif
']','\0'};
#endif
/* Construct a string literal encoding the internal version number. */
#ifdef COMPILER_VERSION_INTERNAL
char const info_version_internal[] = {
'I', 'N', 'F', 'O', ':',
'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','_',
'i','n','t','e','r','n','a','l','[',
COMPILER_VERSION_INTERNAL,']','\0'};
#elif defined(COMPILER_VERSION_INTERNAL_STR)
char const* info_version_internal = "INFO" ":" "compiler_version_internal[" COMPILER_VERSION_INTERNAL_STR "]";
#endif
/* Construct a string literal encoding the version number components. */
#ifdef SIMULATE_VERSION_MAJOR
char const info_simulate_version[] = {
'I', 'N', 'F', 'O', ':',
's','i','m','u','l','a','t','e','_','v','e','r','s','i','o','n','[',
SIMULATE_VERSION_MAJOR,
# ifdef SIMULATE_VERSION_MINOR
'.', SIMULATE_VERSION_MINOR,
# ifdef SIMULATE_VERSION_PATCH
'.', SIMULATE_VERSION_PATCH,
# ifdef SIMULATE_VERSION_TWEAK
'.', SIMULATE_VERSION_TWEAK,
# endif
# endif
# endif
']','\0'};
#endif
/* Construct the string literal in pieces to prevent the source from
getting matched. Store it in a pointer rather than an array
because some compilers will just produce instructions to fill the
array rather than assigning a pointer to a static array. */
char const* info_platform = "INFO" ":" "platform[" PLATFORM_ID "]";
char const* info_arch = "INFO" ":" "arch[" ARCHITECTURE_ID "]";
#ifdef HOST_COMPILER_ID
char const* info_host_compiler = "INFO" ":" "host_compiler[" HOST_COMPILER_ID "]";
#endif
#ifdef HOST_COMPILER_VERSION
char const* info_host_compiler_version = "INFO" ":" "host_compiler_version[" HOST_COMPILER_VERSION "]";
#elif defined(HOST_COMPILER_VERSION_MAJOR)
char const info_host_compiler_version[] = {
'I', 'N', 'F', 'O', ':','h','o','s','t','_',
'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','[',
HOST_COMPILER_VERSION_MAJOR,
# ifdef HOST_COMPILER_VERSION_MINOR
'.', HOST_COMPILER_VERSION_MINOR,
# ifdef HOST_COMPILER_VERSION_PATCH
'.', HOST_COMPILER_VERSION_PATCH,
# ifdef HOST_COMPILER_VERSION_TWEAK
'.', HOST_COMPILER_VERSION_TWEAK,
# endif
# endif
# endif
']','\0'};
#endif
#
#define CXX_STD_98 199711L
#define CXX_STD_11 201103L
#define CXX_STD_14 201402L
#define CXX_STD_17 201703L
#define CXX_STD_20 202002L
#define CXX_STD_23 202302L
#define CXX_STD __cplusplus
const char* info_language_standard_default = "INFO" ":" "standard_default["
#if CXX_STD > CXX_STD_23
"26"
#elif CXX_STD > CXX_STD_20
"23"
#elif CXX_STD > CXX_STD_17
"20"
#elif CXX_STD > CXX_STD_14
"17"
#elif CXX_STD > CXX_STD_11
"14"
#elif CXX_STD >= CXX_STD_11
"11"
#else
"98"
#endif
"]";
const char* info_language_extensions_default = "INFO" ":" "extensions_default["
#if (defined(__clang__) || defined(__GNUC__)) && !defined(__STRICT_ANSI__)
"ON"
#else
"OFF"
#endif
"]";
/*--------------------------------------------------------------------------*/
int main(int argc, char* argv[])
{
int require = 0;
require += info_compiler[argc];
require += info_platform[argc];
#ifdef COMPILER_VERSION_MAJOR
require += info_version[argc];
#endif
#ifdef SIMULATE_ID
require += info_simulate[argc];
#endif
#ifdef SIMULATE_VERSION_MAJOR
require += info_simulate_version[argc];
#endif
#ifdef HOST_COMPILER_ID
require += info_host_compiler[argc];
#endif
#ifdef HOST_COMPILER_VERSION_MAJOR
require += info_host_compiler_version[argc];
#endif
require += info_language_standard_default[argc];
require += info_language_extensions_default[argc];
(void)argv;
return require;
}

View File

@@ -0,0 +1,337 @@
---
events:
-
kind: "message-v1"
backtrace:
- "/usr/share/cmake/Modules/CMakeDetermineSystem.cmake:205 (message)"
- "CMakeLists.txt:2 (project)"
message: |
The system is: Linux - 6.8.0-79-generic - x86_64
-
kind: "message-v1"
backtrace:
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:17 (message)"
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:64 (__determine_compiler_id_test)"
- "/usr/share/cmake/Modules/CMakeDetermineCXXCompiler.cmake:126 (CMAKE_DETERMINE_COMPILER_ID)"
- "CMakeLists.txt:2 (project)"
message: |
Compiling the CXX compiler identification source file "CMakeCXXCompilerId.cpp" failed.
Compiler: /opt/rocm-7.0/llvm/bin/clang++
Build flags:
Id flags:
The output was:
1
ld.lld: error: cannot open Scrt1.o: No such file or directory
ld.lld: error: cannot open crti.o: No such file or directory
ld.lld: error: unable to find library -lstdc++
ld.lld: error: unable to find library -lm
ld.lld: error: cannot open /opt/rocm-7.0/lib/llvm/lib/clang/20/lib/x86_64-unknown-linux-gnu/libclang_rt.builtins.a: No such file or directory
ld.lld: error: unable to find library -lgcc_s
ld.lld: error: unable to find library -lc
ld.lld: error: cannot open /opt/rocm-7.0/lib/llvm/lib/clang/20/lib/x86_64-unknown-linux-gnu/libclang_rt.builtins.a: No such file or directory
ld.lld: error: unable to find library -lgcc_s
ld.lld: error: cannot open crtn.o: No such file or directory
clang++: error: linker command failed with exit code 1 (use -v to see invocation)
-
kind: "message-v1"
backtrace:
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:17 (message)"
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:64 (__determine_compiler_id_test)"
- "/usr/share/cmake/Modules/CMakeDetermineCXXCompiler.cmake:126 (CMAKE_DETERMINE_COMPILER_ID)"
- "CMakeLists.txt:2 (project)"
message: |
Compiling the CXX compiler identification source file "CMakeCXXCompilerId.cpp" succeeded.
Compiler: /opt/rocm-7.0/llvm/bin/clang++
Build flags:
Id flags: -c
The output was:
0
Compilation of the CXX compiler identification source "CMakeCXXCompilerId.cpp" produced "CMakeCXXCompilerId.o"
The CXX compiler identification is Clang, found in:
/tmp/rinhash-hip/build/CMakeFiles/3.31.6/CompilerIdCXX/CMakeCXXCompilerId.o
-
kind: "message-v1"
backtrace:
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:1250 (message)"
- "/usr/share/cmake/Modules/CMakeDetermineHIPCompiler.cmake:95 (CMAKE_DETERMINE_COMPILER_ID_VENDOR)"
- "CMakeLists.txt:2 (project)"
message: |
Checking whether the HIP compiler is NVIDIA using "" did not match "nvcc: NVIDIA \\(R\\) Cuda compiler driver":
AMD clang version 20.0.0git (https://github.com/ROCm/llvm-project.git 32697402bdd2c9b01f45d53f123dc646206d3eb5+PATCHED:6509c030a655df7073b63a5b3d705e00f4f461ca)
Target: x86_64-unknown-linux-gnu
Thread model: posix
InstalledDir: /opt/rocm-7.0/lib/llvm/bin
-
kind: "message-v1"
backtrace:
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:1237 (message)"
- "/usr/share/cmake/Modules/CMakeDetermineHIPCompiler.cmake:95 (CMAKE_DETERMINE_COMPILER_ID_VENDOR)"
- "CMakeLists.txt:2 (project)"
message: |
Checking whether the HIP compiler is Clang using "" matched "(clang version)":
AMD clang version 20.0.0git (https://github.com/ROCm/llvm-project.git 32697402bdd2c9b01f45d53f123dc646206d3eb5+PATCHED:6509c030a655df7073b63a5b3d705e00f4f461ca)
Target: x86_64-unknown-linux-gnu
Thread model: posix
InstalledDir: /opt/rocm-7.0/lib/llvm/bin
-
kind: "message-v1"
backtrace:
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:17 (message)"
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:64 (__determine_compiler_id_test)"
- "/usr/share/cmake/Modules/CMakeDetermineHIPCompiler.cmake:136 (CMAKE_DETERMINE_COMPILER_ID)"
- "CMakeLists.txt:2 (project)"
message: |
Compiling the HIP compiler identification source file "CMakeHIPCompilerId.hip" failed.
Compiler: /opt/rocm-7.0/llvm/bin/clang++
Build flags:
Id flags: -v
The output was:
1
AMD clang version 20.0.0git (https://github.com/ROCm/llvm-project.git 32697402bdd2c9b01f45d53f123dc646206d3eb5+PATCHED:6509c030a655df7073b63a5b3d705e00f4f461ca)
Target: x86_64-unknown-linux-gnu
Thread model: posix
InstalledDir: /opt/rocm-7.0/lib/llvm/bin
Found HIP installation: /opt/rocm-7.0, version 3.5.0
"/opt/rocm-7.0/lib/llvm/bin/clang-20" -cc1 -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-linux-gnu -Werror=atomic-alignment -emit-obj -dumpdir a- -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name CMakeHIPCompilerId.hip -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=all -fno-rounding-math -mconstructor-aliases -aux-target-cpu x86-64 -fcuda-is-device -fno-threadsafe-statics -mllvm -amdgpu-internalize-symbols -fcuda-allow-variadic-functions -fvisibility=hidden -fapply-global-visibility-to-externs -mlink-builtin-bitcode /opt/rocm-7.0/lib/llvm/amdgcn/bitcode/hip.bc -mlink-builtin-bitcode /opt/rocm-7.0/lib/llvm/amdgcn/bitcode/ocml.bc -mlink-builtin-bitcode /opt/rocm-7.0/lib/llvm/amdgcn/bitcode/ockl.bc -mlink-builtin-bitcode /opt/rocm-7.0/lib/llvm/amdgcn/bitcode/oclc_daz_opt_off.bc -mlink-builtin-bitcode /opt/rocm-7.0/lib/llvm/amdgcn/bitcode/oclc_unsafe_math_off.bc -mlink-builtin-bitcode /opt/rocm-7.0/lib/llvm/amdgcn/bitcode/oclc_finite_only_off.bc -mlink-builtin-bitcode /opt/rocm-7.0/lib/llvm/amdgcn/bitcode/oclc_correctly_rounded_sqrt_on.bc -mlink-builtin-bitcode /opt/rocm-7.0/lib/llvm/amdgcn/bitcode/oclc_wavefrontsize64_on.bc -mlink-builtin-bitcode /opt/rocm-7.0/lib/llvm/amdgcn/bitcode/oclc_isa_version_906.bc -mlink-builtin-bitcode /opt/rocm-7.0/lib/llvm/amdgcn/bitcode/oclc_abi_version_600.bc -target-cpu gfx906 -debugger-tuning=gdb -fdebug-compilation-dir=/tmp/rinhash-hip/build/CMakeFiles/3.31.6/CompilerIdHIP -v -resource-dir /opt/rocm-7.0/lib/llvm/lib/clang/20 -internal-isystem /opt/rocm-7.0/lib/llvm/lib/clang/20 -idirafter /opt/rocm-7.0/include -I/opt/rocm-7.0/include -internal-isystem /opt/rocm-7.0/lib/llvm/lib/clang/20/include -internal-isystem /usr/local/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -internal-isystem /opt/rocm-7.0/lib/llvm/lib/clang/20/include -internal-isystem /usr/local/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -fdeprecated-macro -fno-autolink -ferror-limit 19 -fhip-new-launch-api -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -fcxx-exceptions -fexceptions -cuid=7bec080eaa19a2fa -fcuda-allow-variadic-functions -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/CMakeHIPCompilerId-gfx906-ca7e26.o -x hip CMakeHIPCompilerId.hip
clang -cc1 version 20.0.0git based upon LLVM 20.0.0git default target x86_64-unknown-linux-gnu
ignoring nonexistent directory "/opt/rocm-7.0/include"
ignoring nonexistent directory "/opt/rocm-7.0/include"
ignoring nonexistent directory "/include"
ignoring nonexistent directory "/include"
ignoring duplicate directory "/opt/rocm-7.0/lib/llvm/lib/clang/20/include"
ignoring duplicate directory "/usr/local/include"
ignoring duplicate directory "/usr/include"
ignoring duplicate directory "/usr/local/include"
ignoring duplicate directory "/opt/rocm-7.0/lib/llvm/lib/clang/20/include"
ignoring duplicate directory "/usr/include"
#include "..." search starts here:
#include <...> search starts here:
/opt/rocm-7.0/lib/llvm/lib/clang/20
/opt/rocm-7.0/lib/llvm/lib/clang/20/include
/usr/local/include
/usr/include
End of search list.
"/opt/rocm-7.0/lib/llvm/bin/lld" -flavor gnu -m elf64_amdgpu --no-undefined -shared -plugin-opt=-amdgpu-internalize-symbols --lto-partitions=8 -plugin-opt=mcpu=gfx906 --whole-archive -o /tmp/CMakeHIPCompilerId-gfx906-df2108.out /tmp/CMakeHIPCompilerId-gfx906-ca7e26.o --no-whole-archive
"/opt/rocm-7.0/lib/llvm/bin/clang-offload-bundler" -type=o -bundle-align=4096 -targets=host-x86_64-unknown-linux-gnu,hipv4-amdgcn-amd-amdhsa--gfx906 -input=/dev/null -input=/tmp/CMakeHIPCompilerId-gfx906-df2108.out -output=/tmp/CMakeHIPCompilerId-36bb2a.hipfb -verbose
"/opt/rocm-7.0/lib/llvm/bin/clang-20" -cc1 -triple x86_64-unknown-linux-gnu -aux-triple amdgcn-amd-amdhsa -emit-obj -dumpdir a- -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name CMakeHIPCompilerId.hip -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=all -fmath-errno -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/tmp/rinhash-hip/build/CMakeFiles/3.31.6/CompilerIdHIP -v -fcoverage-compilation-dir=/tmp/rinhash-hip/build/CMakeFiles/3.31.6/CompilerIdHIP -resource-dir /opt/rocm-7.0/lib/llvm/lib/clang/20 -internal-isystem /opt/rocm-7.0/lib/llvm/lib/clang/20 -idirafter /opt/rocm-7.0/include -I/opt/rocm-7.0/include -internal-isystem /opt/rocm-7.0/lib/llvm/lib/clang/20/include -internal-isystem /usr/local/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -internal-isystem /opt/rocm-7.0/lib/llvm/lib/clang/20/include -internal-isystem /usr/local/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -fdeprecated-macro -ferror-limit 19 -fhip-new-launch-api -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -fcxx-exceptions -fexceptions -fcuda-include-gpubinary /tmp/CMakeHIPCompilerId-36bb2a.hipfb -cuid=7bec080eaa19a2fa -fcuda-allow-variadic-functions -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/CMakeHIPCompilerId-23fde8.o -x hip CMakeHIPCompilerId.hip
clang -cc1 version 20.0.0git based upon LLVM 20.0.0git default target x86_64-unknown-linux-gnu
ignoring nonexistent directory "/opt/rocm-7.0/include"
ignoring nonexistent directory "/opt/rocm-7.0/include"
ignoring nonexistent directory "/include"
ignoring nonexistent directory "/include"
ignoring duplicate directory "/opt/rocm-7.0/lib/llvm/lib/clang/20/include"
ignoring duplicate directory "/usr/local/include"
ignoring duplicate directory "/usr/include"
#include "..." search starts here:
#include <...> search starts here:
/opt/rocm-7.0/lib/llvm/lib/clang/20
/opt/rocm-7.0/lib/llvm/lib/clang/20/include
/usr/local/include
/usr/include
End of search list.
"/opt/rocm-7.0/lib/llvm/bin/ld.lld" --hash-style=gnu --eh-frame-hdr -m elf_x86_64 -pie -dynamic-linker /lib64/ld-linux-x86-64.so.2 -o a.out Scrt1.o crti.o /opt/rocm-7.0/lib/llvm/lib/clang/20/lib/linux/clang_rt.crtbegin-x86_64.o -L/lib/../lib64 -L/usr/lib/../lib64 -L/lib -L/usr/lib -L/opt/rocm-7.0/lib -L/opt/rocm-7.0/lib64 /tmp/CMakeHIPCompilerId-23fde8.o -L/opt/rocm-7.0/lib -l:libamdhip64.so.7 -lstdc++ -lm /opt/rocm-7.0/lib/llvm/lib/clang/20/lib/x86_64-unknown-linux-gnu/libclang_rt.builtins.a -lgcc_s -lc /opt/rocm-7.0/lib/llvm/lib/clang/20/lib/x86_64-unknown-linux-gnu/libclang_rt.builtins.a -lgcc_s /opt/rocm-7.0/lib/llvm/lib/clang/20/lib/linux/clang_rt.crtend-x86_64.o crtn.o
ld.lld: error: cannot open Scrt1.o: No such file or directory
ld.lld: error: cannot open crti.o: No such file or directory
ld.lld: error: unable to find library -lstdc++
ld.lld: error: unable to find library -lm
ld.lld: error: cannot open /opt/rocm-7.0/lib/llvm/lib/clang/20/lib/x86_64-unknown-linux-gnu/libclang_rt.builtins.a: No such file or directory
ld.lld: error: unable to find library -lgcc_s
ld.lld: error: unable to find library -lc
ld.lld: error: cannot open /opt/rocm-7.0/lib/llvm/lib/clang/20/lib/x86_64-unknown-linux-gnu/libclang_rt.builtins.a: No such file or directory
ld.lld: error: unable to find library -lgcc_s
ld.lld: error: cannot open crtn.o: No such file or directory
clang++: error: linker command failed with exit code 1 (use -v to see invocation)
-
kind: "message-v1"
backtrace:
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:17 (message)"
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:64 (__determine_compiler_id_test)"
- "/usr/share/cmake/Modules/CMakeDetermineHIPCompiler.cmake:136 (CMAKE_DETERMINE_COMPILER_ID)"
- "CMakeLists.txt:2 (project)"
message: |
Compiling the HIP compiler identification source file "CMakeHIPCompilerId.hip" failed.
Compiler: /opt/rocm-7.0/llvm/bin/clang++
Build flags:
Id flags:
The output was:
1
ld.lld: error: cannot open Scrt1.o: No such file or directory
ld.lld: error: cannot open crti.o: No such file or directory
ld.lld: error: unable to find library -lstdc++
ld.lld: error: unable to find library -lm
ld.lld: error: cannot open /opt/rocm-7.0/lib/llvm/lib/clang/20/lib/x86_64-unknown-linux-gnu/libclang_rt.builtins.a: No such file or directory
ld.lld: error: unable to find library -lgcc_s
ld.lld: error: unable to find library -lc
ld.lld: error: cannot open /opt/rocm-7.0/lib/llvm/lib/clang/20/lib/x86_64-unknown-linux-gnu/libclang_rt.builtins.a: No such file or directory
ld.lld: error: unable to find library -lgcc_s
ld.lld: error: cannot open crtn.o: No such file or directory
clang++: error: linker command failed with exit code 1 (use -v to see invocation)
-
kind: "message-v1"
backtrace:
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:17 (message)"
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:64 (__determine_compiler_id_test)"
- "/usr/share/cmake/Modules/CMakeDetermineHIPCompiler.cmake:136 (CMAKE_DETERMINE_COMPILER_ID)"
- "CMakeLists.txt:2 (project)"
message: |
Compiling the HIP compiler identification source file "CMakeHIPCompilerId.hip" failed.
Compiler: /opt/rocm-7.0/llvm/bin/clang++
Build flags:
Id flags: -v
The output was:
1
AMD clang version 20.0.0git (https://github.com/ROCm/llvm-project.git 32697402bdd2c9b01f45d53f123dc646206d3eb5+PATCHED:6509c030a655df7073b63a5b3d705e00f4f461ca)
Target: x86_64-unknown-linux-gnu
Thread model: posix
InstalledDir: /opt/rocm-7.0/lib/llvm/bin
Found HIP installation: /opt/rocm-7.0, version 3.5.0
"/opt/rocm-7.0/lib/llvm/bin/clang-20" -cc1 -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-linux-gnu -Werror=atomic-alignment -emit-obj -dumpdir a- -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name CMakeHIPCompilerId.hip -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=all -fno-rounding-math -mconstructor-aliases -aux-target-cpu x86-64 -fcuda-is-device -fno-threadsafe-statics -mllvm -amdgpu-internalize-symbols -fcuda-allow-variadic-functions -fvisibility=hidden -fapply-global-visibility-to-externs -mlink-builtin-bitcode /opt/rocm-7.0/lib/llvm/amdgcn/bitcode/hip.bc -mlink-builtin-bitcode /opt/rocm-7.0/lib/llvm/amdgcn/bitcode/ocml.bc -mlink-builtin-bitcode /opt/rocm-7.0/lib/llvm/amdgcn/bitcode/ockl.bc -mlink-builtin-bitcode /opt/rocm-7.0/lib/llvm/amdgcn/bitcode/oclc_daz_opt_off.bc -mlink-builtin-bitcode /opt/rocm-7.0/lib/llvm/amdgcn/bitcode/oclc_unsafe_math_off.bc -mlink-builtin-bitcode /opt/rocm-7.0/lib/llvm/amdgcn/bitcode/oclc_finite_only_off.bc -mlink-builtin-bitcode /opt/rocm-7.0/lib/llvm/amdgcn/bitcode/oclc_correctly_rounded_sqrt_on.bc -mlink-builtin-bitcode /opt/rocm-7.0/lib/llvm/amdgcn/bitcode/oclc_wavefrontsize64_on.bc -mlink-builtin-bitcode /opt/rocm-7.0/lib/llvm/amdgcn/bitcode/oclc_isa_version_906.bc -mlink-builtin-bitcode /opt/rocm-7.0/lib/llvm/amdgcn/bitcode/oclc_abi_version_600.bc -target-cpu gfx906 -debugger-tuning=gdb -fdebug-compilation-dir=/tmp/rinhash-hip/build/CMakeFiles/3.31.6/CompilerIdHIP -v -resource-dir /opt/rocm-7.0/lib/llvm/lib/clang/20 -internal-isystem /opt/rocm-7.0/lib/llvm/lib/clang/20 -idirafter /opt/rocm-7.0/include -I/opt/rocm-7.0/include -internal-isystem /opt/rocm-7.0/lib/llvm/lib/clang/20/include -internal-isystem /usr/local/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -internal-isystem /opt/rocm-7.0/lib/llvm/lib/clang/20/include -internal-isystem /usr/local/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -fdeprecated-macro -fno-autolink -ferror-limit 19 -fhip-new-launch-api -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -fcxx-exceptions -fexceptions -cuid=7bec080eaa19a2fa -fcuda-allow-variadic-functions -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/CMakeHIPCompilerId-gfx906-a5723d.o -x hip CMakeHIPCompilerId.hip
clang -cc1 version 20.0.0git based upon LLVM 20.0.0git default target x86_64-unknown-linux-gnu
ignoring nonexistent directory "/opt/rocm-7.0/include"
ignoring nonexistent directory "/opt/rocm-7.0/include"
ignoring nonexistent directory "/include"
ignoring nonexistent directory "/include"
ignoring duplicate directory "/opt/rocm-7.0/lib/llvm/lib/clang/20/include"
ignoring duplicate directory "/usr/local/include"
ignoring duplicate directory "/usr/include"
ignoring duplicate directory "/usr/local/include"
ignoring duplicate directory "/opt/rocm-7.0/lib/llvm/lib/clang/20/include"
ignoring duplicate directory "/usr/include"
#include "..." search starts here:
#include <...> search starts here:
/opt/rocm-7.0/lib/llvm/lib/clang/20
/opt/rocm-7.0/lib/llvm/lib/clang/20/include
/usr/local/include
/usr/include
End of search list.
"/opt/rocm-7.0/lib/llvm/bin/lld" -flavor gnu -m elf64_amdgpu --no-undefined -shared -plugin-opt=-amdgpu-internalize-symbols --lto-partitions=8 -plugin-opt=mcpu=gfx906 --whole-archive -o /tmp/CMakeHIPCompilerId-gfx906-73cd59.out /tmp/CMakeHIPCompilerId-gfx906-a5723d.o --no-whole-archive
"/opt/rocm-7.0/lib/llvm/bin/clang-offload-bundler" -type=o -bundle-align=4096 -targets=host-x86_64-unknown-linux-gnu,hipv4-amdgcn-amd-amdhsa--gfx906 -input=/dev/null -input=/tmp/CMakeHIPCompilerId-gfx906-73cd59.out -output=/tmp/CMakeHIPCompilerId-af3c23.hipfb -verbose
"/opt/rocm-7.0/lib/llvm/bin/clang-20" -cc1 -triple x86_64-unknown-linux-gnu -aux-triple amdgcn-amd-amdhsa -emit-obj -dumpdir a- -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name CMakeHIPCompilerId.hip -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=all -fmath-errno -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/tmp/rinhash-hip/build/CMakeFiles/3.31.6/CompilerIdHIP -v -fcoverage-compilation-dir=/tmp/rinhash-hip/build/CMakeFiles/3.31.6/CompilerIdHIP -resource-dir /opt/rocm-7.0/lib/llvm/lib/clang/20 -internal-isystem /opt/rocm-7.0/lib/llvm/lib/clang/20 -idirafter /opt/rocm-7.0/include -I/opt/rocm-7.0/include -internal-isystem /opt/rocm-7.0/lib/llvm/lib/clang/20/include -internal-isystem /usr/local/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -internal-isystem /opt/rocm-7.0/lib/llvm/lib/clang/20/include -internal-isystem /usr/local/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -fdeprecated-macro -ferror-limit 19 -fhip-new-launch-api -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -fcxx-exceptions -fexceptions -fcuda-include-gpubinary /tmp/CMakeHIPCompilerId-af3c23.hipfb -cuid=7bec080eaa19a2fa -fcuda-allow-variadic-functions -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/CMakeHIPCompilerId-fc89bb.o -x hip CMakeHIPCompilerId.hip
clang -cc1 version 20.0.0git based upon LLVM 20.0.0git default target x86_64-unknown-linux-gnu
ignoring nonexistent directory "/opt/rocm-7.0/include"
ignoring nonexistent directory "/opt/rocm-7.0/include"
ignoring nonexistent directory "/include"
ignoring nonexistent directory "/include"
ignoring duplicate directory "/opt/rocm-7.0/lib/llvm/lib/clang/20/include"
ignoring duplicate directory "/usr/local/include"
ignoring duplicate directory "/usr/include"
#include "..." search starts here:
#include <...> search starts here:
/opt/rocm-7.0/lib/llvm/lib/clang/20
/opt/rocm-7.0/lib/llvm/lib/clang/20/include
/usr/local/include
/usr/include
End of search list.
"/opt/rocm-7.0/lib/llvm/bin/ld.lld" --hash-style=gnu --eh-frame-hdr -m elf_x86_64 -pie -dynamic-linker /lib64/ld-linux-x86-64.so.2 -o a.out Scrt1.o crti.o /opt/rocm-7.0/lib/llvm/lib/clang/20/lib/linux/clang_rt.crtbegin-x86_64.o -L/lib/../lib64 -L/usr/lib/../lib64 -L/lib -L/usr/lib -L/opt/rocm-7.0/lib -L/opt/rocm-7.0/lib64 /tmp/CMakeHIPCompilerId-fc89bb.o -L/opt/rocm-7.0/lib -l:libamdhip64.so.7 -lstdc++ -lm /opt/rocm-7.0/lib/llvm/lib/clang/20/lib/x86_64-unknown-linux-gnu/libclang_rt.builtins.a -lgcc_s -lc /opt/rocm-7.0/lib/llvm/lib/clang/20/lib/x86_64-unknown-linux-gnu/libclang_rt.builtins.a -lgcc_s /opt/rocm-7.0/lib/llvm/lib/clang/20/lib/linux/clang_rt.crtend-x86_64.o crtn.o
ld.lld: error: cannot open Scrt1.o: No such file or directory
ld.lld: error: cannot open crti.o: No such file or directory
ld.lld: error: unable to find library -lstdc++
ld.lld: error: unable to find library -lm
ld.lld: error: cannot open /opt/rocm-7.0/lib/llvm/lib/clang/20/lib/x86_64-unknown-linux-gnu/libclang_rt.builtins.a: No such file or directory
ld.lld: error: unable to find library -lgcc_s
ld.lld: error: unable to find library -lc
ld.lld: error: cannot open /opt/rocm-7.0/lib/llvm/lib/clang/20/lib/x86_64-unknown-linux-gnu/libclang_rt.builtins.a: No such file or directory
ld.lld: error: unable to find library -lgcc_s
ld.lld: error: cannot open crtn.o: No such file or directory
clang++: error: linker command failed with exit code 1 (use -v to see invocation)
-
kind: "message-v1"
backtrace:
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:17 (message)"
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:64 (__determine_compiler_id_test)"
- "/usr/share/cmake/Modules/CMakeDetermineHIPCompiler.cmake:136 (CMAKE_DETERMINE_COMPILER_ID)"
- "CMakeLists.txt:2 (project)"
message: |
Compiling the HIP compiler identification source file "CMakeHIPCompilerId.hip" failed.
Compiler: /opt/rocm-7.0/llvm/bin/clang++
Build flags:
Id flags:
The output was:
1
ld.lld: error: cannot open Scrt1.o: No such file or directory
ld.lld: error: cannot open crti.o: No such file or directory
ld.lld: error: unable to find library -lstdc++
ld.lld: error: unable to find library -lm
ld.lld: error: cannot open /opt/rocm-7.0/lib/llvm/lib/clang/20/lib/x86_64-unknown-linux-gnu/libclang_rt.builtins.a: No such file or directory
ld.lld: error: unable to find library -lgcc_s
ld.lld: error: unable to find library -lc
ld.lld: error: cannot open /opt/rocm-7.0/lib/llvm/lib/clang/20/lib/x86_64-unknown-linux-gnu/libclang_rt.builtins.a: No such file or directory
ld.lld: error: unable to find library -lgcc_s
ld.lld: error: cannot open crtn.o: No such file or directory
clang++: error: linker command failed with exit code 1 (use -v to see invocation)
-
kind: "message-v1"
backtrace:
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:1250 (message)"
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:86 (CMAKE_DETERMINE_COMPILER_ID_VENDOR)"
- "/usr/share/cmake/Modules/CMakeDetermineHIPCompiler.cmake:136 (CMAKE_DETERMINE_COMPILER_ID)"
- "CMakeLists.txt:2 (project)"
message: |
Checking whether the HIP compiler is NVIDIA using "" did not match "nvcc: NVIDIA \\(R\\) Cuda compiler driver":
clang++: error: no input files
-
kind: "message-v1"
backtrace:
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:1250 (message)"
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:86 (CMAKE_DETERMINE_COMPILER_ID_VENDOR)"
- "/usr/share/cmake/Modules/CMakeDetermineHIPCompiler.cmake:136 (CMAKE_DETERMINE_COMPILER_ID)"
- "CMakeLists.txt:2 (project)"
message: |
Checking whether the HIP compiler is Clang using "" did not match "(clang version)":
clang++: error: no input files
-
kind: "message-v1"
backtrace:
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:1250 (message)"
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:86 (CMAKE_DETERMINE_COMPILER_ID_VENDOR)"
- "/usr/share/cmake/Modules/CMakeDetermineHIPCompiler.cmake:136 (CMAKE_DETERMINE_COMPILER_ID)"
- "CMakeLists.txt:2 (project)"
message: |
Checking whether the HIP compiler is NVIDIA using "" did not match "nvcc: NVIDIA \\(R\\) Cuda compiler driver":
clang++: error: no input files
-
kind: "message-v1"
backtrace:
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:1250 (message)"
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:86 (CMAKE_DETERMINE_COMPILER_ID_VENDOR)"
- "/usr/share/cmake/Modules/CMakeDetermineHIPCompiler.cmake:136 (CMAKE_DETERMINE_COMPILER_ID)"
- "CMakeLists.txt:2 (project)"
message: |
Checking whether the HIP compiler is Clang using "" did not match "(clang version)":
clang++: error: no input files
-
kind: "message-v1"
backtrace:
- "/usr/share/cmake/Modules/CMakeDetermineHIPCompiler.cmake:188 (message)"
- "CMakeLists.txt:2 (project)"
message: |
Parsed HIP implicit link information from compiler id output:
link line regex: [^( *|.*[/\\])(ld[0-9]*(\\.[a-z]+)?|CMAKE_LINK_STARTFILE-NOTFOUND|([^/\\]+-)?ld|collect2)[^/\\]*( |$)]
implicit libs: []
implicit objs: []
implicit dirs: []
implicit fwks: []
...

View File

@@ -0,0 +1 @@
# This file is generated by cmake for dependency checking of the CMakeCache.txt file

View File

@@ -0,0 +1,34 @@
#pragma once
#ifdef __HIP_PLATFORM_AMD__
#include <hip/hip_runtime.h>
#include <hip/hip_runtime_api.h>
#define cudaError_t hipError_t
#define cudaSuccess hipSuccess
#define cudaMalloc hipMalloc
#define cudaFree hipFree
#define cudaMemcpy hipMemcpy
#define cudaMemcpyHostToDevice hipMemcpyHostToDevice
#define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost
#define cudaDeviceSynchronize hipDeviceSynchronize
#define cudaGetErrorString hipGetErrorString
#define cudaGetLastError hipGetLastError
#define cudaMemGetInfo hipMemGetInfo
#define cudaDeviceReset hipDeviceReset
#define __global__ __global__
#define __device__ __device__
#define __host__ __host__
#define __shared__ __shared__
#define __syncthreads __syncthreads
#define __forceinline__ __forceinline__
#define __constant__ __constant__
#define __align__(x) __attribute__((aligned(x)))
#define blockIdx hipBlockIdx_x
#define threadIdx hipThreadIdx_x
#define blockDim hipBlockDim_x
#define gridDim hipGridDim_x
#define hipLaunchKernelGGL(F,GRID,BLOCK,SHMEM,STREAM,...) hipLaunchKernelGGL(F, dim3(GRID), dim3(BLOCK), SHMEM, STREAM, __VA_ARGS__)
#else
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#endif

View File

@@ -0,0 +1,283 @@
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <vector>
#include <stdexcept>
// Include shared device functions
#include "rinhash_device.cuh"
#include "argon2d_device.cuh"
#include "sha3-256.hip.cu"
#include "blake3_device.cuh"
// Modified kernel to use device functions and write output
extern "C" __global__ void rinhash_cuda_kernel(
const uint8_t* input,
size_t input_len,
uint8_t* output,
block* argon2_memory
) {
__shared__ uint8_t blake3_out[32];
__shared__ uint8_t argon2_out[32];
if (threadIdx.x == 0) {
// Step 1: BLAKE3 hash
light_hash_device(input, input_len, blake3_out);
// Step 2: Argon2d hash (t_cost=2, m_cost=64, lanes=1)
uint8_t salt[11] = { 'R','i','n','C','o','i','n','S','a','l','t' };
device_argon2d_hash(argon2_out, blake3_out, 32, 2, 64, 1, argon2_memory, salt, 11);
// Step 3: SHA3-256 hash
uint8_t sha3_out[32];
sha3_256_device(argon2_out, 32, sha3_out);
// Write result to output
for (int i = 0; i < 32; i++) {
output[i] = sha3_out[i];
}
}
__syncthreads();
}
// RinHash HIP implementation for a single header
extern "C" void rinhash_cuda(const uint8_t* input, size_t input_len, uint8_t* output) {
// Argon2 parameters
const uint32_t m_cost = 64; // blocks (64 KiB)
uint8_t *d_input = nullptr;
uint8_t *d_output = nullptr;
block *d_memory = nullptr;
cudaError_t err;
// Allocate device buffers
err = cudaMalloc(&d_input, input_len);
if (err != cudaSuccess) {
fprintf(stderr, "HIP error: Failed to allocate input memory: %s\n", cudaGetErrorString(err));
return;
}
err = cudaMalloc(&d_output, 32);
if (err != cudaSuccess) {
fprintf(stderr, "HIP error: Failed to allocate output memory: %s\n", cudaGetErrorString(err));
cudaFree(d_input);
return;
}
// Allocate Argon2 memory once per hash
err = cudaMalloc(&d_memory, m_cost * sizeof(block));
if (err != cudaSuccess) {
fprintf(stderr, "HIP error: Failed to allocate argon2 memory: %s\n", cudaGetErrorString(err));
cudaFree(d_input);
cudaFree(d_output);
return;
}
// Copy input header
err = cudaMemcpy(d_input, input, input_len, cudaMemcpyHostToDevice);
if (err != cudaSuccess) {
fprintf(stderr, "HIP error: Failed to copy input to device: %s\n", cudaGetErrorString(err));
cudaFree(d_memory);
cudaFree(d_input);
cudaFree(d_output);
return;
}
// Launch the kernel (single thread is fine for single hash)
rinhash_cuda_kernel<<<1, 1>>>(d_input, input_len, d_output, d_memory);
// Wait
err = cudaDeviceSynchronize();
if (err != cudaSuccess) {
fprintf(stderr, "HIP error during kernel execution: %s\n", cudaGetErrorString(err));
cudaFree(d_memory);
cudaFree(d_input);
cudaFree(d_output);
return;
}
// Copy result
err = cudaMemcpy(output, d_output, 32, cudaMemcpyDeviceToHost);
if (err != cudaSuccess) {
fprintf(stderr, "HIP error: Failed to copy output from device: %s\n", cudaGetErrorString(err));
}
// Free
cudaFree(d_memory);
cudaFree(d_input);
cudaFree(d_output);
}
// Helper function to convert a block header to bytes
extern "C" void blockheader_to_bytes(
const uint32_t* version,
const uint32_t* prev_block,
const uint32_t* merkle_root,
const uint32_t* timestamp,
const uint32_t* bits,
const uint32_t* nonce,
uint8_t* output,
size_t* output_len
) {
size_t offset = 0;
memcpy(output + offset, version, 4); offset += 4;
memcpy(output + offset, prev_block, 32); offset += 32;
memcpy(output + offset, merkle_root, 32); offset += 32;
memcpy(output + offset, timestamp, 4); offset += 4;
memcpy(output + offset, bits, 4); offset += 4;
memcpy(output + offset, nonce, 4); offset += 4;
*output_len = offset;
}
// Batch processing version for mining (sequential per header for correctness)
extern "C" void rinhash_cuda_batch(
const uint8_t* block_headers,
size_t block_header_len,
uint8_t* outputs,
uint32_t num_blocks
) {
// Argon2 parameters
const uint32_t m_cost = 64;
// Allocate reusable device buffers
uint8_t *d_input = nullptr;
uint8_t *d_output = nullptr;
block *d_memory = nullptr;
cudaError_t err;
err = cudaMalloc(&d_input, block_header_len);
if (err != cudaSuccess) {
fprintf(stderr, "HIP error: Failed to allocate header buffer: %s\n", cudaGetErrorString(err));
return;
}
err = cudaMalloc(&d_output, 32);
if (err != cudaSuccess) {
fprintf(stderr, "HIP error: Failed to allocate output buffer: %s\n", cudaGetErrorString(err));
cudaFree(d_input);
return;
}
err = cudaMalloc(&d_memory, m_cost * sizeof(block));
if (err != cudaSuccess) {
fprintf(stderr, "HIP error: Failed to allocate argon2 memory: %s\n", cudaGetErrorString(err));
cudaFree(d_input);
cudaFree(d_output);
return;
}
for (uint32_t i = 0; i < num_blocks; i++) {
const uint8_t* header = block_headers + i * block_header_len;
uint8_t* out = outputs + i * 32;
err = cudaMemcpy(d_input, header, block_header_len, cudaMemcpyHostToDevice);
if (err != cudaSuccess) {
fprintf(stderr, "HIP error: copy header %u failed: %s\n", i, cudaGetErrorString(err));
break;
}
rinhash_cuda_kernel<<<1, 1>>>(d_input, block_header_len, d_output, d_memory);
err = cudaDeviceSynchronize();
if (err != cudaSuccess) {
fprintf(stderr, "HIP error in kernel %u: %s\n", i, cudaGetErrorString(err));
break;
}
err = cudaMemcpy(out, d_output, 32, cudaMemcpyDeviceToHost);
if (err != cudaSuccess) {
fprintf(stderr, "HIP error: copy out %u failed: %s\n", i, cudaGetErrorString(err));
break;
}
}
cudaFree(d_memory);
cudaFree(d_output);
cudaFree(d_input);
}
// Main RinHash function that would be called from outside
extern "C" void RinHash(
const uint32_t* version,
const uint32_t* prev_block,
const uint32_t* merkle_root,
const uint32_t* timestamp,
const uint32_t* bits,
const uint32_t* nonce,
uint8_t* output
) {
uint8_t block_header[80];
size_t block_header_len;
blockheader_to_bytes(
version,
prev_block,
merkle_root,
timestamp,
bits,
nonce,
block_header,
&block_header_len
);
rinhash_cuda(block_header, block_header_len, output);
}
// Mining function that tries different nonces (host-side best selection)
extern "C" void RinHash_mine(
const uint32_t* version,
const uint32_t* prev_block,
const uint32_t* merkle_root,
const uint32_t* timestamp,
const uint32_t* bits,
uint32_t start_nonce,
uint32_t num_nonces,
uint32_t* found_nonce,
uint8_t* target_hash,
uint8_t* best_hash
) {
const size_t block_header_len = 80;
std::vector<uint8_t> block_headers(block_header_len * num_nonces);
std::vector<uint8_t> hashes(32 * num_nonces);
for (uint32_t i = 0; i < num_nonces; i++) {
uint32_t current_nonce = start_nonce + i;
uint8_t* header = block_headers.data() + i * block_header_len;
size_t header_len;
blockheader_to_bytes(
version,
prev_block,
merkle_root,
timestamp,
bits,
&current_nonce,
header,
&header_len
);
}
rinhash_cuda_batch(block_headers.data(), block_header_len, hashes.data(), num_nonces);
memcpy(best_hash, hashes.data(), 32);
*found_nonce = start_nonce;
for (uint32_t i = 1; i < num_nonces; i++) {
uint8_t* current_hash = hashes.data() + i * 32;
bool is_better = false;
for (int j = 0; j < 32; j++) {
if (current_hash[j] < best_hash[j]) { is_better = true; break; }
else if (current_hash[j] > best_hash[j]) { break; }
}
if (is_better) {
memcpy(best_hash, current_hash, 32);
*found_nonce = start_nonce + i;
}
}
}

View File

@@ -0,0 +1,8 @@
#ifndef RINHASH_DEVICE_CUH
#define RINHASH_DEVICE_CUH
#include <stdint.h>
#endif // RINHASH_DEVICE_CUH

View File

@@ -0,0 +1,140 @@
#include <stdint.h>
#include <stddef.h>
#define KECCAKF_ROUNDS 24
// 64bit 値のビット回転(左回転)
__device__ inline uint64_t rotate(uint64_t x, int n) {
return (x << n) | (x >> (64 - n));
}
// Keccakf[1600] 変換(内部状態 st[25] に対して 24 ラウンドの permutation を実行)
__device__ inline uint64_t ROTL64(uint64_t x, int n) {
return (x << n) | (x >> (64 - n));
}
__device__ void keccakf(uint64_t st[25]) {
const int R[24] = {
1, 3, 6, 10, 15, 21,
28, 36, 45, 55, 2, 14,
27, 41, 56, 8, 25, 43,
62, 18, 39, 61, 20, 44
};
const int P[24] = {
10, 7, 11, 17, 18, 3,
5, 16, 8, 21, 24, 4,
15, 23, 19, 13, 12, 2,
20, 14, 22, 9, 6, 1
};
const uint64_t RC[24] = {
0x0000000000000001ULL, 0x0000000000008082ULL,
0x800000000000808aULL, 0x8000000080008000ULL,
0x000000000000808bULL, 0x0000000080000001ULL,
0x8000000080008081ULL, 0x8000000000008009ULL,
0x000000000000008aULL, 0x0000000000000088ULL,
0x0000000080008009ULL, 0x000000008000000aULL,
0x000000008000808bULL, 0x800000000000008bULL,
0x8000000000008089ULL, 0x8000000000008003ULL,
0x8000000000008002ULL, 0x8000000000000080ULL,
0x000000000000800aULL, 0x800000008000000aULL,
0x8000000080008081ULL, 0x8000000000008080ULL,
0x0000000080000001ULL, 0x8000000080008008ULL
};
int i, j, round;
uint64_t t, bc[5];
for (round = 0; round < 24; round++) {
// Theta
for (i = 0; i < 5; i++)
bc[i] = st[i] ^ st[i + 5] ^ st[i + 10] ^ st[i + 15] ^ st[i + 20];
for (i = 0; i < 5; i++) {
t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1);
for (j = 0; j < 25; j += 5)
st[j + i] ^= t;
}
// Rho and Pi
t = st[1];
for (i = 0; i < 24; i++) {
j = P[i];
bc[0] = st[j];
st[j] = ROTL64(t, R[i]);
t = bc[0];
}
// Chi
for (j = 0; j < 25; j += 5) {
for (i = 0; i < 5; i++)
bc[i] = st[j + i];
for (i = 0; i < 5; i++)
st[j + i] ^= (~bc[(i + 1) % 5]) & bc[(i + 2) % 5];
}
// Iota
st[0] ^= RC[round];
}
}
// little-endian で 64bit 値を読み込む8 バイトの配列から)
__device__ inline uint64_t load64_le(const uint8_t *src) {
uint64_t x = 0;
#pragma unroll
for (int i = 0; i < 8; i++) {
x |= ((uint64_t)src[i]) << (8 * i);
}
return x;
}
// little-endian で 64bit 値を書き込む8 バイトの配列へ)
__device__ inline void store64_le(uint8_t *dst, uint64_t x) {
#pragma unroll
for (int i = 0; i < 8; i++) {
dst[i] = (uint8_t)(x >> (8 * i));
}
}
/*
__device__ 関数 sha3_256_device
・引数 input, inlen で与えられる入力データを吸収し、
SHA3-256 仕様によりパディングおよび Keccak-f[1600] 変換を実行します。
・最終的に内部状態の先頭 32 バイト4 ワード)を little-endian 形式で
hash_out に出力します。
・SHA3-256 ではレート(吸収部サイズ)が 136 バイトです。
*/
__device__ void sha3_256_device(const uint8_t *input, size_t inlen, uint8_t *hash_out) {
const size_t rate = 136; // SHA3-256 の吸収部サイズ(バイト単位)
uint64_t st[25] = {0}; // 内部状態25ワード1600ビット
for (int i = 0; i < 25; i++) st[i] = 0;
size_t offset = 0;
// 通常ブロックrateバイト処理今回inlen=32なのでスキップされるはず
while (inlen >= rate) {
// 吸収
for (int i = 0; i < (rate / 8); i++) {
st[i] ^= load64_le(input + i * 8);
}
// 最終 Keccak-f
keccakf(st);
input += rate;
inlen -= rate;
}
for (int i = 0; i < 4; i++) {
st[i] ^= load64_le(input + i * 8); // 4 * 8 = 32バイト
}
((uint8_t*)st)[32] ^= 0x06; // パディング32バイト目
((uint8_t*)st)[rate - 1] ^= 0x80; // パディング(最後のバイト)
keccakf(st); // 最終 Keccak-f
// スクイーズ出力32バイト
for (int i = 0; i < 4; i++) {
store64_le(hash_out + i * 8, st[i]);
}
}