rockm
This commit is contained in:
33
rin/miner/Dockerfile.hip-linux
Normal file
33
rin/miner/Dockerfile.hip-linux
Normal file
@@ -0,0 +1,33 @@
|
||||
# Dockerfile for building RinHash HIP implementation on Linux
|
||||
# Using your existing ROCm toolbox image
|
||||
FROM kyuz0/amd-strix-halo-toolboxes:rocm-7rc-rocwmma
|
||||
|
||||
# Install additional build tools if needed
|
||||
RUN apt-get update && apt-get install -y \
|
||||
cmake \
|
||||
ninja-build \
|
||||
git \
|
||||
&& rm -rf /var/lib/apt/lists/* 2>/dev/null || true
|
||||
|
||||
# Create build directory
|
||||
WORKDIR /build
|
||||
|
||||
# Copy source files
|
||||
COPY gpu/RinHash-hip/ /build/
|
||||
|
||||
# Create output directory
|
||||
RUN mkdir -p /output
|
||||
|
||||
# Build using CMake
|
||||
RUN mkdir -p build && \
|
||||
cd build && \
|
||||
cmake -G "Ninja" \
|
||||
-DHIP_PLATFORM=amd \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_INSTALL_PREFIX=/output \
|
||||
.. && \
|
||||
cmake --build . -j$(nproc) && \
|
||||
cmake --install . || cp rinhash-hip-miner /output/
|
||||
|
||||
# Default command to copy binaries to output
|
||||
CMD ["sh", "-c", "cp build/rinhash-hip-miner /output/ 2>/dev/null || cp /output/rinhash-hip-miner /output/ && echo 'HIP build completed successfully! Binary copied to /output/'"]
|
86
rin/miner/build-hip-linux-docker.sh
Normal file
86
rin/miner/build-hip-linux-docker.sh
Normal file
@@ -0,0 +1,86 @@
|
||||
#!/bin/bash
|
||||
# Build script for RinHash HIP implementation using Docker
|
||||
# This script builds the HIP miner in a containerized ROCm environment
|
||||
|
||||
echo "================================================"
|
||||
echo " RinHash HIP/ROCm Linux Docker Build Script"
|
||||
echo "================================================"
|
||||
|
||||
# Check if Docker is available
|
||||
if ! command -v docker &> /dev/null; then
|
||||
echo "ERROR: Docker not found in PATH"
|
||||
echo "Please install Docker first"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Docker found:"
|
||||
docker --version
|
||||
echo ""
|
||||
|
||||
# Build directory setup
|
||||
BUILD_DIR="$(dirname "$0")"
|
||||
cd "$BUILD_DIR" || exit 1
|
||||
|
||||
echo "Building Docker image for HIP/ROCm compilation..."
|
||||
echo ""
|
||||
|
||||
# Build the Docker image
|
||||
sudo docker build -f Dockerfile.hip-linux -t rinhash-hip-builder .
|
||||
|
||||
if [ $? -ne 0 ]; then
|
||||
echo ""
|
||||
echo "==============================================="
|
||||
echo " DOCKER BUILD FAILED!"
|
||||
echo "==============================================="
|
||||
echo ""
|
||||
echo "Common issues:"
|
||||
echo "1. Docker not properly installed"
|
||||
echo "2. Insufficient permissions"
|
||||
echo "3. Network connectivity issues"
|
||||
echo "4. ROCm base image not available"
|
||||
echo ""
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "Docker image built successfully!"
|
||||
echo ""
|
||||
|
||||
# Create output directory
|
||||
mkdir -p hip-output
|
||||
|
||||
echo "Running container to build HIP binaries..."
|
||||
echo ""
|
||||
|
||||
# Run the container and extract binaries
|
||||
sudo docker run --rm \
|
||||
-v "$(pwd)/hip-output:/output" \
|
||||
rinhash-hip-builder
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
echo ""
|
||||
echo "==============================================="
|
||||
echo " BUILD SUCCESSFUL!"
|
||||
echo "==============================================="
|
||||
echo ""
|
||||
echo "Binaries created in hip-output/:"
|
||||
ls -la hip-output/
|
||||
echo ""
|
||||
echo "To test the miner (requires AMD GPU with ROCm):"
|
||||
echo " ./hip-output/rinhash-hip-miner --help"
|
||||
echo ""
|
||||
echo "Note: To run on AMD GPU, you'll need ROCm runtime installed:"
|
||||
echo " sudo apt install rocm-dev hip-runtime-amd"
|
||||
echo ""
|
||||
else
|
||||
echo ""
|
||||
echo "==============================================="
|
||||
echo " BUILD FAILED!"
|
||||
echo "==============================================="
|
||||
echo ""
|
||||
echo "Check the error messages above for details."
|
||||
echo ""
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "HIP build completed successfully!"
|
92
rin/miner/gpu/RinHash-hip/build-hip-linux.sh
Normal file
92
rin/miner/gpu/RinHash-hip/build-hip-linux.sh
Normal file
@@ -0,0 +1,92 @@
|
||||
#!/bin/bash
|
||||
# RinHash HIP Build Script for Linux
|
||||
# This script builds the HIP implementation of RinHash for AMD GPUs
|
||||
|
||||
echo "======================================"
|
||||
echo " RinHash HIP Miner Build Script"
|
||||
echo "======================================"
|
||||
|
||||
# Check if hipcc is available
|
||||
if ! command -v hipcc &> /dev/null; then
|
||||
echo "ERROR: hipcc not found in PATH"
|
||||
echo "Please install ROCm/HIP toolkit"
|
||||
echo "On Ubuntu/Debian: sudo apt install rocm-dev hip-runtime-amd"
|
||||
echo "Or download from: https://rocm.docs.amd.com/en/latest/deploy/linux/quick_start.html"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "HIP compiler found:"
|
||||
hipcc --version
|
||||
echo ""
|
||||
|
||||
# Check if cmake is available
|
||||
if ! command -v cmake &> /dev/null; then
|
||||
echo "ERROR: CMake not found in PATH"
|
||||
echo "Please install cmake: sudo apt install cmake"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "CMake found:"
|
||||
cmake --version | head -1
|
||||
echo ""
|
||||
|
||||
echo "Building RinHash HIP miner..."
|
||||
echo ""
|
||||
|
||||
# Create build directory
|
||||
mkdir -p build
|
||||
cd build
|
||||
|
||||
# Configure with CMake
|
||||
cmake -G "Ninja" \
|
||||
-DHIP_PLATFORM=amd \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
..
|
||||
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "CMake configuration failed!"
|
||||
echo "Trying without Ninja..."
|
||||
cmake -DHIP_PLATFORM=amd \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
..
|
||||
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "CMake configuration failed completely!"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Build
|
||||
cmake --build . -j$(nproc)
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
echo ""
|
||||
echo "======================================"
|
||||
echo " BUILD SUCCESSFUL!"
|
||||
echo "======================================"
|
||||
echo ""
|
||||
echo "Executable created:"
|
||||
echo " build/rinhash-hip-miner"
|
||||
echo ""
|
||||
echo "To test the miner:"
|
||||
echo " cd build && ./rinhash-hip-miner --help"
|
||||
echo ""
|
||||
echo "To check AMD GPU availability:"
|
||||
echo " rocm-smi"
|
||||
echo ""
|
||||
else
|
||||
echo ""
|
||||
echo "======================================"
|
||||
echo " BUILD FAILED!"
|
||||
echo "======================================"
|
||||
echo ""
|
||||
echo "Common issues:"
|
||||
echo "1. Missing ROCm development libraries"
|
||||
echo "2. Incompatible HIP version"
|
||||
echo "3. Missing development tools"
|
||||
echo ""
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Build completed successfully!"
|
||||
|
21
rin/miner/hip-output/CMakeLists.txt
Normal file
21
rin/miner/hip-output/CMakeLists.txt
Normal file
@@ -0,0 +1,21 @@
|
||||
cmake_minimum_required(VERSION 3.21)
|
||||
project(RinHashHIP LANGUAGES CXX HIP)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_HIP_STANDARD 17)
|
||||
|
||||
# Enable HIP
|
||||
find_package(HIP REQUIRED)
|
||||
|
||||
set(SOURCES
|
||||
rinhash.hip.cu
|
||||
sha3-256.hip.cu
|
||||
)
|
||||
|
||||
add_executable(rinhash-hip-miner ${SOURCES})
|
||||
|
||||
target_include_directories(rinhash-hip-miner PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
target_compile_definitions(rinhash-hip-miner PRIVATE __HIP_PLATFORM_AMD__)
|
||||
|
||||
target_link_libraries(rinhash-hip-miner PRIVATE HIP::device)
|
929
rin/miner/hip-output/argon2d_device.cuh
Normal file
929
rin/miner/hip-output/argon2d_device.cuh
Normal file
@@ -0,0 +1,929 @@
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
//=== Argon2 定数 ===//
|
||||
#define ARGON2_BLOCK_SIZE 1024
|
||||
#define ARGON2_QWORDS_IN_BLOCK (ARGON2_BLOCK_SIZE / 8)
|
||||
#define ARGON2_OWORDS_IN_BLOCK (ARGON2_BLOCK_SIZE / 16)
|
||||
#define ARGON2_HWORDS_IN_BLOCK (ARGON2_BLOCK_SIZE / 32)
|
||||
#define ARGON2_SYNC_POINTS 4
|
||||
#define ARGON2_PREHASH_DIGEST_LENGTH 64
|
||||
#define ARGON2_PREHASH_SEED_LENGTH 72
|
||||
#define ARGON2_VERSION_10 0x10
|
||||
#define ARGON2_VERSION_13 0x13
|
||||
#define ARGON2_ADDRESSES_IN_BLOCK 128
|
||||
|
||||
//=== Blake2b 定数 ===//
|
||||
#define BLAKE2B_BLOCKBYTES 128
|
||||
#define BLAKE2B_OUTBYTES 64
|
||||
#define BLAKE2B_KEYBYTES 64
|
||||
#define BLAKE2B_SALTBYTES 16
|
||||
#define BLAKE2B_PERSONALBYTES 16
|
||||
#define BLAKE2B_ROUNDS 12
|
||||
|
||||
//=== 構造体定義 ===//
|
||||
typedef struct __align__(64) block_ {
|
||||
uint64_t v[ARGON2_QWORDS_IN_BLOCK];
|
||||
} block;
|
||||
|
||||
typedef struct Argon2_instance_t {
|
||||
block *memory; /* Memory pointer */
|
||||
uint32_t version;
|
||||
uint32_t passes; /* Number of passes */
|
||||
uint32_t memory_blocks; /* Number of blocks in memory */
|
||||
uint32_t segment_length;
|
||||
uint32_t lane_length;
|
||||
uint32_t lanes;
|
||||
uint32_t threads;
|
||||
int print_internals; /* whether to print the memory blocks */
|
||||
} argon2_instance_t;
|
||||
|
||||
/*
|
||||
* Argon2 position: where we construct the block right now. Used to distribute
|
||||
* work between threads.
|
||||
*/
|
||||
typedef struct Argon2_position_t {
|
||||
uint32_t pass;
|
||||
uint32_t lane;
|
||||
uint8_t slice;
|
||||
uint32_t index;
|
||||
} argon2_position_t;
|
||||
|
||||
typedef struct __blake2b_state {
|
||||
uint64_t h[8];
|
||||
uint64_t t[2];
|
||||
uint64_t f[2];
|
||||
uint8_t buf[BLAKE2B_BLOCKBYTES];
|
||||
unsigned buflen;
|
||||
unsigned outlen;
|
||||
uint8_t last_node;
|
||||
} blake2b_state;
|
||||
|
||||
typedef struct __blake2b_param {
|
||||
uint8_t digest_length; /* 1 */
|
||||
uint8_t key_length; /* 2 */
|
||||
uint8_t fanout; /* 3 */
|
||||
uint8_t depth; /* 4 */
|
||||
uint32_t leaf_length; /* 8 */
|
||||
uint64_t node_offset; /* 16 */
|
||||
uint8_t node_depth; /* 17 */
|
||||
uint8_t inner_length; /* 18 */
|
||||
uint8_t reserved[14]; /* 32 */
|
||||
uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */
|
||||
uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */
|
||||
} blake2b_param;
|
||||
|
||||
//=== 定数メモリ ===//
|
||||
__constant__ uint64_t blake2b_IV[8] = {
|
||||
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
|
||||
0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
|
||||
0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
|
||||
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
|
||||
};
|
||||
|
||||
__constant__ uint8_t blake2b_sigma[12][16] = {
|
||||
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
|
||||
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
|
||||
{11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},
|
||||
{7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8},
|
||||
{9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13},
|
||||
{2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9},
|
||||
{12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11},
|
||||
{13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10},
|
||||
{6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5},
|
||||
{10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0},
|
||||
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
|
||||
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}
|
||||
};
|
||||
|
||||
//=== 共通ヘルパー関数 ===//
|
||||
__device__ __forceinline__ uint64_t rotr64(uint64_t x, uint32_t n) {
|
||||
return (x >> n) | (x << (64 - n));
|
||||
}
|
||||
|
||||
// fBlaMka関数をCリファレンス実装と完全に一致させる
|
||||
__device__ __forceinline__ uint64_t fBlaMka(uint64_t x, uint64_t y) {
|
||||
const uint64_t m = 0xFFFFFFFFULL;
|
||||
uint64_t xy = (x & m) * (y & m);
|
||||
return x + y + 2 * xy;
|
||||
}
|
||||
|
||||
// Blake2b G関数 - リファレンス実装と完全に一致させる
|
||||
__device__ __forceinline__ void blake2b_G(uint64_t& a, uint64_t& b, uint64_t& c, uint64_t& d, uint64_t m1, uint64_t m2) {
|
||||
a = a + b + m1;
|
||||
d = rotr64(d ^ a, 32);
|
||||
c = c + d;
|
||||
b = rotr64(b ^ c, 24);
|
||||
a = a + b + m2;
|
||||
d = rotr64(d ^ a, 16);
|
||||
c = c + d;
|
||||
b = rotr64(b ^ c, 63);
|
||||
}
|
||||
|
||||
// リトルエンディアンでの32ビット値の格納
|
||||
__device__ __forceinline__ void store32(void *dst, uint32_t w) {
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
memcpy(dst, &w, sizeof w);
|
||||
#else
|
||||
uint8_t *p = (uint8_t *)dst;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
#endif
|
||||
}
|
||||
__device__ __forceinline__ void blake2b_increment_counter(blake2b_state *S,
|
||||
uint64_t inc) {
|
||||
S->t[0] += inc;
|
||||
S->t[1] += (S->t[0] < inc);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ void blake2b_set_lastnode(blake2b_state *S) {
|
||||
S->f[1] = (uint64_t)-1;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ void blake2b_set_lastblock(blake2b_state *S) {
|
||||
if (S->last_node) {
|
||||
blake2b_set_lastnode(S);
|
||||
}
|
||||
S->f[0] = (uint64_t)-1;
|
||||
}
|
||||
|
||||
// Add structure-specific memset function
|
||||
__device__ void blake2b_state_memset(blake2b_state* S) {
|
||||
for (int i = 0; i < sizeof(blake2b_state); i++) {
|
||||
((uint8_t*)S)[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Add missing xor_block function
|
||||
__device__ void xor_block(block* dst, const block* src) {
|
||||
for (int i = 0; i < ARGON2_QWORDS_IN_BLOCK; i++) {
|
||||
dst->v[i] ^= src->v[i];
|
||||
}
|
||||
}
|
||||
|
||||
// custom memcpy, apparently cuda's memcpy is slow
|
||||
// when called within a kernel
|
||||
__device__ void c_memcpy(void *dest, const void *src, size_t n) {
|
||||
uint8_t *d = (uint8_t*)dest;
|
||||
const uint8_t *s = (const uint8_t*)src;
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
d[i] = s[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Add missing copy_block function
|
||||
__device__ void copy_block(block* dst, const block* src) {
|
||||
c_memcpy(dst->v, src->v, sizeof(uint64_t) * ARGON2_QWORDS_IN_BLOCK);
|
||||
}
|
||||
|
||||
// fill_blockをCリファレンス実装と完全に一致させる
|
||||
__device__ void fill_block(const block* prev_block, const block* ref_block, block* next_block, int with_xor) {
|
||||
block blockR = {};
|
||||
block block_tmp = {};
|
||||
unsigned i;
|
||||
|
||||
copy_block(&blockR, ref_block);
|
||||
xor_block(&blockR, prev_block);
|
||||
copy_block(&block_tmp, &blockR);
|
||||
|
||||
if (with_xor) {
|
||||
xor_block(&block_tmp, next_block);
|
||||
}
|
||||
|
||||
// G function without macro
|
||||
auto g = [](uint64_t& a, uint64_t& b, uint64_t& c, uint64_t& d) {
|
||||
a = fBlaMka(a, b);
|
||||
d = rotr64(d ^ a, 32);
|
||||
c = fBlaMka(c, d);
|
||||
b = rotr64(b ^ c, 24);
|
||||
a = fBlaMka(a, b);
|
||||
d = rotr64(d ^ a, 16);
|
||||
c = fBlaMka(c, d);
|
||||
b = rotr64(b ^ c, 63);
|
||||
};
|
||||
|
||||
// BLAKE2_ROUND_NOMSG function without macro
|
||||
auto blake2_round = [&g](uint64_t& v0, uint64_t& v1, uint64_t& v2, uint64_t& v3,
|
||||
uint64_t& v4, uint64_t& v5, uint64_t& v6, uint64_t& v7,
|
||||
uint64_t& v8, uint64_t& v9, uint64_t& v10, uint64_t& v11,
|
||||
uint64_t& v12, uint64_t& v13, uint64_t& v14, uint64_t& v15) {
|
||||
do {
|
||||
g(v0, v4, v8, v12);
|
||||
g(v1, v5, v9, v13);
|
||||
g(v2, v6, v10, v14);
|
||||
g(v3, v7, v11, v15);
|
||||
g(v0, v5, v10, v15);
|
||||
g(v1, v6, v11, v12);
|
||||
g(v2, v7, v8, v13);
|
||||
g(v3, v4, v9, v14);
|
||||
} while ((void)0, 0);
|
||||
};
|
||||
|
||||
// Apply Blake2 on columns
|
||||
for (i = 0; i < 8; ++i) {
|
||||
blake2_round(
|
||||
blockR.v[16 * i], blockR.v[16 * i + 1], blockR.v[16 * i + 2],
|
||||
blockR.v[16 * i + 3], blockR.v[16 * i + 4], blockR.v[16 * i + 5],
|
||||
blockR.v[16 * i + 6], blockR.v[16 * i + 7], blockR.v[16 * i + 8],
|
||||
blockR.v[16 * i + 9], blockR.v[16 * i + 10], blockR.v[16 * i + 11],
|
||||
blockR.v[16 * i + 12], blockR.v[16 * i + 13], blockR.v[16 * i + 14],
|
||||
blockR.v[16 * i + 15]
|
||||
);
|
||||
}
|
||||
|
||||
// Apply Blake2 on rows
|
||||
for (i = 0; i < 8; i++) {
|
||||
blake2_round(
|
||||
blockR.v[2 * i], blockR.v[2 * i + 1], blockR.v[2 * i + 16],
|
||||
blockR.v[2 * i + 17], blockR.v[2 * i + 32], blockR.v[2 * i + 33],
|
||||
blockR.v[2 * i + 48], blockR.v[2 * i + 49], blockR.v[2 * i + 64],
|
||||
blockR.v[2 * i + 65], blockR.v[2 * i + 80], blockR.v[2 * i + 81],
|
||||
blockR.v[2 * i + 96], blockR.v[2 * i + 97], blockR.v[2 * i + 112],
|
||||
blockR.v[2 * i + 113]
|
||||
);
|
||||
}
|
||||
|
||||
copy_block(next_block, &block_tmp);
|
||||
xor_block(next_block, &blockR);
|
||||
}
|
||||
|
||||
template<typename T, typename ptr_t>
|
||||
__device__ void c_memset(ptr_t dest, T val, int count) {
|
||||
for(int i=0; i<count; i++)
|
||||
dest[i] = val;
|
||||
}
|
||||
|
||||
__device__ void init_block_value(block *b, uint8_t in) { c_memset(b->v, in, sizeof(b->v)); }
|
||||
|
||||
__device__ void next_addresses(block *address_block, block *input_block,
|
||||
const block *zero_block) {
|
||||
input_block->v[6]++;
|
||||
fill_block(zero_block, input_block, address_block, 0);
|
||||
fill_block(zero_block, address_block, address_block, 0);
|
||||
}
|
||||
|
||||
__device__ void G1(uint64_t& a, uint64_t& b, uint64_t& c, uint64_t& d, uint64_t x, uint64_t y) {
|
||||
a = a + b + x;
|
||||
d = rotr64(d ^ a, 32);
|
||||
c = c + d;
|
||||
b = rotr64(b ^ c, 24);
|
||||
a = a + b + y;
|
||||
d = rotr64(d ^ a, 16);
|
||||
c = c + d;
|
||||
b = rotr64(b ^ c, 63);
|
||||
}
|
||||
|
||||
// Blake2b compression function F
|
||||
__device__ void blake2b_compress(blake2b_state* S, const uint8_t block[BLAKE2B_BLOCKBYTES]) {
|
||||
uint64_t m[16];
|
||||
uint64_t v[16];
|
||||
|
||||
// Load message block into m[16]
|
||||
for (int i = 0; i < 16; i++) {
|
||||
const uint8_t* p = block + i * 8;
|
||||
m[i] = ((uint64_t)p[0])
|
||||
| ((uint64_t)p[1] << 8)
|
||||
| ((uint64_t)p[2] << 16)
|
||||
| ((uint64_t)p[3] << 24)
|
||||
| ((uint64_t)p[4] << 32)
|
||||
| ((uint64_t)p[5] << 40)
|
||||
| ((uint64_t)p[6] << 48)
|
||||
| ((uint64_t)p[7] << 56);
|
||||
}
|
||||
|
||||
// Initialize v[0..15]
|
||||
for (int i = 0; i < 8; i++) {
|
||||
v[i] = S->h[i];
|
||||
v[i + 8] = blake2b_IV[i];
|
||||
}
|
||||
|
||||
v[12] ^= S->t[0];
|
||||
v[13] ^= S->t[1];
|
||||
v[14] ^= S->f[0];
|
||||
v[15] ^= S->f[1];
|
||||
|
||||
for (int r = 0; r < BLAKE2B_ROUNDS; r++) {
|
||||
const uint8_t* s = blake2b_sigma[r];
|
||||
|
||||
// Column step
|
||||
G1(v[0], v[4], v[8], v[12], m[s[0]], m[s[1]]);
|
||||
G1(v[1], v[5], v[9], v[13], m[s[2]], m[s[3]]);
|
||||
G1(v[2], v[6], v[10], v[14], m[s[4]], m[s[5]]);
|
||||
G1(v[3], v[7], v[11], v[15], m[s[6]], m[s[7]]);
|
||||
|
||||
// Diagonal step
|
||||
G1(v[0], v[5], v[10], v[15], m[s[8]], m[s[9]]);
|
||||
G1(v[1], v[6], v[11], v[12], m[s[10]], m[s[11]]);
|
||||
G1(v[2], v[7], v[8], v[13], m[s[12]], m[s[13]]);
|
||||
G1(v[3], v[4], v[9], v[14], m[s[14]], m[s[15]]);
|
||||
}
|
||||
|
||||
// Finalization
|
||||
for (int i = 0; i < 8; i++) {
|
||||
S->h[i] ^= v[i] ^ v[i + 8];
|
||||
}
|
||||
}
|
||||
|
||||
// Helper functions to load/store 64-bit values in little-endian order
|
||||
__device__ __forceinline__ uint64_t load64(const void* src) {
|
||||
const uint8_t* p = (const uint8_t*)src;
|
||||
return ((uint64_t)(p[0]))
|
||||
| ((uint64_t)(p[1]) << 8)
|
||||
| ((uint64_t)(p[2]) << 16)
|
||||
| ((uint64_t)(p[3]) << 24)
|
||||
| ((uint64_t)(p[4]) << 32)
|
||||
| ((uint64_t)(p[5]) << 40)
|
||||
| ((uint64_t)(p[6]) << 48)
|
||||
| ((uint64_t)(p[7]) << 56);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ void store64(void* dst, uint64_t w) {
|
||||
uint8_t* p = (uint8_t*)dst;
|
||||
p[0] = (uint8_t)(w);
|
||||
p[1] = (uint8_t)(w >> 8);
|
||||
p[2] = (uint8_t)(w >> 16);
|
||||
p[3] = (uint8_t)(w >> 24);
|
||||
p[4] = (uint8_t)(w >> 32);
|
||||
p[5] = (uint8_t)(w >> 40);
|
||||
p[6] = (uint8_t)(w >> 48);
|
||||
p[7] = (uint8_t)(w >> 56);
|
||||
}
|
||||
|
||||
__device__ void load_block(block *dst, const void *input) {
|
||||
unsigned i;
|
||||
for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
|
||||
dst->v[i] = load64((const uint8_t *)input + i * sizeof(dst->v[i]));
|
||||
}
|
||||
}
|
||||
|
||||
__device__ void store_block(void *output, const block *src) {
|
||||
unsigned i;
|
||||
for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
|
||||
store64((uint8_t *)output + i * sizeof(src->v[i]), src->v[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Blake2b init function to match reference implementation exactly
|
||||
__device__ int blake2b_init(blake2b_state* S, size_t outlen) {
|
||||
blake2b_param P;
|
||||
// Clear state using our custom function
|
||||
blake2b_state_memset(S);
|
||||
|
||||
// Set parameters according to Blake2b spec
|
||||
P.digest_length = (uint8_t)outlen;
|
||||
P.key_length = 0;
|
||||
P.fanout = 1;
|
||||
P.depth = 1;
|
||||
P.leaf_length = 0;
|
||||
P.node_offset = 0;
|
||||
P.node_depth = 0;
|
||||
P.inner_length = 0;
|
||||
c_memset(P.reserved, 0, sizeof(P.reserved));
|
||||
c_memset(P.salt, 0, sizeof(P.salt));
|
||||
c_memset(P.personal, 0, sizeof(P.personal));
|
||||
|
||||
// Initialize state vector with IV
|
||||
for (int i = 0; i < 8; i++) {
|
||||
S->h[i] = blake2b_IV[i];
|
||||
}
|
||||
|
||||
const unsigned char *p = (const unsigned char *)(&P);
|
||||
/* IV XOR Parameter Block */
|
||||
for (int i = 0; i < 8; ++i) {
|
||||
S->h[i] ^= load64(&p[i * sizeof(S->h[i])]);
|
||||
}
|
||||
S->outlen = P.digest_length;
|
||||
return 0; // Success
|
||||
}
|
||||
|
||||
__device__ int FLAG_clear_internal_memory = 0;
|
||||
__device__ void clear_internal_memory(void *v, size_t n) {
|
||||
if (FLAG_clear_internal_memory && v) {
|
||||
// secure_wipe_memory(v, n);
|
||||
}
|
||||
}
|
||||
|
||||
// Blake2b update function to match reference implementation
|
||||
__device__ int blake2b_update(blake2b_state* S, const uint8_t* in, size_t inlen) {
|
||||
const uint8_t *pin = (const uint8_t *)in;
|
||||
|
||||
if (inlen == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Sanity check */
|
||||
if (S == NULL || in == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Is this a reused state? */
|
||||
if (S->f[0] != 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (S->buflen + inlen > BLAKE2B_BLOCKBYTES) {
|
||||
/* Complete current block */
|
||||
size_t left = S->buflen;
|
||||
size_t fill = BLAKE2B_BLOCKBYTES - left;
|
||||
c_memcpy(&S->buf[left], pin, fill);
|
||||
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
|
||||
blake2b_compress(S, S->buf);
|
||||
S->buflen = 0;
|
||||
inlen -= fill;
|
||||
pin += fill;
|
||||
/* Avoid buffer copies when possible */
|
||||
while (inlen > BLAKE2B_BLOCKBYTES) {
|
||||
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
|
||||
blake2b_compress(S, pin);
|
||||
inlen -= BLAKE2B_BLOCKBYTES;
|
||||
pin += BLAKE2B_BLOCKBYTES;
|
||||
}
|
||||
}
|
||||
c_memcpy(&S->buf[S->buflen], pin, inlen);
|
||||
S->buflen += (unsigned int)inlen;
|
||||
return 0; // Success
|
||||
}
|
||||
|
||||
// Blake2b final function to match reference implementation
|
||||
__device__ int blake2b_final(blake2b_state* S, uint8_t* out, size_t outlen) {
|
||||
if (!S || !out)
|
||||
return -1;
|
||||
|
||||
uint8_t buffer[BLAKE2B_OUTBYTES] = {0};
|
||||
unsigned int i;
|
||||
blake2b_increment_counter(S, S->buflen);
|
||||
blake2b_set_lastblock(S);
|
||||
c_memset(&S->buf[S->buflen], 0, BLAKE2B_BLOCKBYTES - S->buflen); /* Padding */
|
||||
blake2b_compress(S, S->buf);
|
||||
|
||||
for (i = 0; i < 8; ++i) { /* Output full hash to temp buffer */
|
||||
store64(buffer + sizeof(S->h[i]) * i, S->h[i]);
|
||||
}
|
||||
|
||||
c_memcpy(out, buffer, S->outlen);
|
||||
return 0;
|
||||
}
|
||||
|
||||
__device__ int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key,
|
||||
size_t keylen) {
|
||||
blake2b_param P;
|
||||
|
||||
if (S == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Setup Parameter Block for keyed BLAKE2 */
|
||||
P.digest_length = (uint8_t)outlen;
|
||||
P.key_length = (uint8_t)keylen;
|
||||
P.fanout = 1;
|
||||
P.depth = 1;
|
||||
P.leaf_length = 0;
|
||||
P.node_offset = 0;
|
||||
P.node_depth = 0;
|
||||
P.inner_length = 0;
|
||||
c_memset(P.reserved, 0, sizeof(P.reserved));
|
||||
c_memset(P.salt, 0, sizeof(P.salt));
|
||||
c_memset(P.personal, 0, sizeof(P.personal));
|
||||
|
||||
// Initialize state vector with IV
|
||||
for (int i = 0; i < 8; i++) {
|
||||
S->h[i] = blake2b_IV[i];
|
||||
}
|
||||
|
||||
// XOR first element with param
|
||||
const unsigned char *p = (const unsigned char *)(&P);
|
||||
/* IV XOR Parameter Block */
|
||||
for (int i = 0; i < 8; ++i) {
|
||||
S->h[i] ^= load64(&p[i * sizeof(S->h[i])]);
|
||||
}
|
||||
S->outlen = P.digest_length;
|
||||
|
||||
uint8_t block[BLAKE2B_BLOCKBYTES];
|
||||
c_memset(block, 0, BLAKE2B_BLOCKBYTES);
|
||||
c_memcpy(block, key, keylen);
|
||||
blake2b_update(S, block, BLAKE2B_BLOCKBYTES);
|
||||
/* Burn the key from stack */
|
||||
clear_internal_memory(block, BLAKE2B_BLOCKBYTES);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Blake2b all-in-one function
|
||||
__device__ int blake2b(void *out, size_t outlen, const void *in, size_t inlen,
|
||||
const void *key, size_t keylen) {
|
||||
blake2b_state S;
|
||||
int ret = -1;
|
||||
|
||||
/* Verify parameters */
|
||||
if (NULL == in && inlen > 0) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (NULL == out || outlen == 0 || outlen > BLAKE2B_OUTBYTES) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if ((NULL == key && keylen > 0) || keylen > BLAKE2B_KEYBYTES) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (keylen > 0) {
|
||||
if (blake2b_init_key(&S, outlen, key, keylen) < 0) {
|
||||
goto fail;
|
||||
}
|
||||
} else {
|
||||
if (blake2b_init(&S, outlen) < 0) {
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
|
||||
if (blake2b_update(&S, (const uint8_t*)in, inlen) < 0) {
|
||||
goto fail;
|
||||
}
|
||||
ret = blake2b_final(&S, (uint8_t*)out, outlen);
|
||||
|
||||
fail:
|
||||
clear_internal_memory(&S, sizeof(S));
|
||||
return ret;
|
||||
}
|
||||
|
||||
// index_alpha関数を完全にCリファレンス実装と一致させる(関数のシグネチャも含め)
|
||||
__device__ uint32_t index_alpha(const argon2_instance_t *instance,
|
||||
const argon2_position_t *position, uint32_t pseudo_rand,
|
||||
int same_lane) {
|
||||
uint32_t reference_area_size;
|
||||
uint64_t relative_position;
|
||||
uint32_t start_position, absolute_position;
|
||||
|
||||
if (0 == position->pass) {
|
||||
/* First pass */
|
||||
if (0 == position->slice) {
|
||||
/* First slice */
|
||||
reference_area_size =
|
||||
position->index - 1; /* all but the previous */
|
||||
} else {
|
||||
if (same_lane) {
|
||||
/* The same lane => add current segment */
|
||||
reference_area_size =
|
||||
position->slice * instance->segment_length +
|
||||
position->index - 1;
|
||||
} else {
|
||||
reference_area_size =
|
||||
position->slice * instance->segment_length +
|
||||
((position->index == 0) ? (-1) : 0);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* Second pass */
|
||||
if (same_lane) {
|
||||
reference_area_size = instance->lane_length -
|
||||
instance->segment_length + position->index -
|
||||
1;
|
||||
} else {
|
||||
reference_area_size = instance->lane_length -
|
||||
instance->segment_length +
|
||||
((position->index == 0) ? (-1) : 0);
|
||||
}
|
||||
}
|
||||
|
||||
/* 1.2.4. Mapping pseudo_rand to 0..<reference_area_size-1> and produce
|
||||
* relative position */
|
||||
relative_position = pseudo_rand;
|
||||
relative_position = relative_position * relative_position >> 32;
|
||||
relative_position = reference_area_size - 1 -
|
||||
(reference_area_size * relative_position >> 32);
|
||||
|
||||
/* 1.2.5 Computing starting position */
|
||||
start_position = 0;
|
||||
|
||||
if (0 != position->pass) {
|
||||
start_position = (position->slice == ARGON2_SYNC_POINTS - 1)
|
||||
? 0
|
||||
: (position->slice + 1) * instance->segment_length;
|
||||
}
|
||||
|
||||
/* 1.2.6. Computing absolute position */
|
||||
absolute_position = (start_position + relative_position) %
|
||||
instance->lane_length; /* absolute position */
|
||||
return absolute_position;
|
||||
}
|
||||
|
||||
// fill_segment関数を追加(Cリファレンス実装と完全に一致)
|
||||
__device__ void fill_segment(const argon2_instance_t *instance,
|
||||
argon2_position_t position) {
|
||||
block *ref_block = NULL, *curr_block = NULL;
|
||||
block address_block, input_block, zero_block;
|
||||
uint64_t pseudo_rand, ref_index, ref_lane;
|
||||
uint32_t prev_offset, curr_offset;
|
||||
uint32_t starting_index;
|
||||
uint32_t i;
|
||||
int data_independent_addressing;
|
||||
|
||||
|
||||
data_independent_addressing = false;
|
||||
|
||||
if (data_independent_addressing) {
|
||||
init_block_value(&zero_block, 0);
|
||||
init_block_value(&input_block, 0);
|
||||
|
||||
input_block.v[0] = position.pass;
|
||||
input_block.v[1] = position.lane;
|
||||
input_block.v[2] = position.slice;
|
||||
input_block.v[3] = instance->memory_blocks;
|
||||
input_block.v[4] = instance->passes;
|
||||
input_block.v[5] = 0;
|
||||
}
|
||||
|
||||
starting_index = 0;
|
||||
|
||||
if ((0 == position.pass) && (0 == position.slice)) {
|
||||
starting_index = 2; /* we have already generated the first two blocks */
|
||||
|
||||
/* Don't forget to generate the first block of addresses: */
|
||||
if (data_independent_addressing) {
|
||||
next_addresses(&address_block, &input_block, &zero_block);
|
||||
}
|
||||
}
|
||||
|
||||
/* Offset of the current block */
|
||||
curr_offset = position.lane * instance->lane_length +
|
||||
position.slice * instance->segment_length + starting_index;
|
||||
|
||||
if (0 == curr_offset % instance->lane_length) {
|
||||
/* Last block in this lane */
|
||||
prev_offset = curr_offset + instance->lane_length - 1;
|
||||
} else {
|
||||
/* Previous block */
|
||||
prev_offset = curr_offset - 1;
|
||||
}
|
||||
|
||||
for (i = starting_index; i < instance->segment_length;
|
||||
++i, ++curr_offset, ++prev_offset) {
|
||||
/*1.1 Rotating prev_offset if needed */
|
||||
if (curr_offset % instance->lane_length == 1) {
|
||||
prev_offset = curr_offset - 1;
|
||||
}
|
||||
|
||||
/* 1.2 Computing the index of the reference block */
|
||||
/* 1.2.1 Taking pseudo-random value from the previous block */
|
||||
if (data_independent_addressing) {
|
||||
if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) {
|
||||
next_addresses(&address_block, &input_block, &zero_block);
|
||||
}
|
||||
pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK];
|
||||
} else {
|
||||
pseudo_rand = instance->memory[prev_offset].v[0];
|
||||
}
|
||||
|
||||
/* 1.2.2 Computing the lane of the reference block */
|
||||
ref_lane = ((pseudo_rand >> 32)) % instance->lanes;
|
||||
|
||||
if ((position.pass == 0) && (position.slice == 0)) {
|
||||
/* Can not reference other lanes yet */
|
||||
ref_lane = position.lane;
|
||||
}
|
||||
|
||||
/* 1.2.3 Computing the number of possible reference block within the
|
||||
* lane.
|
||||
*/
|
||||
position.index = i;
|
||||
ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,
|
||||
ref_lane == position.lane);
|
||||
|
||||
/* 2 Creating a new block */
|
||||
ref_block =
|
||||
instance->memory + instance->lane_length * ref_lane + ref_index;
|
||||
curr_block = instance->memory + curr_offset;
|
||||
if (ARGON2_VERSION_10 == instance->version) {
|
||||
/* version 1.2.1 and earlier: overwrite, not XOR */
|
||||
fill_block(instance->memory + prev_offset, ref_block, curr_block, 0);
|
||||
} else {
|
||||
if(0 == position.pass) {
|
||||
fill_block(instance->memory + prev_offset, ref_block,
|
||||
curr_block, 0);
|
||||
} else {
|
||||
fill_block(instance->memory + prev_offset, ref_block,
|
||||
curr_block, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// fill_memory関数をCリファレンス実装と完全に一致させる
|
||||
__device__ void fill_memory(block* memory, uint32_t passes, uint32_t lanes, uint32_t lane_length, uint32_t segment_length) {
|
||||
argon2_instance_t instance;
|
||||
instance.version = ARGON2_VERSION_13;
|
||||
instance.passes = passes;
|
||||
instance.memory = memory;
|
||||
instance.memory_blocks = lanes * lane_length;
|
||||
instance.segment_length = segment_length;
|
||||
instance.lane_length = lane_length;
|
||||
instance.lanes = lanes;
|
||||
instance.threads = lanes;
|
||||
instance.print_internals = 0;
|
||||
|
||||
argon2_position_t position;
|
||||
for (uint32_t pass = 0; pass < passes; ++pass) {
|
||||
position.pass = pass;
|
||||
for (uint32_t slice = 0; slice < ARGON2_SYNC_POINTS; ++slice) {
|
||||
position.slice = slice;
|
||||
for (uint32_t lane = 0; lane < lanes; ++lane) {
|
||||
position.lane = lane;
|
||||
fill_segment(&instance, position);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// blake2b_long関数をCリファレンス実装と完全に一致させる
|
||||
__device__ int blake2b_long(void *pout, size_t outlen, const void *in, size_t inlen) {
|
||||
uint8_t *out = (uint8_t *)pout;
|
||||
blake2b_state blake_state;
|
||||
uint8_t outlen_bytes[sizeof(uint32_t)] = {0};
|
||||
int ret = -1;
|
||||
|
||||
if (outlen > UINT32_MAX) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* Ensure little-endian byte order! */
|
||||
store32(outlen_bytes, (uint32_t)outlen);
|
||||
|
||||
#define TRY(statement) \
|
||||
do { \
|
||||
ret = statement; \
|
||||
if (ret < 0) { \
|
||||
goto fail; \
|
||||
} \
|
||||
} while ((void)0, 0)
|
||||
|
||||
if (outlen <= BLAKE2B_OUTBYTES) {
|
||||
TRY(blake2b_init(&blake_state, outlen));
|
||||
TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
|
||||
TRY(blake2b_update(&blake_state, (const uint8_t*)in, inlen));
|
||||
TRY(blake2b_final(&blake_state, out, outlen));
|
||||
} else {
|
||||
uint32_t toproduce;
|
||||
uint8_t out_buffer[BLAKE2B_OUTBYTES];
|
||||
uint8_t in_buffer[BLAKE2B_OUTBYTES];
|
||||
TRY(blake2b_init(&blake_state, BLAKE2B_OUTBYTES));
|
||||
TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
|
||||
TRY(blake2b_update(&blake_state, (const uint8_t*)in, inlen));
|
||||
TRY(blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES));
|
||||
c_memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
|
||||
out += BLAKE2B_OUTBYTES / 2;
|
||||
toproduce = (uint32_t)outlen - BLAKE2B_OUTBYTES / 2;
|
||||
|
||||
while (toproduce > BLAKE2B_OUTBYTES) {
|
||||
c_memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
|
||||
TRY(blake2b(out_buffer, BLAKE2B_OUTBYTES, in_buffer, BLAKE2B_OUTBYTES, NULL, 0));
|
||||
c_memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
|
||||
out += BLAKE2B_OUTBYTES / 2;
|
||||
toproduce -= BLAKE2B_OUTBYTES / 2;
|
||||
}
|
||||
|
||||
c_memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
|
||||
TRY(blake2b(out_buffer, toproduce, in_buffer, BLAKE2B_OUTBYTES, NULL,
|
||||
0));
|
||||
c_memcpy(out, out_buffer, toproduce);
|
||||
}
|
||||
fail:
|
||||
clear_internal_memory(&blake_state, sizeof(blake_state));
|
||||
return ret;
|
||||
#undef TRY
|
||||
}
|
||||
|
||||
// device_argon2d_hash関数を完全にCリファレンス実装と一致させる
|
||||
__device__ void device_argon2d_hash(
|
||||
uint8_t* output,
|
||||
const uint8_t* input, size_t input_len,
|
||||
uint32_t t_cost, uint32_t m_cost, uint32_t lanes,
|
||||
block* memory,
|
||||
const uint8_t* salt, size_t salt_len
|
||||
) {
|
||||
argon2_instance_t instance;
|
||||
// 1. メモリサイズの調整
|
||||
uint32_t memory_blocks = m_cost;
|
||||
if (memory_blocks < 2 * ARGON2_SYNC_POINTS * lanes) {
|
||||
memory_blocks = 2 * ARGON2_SYNC_POINTS * lanes;
|
||||
}
|
||||
|
||||
uint32_t segment_length = memory_blocks / (lanes * ARGON2_SYNC_POINTS);
|
||||
memory_blocks = segment_length * (lanes * ARGON2_SYNC_POINTS);
|
||||
uint32_t lane_length = segment_length * ARGON2_SYNC_POINTS;
|
||||
|
||||
// Initialize instance with the provided memory pointer
|
||||
instance.version = ARGON2_VERSION_13;
|
||||
instance.memory = memory; // Use the provided memory pointer
|
||||
instance.passes = t_cost;
|
||||
instance.memory_blocks = memory_blocks;
|
||||
instance.segment_length = segment_length;
|
||||
instance.lane_length = lane_length;
|
||||
instance.lanes = lanes;
|
||||
instance.threads = 1;
|
||||
|
||||
// 2. 初期ハッシュの計算
|
||||
uint8_t blockhash[ARGON2_PREHASH_DIGEST_LENGTH];
|
||||
blake2b_state BlakeHash;
|
||||
|
||||
blake2b_init(&BlakeHash, ARGON2_PREHASH_DIGEST_LENGTH);
|
||||
|
||||
uint8_t value[sizeof(uint32_t)];
|
||||
|
||||
store32(&value, lanes);
|
||||
blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
|
||||
|
||||
store32(&value, 32);
|
||||
blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
|
||||
|
||||
store32(&value, memory_blocks);
|
||||
blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
|
||||
|
||||
store32(&value, t_cost);
|
||||
blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
|
||||
|
||||
store32(&value, ARGON2_VERSION_13);
|
||||
blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
|
||||
|
||||
store32(&value, 0);
|
||||
blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
|
||||
|
||||
store32(&value, input_len);
|
||||
blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
|
||||
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)input, input_len);
|
||||
|
||||
store32(&value, salt_len);
|
||||
blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
|
||||
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)salt, salt_len);
|
||||
store32(&value, 0);
|
||||
|
||||
blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
|
||||
|
||||
store32(&value, 0);
|
||||
|
||||
blake2b_update(&BlakeHash, (uint8_t*)&value, sizeof(value));
|
||||
|
||||
|
||||
blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH);
|
||||
|
||||
// 3. Initialize first blocks in each lane
|
||||
uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE];
|
||||
uint8_t initial_hash[ARGON2_PREHASH_SEED_LENGTH];
|
||||
c_memcpy(initial_hash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH);
|
||||
c_memset(initial_hash + ARGON2_PREHASH_DIGEST_LENGTH, 0, ARGON2_PREHASH_SEED_LENGTH - ARGON2_PREHASH_DIGEST_LENGTH);
|
||||
|
||||
for (uint32_t l = 0; l < lanes; ++l) {
|
||||
store32(initial_hash + ARGON2_PREHASH_DIGEST_LENGTH, 0);
|
||||
store32(initial_hash + ARGON2_PREHASH_DIGEST_LENGTH + 4, l);
|
||||
|
||||
blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, initial_hash, ARGON2_PREHASH_SEED_LENGTH);
|
||||
load_block(&memory[l * lane_length], blockhash_bytes);
|
||||
|
||||
store32(initial_hash + ARGON2_PREHASH_DIGEST_LENGTH, 1);
|
||||
blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, initial_hash, ARGON2_PREHASH_SEED_LENGTH);
|
||||
load_block(&memory[l * lane_length + 1], blockhash_bytes);
|
||||
}
|
||||
|
||||
// 4. Fill memory
|
||||
fill_memory(memory, t_cost, lanes, lane_length, segment_length);
|
||||
|
||||
// 5. Final block mixing
|
||||
block final_block;
|
||||
copy_block(&final_block, &memory[0 * lane_length + (lane_length - 1)]);
|
||||
|
||||
for (uint32_t l = 1; l < lanes; ++l) {
|
||||
uint32_t last_block_in_lane = l * lane_length + (lane_length - 1);
|
||||
xor_block(&final_block, &memory[last_block_in_lane]);
|
||||
}
|
||||
|
||||
// 6. Final hash
|
||||
uint8_t final_block_bytes[ARGON2_BLOCK_SIZE];
|
||||
store_block(final_block_bytes, &final_block);
|
||||
|
||||
blake2b_long(output, 32, final_block_bytes, ARGON2_BLOCK_SIZE);
|
||||
|
||||
}
|
||||
|
||||
//=== __global__ カーネル例(salt 指定版)===//
|
||||
// ホスト側でブロック用メモリをあらかじめ確保し、そのポインタ(memory_ptr)を渡すことを前提としています。
|
||||
__global__ void argon2d_hash_device_kernel(
|
||||
uint8_t* output,
|
||||
const uint8_t* input, size_t input_len,
|
||||
uint32_t t_cost, uint32_t m_cost, uint32_t lanes,
|
||||
block* memory_ptr, // ホスト側で確保したメモリ領域へのポインタ
|
||||
const uint8_t* salt, size_t salt_len
|
||||
) {
|
||||
if (threadIdx.x == 0 && blockIdx.x == 0) {
|
||||
device_argon2d_hash(output, input, input_len, t_cost, m_cost, lanes, memory_ptr, salt, salt_len);
|
||||
}
|
||||
}
|
272
rin/miner/hip-output/blake3_device.cuh
Normal file
272
rin/miner/hip-output/blake3_device.cuh
Normal file
@@ -0,0 +1,272 @@
|
||||
#include "blaze3_cpu.cuh"
|
||||
|
||||
// Number of threads per thread block
|
||||
__constant__ const int NUM_THREADS = 16;
|
||||
|
||||
// redefine functions, but for the GPU
|
||||
// all of them are the same but with g_ prefixed
|
||||
__constant__ const u32 g_IV[8] = {
|
||||
0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
|
||||
0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19,
|
||||
};
|
||||
|
||||
__constant__ const int g_MSG_PERMUTATION[] = {
|
||||
2, 6, 3, 10, 7, 0, 4, 13,
|
||||
1, 11, 12, 5, 9, 14, 15, 8
|
||||
};
|
||||
|
||||
__device__ u32 g_rotr(u32 value, int shift) {
|
||||
return (value >> shift)|(value << (usize - shift));
|
||||
}
|
||||
|
||||
__device__ void g_g(u32 state[16], u32 a, u32 b, u32 c, u32 d, u32 mx, u32 my) {
|
||||
state[a] = state[a] + state[b] + mx;
|
||||
state[d] = g_rotr((state[d] ^ state[a]), 16);
|
||||
state[c] = state[c] + state[d];
|
||||
|
||||
state[b] = g_rotr((state[b] ^ state[c]), 12);
|
||||
state[a] = state[a] + state[b] + my;
|
||||
state[d] = g_rotr((state[d] ^ state[a]), 8);
|
||||
|
||||
state[c] = state[c] + state[d];
|
||||
state[b] = g_rotr((state[b] ^ state[c]), 7);
|
||||
}
|
||||
|
||||
__device__ void g_round(u32 state[16], u32 m[16]) {
|
||||
// Mix the columns.
|
||||
g_g(state, 0, 4, 8, 12, m[0], m[1]);
|
||||
g_g(state, 1, 5, 9, 13, m[2], m[3]);
|
||||
g_g(state, 2, 6, 10, 14, m[4], m[5]);
|
||||
g_g(state, 3, 7, 11, 15, m[6], m[7]);
|
||||
// Mix the diagonals.
|
||||
g_g(state, 0, 5, 10, 15, m[8], m[9]);
|
||||
g_g(state, 1, 6, 11, 12, m[10], m[11]);
|
||||
g_g(state, 2, 7, 8, 13, m[12], m[13]);
|
||||
g_g(state, 3, 4, 9, 14, m[14], m[15]);
|
||||
}
|
||||
|
||||
__device__ void g_permute(u32 m[16]) {
|
||||
u32 permuted[16];
|
||||
for(int i=0; i<16; i++)
|
||||
permuted[i] = m[g_MSG_PERMUTATION[i]];
|
||||
for(int i=0; i<16; i++)
|
||||
m[i] = permuted[i];
|
||||
}
|
||||
|
||||
// custom memcpy, apparently cuda's memcpy is slow
|
||||
// when called within a kernel
|
||||
__device__ void g_memcpy(u32 *lhs, const u32 *rhs, int size) {
|
||||
// assuming u32 is 4 bytes
|
||||
int len = size / 4;
|
||||
for(int i=0; i<len; i++)
|
||||
lhs[i] = rhs[i];
|
||||
}
|
||||
|
||||
// custom memset
|
||||
template<typename T, typename ptr_t>
|
||||
__device__ void g_memset(ptr_t dest, T val, int count) {
|
||||
for(int i=0; i<count; i++)
|
||||
dest[i] = val;
|
||||
}
|
||||
|
||||
__device__ void g_compress(
|
||||
u32 *chaining_value,
|
||||
u32 *block_words,
|
||||
u64 counter,
|
||||
u32 block_len,
|
||||
u32 flags,
|
||||
u32 *state
|
||||
) {
|
||||
// Search for better alternative
|
||||
g_memcpy(state, chaining_value, 32);
|
||||
g_memcpy(state+8, g_IV, 16);
|
||||
state[12] = (u32)counter;
|
||||
state[13] = (u32)(counter >> 32);
|
||||
state[14] = block_len;
|
||||
state[15] = flags;
|
||||
|
||||
u32 block[16];
|
||||
g_memcpy(block, block_words, 64);
|
||||
|
||||
g_round(state, block); // round 1
|
||||
g_permute(block);
|
||||
g_round(state, block); // round 2
|
||||
g_permute(block);
|
||||
g_round(state, block); // round 3
|
||||
g_permute(block);
|
||||
g_round(state, block); // round 4
|
||||
g_permute(block);
|
||||
g_round(state, block); // round 5
|
||||
g_permute(block);
|
||||
g_round(state, block); // round 6
|
||||
g_permute(block);
|
||||
g_round(state, block); // round 7
|
||||
|
||||
for(int i=0; i<8; i++){
|
||||
state[i] ^= state[i + 8];
|
||||
state[i + 8] ^= chaining_value[i];
|
||||
}
|
||||
}
|
||||
|
||||
__device__ void g_words_from_little_endian_bytes(
|
||||
u8 *bytes, u32 *words, u32 bytes_len
|
||||
) {
|
||||
u32 tmp;
|
||||
for(u32 i=0; i<bytes_len; i+=4) {
|
||||
tmp = (bytes[i+3]<<24) | (bytes[i+2]<<16) | (bytes[i+1]<<8) | bytes[i];
|
||||
words[i/4] = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ void Chunk::g_compress_chunk(u32 out_flags) {
|
||||
if(flags&PARENT) {
|
||||
g_compress(
|
||||
key,
|
||||
data,
|
||||
0, // counter is always zero for parent nodes
|
||||
BLOCK_LEN,
|
||||
flags | out_flags,
|
||||
raw_hash
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
u32 chaining_value[8];
|
||||
u32 block_len = BLOCK_LEN, flagger;
|
||||
g_memcpy(chaining_value, key, 32);
|
||||
|
||||
bool empty_input = (leaf_len==0);
|
||||
if(empty_input) {
|
||||
for(u32 i=0; i<BLOCK_LEN; i++)
|
||||
leaf_data[i] = 0U;
|
||||
leaf_len = BLOCK_LEN;
|
||||
}
|
||||
|
||||
// move all mem allocs outside loop
|
||||
u32 block_words[16];
|
||||
u8 block_cast[BLOCK_LEN];
|
||||
|
||||
for(u32 i=0; i<leaf_len; i+=BLOCK_LEN) {
|
||||
flagger = flags;
|
||||
// for the last message block
|
||||
if(i+BLOCK_LEN > leaf_len)
|
||||
block_len = leaf_len%BLOCK_LEN;
|
||||
else
|
||||
block_len = BLOCK_LEN;
|
||||
|
||||
// special case
|
||||
if(empty_input)
|
||||
block_len = 0;
|
||||
|
||||
// clear up block_words
|
||||
g_memset(block_words, 0, 16);
|
||||
|
||||
u32 new_block_len(block_len);
|
||||
if(block_len%4)
|
||||
new_block_len += 4 - (block_len%4);
|
||||
|
||||
// This memcpy is fine since data is a byte array
|
||||
memcpy(block_cast, leaf_data+i, new_block_len*sizeof(*block_cast));
|
||||
|
||||
g_words_from_little_endian_bytes(leaf_data+i, block_words, new_block_len);
|
||||
|
||||
if(i==0)
|
||||
flagger |= CHUNK_START;
|
||||
if(i+BLOCK_LEN >= leaf_len)
|
||||
flagger |= CHUNK_END | out_flags;
|
||||
|
||||
// raw hash for root node
|
||||
g_compress(
|
||||
chaining_value,
|
||||
block_words,
|
||||
counter,
|
||||
block_len,
|
||||
flagger,
|
||||
raw_hash
|
||||
);
|
||||
|
||||
g_memcpy(chaining_value, raw_hash, 32);
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void compute(Chunk *data, int l, int r) {
|
||||
// n is always a power of 2
|
||||
int n = r-l;
|
||||
int tid = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if(tid >= n)
|
||||
return;
|
||||
|
||||
if(n==1) {
|
||||
data[l].g_compress_chunk();
|
||||
// printf("Compressing : %d\n", l);
|
||||
}
|
||||
else {
|
||||
compute<<<n/2,16>>>(data, l, l+n/2);
|
||||
cudaDeviceSynchronize();
|
||||
compute<<<n/2,16>>>(data, l+n/2, r);
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
data[l].flags |= PARENT;
|
||||
|
||||
memcpy(data[l].data, data[l].raw_hash, 32);
|
||||
memcpy(data[l].data+8, data[l+n/2].raw_hash, 32);
|
||||
data[l].g_compress_chunk();
|
||||
// printf("Compressing : %d to %d\n", l, r);
|
||||
}
|
||||
}
|
||||
|
||||
// CPU version of light_hash (unchanged)
|
||||
void light_hash(Chunk *data, int N, Chunk *result, Chunk *memory_bar) {
|
||||
const int data_size = N*sizeof(Chunk);
|
||||
|
||||
// Device settings
|
||||
// Allows DeviceSync to be called upto 16 levels of recursion
|
||||
cudaDeviceSetLimit(cudaLimitDevRuntimeSyncDepth, 16);
|
||||
|
||||
// Device vector
|
||||
Chunk *g_data = memory_bar;
|
||||
cudaMemcpy(g_data, data, data_size, cudaMemcpyHostToDevice);
|
||||
|
||||
// Actual computation of hash
|
||||
compute<<<N,32>>>(g_data, 0, N);
|
||||
|
||||
cudaMemcpy(result, g_data, sizeof(Chunk), cudaMemcpyDeviceToHost);
|
||||
}
|
||||
|
||||
// Device-callable version of light_hash
|
||||
__device__ void light_hash_device(const uint8_t* input, size_t input_len, uint8_t* output) {
|
||||
// Create a single chunk for processing the input
|
||||
Chunk chunk;
|
||||
|
||||
// Initialize the chunk with the input data
|
||||
for (int i = 0; i < 8; i++) {
|
||||
chunk.key[i] = g_IV[i]; // Use device constant IV
|
||||
}
|
||||
|
||||
// Copy the input data to leaf_data (with bounds checking)
|
||||
size_t copy_len = min(input_len, (size_t)BLOCK_LEN * 16); // Ensure we don't overflow
|
||||
for (size_t i = 0; i < copy_len; i++) {
|
||||
chunk.leaf_data[i] = input[i];
|
||||
}
|
||||
|
||||
chunk.leaf_len = copy_len;
|
||||
chunk.counter = 0;
|
||||
chunk.flags = 0; // Default flags
|
||||
|
||||
// Process the chunk directly
|
||||
chunk.g_compress_chunk(ROOT); // Set ROOT flag for final output
|
||||
|
||||
// Copy the raw hash to the output
|
||||
for (int i = 0; i < 8; i++) {
|
||||
// Convert 32-bit words to bytes in little-endian format
|
||||
output[i*4] = (uint8_t)(chunk.raw_hash[i]);
|
||||
output[i*4+1] = (uint8_t)(chunk.raw_hash[i] >> 8);
|
||||
output[i*4+2] = (uint8_t)(chunk.raw_hash[i] >> 16);
|
||||
output[i*4+3] = (uint8_t)(chunk.raw_hash[i] >> 24);
|
||||
}
|
||||
}
|
||||
|
||||
// Alias for compatibility with other device code
|
||||
__device__ void blake3_hash_device(const uint8_t* input, size_t input_len, uint8_t* output) {
|
||||
light_hash_device(input, input_len, output);
|
||||
}
|
420
rin/miner/hip-output/blaze3_cpu.cuh
Normal file
420
rin/miner/hip-output/blaze3_cpu.cuh
Normal file
@@ -0,0 +1,420 @@
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
using namespace std;
|
||||
|
||||
// Let's use a pinned memory vector!
|
||||
#include <thrust/host_vector.h>
|
||||
#include <thrust/system/cuda/experimental/pinned_allocator.h>
|
||||
|
||||
using u32 = uint32_t;
|
||||
using u64 = uint64_t;
|
||||
using u8 = uint8_t;
|
||||
|
||||
const u32 OUT_LEN = 32;
|
||||
const u32 KEY_LEN = 32;
|
||||
const u32 BLOCK_LEN = 64;
|
||||
const u32 CHUNK_LEN = 1024;
|
||||
// Multiple chunks make a snicker bar :)
|
||||
const u32 SNICKER = 1U << 10;
|
||||
// Factory height and snicker size have an inversly propotional relationship
|
||||
// FACTORY_HT * (log2 SNICKER) + 10 >= 64
|
||||
const u32 FACTORY_HT = 5;
|
||||
|
||||
const u32 CHUNK_START = 1 << 0;
|
||||
const u32 CHUNK_END = 1 << 1;
|
||||
const u32 PARENT = 1 << 2;
|
||||
const u32 ROOT = 1 << 3;
|
||||
const u32 KEYED_HASH = 1 << 4;
|
||||
const u32 DERIVE_KEY_CONTEXT = 1 << 5;
|
||||
const u32 DERIVE_KEY_MATERIAL = 1 << 6;
|
||||
|
||||
const int usize = sizeof(u32) * 8;
|
||||
|
||||
u32 IV[8] = {
|
||||
0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
|
||||
0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19,
|
||||
};
|
||||
|
||||
const int MSG_PERMUTATION[] = {
|
||||
2, 6, 3, 10, 7, 0, 4, 13,
|
||||
1, 11, 12, 5, 9, 14, 15, 8
|
||||
};
|
||||
|
||||
u32 rotr(u32 value, int shift) {
|
||||
return (value >> shift)|(value << (usize - shift));
|
||||
}
|
||||
|
||||
void g(u32 state[16], u32 a, u32 b, u32 c, u32 d, u32 mx, u32 my) {
|
||||
state[a] = state[a] + state[b] + mx;
|
||||
state[d] = rotr((state[d] ^ state[a]), 16);
|
||||
state[c] = state[c] + state[d];
|
||||
|
||||
state[b] = rotr((state[b] ^ state[c]), 12);
|
||||
state[a] = state[a] + state[b] + my;
|
||||
state[d] = rotr((state[d] ^ state[a]), 8);
|
||||
|
||||
state[c] = state[c] + state[d];
|
||||
state[b] = rotr((state[b] ^ state[c]), 7);
|
||||
}
|
||||
|
||||
void round(u32 state[16], u32 m[16]) {
|
||||
// Mix the columns.
|
||||
g(state, 0, 4, 8, 12, m[0], m[1]);
|
||||
g(state, 1, 5, 9, 13, m[2], m[3]);
|
||||
g(state, 2, 6, 10, 14, m[4], m[5]);
|
||||
g(state, 3, 7, 11, 15, m[6], m[7]);
|
||||
// Mix the diagonals.
|
||||
g(state, 0, 5, 10, 15, m[8], m[9]);
|
||||
g(state, 1, 6, 11, 12, m[10], m[11]);
|
||||
g(state, 2, 7, 8, 13, m[12], m[13]);
|
||||
g(state, 3, 4, 9, 14, m[14], m[15]);
|
||||
}
|
||||
|
||||
void permute(u32 m[16]) {
|
||||
u32 permuted[16];
|
||||
for(int i=0; i<16; i++)
|
||||
permuted[i] = m[MSG_PERMUTATION[i]];
|
||||
for(int i=0; i<16; i++)
|
||||
m[i] = permuted[i];
|
||||
}
|
||||
|
||||
void compress(
|
||||
u32 *chaining_value,
|
||||
u32 *block_words,
|
||||
u64 counter,
|
||||
u32 block_len,
|
||||
u32 flags,
|
||||
u32 *state
|
||||
) {
|
||||
memcpy(state, chaining_value, 8*sizeof(*state));
|
||||
memcpy(state+8, IV, 4*sizeof(*state));
|
||||
state[12] = (u32)counter;
|
||||
state[13] = (u32)(counter >> 32);
|
||||
state[14] = block_len;
|
||||
state[15] = flags;
|
||||
|
||||
u32 block[16];
|
||||
memcpy(block, block_words, 16*sizeof(*block));
|
||||
|
||||
round(state, block); // round 1
|
||||
permute(block);
|
||||
round(state, block); // round 2
|
||||
permute(block);
|
||||
round(state, block); // round 3
|
||||
permute(block);
|
||||
round(state, block); // round 4
|
||||
permute(block);
|
||||
round(state, block); // round 5
|
||||
permute(block);
|
||||
round(state, block); // round 6
|
||||
permute(block);
|
||||
round(state, block); // round 7
|
||||
|
||||
for(int i=0; i<8; i++){
|
||||
state[i] ^= state[i + 8];
|
||||
state[i + 8] ^= chaining_value[i];
|
||||
}
|
||||
}
|
||||
|
||||
void words_from_little_endian_bytes(u8 *bytes, u32 *words, u32 bytes_len) {
|
||||
u32 tmp;
|
||||
for(u32 i=0; i<bytes_len; i+=4) {
|
||||
tmp = (bytes[i+3]<<24) | (bytes[i+2]<<16) | (bytes[i+1]<<8) | bytes[i];
|
||||
words[i/4] = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
struct Chunk {
|
||||
// use only when it is a leaf node
|
||||
// leaf data may have less than 1024 bytes
|
||||
u8 leaf_data[1024];
|
||||
u32 leaf_len;
|
||||
// use in all other cases
|
||||
// data will always have 64 bytes
|
||||
u32 data[16];
|
||||
u32 flags;
|
||||
u32 raw_hash[16];
|
||||
u32 key[8];
|
||||
// only useful for leaf nodes
|
||||
u64 counter;
|
||||
// Constructor for leaf nodes
|
||||
__device__ __host__ Chunk(char *input, int size, u32 _flags, u32 *_key, u64 ctr){
|
||||
counter = ctr;
|
||||
flags = _flags;
|
||||
memcpy(key, _key, 8*sizeof(*key));
|
||||
memset(leaf_data, 0, 1024);
|
||||
memcpy(leaf_data, input, size);
|
||||
leaf_len = size;
|
||||
}
|
||||
__device__ __host__ Chunk(u32 _flags, u32 *_key) {
|
||||
counter = 0;
|
||||
flags = _flags;
|
||||
memcpy(key, _key, 8*sizeof(*key));
|
||||
leaf_len = 0;
|
||||
}
|
||||
__device__ __host__ Chunk() {}
|
||||
// Chunk() : leaf_len(0) {}
|
||||
// process data in sizes of message blocks and store cv in hash
|
||||
void compress_chunk(u32=0);
|
||||
__device__ void g_compress_chunk(u32=0);
|
||||
};
|
||||
|
||||
void Chunk::compress_chunk(u32 out_flags) {
|
||||
if(flags&PARENT) {
|
||||
compress(
|
||||
key,
|
||||
data,
|
||||
0, // counter is always zero for parent nodes
|
||||
BLOCK_LEN,
|
||||
flags | out_flags,
|
||||
raw_hash
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
u32 chaining_value[8], block_len = BLOCK_LEN, flagger;
|
||||
memcpy(chaining_value, key, 8*sizeof(*chaining_value));
|
||||
|
||||
bool empty_input = (leaf_len==0);
|
||||
if(empty_input) {
|
||||
for(u32 i=0; i<BLOCK_LEN; i++)
|
||||
leaf_data[i] = 0U;
|
||||
leaf_len = BLOCK_LEN;
|
||||
}
|
||||
|
||||
for(u32 i=0; i<leaf_len; i+=BLOCK_LEN) {
|
||||
flagger = flags;
|
||||
// for the last message block
|
||||
if(i+BLOCK_LEN > leaf_len)
|
||||
block_len = leaf_len%BLOCK_LEN;
|
||||
else
|
||||
block_len = BLOCK_LEN;
|
||||
|
||||
// special case
|
||||
if(empty_input)
|
||||
block_len = 0;
|
||||
|
||||
u32 block_words[16];
|
||||
memset(block_words, 0, 16*sizeof(*block_words));
|
||||
u32 new_block_len(block_len);
|
||||
if(block_len%4)
|
||||
new_block_len += 4 - (block_len%4);
|
||||
|
||||
// BLOCK_LEN is the max possible length of block_cast
|
||||
u8 block_cast[BLOCK_LEN];
|
||||
memset(block_cast, 0, new_block_len*sizeof(*block_cast));
|
||||
memcpy(block_cast, leaf_data+i, block_len*sizeof(*block_cast));
|
||||
|
||||
words_from_little_endian_bytes(block_cast, block_words, new_block_len);
|
||||
|
||||
if(i==0)
|
||||
flagger |= CHUNK_START;
|
||||
if(i+BLOCK_LEN >= leaf_len)
|
||||
flagger |= CHUNK_END | out_flags;
|
||||
|
||||
// raw hash for root node
|
||||
compress(
|
||||
chaining_value,
|
||||
block_words,
|
||||
counter,
|
||||
block_len,
|
||||
flagger,
|
||||
raw_hash
|
||||
);
|
||||
|
||||
memcpy(chaining_value, raw_hash, 8*sizeof(*chaining_value));
|
||||
}
|
||||
}
|
||||
|
||||
using thrust_vector = thrust::host_vector<
|
||||
Chunk,
|
||||
thrust::system::cuda::experimental::pinned_allocator<Chunk>
|
||||
>;
|
||||
|
||||
// The GPU hasher
|
||||
void light_hash(Chunk*, int, Chunk*, Chunk*);
|
||||
|
||||
// Sanity checks
|
||||
Chunk hash_many(Chunk *data, int first, int last, Chunk *memory_bar) {
|
||||
// n will always be a power of 2
|
||||
int n = last-first;
|
||||
// Reduce GPU calling overhead
|
||||
if(n == 1) {
|
||||
data[first].compress_chunk();
|
||||
return data[first];
|
||||
}
|
||||
|
||||
Chunk ret;
|
||||
light_hash(data+first, n, &ret, memory_bar);
|
||||
return ret;
|
||||
|
||||
// CPU style execution
|
||||
// Chunk left, right;
|
||||
// left = hash_many(data, first, first+n/2);
|
||||
// right = hash_many(data, first+n/2, last);
|
||||
// Chunk parent(left.flags, left.key);
|
||||
// parent.flags |= PARENT;
|
||||
// memcpy(parent.data, left.raw_hash, 32);
|
||||
// memcpy(parent.data+8, right.raw_hash, 32);
|
||||
// parent.compress_chunk();
|
||||
// return parent;
|
||||
}
|
||||
|
||||
Chunk merge(Chunk &left, Chunk &right);
|
||||
void hash_root(Chunk &node, vector<u8> &out_slice);
|
||||
|
||||
struct Hasher {
|
||||
u32 key[8];
|
||||
u32 flags;
|
||||
u64 ctr;
|
||||
u64 file_size;
|
||||
// A memory bar for CUDA to use during it's computation
|
||||
Chunk* memory_bar;
|
||||
// Factory is an array of FACTORY_HT possible SNICKER bars
|
||||
thrust_vector factory[FACTORY_HT];
|
||||
|
||||
// methods
|
||||
static Hasher new_internal(u32 key[8], u32 flags, u64 fsize);
|
||||
static Hasher _new(u64);
|
||||
// initializes cuda memory (if needed)
|
||||
void init();
|
||||
// frees cuda memory (if it is there)
|
||||
// free nullptr is a no-op
|
||||
~Hasher() {
|
||||
if(memory_bar)
|
||||
cudaFree(memory_bar);
|
||||
else
|
||||
free(memory_bar);
|
||||
}
|
||||
|
||||
void update(char *input, int size);
|
||||
void finalize(vector<u8> &out_slice);
|
||||
void propagate();
|
||||
};
|
||||
|
||||
Hasher Hasher::new_internal(u32 key[8], u32 flags, u64 fsize) {
|
||||
return Hasher{
|
||||
{
|
||||
key[0], key[1], key[2], key[3],
|
||||
key[4], key[5], key[6], key[7]
|
||||
},
|
||||
flags,
|
||||
0, // counter
|
||||
fsize
|
||||
};
|
||||
}
|
||||
|
||||
Hasher Hasher::_new(u64 fsize) { return new_internal(IV, 0, fsize); }
|
||||
|
||||
void Hasher::init() {
|
||||
if(file_size<1) {
|
||||
memory_bar = nullptr;
|
||||
return;
|
||||
}
|
||||
u64 num_chunks = ceil(file_size / CHUNK_LEN);
|
||||
u32 bar_size = min(num_chunks, (u64)SNICKER);
|
||||
// Just for safety :)
|
||||
++bar_size;
|
||||
cudaMalloc(&memory_bar, bar_size*sizeof(Chunk));
|
||||
|
||||
// Let the most commonly used places always have memory
|
||||
// +1 so that it does not resize when it hits CHUNK_LEN
|
||||
u32 RESERVE = SNICKER + 1;
|
||||
factory[0].reserve(RESERVE);
|
||||
factory[1].reserve(RESERVE);
|
||||
}
|
||||
|
||||
void Hasher::propagate() {
|
||||
int level=0;
|
||||
// nodes move to upper levels if lower one is one SNICKER long
|
||||
while(factory[level].size() == SNICKER) {
|
||||
Chunk subtree = hash_many(factory[level].data(), 0, SNICKER, memory_bar);
|
||||
factory[level].clear();
|
||||
++level;
|
||||
factory[level].push_back(subtree);
|
||||
}
|
||||
}
|
||||
|
||||
void Hasher::update(char *input, int size) {
|
||||
factory[0].push_back(Chunk(input, size, flags, key, ctr));
|
||||
++ctr;
|
||||
if(factory[0].size() == SNICKER)
|
||||
propagate();
|
||||
}
|
||||
|
||||
void Hasher::finalize(vector<u8> &out_slice) {
|
||||
Chunk root(flags, key);
|
||||
for(int i=0; i<FACTORY_HT; i++) {
|
||||
vector<Chunk> subtrees;
|
||||
u32 n = factory[i].size(), divider=SNICKER;
|
||||
if(!n)
|
||||
continue;
|
||||
int start = 0;
|
||||
while(divider) {
|
||||
if(n÷r) {
|
||||
Chunk subtree = hash_many(factory[i].data(), start, start+divider, memory_bar);
|
||||
subtrees.push_back(subtree);
|
||||
start += divider;
|
||||
}
|
||||
divider >>= 1;
|
||||
}
|
||||
while(subtrees.size()>1) {
|
||||
Chunk tmp1 = subtrees.back();
|
||||
subtrees.pop_back();
|
||||
Chunk tmp2 = subtrees.back();
|
||||
subtrees.pop_back();
|
||||
// tmp2 is the left child
|
||||
// tmp1 is the right child
|
||||
// that's the order they appear within the array
|
||||
Chunk tmp = merge(tmp2, tmp1);
|
||||
subtrees.push_back(tmp);
|
||||
}
|
||||
if(i<FACTORY_HT-1)
|
||||
factory[i+1].push_back(subtrees[0]);
|
||||
else
|
||||
root = subtrees[0];
|
||||
}
|
||||
hash_root(root, out_slice);
|
||||
}
|
||||
|
||||
Chunk merge(Chunk &left, Chunk &right) {
|
||||
// cout << "Called merge once\n";
|
||||
left.compress_chunk();
|
||||
right.compress_chunk();
|
||||
|
||||
Chunk parent(left.flags, left.key);
|
||||
parent.flags |= PARENT;
|
||||
// 32 bytes need to be copied for all of these
|
||||
memcpy(parent.data, left.raw_hash, 32);
|
||||
memcpy(parent.data+8, right.raw_hash, 32);
|
||||
return parent;
|
||||
}
|
||||
|
||||
void hash_root(Chunk &node, vector<u8> &out_slice) {
|
||||
// the last message block must not be hashed like the others
|
||||
// it needs to be hashed with the root flag
|
||||
u64 output_block_counter = 0;
|
||||
u64 i=0, k=2*OUT_LEN;
|
||||
|
||||
u32 words[16] = {};
|
||||
for(; int(out_slice.size()-i)>0; i+=k) {
|
||||
node.counter = output_block_counter;
|
||||
node.compress_chunk(ROOT);
|
||||
|
||||
// words is u32[16]
|
||||
memcpy(words, node.raw_hash, 16*sizeof(*words));
|
||||
|
||||
vector<u8> out_block(min(k, (u64)out_slice.size()-i));
|
||||
for(u32 l=0; l<out_block.size(); l+=4) {
|
||||
for(u32 j=0; j<min(4U, (u32)out_block.size()-l); j++)
|
||||
out_block[l+j] = (words[l/4]>>(8*j)) & 0x000000FF;
|
||||
}
|
||||
|
||||
for(u32 j=0; j<out_block.size(); j++)
|
||||
out_slice[i+j] = out_block[j];
|
||||
|
||||
++output_block_counter;
|
||||
}
|
||||
}
|
91
rin/miner/hip-output/build-hip-linux.sh
Normal file
91
rin/miner/hip-output/build-hip-linux.sh
Normal file
@@ -0,0 +1,91 @@
|
||||
#!/bin/bash
|
||||
# RinHash HIP Build Script for Linux
|
||||
# This script builds the HIP implementation of RinHash for AMD GPUs
|
||||
|
||||
echo "======================================"
|
||||
echo " RinHash HIP Miner Build Script"
|
||||
echo "======================================"
|
||||
|
||||
# Check if hipcc is available
|
||||
if ! command -v hipcc &> /dev/null; then
|
||||
echo "ERROR: hipcc not found in PATH"
|
||||
echo "Please install ROCm/HIP toolkit"
|
||||
echo "On Ubuntu/Debian: sudo apt install rocm-dev hip-runtime-amd"
|
||||
echo "Or download from: https://rocm.docs.amd.com/en/latest/deploy/linux/quick_start.html"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "HIP compiler found:"
|
||||
hipcc --version
|
||||
echo ""
|
||||
|
||||
# Check if cmake is available
|
||||
if ! command -v cmake &> /dev/null; then
|
||||
echo "ERROR: CMake not found in PATH"
|
||||
echo "Please install cmake: sudo apt install cmake"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "CMake found:"
|
||||
cmake --version | head -1
|
||||
echo ""
|
||||
|
||||
echo "Building RinHash HIP miner..."
|
||||
echo ""
|
||||
|
||||
# Create build directory
|
||||
mkdir -p build
|
||||
cd build
|
||||
|
||||
# Configure with CMake
|
||||
cmake -G "Ninja" \
|
||||
-DHIP_PLATFORM=amd \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
..
|
||||
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "CMake configuration failed!"
|
||||
echo "Trying without Ninja..."
|
||||
cmake -DHIP_PLATFORM=amd \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
..
|
||||
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "CMake configuration failed completely!"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Build
|
||||
cmake --build . -j$(nproc)
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
echo ""
|
||||
echo "======================================"
|
||||
echo " BUILD SUCCESSFUL!"
|
||||
echo "======================================"
|
||||
echo ""
|
||||
echo "Executable created:"
|
||||
echo " build/rinhash-hip-miner"
|
||||
echo ""
|
||||
echo "To test the miner:"
|
||||
echo " cd build && ./rinhash-hip-miner --help"
|
||||
echo ""
|
||||
echo "To check AMD GPU availability:"
|
||||
echo " rocm-smi"
|
||||
echo ""
|
||||
else
|
||||
echo ""
|
||||
echo "======================================"
|
||||
echo " BUILD FAILED!"
|
||||
echo "======================================"
|
||||
echo ""
|
||||
echo "Common issues:"
|
||||
echo "1. Missing ROCm development libraries"
|
||||
echo "2. Incompatible HIP version"
|
||||
echo "3. Missing development tools"
|
||||
echo ""
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Build completed successfully!"
|
18
rin/miner/hip-output/build-hip.bat
Normal file
18
rin/miner/hip-output/build-hip.bat
Normal file
@@ -0,0 +1,18 @@
|
||||
@echo off
|
||||
setlocal
|
||||
|
||||
where hipcc >nul 2>nul
|
||||
if errorlevel 1 (
|
||||
echo ERROR: hipcc not found. Please install ROCm/HIP toolchain.
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
if not exist build mkdir build
|
||||
cd build
|
||||
cmake -G "Ninja" -DHIP_PLATFORM=amd -DCMAKE_BUILD_TYPE=Release ..
|
||||
if errorlevel 1 exit /b 1
|
||||
cmake --build . -j
|
||||
if errorlevel 1 exit /b 1
|
||||
|
||||
cd ..
|
||||
echo Build done. Executable should be at build\rinhash-hip-miner.exe
|
175
rin/miner/hip-output/build/CMakeCache.txt
Normal file
175
rin/miner/hip-output/build/CMakeCache.txt
Normal file
@@ -0,0 +1,175 @@
|
||||
# This is the CMakeCache file.
|
||||
# For build in directory: /tmp/rinhash-hip/build
|
||||
# It was generated by CMake: /usr/bin/cmake
|
||||
# You can edit this file to change values found and used by cmake.
|
||||
# If you do not want to change any of the values, simply exit the editor.
|
||||
# If you do want to change a value, simply edit, save, and exit the editor.
|
||||
# The syntax for the file is as follows:
|
||||
# KEY:TYPE=VALUE
|
||||
# KEY is the name of a variable in the cache.
|
||||
# TYPE is a hint to GUIs for the type of VALUE, DO NOT EDIT TYPE!.
|
||||
# VALUE is the current value for the KEY.
|
||||
|
||||
########################
|
||||
# EXTERNAL cache entries
|
||||
########################
|
||||
|
||||
//Path to a program.
|
||||
CMAKE_ADDR2LINE:FILEPATH=CMAKE_ADDR2LINE-NOTFOUND
|
||||
|
||||
//Path to a program.
|
||||
CMAKE_AR:FILEPATH=CMAKE_AR-NOTFOUND
|
||||
|
||||
//No help, variable specified on the command line.
|
||||
CMAKE_BUILD_TYPE:UNINITIALIZED=Release
|
||||
|
||||
//CXX compiler
|
||||
CMAKE_CXX_COMPILER:FILEPATH=/opt/rocm-7.0/llvm/bin/clang++
|
||||
|
||||
//LLVM archiver
|
||||
CMAKE_CXX_COMPILER_AR:FILEPATH=CMAKE_CXX_COMPILER_AR-NOTFOUND
|
||||
|
||||
//`clang-scan-deps` dependency scanner
|
||||
CMAKE_CXX_COMPILER_CLANG_SCAN_DEPS:FILEPATH=CMAKE_CXX_COMPILER_CLANG_SCAN_DEPS-NOTFOUND
|
||||
|
||||
//Generate index for LLVM archive
|
||||
CMAKE_CXX_COMPILER_RANLIB:FILEPATH=CMAKE_CXX_COMPILER_RANLIB-NOTFOUND
|
||||
|
||||
//Path to a program.
|
||||
CMAKE_DLLTOOL:FILEPATH=CMAKE_DLLTOOL-NOTFOUND
|
||||
|
||||
//Value Computed by CMake.
|
||||
CMAKE_FIND_PACKAGE_REDIRECTS_DIR:STATIC=/tmp/rinhash-hip/build/CMakeFiles/pkgRedirects
|
||||
|
||||
//HIP compiler
|
||||
CMAKE_HIP_COMPILER:FILEPATH=/opt/rocm-7.0/llvm/bin/clang++
|
||||
|
||||
//HIP platform
|
||||
CMAKE_HIP_PLATFORM:STRING=amd
|
||||
|
||||
//Path to a program.
|
||||
CMAKE_LINKER:FILEPATH=/opt/rocm-7.0/llvm/bin/ld.lld
|
||||
|
||||
//Path to a program.
|
||||
CMAKE_MAKE_PROGRAM:FILEPATH=/usr/sbin/gmake
|
||||
|
||||
//Path to a program.
|
||||
CMAKE_NM:FILEPATH=/opt/rocm-7.0/llvm/bin/llvm-nm
|
||||
|
||||
//Path to a program.
|
||||
CMAKE_OBJCOPY:FILEPATH=/opt/rocm-7.0/llvm/bin/llvm-objcopy
|
||||
|
||||
//Path to a program.
|
||||
CMAKE_OBJDUMP:FILEPATH=/opt/rocm-7.0/llvm/bin/llvm-objdump
|
||||
|
||||
//Value Computed by CMake
|
||||
CMAKE_PROJECT_DESCRIPTION:STATIC=
|
||||
|
||||
//Value Computed by CMake
|
||||
CMAKE_PROJECT_HOMEPAGE_URL:STATIC=
|
||||
|
||||
//Value Computed by CMake
|
||||
CMAKE_PROJECT_NAME:STATIC=RinHashHIP
|
||||
|
||||
//Path to a program.
|
||||
CMAKE_READELF:FILEPATH=CMAKE_READELF-NOTFOUND
|
||||
|
||||
//Path to a program.
|
||||
CMAKE_STRIP:FILEPATH=/opt/rocm-7.0/llvm/bin/llvm-strip
|
||||
|
||||
//Path to a program.
|
||||
CMAKE_TAPI:FILEPATH=CMAKE_TAPI-NOTFOUND
|
||||
|
||||
//No help, variable specified on the command line.
|
||||
HIP_PLATFORM:UNINITIALIZED=amd
|
||||
|
||||
//Value Computed by CMake
|
||||
RinHashHIP_BINARY_DIR:STATIC=/tmp/rinhash-hip/build
|
||||
|
||||
//Value Computed by CMake
|
||||
RinHashHIP_IS_TOP_LEVEL:STATIC=ON
|
||||
|
||||
//Value Computed by CMake
|
||||
RinHashHIP_SOURCE_DIR:STATIC=/tmp/rinhash-hip
|
||||
|
||||
|
||||
########################
|
||||
# INTERNAL cache entries
|
||||
########################
|
||||
|
||||
//ADVANCED property for variable: CMAKE_ADDR2LINE
|
||||
CMAKE_ADDR2LINE-ADVANCED:INTERNAL=1
|
||||
//ADVANCED property for variable: CMAKE_AR
|
||||
CMAKE_AR-ADVANCED:INTERNAL=1
|
||||
//This is the directory where this CMakeCache.txt was created
|
||||
CMAKE_CACHEFILE_DIR:INTERNAL=/tmp/rinhash-hip/build
|
||||
//Major version of cmake used to create the current loaded cache
|
||||
CMAKE_CACHE_MAJOR_VERSION:INTERNAL=3
|
||||
//Minor version of cmake used to create the current loaded cache
|
||||
CMAKE_CACHE_MINOR_VERSION:INTERNAL=31
|
||||
//Patch version of cmake used to create the current loaded cache
|
||||
CMAKE_CACHE_PATCH_VERSION:INTERNAL=6
|
||||
//Path to CMake executable.
|
||||
CMAKE_COMMAND:INTERNAL=/usr/bin/cmake
|
||||
//Path to cpack program executable.
|
||||
CMAKE_CPACK_COMMAND:INTERNAL=/usr/bin/cpack
|
||||
//Path to ctest program executable.
|
||||
CMAKE_CTEST_COMMAND:INTERNAL=/usr/bin/ctest
|
||||
//ADVANCED property for variable: CMAKE_CXX_COMPILER
|
||||
CMAKE_CXX_COMPILER-ADVANCED:INTERNAL=1
|
||||
//ADVANCED property for variable: CMAKE_CXX_COMPILER_AR
|
||||
CMAKE_CXX_COMPILER_AR-ADVANCED:INTERNAL=1
|
||||
//ADVANCED property for variable: CMAKE_CXX_COMPILER_CLANG_SCAN_DEPS
|
||||
CMAKE_CXX_COMPILER_CLANG_SCAN_DEPS-ADVANCED:INTERNAL=1
|
||||
//ADVANCED property for variable: CMAKE_CXX_COMPILER_RANLIB
|
||||
CMAKE_CXX_COMPILER_RANLIB-ADVANCED:INTERNAL=1
|
||||
//ADVANCED property for variable: CMAKE_DLLTOOL
|
||||
CMAKE_DLLTOOL-ADVANCED:INTERNAL=1
|
||||
//Path to cache edit program executable.
|
||||
CMAKE_EDIT_COMMAND:INTERNAL=/usr/bin/ccmake
|
||||
//Executable file format
|
||||
CMAKE_EXECUTABLE_FORMAT:INTERNAL=ELF
|
||||
//Name of external makefile project generator.
|
||||
CMAKE_EXTRA_GENERATOR:INTERNAL=
|
||||
//Name of generator.
|
||||
CMAKE_GENERATOR:INTERNAL=Unix Makefiles
|
||||
//Generator instance identifier.
|
||||
CMAKE_GENERATOR_INSTANCE:INTERNAL=
|
||||
//Name of generator platform.
|
||||
CMAKE_GENERATOR_PLATFORM:INTERNAL=
|
||||
//Name of generator toolset.
|
||||
CMAKE_GENERATOR_TOOLSET:INTERNAL=
|
||||
//ADVANCED property for variable: CMAKE_HIP_COMPILER
|
||||
CMAKE_HIP_COMPILER-ADVANCED:INTERNAL=1
|
||||
//Source directory with the top level CMakeLists.txt file for this
|
||||
// project
|
||||
CMAKE_HOME_DIRECTORY:INTERNAL=/tmp/rinhash-hip
|
||||
//ADVANCED property for variable: CMAKE_LINKER
|
||||
CMAKE_LINKER-ADVANCED:INTERNAL=1
|
||||
//ADVANCED property for variable: CMAKE_MAKE_PROGRAM
|
||||
CMAKE_MAKE_PROGRAM-ADVANCED:INTERNAL=1
|
||||
//ADVANCED property for variable: CMAKE_NM
|
||||
CMAKE_NM-ADVANCED:INTERNAL=1
|
||||
//number of local generators
|
||||
CMAKE_NUMBER_OF_MAKEFILES:INTERNAL=1
|
||||
//ADVANCED property for variable: CMAKE_OBJCOPY
|
||||
CMAKE_OBJCOPY-ADVANCED:INTERNAL=1
|
||||
//ADVANCED property for variable: CMAKE_OBJDUMP
|
||||
CMAKE_OBJDUMP-ADVANCED:INTERNAL=1
|
||||
//Platform information initialized
|
||||
CMAKE_PLATFORM_INFO_INITIALIZED:INTERNAL=1
|
||||
//ADVANCED property for variable: CMAKE_RANLIB
|
||||
CMAKE_RANLIB-ADVANCED:INTERNAL=1
|
||||
//noop for ranlib
|
||||
CMAKE_RANLIB:INTERNAL=:
|
||||
//ADVANCED property for variable: CMAKE_READELF
|
||||
CMAKE_READELF-ADVANCED:INTERNAL=1
|
||||
//Path to CMake installation.
|
||||
CMAKE_ROOT:INTERNAL=/usr/share/cmake
|
||||
//ADVANCED property for variable: CMAKE_STRIP
|
||||
CMAKE_STRIP-ADVANCED:INTERNAL=1
|
||||
//ADVANCED property for variable: CMAKE_TAPI
|
||||
CMAKE_TAPI-ADVANCED:INTERNAL=1
|
||||
//uname command
|
||||
CMAKE_UNAME:INTERNAL=/usr/sbin/uname
|
||||
|
@@ -0,0 +1,97 @@
|
||||
set(CMAKE_CXX_COMPILER "/opt/rocm-7.0/llvm/bin/clang++")
|
||||
set(CMAKE_CXX_COMPILER_ARG1 "")
|
||||
set(CMAKE_CXX_COMPILER_ID "Clang")
|
||||
set(CMAKE_CXX_COMPILER_VERSION "20.0.0")
|
||||
set(CMAKE_CXX_COMPILER_VERSION_INTERNAL "")
|
||||
set(CMAKE_CXX_COMPILER_WRAPPER "")
|
||||
set(CMAKE_CXX_STANDARD_COMPUTED_DEFAULT "17")
|
||||
set(CMAKE_CXX_EXTENSIONS_COMPUTED_DEFAULT "ON")
|
||||
set(CMAKE_CXX_STANDARD_LATEST "")
|
||||
set(CMAKE_CXX_COMPILE_FEATURES "")
|
||||
set(CMAKE_CXX98_COMPILE_FEATURES "")
|
||||
set(CMAKE_CXX11_COMPILE_FEATURES "")
|
||||
set(CMAKE_CXX14_COMPILE_FEATURES "")
|
||||
set(CMAKE_CXX17_COMPILE_FEATURES "")
|
||||
set(CMAKE_CXX20_COMPILE_FEATURES "")
|
||||
set(CMAKE_CXX23_COMPILE_FEATURES "")
|
||||
set(CMAKE_CXX26_COMPILE_FEATURES "")
|
||||
|
||||
set(CMAKE_CXX_PLATFORM_ID "Linux")
|
||||
set(CMAKE_CXX_SIMULATE_ID "")
|
||||
set(CMAKE_CXX_COMPILER_FRONTEND_VARIANT "GNU")
|
||||
set(CMAKE_CXX_SIMULATE_VERSION "")
|
||||
|
||||
|
||||
|
||||
|
||||
set(CMAKE_AR "CMAKE_AR-NOTFOUND")
|
||||
set(CMAKE_CXX_COMPILER_AR "CMAKE_CXX_COMPILER_AR-NOTFOUND")
|
||||
set(CMAKE_RANLIB ":")
|
||||
set(CMAKE_CXX_COMPILER_RANLIB "CMAKE_CXX_COMPILER_RANLIB-NOTFOUND")
|
||||
set(CMAKE_LINKER "/opt/rocm-7.0/llvm/bin/ld.lld")
|
||||
set(CMAKE_LINKER_LINK "")
|
||||
set(CMAKE_LINKER_LLD "")
|
||||
set(CMAKE_CXX_COMPILER_LINKER "")
|
||||
set(CMAKE_CXX_COMPILER_LINKER_ID "")
|
||||
set(CMAKE_CXX_COMPILER_LINKER_VERSION )
|
||||
set(CMAKE_CXX_COMPILER_LINKER_FRONTEND_VARIANT )
|
||||
set(CMAKE_MT "")
|
||||
set(CMAKE_TAPI "CMAKE_TAPI-NOTFOUND")
|
||||
set(CMAKE_COMPILER_IS_GNUCXX )
|
||||
set(CMAKE_CXX_COMPILER_LOADED 1)
|
||||
set(CMAKE_CXX_COMPILER_WORKS )
|
||||
set(CMAKE_CXX_ABI_COMPILED )
|
||||
|
||||
set(CMAKE_CXX_COMPILER_ENV_VAR "CXX")
|
||||
|
||||
set(CMAKE_CXX_COMPILER_ID_RUN 1)
|
||||
set(CMAKE_CXX_SOURCE_FILE_EXTENSIONS C;M;c++;cc;cpp;cxx;m;mm;mpp;CPP;ixx;cppm;ccm;cxxm;c++m)
|
||||
set(CMAKE_CXX_IGNORE_EXTENSIONS inl;h;hpp;HPP;H;o;O;obj;OBJ;def;DEF;rc;RC)
|
||||
|
||||
foreach (lang IN ITEMS C OBJC OBJCXX)
|
||||
if (CMAKE_${lang}_COMPILER_ID_RUN)
|
||||
foreach(extension IN LISTS CMAKE_${lang}_SOURCE_FILE_EXTENSIONS)
|
||||
list(REMOVE_ITEM CMAKE_CXX_SOURCE_FILE_EXTENSIONS ${extension})
|
||||
endforeach()
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
set(CMAKE_CXX_LINKER_PREFERENCE 30)
|
||||
set(CMAKE_CXX_LINKER_PREFERENCE_PROPAGATES 1)
|
||||
set(CMAKE_CXX_LINKER_DEPFILE_SUPPORTED )
|
||||
|
||||
# Save compiler ABI information.
|
||||
set(CMAKE_CXX_SIZEOF_DATA_PTR "")
|
||||
set(CMAKE_CXX_COMPILER_ABI "")
|
||||
set(CMAKE_CXX_BYTE_ORDER "")
|
||||
set(CMAKE_CXX_LIBRARY_ARCHITECTURE "")
|
||||
|
||||
if(CMAKE_CXX_SIZEOF_DATA_PTR)
|
||||
set(CMAKE_SIZEOF_VOID_P "${CMAKE_CXX_SIZEOF_DATA_PTR}")
|
||||
endif()
|
||||
|
||||
if(CMAKE_CXX_COMPILER_ABI)
|
||||
set(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_CXX_COMPILER_ABI}")
|
||||
endif()
|
||||
|
||||
if(CMAKE_CXX_LIBRARY_ARCHITECTURE)
|
||||
set(CMAKE_LIBRARY_ARCHITECTURE "")
|
||||
endif()
|
||||
|
||||
set(CMAKE_CXX_CL_SHOWINCLUDES_PREFIX "")
|
||||
if(CMAKE_CXX_CL_SHOWINCLUDES_PREFIX)
|
||||
set(CMAKE_CL_SHOWINCLUDES_PREFIX "${CMAKE_CXX_CL_SHOWINCLUDES_PREFIX}")
|
||||
endif()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
set(CMAKE_CXX_IMPLICIT_INCLUDE_DIRECTORIES "")
|
||||
set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES "")
|
||||
set(CMAKE_CXX_IMPLICIT_LINK_DIRECTORIES "")
|
||||
set(CMAKE_CXX_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "")
|
||||
set(CMAKE_CXX_COMPILER_CLANG_RESOURCE_DIR "/opt/rocm-7.0/lib/llvm/lib/clang/20")
|
||||
|
||||
set(CMAKE_CXX_COMPILER_IMPORT_STD "")
|
||||
|
@@ -0,0 +1,15 @@
|
||||
set(CMAKE_HOST_SYSTEM "Linux-6.8.0-79-generic")
|
||||
set(CMAKE_HOST_SYSTEM_NAME "Linux")
|
||||
set(CMAKE_HOST_SYSTEM_VERSION "6.8.0-79-generic")
|
||||
set(CMAKE_HOST_SYSTEM_PROCESSOR "x86_64")
|
||||
|
||||
|
||||
|
||||
set(CMAKE_SYSTEM "Linux-6.8.0-79-generic")
|
||||
set(CMAKE_SYSTEM_NAME "Linux")
|
||||
set(CMAKE_SYSTEM_VERSION "6.8.0-79-generic")
|
||||
set(CMAKE_SYSTEM_PROCESSOR "x86_64")
|
||||
|
||||
set(CMAKE_CROSSCOMPILING "FALSE")
|
||||
|
||||
set(CMAKE_SYSTEM_LOADED 1)
|
@@ -0,0 +1,919 @@
|
||||
/* This source file must have a .cpp extension so that all C++ compilers
|
||||
recognize the extension without flags. Borland does not know .cxx for
|
||||
example. */
|
||||
#ifndef __cplusplus
|
||||
# error "A C compiler has been selected for C++."
|
||||
#endif
|
||||
|
||||
#if !defined(__has_include)
|
||||
/* If the compiler does not have __has_include, pretend the answer is
|
||||
always no. */
|
||||
# define __has_include(x) 0
|
||||
#endif
|
||||
|
||||
|
||||
/* Version number components: V=Version, R=Revision, P=Patch
|
||||
Version date components: YYYY=Year, MM=Month, DD=Day */
|
||||
|
||||
#if defined(__INTEL_COMPILER) || defined(__ICC)
|
||||
# define COMPILER_ID "Intel"
|
||||
# if defined(_MSC_VER)
|
||||
# define SIMULATE_ID "MSVC"
|
||||
# endif
|
||||
# if defined(__GNUC__)
|
||||
# define SIMULATE_ID "GNU"
|
||||
# endif
|
||||
/* __INTEL_COMPILER = VRP prior to 2021, and then VVVV for 2021 and later,
|
||||
except that a few beta releases use the old format with V=2021. */
|
||||
# if __INTEL_COMPILER < 2021 || __INTEL_COMPILER == 202110 || __INTEL_COMPILER == 202111
|
||||
# define COMPILER_VERSION_MAJOR DEC(__INTEL_COMPILER/100)
|
||||
# define COMPILER_VERSION_MINOR DEC(__INTEL_COMPILER/10 % 10)
|
||||
# if defined(__INTEL_COMPILER_UPDATE)
|
||||
# define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER_UPDATE)
|
||||
# else
|
||||
# define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER % 10)
|
||||
# endif
|
||||
# else
|
||||
# define COMPILER_VERSION_MAJOR DEC(__INTEL_COMPILER)
|
||||
# define COMPILER_VERSION_MINOR DEC(__INTEL_COMPILER_UPDATE)
|
||||
/* The third version component from --version is an update index,
|
||||
but no macro is provided for it. */
|
||||
# define COMPILER_VERSION_PATCH DEC(0)
|
||||
# endif
|
||||
# if defined(__INTEL_COMPILER_BUILD_DATE)
|
||||
/* __INTEL_COMPILER_BUILD_DATE = YYYYMMDD */
|
||||
# define COMPILER_VERSION_TWEAK DEC(__INTEL_COMPILER_BUILD_DATE)
|
||||
# endif
|
||||
# if defined(_MSC_VER)
|
||||
/* _MSC_VER = VVRR */
|
||||
# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
|
||||
# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
|
||||
# endif
|
||||
# if defined(__GNUC__)
|
||||
# define SIMULATE_VERSION_MAJOR DEC(__GNUC__)
|
||||
# elif defined(__GNUG__)
|
||||
# define SIMULATE_VERSION_MAJOR DEC(__GNUG__)
|
||||
# endif
|
||||
# if defined(__GNUC_MINOR__)
|
||||
# define SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__)
|
||||
# endif
|
||||
# if defined(__GNUC_PATCHLEVEL__)
|
||||
# define SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__)
|
||||
# endif
|
||||
|
||||
#elif (defined(__clang__) && defined(__INTEL_CLANG_COMPILER)) || defined(__INTEL_LLVM_COMPILER)
|
||||
# define COMPILER_ID "IntelLLVM"
|
||||
#if defined(_MSC_VER)
|
||||
# define SIMULATE_ID "MSVC"
|
||||
#endif
|
||||
#if defined(__GNUC__)
|
||||
# define SIMULATE_ID "GNU"
|
||||
#endif
|
||||
/* __INTEL_LLVM_COMPILER = VVVVRP prior to 2021.2.0, VVVVRRPP for 2021.2.0 and
|
||||
* later. Look for 6 digit vs. 8 digit version number to decide encoding.
|
||||
* VVVV is no smaller than the current year when a version is released.
|
||||
*/
|
||||
#if __INTEL_LLVM_COMPILER < 1000000L
|
||||
# define COMPILER_VERSION_MAJOR DEC(__INTEL_LLVM_COMPILER/100)
|
||||
# define COMPILER_VERSION_MINOR DEC(__INTEL_LLVM_COMPILER/10 % 10)
|
||||
# define COMPILER_VERSION_PATCH DEC(__INTEL_LLVM_COMPILER % 10)
|
||||
#else
|
||||
# define COMPILER_VERSION_MAJOR DEC(__INTEL_LLVM_COMPILER/10000)
|
||||
# define COMPILER_VERSION_MINOR DEC(__INTEL_LLVM_COMPILER/100 % 100)
|
||||
# define COMPILER_VERSION_PATCH DEC(__INTEL_LLVM_COMPILER % 100)
|
||||
#endif
|
||||
#if defined(_MSC_VER)
|
||||
/* _MSC_VER = VVRR */
|
||||
# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
|
||||
# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
|
||||
#endif
|
||||
#if defined(__GNUC__)
|
||||
# define SIMULATE_VERSION_MAJOR DEC(__GNUC__)
|
||||
#elif defined(__GNUG__)
|
||||
# define SIMULATE_VERSION_MAJOR DEC(__GNUG__)
|
||||
#endif
|
||||
#if defined(__GNUC_MINOR__)
|
||||
# define SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__)
|
||||
#endif
|
||||
#if defined(__GNUC_PATCHLEVEL__)
|
||||
# define SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__)
|
||||
#endif
|
||||
|
||||
#elif defined(__PATHCC__)
|
||||
# define COMPILER_ID "PathScale"
|
||||
# define COMPILER_VERSION_MAJOR DEC(__PATHCC__)
|
||||
# define COMPILER_VERSION_MINOR DEC(__PATHCC_MINOR__)
|
||||
# if defined(__PATHCC_PATCHLEVEL__)
|
||||
# define COMPILER_VERSION_PATCH DEC(__PATHCC_PATCHLEVEL__)
|
||||
# endif
|
||||
|
||||
#elif defined(__BORLANDC__) && defined(__CODEGEARC_VERSION__)
|
||||
# define COMPILER_ID "Embarcadero"
|
||||
# define COMPILER_VERSION_MAJOR HEX(__CODEGEARC_VERSION__>>24 & 0x00FF)
|
||||
# define COMPILER_VERSION_MINOR HEX(__CODEGEARC_VERSION__>>16 & 0x00FF)
|
||||
# define COMPILER_VERSION_PATCH DEC(__CODEGEARC_VERSION__ & 0xFFFF)
|
||||
|
||||
#elif defined(__BORLANDC__)
|
||||
# define COMPILER_ID "Borland"
|
||||
/* __BORLANDC__ = 0xVRR */
|
||||
# define COMPILER_VERSION_MAJOR HEX(__BORLANDC__>>8)
|
||||
# define COMPILER_VERSION_MINOR HEX(__BORLANDC__ & 0xFF)
|
||||
|
||||
#elif defined(__WATCOMC__) && __WATCOMC__ < 1200
|
||||
# define COMPILER_ID "Watcom"
|
||||
/* __WATCOMC__ = VVRR */
|
||||
# define COMPILER_VERSION_MAJOR DEC(__WATCOMC__ / 100)
|
||||
# define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10)
|
||||
# if (__WATCOMC__ % 10) > 0
|
||||
# define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10)
|
||||
# endif
|
||||
|
||||
#elif defined(__WATCOMC__)
|
||||
# define COMPILER_ID "OpenWatcom"
|
||||
/* __WATCOMC__ = VVRP + 1100 */
|
||||
# define COMPILER_VERSION_MAJOR DEC((__WATCOMC__ - 1100) / 100)
|
||||
# define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10)
|
||||
# if (__WATCOMC__ % 10) > 0
|
||||
# define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10)
|
||||
# endif
|
||||
|
||||
#elif defined(__SUNPRO_CC)
|
||||
# define COMPILER_ID "SunPro"
|
||||
# if __SUNPRO_CC >= 0x5100
|
||||
/* __SUNPRO_CC = 0xVRRP */
|
||||
# define COMPILER_VERSION_MAJOR HEX(__SUNPRO_CC>>12)
|
||||
# define COMPILER_VERSION_MINOR HEX(__SUNPRO_CC>>4 & 0xFF)
|
||||
# define COMPILER_VERSION_PATCH HEX(__SUNPRO_CC & 0xF)
|
||||
# else
|
||||
/* __SUNPRO_CC = 0xVRP */
|
||||
# define COMPILER_VERSION_MAJOR HEX(__SUNPRO_CC>>8)
|
||||
# define COMPILER_VERSION_MINOR HEX(__SUNPRO_CC>>4 & 0xF)
|
||||
# define COMPILER_VERSION_PATCH HEX(__SUNPRO_CC & 0xF)
|
||||
# endif
|
||||
|
||||
#elif defined(__HP_aCC)
|
||||
# define COMPILER_ID "HP"
|
||||
/* __HP_aCC = VVRRPP */
|
||||
# define COMPILER_VERSION_MAJOR DEC(__HP_aCC/10000)
|
||||
# define COMPILER_VERSION_MINOR DEC(__HP_aCC/100 % 100)
|
||||
# define COMPILER_VERSION_PATCH DEC(__HP_aCC % 100)
|
||||
|
||||
#elif defined(__DECCXX)
|
||||
# define COMPILER_ID "Compaq"
|
||||
/* __DECCXX_VER = VVRRTPPPP */
|
||||
# define COMPILER_VERSION_MAJOR DEC(__DECCXX_VER/10000000)
|
||||
# define COMPILER_VERSION_MINOR DEC(__DECCXX_VER/100000 % 100)
|
||||
# define COMPILER_VERSION_PATCH DEC(__DECCXX_VER % 10000)
|
||||
|
||||
#elif defined(__IBMCPP__) && defined(__COMPILER_VER__)
|
||||
# define COMPILER_ID "zOS"
|
||||
/* __IBMCPP__ = VRP */
|
||||
# define COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100)
|
||||
# define COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10)
|
||||
# define COMPILER_VERSION_PATCH DEC(__IBMCPP__ % 10)
|
||||
|
||||
#elif defined(__open_xl__) && defined(__clang__)
|
||||
# define COMPILER_ID "IBMClang"
|
||||
# define COMPILER_VERSION_MAJOR DEC(__open_xl_version__)
|
||||
# define COMPILER_VERSION_MINOR DEC(__open_xl_release__)
|
||||
# define COMPILER_VERSION_PATCH DEC(__open_xl_modification__)
|
||||
# define COMPILER_VERSION_TWEAK DEC(__open_xl_ptf_fix_level__)
|
||||
|
||||
|
||||
#elif defined(__ibmxl__) && defined(__clang__)
|
||||
# define COMPILER_ID "XLClang"
|
||||
# define COMPILER_VERSION_MAJOR DEC(__ibmxl_version__)
|
||||
# define COMPILER_VERSION_MINOR DEC(__ibmxl_release__)
|
||||
# define COMPILER_VERSION_PATCH DEC(__ibmxl_modification__)
|
||||
# define COMPILER_VERSION_TWEAK DEC(__ibmxl_ptf_fix_level__)
|
||||
|
||||
|
||||
#elif defined(__IBMCPP__) && !defined(__COMPILER_VER__) && __IBMCPP__ >= 800
|
||||
# define COMPILER_ID "XL"
|
||||
/* __IBMCPP__ = VRP */
|
||||
# define COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100)
|
||||
# define COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10)
|
||||
# define COMPILER_VERSION_PATCH DEC(__IBMCPP__ % 10)
|
||||
|
||||
#elif defined(__IBMCPP__) && !defined(__COMPILER_VER__) && __IBMCPP__ < 800
|
||||
# define COMPILER_ID "VisualAge"
|
||||
/* __IBMCPP__ = VRP */
|
||||
# define COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100)
|
||||
# define COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10)
|
||||
# define COMPILER_VERSION_PATCH DEC(__IBMCPP__ % 10)
|
||||
|
||||
#elif defined(__NVCOMPILER)
|
||||
# define COMPILER_ID "NVHPC"
|
||||
# define COMPILER_VERSION_MAJOR DEC(__NVCOMPILER_MAJOR__)
|
||||
# define COMPILER_VERSION_MINOR DEC(__NVCOMPILER_MINOR__)
|
||||
# if defined(__NVCOMPILER_PATCHLEVEL__)
|
||||
# define COMPILER_VERSION_PATCH DEC(__NVCOMPILER_PATCHLEVEL__)
|
||||
# endif
|
||||
|
||||
#elif defined(__PGI)
|
||||
# define COMPILER_ID "PGI"
|
||||
# define COMPILER_VERSION_MAJOR DEC(__PGIC__)
|
||||
# define COMPILER_VERSION_MINOR DEC(__PGIC_MINOR__)
|
||||
# if defined(__PGIC_PATCHLEVEL__)
|
||||
# define COMPILER_VERSION_PATCH DEC(__PGIC_PATCHLEVEL__)
|
||||
# endif
|
||||
|
||||
#elif defined(__clang__) && defined(__cray__)
|
||||
# define COMPILER_ID "CrayClang"
|
||||
# define COMPILER_VERSION_MAJOR DEC(__cray_major__)
|
||||
# define COMPILER_VERSION_MINOR DEC(__cray_minor__)
|
||||
# define COMPILER_VERSION_PATCH DEC(__cray_patchlevel__)
|
||||
# define COMPILER_VERSION_INTERNAL_STR __clang_version__
|
||||
|
||||
|
||||
#elif defined(_CRAYC)
|
||||
# define COMPILER_ID "Cray"
|
||||
# define COMPILER_VERSION_MAJOR DEC(_RELEASE_MAJOR)
|
||||
# define COMPILER_VERSION_MINOR DEC(_RELEASE_MINOR)
|
||||
|
||||
#elif defined(__TI_COMPILER_VERSION__)
|
||||
# define COMPILER_ID "TI"
|
||||
/* __TI_COMPILER_VERSION__ = VVVRRRPPP */
|
||||
# define COMPILER_VERSION_MAJOR DEC(__TI_COMPILER_VERSION__/1000000)
|
||||
# define COMPILER_VERSION_MINOR DEC(__TI_COMPILER_VERSION__/1000 % 1000)
|
||||
# define COMPILER_VERSION_PATCH DEC(__TI_COMPILER_VERSION__ % 1000)
|
||||
|
||||
#elif defined(__CLANG_FUJITSU)
|
||||
# define COMPILER_ID "FujitsuClang"
|
||||
# define COMPILER_VERSION_MAJOR DEC(__FCC_major__)
|
||||
# define COMPILER_VERSION_MINOR DEC(__FCC_minor__)
|
||||
# define COMPILER_VERSION_PATCH DEC(__FCC_patchlevel__)
|
||||
# define COMPILER_VERSION_INTERNAL_STR __clang_version__
|
||||
|
||||
|
||||
#elif defined(__FUJITSU)
|
||||
# define COMPILER_ID "Fujitsu"
|
||||
# if defined(__FCC_version__)
|
||||
# define COMPILER_VERSION __FCC_version__
|
||||
# elif defined(__FCC_major__)
|
||||
# define COMPILER_VERSION_MAJOR DEC(__FCC_major__)
|
||||
# define COMPILER_VERSION_MINOR DEC(__FCC_minor__)
|
||||
# define COMPILER_VERSION_PATCH DEC(__FCC_patchlevel__)
|
||||
# endif
|
||||
# if defined(__fcc_version)
|
||||
# define COMPILER_VERSION_INTERNAL DEC(__fcc_version)
|
||||
# elif defined(__FCC_VERSION)
|
||||
# define COMPILER_VERSION_INTERNAL DEC(__FCC_VERSION)
|
||||
# endif
|
||||
|
||||
|
||||
#elif defined(__ghs__)
|
||||
# define COMPILER_ID "GHS"
|
||||
/* __GHS_VERSION_NUMBER = VVVVRP */
|
||||
# ifdef __GHS_VERSION_NUMBER
|
||||
# define COMPILER_VERSION_MAJOR DEC(__GHS_VERSION_NUMBER / 100)
|
||||
# define COMPILER_VERSION_MINOR DEC(__GHS_VERSION_NUMBER / 10 % 10)
|
||||
# define COMPILER_VERSION_PATCH DEC(__GHS_VERSION_NUMBER % 10)
|
||||
# endif
|
||||
|
||||
#elif defined(__TASKING__)
|
||||
# define COMPILER_ID "Tasking"
|
||||
# define COMPILER_VERSION_MAJOR DEC(__VERSION__/1000)
|
||||
# define COMPILER_VERSION_MINOR DEC(__VERSION__ % 100)
|
||||
# define COMPILER_VERSION_INTERNAL DEC(__VERSION__)
|
||||
|
||||
#elif defined(__ORANGEC__)
|
||||
# define COMPILER_ID "OrangeC"
|
||||
# define COMPILER_VERSION_MAJOR DEC(__ORANGEC_MAJOR__)
|
||||
# define COMPILER_VERSION_MINOR DEC(__ORANGEC_MINOR__)
|
||||
# define COMPILER_VERSION_PATCH DEC(__ORANGEC_PATCHLEVEL__)
|
||||
|
||||
#elif defined(__SCO_VERSION__)
|
||||
# define COMPILER_ID "SCO"
|
||||
|
||||
#elif defined(__ARMCC_VERSION) && !defined(__clang__)
|
||||
# define COMPILER_ID "ARMCC"
|
||||
#if __ARMCC_VERSION >= 1000000
|
||||
/* __ARMCC_VERSION = VRRPPPP */
|
||||
# define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/1000000)
|
||||
# define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 100)
|
||||
# define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION % 10000)
|
||||
#else
|
||||
/* __ARMCC_VERSION = VRPPPP */
|
||||
# define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/100000)
|
||||
# define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 10)
|
||||
# define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION % 10000)
|
||||
#endif
|
||||
|
||||
|
||||
#elif defined(__clang__) && defined(__apple_build_version__)
|
||||
# define COMPILER_ID "AppleClang"
|
||||
# if defined(_MSC_VER)
|
||||
# define SIMULATE_ID "MSVC"
|
||||
# endif
|
||||
# define COMPILER_VERSION_MAJOR DEC(__clang_major__)
|
||||
# define COMPILER_VERSION_MINOR DEC(__clang_minor__)
|
||||
# define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__)
|
||||
# if defined(_MSC_VER)
|
||||
/* _MSC_VER = VVRR */
|
||||
# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
|
||||
# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
|
||||
# endif
|
||||
# define COMPILER_VERSION_TWEAK DEC(__apple_build_version__)
|
||||
|
||||
#elif defined(__clang__) && defined(__ARMCOMPILER_VERSION)
|
||||
# define COMPILER_ID "ARMClang"
|
||||
# define COMPILER_VERSION_MAJOR DEC(__ARMCOMPILER_VERSION/1000000)
|
||||
# define COMPILER_VERSION_MINOR DEC(__ARMCOMPILER_VERSION/10000 % 100)
|
||||
# define COMPILER_VERSION_PATCH DEC(__ARMCOMPILER_VERSION/100 % 100)
|
||||
# define COMPILER_VERSION_INTERNAL DEC(__ARMCOMPILER_VERSION)
|
||||
|
||||
#elif defined(__clang__) && defined(__ti__)
|
||||
# define COMPILER_ID "TIClang"
|
||||
# define COMPILER_VERSION_MAJOR DEC(__ti_major__)
|
||||
# define COMPILER_VERSION_MINOR DEC(__ti_minor__)
|
||||
# define COMPILER_VERSION_PATCH DEC(__ti_patchlevel__)
|
||||
# define COMPILER_VERSION_INTERNAL DEC(__ti_version__)
|
||||
|
||||
#elif defined(__clang__)
|
||||
# define COMPILER_ID "Clang"
|
||||
# if defined(_MSC_VER)
|
||||
# define SIMULATE_ID "MSVC"
|
||||
# endif
|
||||
# define COMPILER_VERSION_MAJOR DEC(__clang_major__)
|
||||
# define COMPILER_VERSION_MINOR DEC(__clang_minor__)
|
||||
# define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__)
|
||||
# if defined(_MSC_VER)
|
||||
/* _MSC_VER = VVRR */
|
||||
# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
|
||||
# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
|
||||
# endif
|
||||
|
||||
#elif defined(__LCC__) && (defined(__GNUC__) || defined(__GNUG__) || defined(__MCST__))
|
||||
# define COMPILER_ID "LCC"
|
||||
# define COMPILER_VERSION_MAJOR DEC(__LCC__ / 100)
|
||||
# define COMPILER_VERSION_MINOR DEC(__LCC__ % 100)
|
||||
# if defined(__LCC_MINOR__)
|
||||
# define COMPILER_VERSION_PATCH DEC(__LCC_MINOR__)
|
||||
# endif
|
||||
# if defined(__GNUC__) && defined(__GNUC_MINOR__)
|
||||
# define SIMULATE_ID "GNU"
|
||||
# define SIMULATE_VERSION_MAJOR DEC(__GNUC__)
|
||||
# define SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__)
|
||||
# if defined(__GNUC_PATCHLEVEL__)
|
||||
# define SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__)
|
||||
# endif
|
||||
# endif
|
||||
|
||||
#elif defined(__GNUC__) || defined(__GNUG__)
|
||||
# define COMPILER_ID "GNU"
|
||||
# if defined(__GNUC__)
|
||||
# define COMPILER_VERSION_MAJOR DEC(__GNUC__)
|
||||
# else
|
||||
# define COMPILER_VERSION_MAJOR DEC(__GNUG__)
|
||||
# endif
|
||||
# if defined(__GNUC_MINOR__)
|
||||
# define COMPILER_VERSION_MINOR DEC(__GNUC_MINOR__)
|
||||
# endif
|
||||
# if defined(__GNUC_PATCHLEVEL__)
|
||||
# define COMPILER_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__)
|
||||
# endif
|
||||
|
||||
#elif defined(_MSC_VER)
|
||||
# define COMPILER_ID "MSVC"
|
||||
/* _MSC_VER = VVRR */
|
||||
# define COMPILER_VERSION_MAJOR DEC(_MSC_VER / 100)
|
||||
# define COMPILER_VERSION_MINOR DEC(_MSC_VER % 100)
|
||||
# if defined(_MSC_FULL_VER)
|
||||
# if _MSC_VER >= 1400
|
||||
/* _MSC_FULL_VER = VVRRPPPPP */
|
||||
# define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 100000)
|
||||
# else
|
||||
/* _MSC_FULL_VER = VVRRPPPP */
|
||||
# define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 10000)
|
||||
# endif
|
||||
# endif
|
||||
# if defined(_MSC_BUILD)
|
||||
# define COMPILER_VERSION_TWEAK DEC(_MSC_BUILD)
|
||||
# endif
|
||||
|
||||
#elif defined(_ADI_COMPILER)
|
||||
# define COMPILER_ID "ADSP"
|
||||
#if defined(__VERSIONNUM__)
|
||||
/* __VERSIONNUM__ = 0xVVRRPPTT */
|
||||
# define COMPILER_VERSION_MAJOR DEC(__VERSIONNUM__ >> 24 & 0xFF)
|
||||
# define COMPILER_VERSION_MINOR DEC(__VERSIONNUM__ >> 16 & 0xFF)
|
||||
# define COMPILER_VERSION_PATCH DEC(__VERSIONNUM__ >> 8 & 0xFF)
|
||||
# define COMPILER_VERSION_TWEAK DEC(__VERSIONNUM__ & 0xFF)
|
||||
#endif
|
||||
|
||||
#elif defined(__IAR_SYSTEMS_ICC__) || defined(__IAR_SYSTEMS_ICC)
|
||||
# define COMPILER_ID "IAR"
|
||||
# if defined(__VER__) && defined(__ICCARM__)
|
||||
# define COMPILER_VERSION_MAJOR DEC((__VER__) / 1000000)
|
||||
# define COMPILER_VERSION_MINOR DEC(((__VER__) / 1000) % 1000)
|
||||
# define COMPILER_VERSION_PATCH DEC((__VER__) % 1000)
|
||||
# define COMPILER_VERSION_INTERNAL DEC(__IAR_SYSTEMS_ICC__)
|
||||
# elif defined(__VER__) && (defined(__ICCAVR__) || defined(__ICCRX__) || defined(__ICCRH850__) || defined(__ICCRL78__) || defined(__ICC430__) || defined(__ICCRISCV__) || defined(__ICCV850__) || defined(__ICC8051__) || defined(__ICCSTM8__))
|
||||
# define COMPILER_VERSION_MAJOR DEC((__VER__) / 100)
|
||||
# define COMPILER_VERSION_MINOR DEC((__VER__) - (((__VER__) / 100)*100))
|
||||
# define COMPILER_VERSION_PATCH DEC(__SUBVERSION__)
|
||||
# define COMPILER_VERSION_INTERNAL DEC(__IAR_SYSTEMS_ICC__)
|
||||
# endif
|
||||
|
||||
|
||||
/* These compilers are either not known or too old to define an
|
||||
identification macro. Try to identify the platform and guess that
|
||||
it is the native compiler. */
|
||||
#elif defined(__hpux) || defined(__hpua)
|
||||
# define COMPILER_ID "HP"
|
||||
|
||||
#else /* unknown compiler */
|
||||
# define COMPILER_ID ""
|
||||
#endif
|
||||
|
||||
/* Construct the string literal in pieces to prevent the source from
|
||||
getting matched. Store it in a pointer rather than an array
|
||||
because some compilers will just produce instructions to fill the
|
||||
array rather than assigning a pointer to a static array. */
|
||||
char const* info_compiler = "INFO" ":" "compiler[" COMPILER_ID "]";
|
||||
#ifdef SIMULATE_ID
|
||||
char const* info_simulate = "INFO" ":" "simulate[" SIMULATE_ID "]";
|
||||
#endif
|
||||
|
||||
#ifdef __QNXNTO__
|
||||
char const* qnxnto = "INFO" ":" "qnxnto[]";
|
||||
#endif
|
||||
|
||||
#if defined(__CRAYXT_COMPUTE_LINUX_TARGET)
|
||||
char const *info_cray = "INFO" ":" "compiler_wrapper[CrayPrgEnv]";
|
||||
#endif
|
||||
|
||||
#define STRINGIFY_HELPER(X) #X
|
||||
#define STRINGIFY(X) STRINGIFY_HELPER(X)
|
||||
|
||||
/* Identify known platforms by name. */
|
||||
#if defined(__linux) || defined(__linux__) || defined(linux)
|
||||
# define PLATFORM_ID "Linux"
|
||||
|
||||
#elif defined(__MSYS__)
|
||||
# define PLATFORM_ID "MSYS"
|
||||
|
||||
#elif defined(__CYGWIN__)
|
||||
# define PLATFORM_ID "Cygwin"
|
||||
|
||||
#elif defined(__MINGW32__)
|
||||
# define PLATFORM_ID "MinGW"
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
# define PLATFORM_ID "Darwin"
|
||||
|
||||
#elif defined(_WIN32) || defined(__WIN32__) || defined(WIN32)
|
||||
# define PLATFORM_ID "Windows"
|
||||
|
||||
#elif defined(__FreeBSD__) || defined(__FreeBSD)
|
||||
# define PLATFORM_ID "FreeBSD"
|
||||
|
||||
#elif defined(__NetBSD__) || defined(__NetBSD)
|
||||
# define PLATFORM_ID "NetBSD"
|
||||
|
||||
#elif defined(__OpenBSD__) || defined(__OPENBSD)
|
||||
# define PLATFORM_ID "OpenBSD"
|
||||
|
||||
#elif defined(__sun) || defined(sun)
|
||||
# define PLATFORM_ID "SunOS"
|
||||
|
||||
#elif defined(_AIX) || defined(__AIX) || defined(__AIX__) || defined(__aix) || defined(__aix__)
|
||||
# define PLATFORM_ID "AIX"
|
||||
|
||||
#elif defined(__hpux) || defined(__hpux__)
|
||||
# define PLATFORM_ID "HP-UX"
|
||||
|
||||
#elif defined(__HAIKU__)
|
||||
# define PLATFORM_ID "Haiku"
|
||||
|
||||
#elif defined(__BeOS) || defined(__BEOS__) || defined(_BEOS)
|
||||
# define PLATFORM_ID "BeOS"
|
||||
|
||||
#elif defined(__QNX__) || defined(__QNXNTO__)
|
||||
# define PLATFORM_ID "QNX"
|
||||
|
||||
#elif defined(__tru64) || defined(_tru64) || defined(__TRU64__)
|
||||
# define PLATFORM_ID "Tru64"
|
||||
|
||||
#elif defined(__riscos) || defined(__riscos__)
|
||||
# define PLATFORM_ID "RISCos"
|
||||
|
||||
#elif defined(__sinix) || defined(__sinix__) || defined(__SINIX__)
|
||||
# define PLATFORM_ID "SINIX"
|
||||
|
||||
#elif defined(__UNIX_SV__)
|
||||
# define PLATFORM_ID "UNIX_SV"
|
||||
|
||||
#elif defined(__bsdos__)
|
||||
# define PLATFORM_ID "BSDOS"
|
||||
|
||||
#elif defined(_MPRAS) || defined(MPRAS)
|
||||
# define PLATFORM_ID "MP-RAS"
|
||||
|
||||
#elif defined(__osf) || defined(__osf__)
|
||||
# define PLATFORM_ID "OSF1"
|
||||
|
||||
#elif defined(_SCO_SV) || defined(SCO_SV) || defined(sco_sv)
|
||||
# define PLATFORM_ID "SCO_SV"
|
||||
|
||||
#elif defined(__ultrix) || defined(__ultrix__) || defined(_ULTRIX)
|
||||
# define PLATFORM_ID "ULTRIX"
|
||||
|
||||
#elif defined(__XENIX__) || defined(_XENIX) || defined(XENIX)
|
||||
# define PLATFORM_ID "Xenix"
|
||||
|
||||
#elif defined(__WATCOMC__)
|
||||
# if defined(__LINUX__)
|
||||
# define PLATFORM_ID "Linux"
|
||||
|
||||
# elif defined(__DOS__)
|
||||
# define PLATFORM_ID "DOS"
|
||||
|
||||
# elif defined(__OS2__)
|
||||
# define PLATFORM_ID "OS2"
|
||||
|
||||
# elif defined(__WINDOWS__)
|
||||
# define PLATFORM_ID "Windows3x"
|
||||
|
||||
# elif defined(__VXWORKS__)
|
||||
# define PLATFORM_ID "VxWorks"
|
||||
|
||||
# else /* unknown platform */
|
||||
# define PLATFORM_ID
|
||||
# endif
|
||||
|
||||
#elif defined(__INTEGRITY)
|
||||
# if defined(INT_178B)
|
||||
# define PLATFORM_ID "Integrity178"
|
||||
|
||||
# else /* regular Integrity */
|
||||
# define PLATFORM_ID "Integrity"
|
||||
# endif
|
||||
|
||||
# elif defined(_ADI_COMPILER)
|
||||
# define PLATFORM_ID "ADSP"
|
||||
|
||||
#else /* unknown platform */
|
||||
# define PLATFORM_ID
|
||||
|
||||
#endif
|
||||
|
||||
/* For windows compilers MSVC and Intel we can determine
|
||||
the architecture of the compiler being used. This is because
|
||||
the compilers do not have flags that can change the architecture,
|
||||
but rather depend on which compiler is being used
|
||||
*/
|
||||
#if defined(_WIN32) && defined(_MSC_VER)
|
||||
# if defined(_M_IA64)
|
||||
# define ARCHITECTURE_ID "IA64"
|
||||
|
||||
# elif defined(_M_ARM64EC)
|
||||
# define ARCHITECTURE_ID "ARM64EC"
|
||||
|
||||
# elif defined(_M_X64) || defined(_M_AMD64)
|
||||
# define ARCHITECTURE_ID "x64"
|
||||
|
||||
# elif defined(_M_IX86)
|
||||
# define ARCHITECTURE_ID "X86"
|
||||
|
||||
# elif defined(_M_ARM64)
|
||||
# define ARCHITECTURE_ID "ARM64"
|
||||
|
||||
# elif defined(_M_ARM)
|
||||
# if _M_ARM == 4
|
||||
# define ARCHITECTURE_ID "ARMV4I"
|
||||
# elif _M_ARM == 5
|
||||
# define ARCHITECTURE_ID "ARMV5I"
|
||||
# else
|
||||
# define ARCHITECTURE_ID "ARMV" STRINGIFY(_M_ARM)
|
||||
# endif
|
||||
|
||||
# elif defined(_M_MIPS)
|
||||
# define ARCHITECTURE_ID "MIPS"
|
||||
|
||||
# elif defined(_M_SH)
|
||||
# define ARCHITECTURE_ID "SHx"
|
||||
|
||||
# else /* unknown architecture */
|
||||
# define ARCHITECTURE_ID ""
|
||||
# endif
|
||||
|
||||
#elif defined(__WATCOMC__)
|
||||
# if defined(_M_I86)
|
||||
# define ARCHITECTURE_ID "I86"
|
||||
|
||||
# elif defined(_M_IX86)
|
||||
# define ARCHITECTURE_ID "X86"
|
||||
|
||||
# else /* unknown architecture */
|
||||
# define ARCHITECTURE_ID ""
|
||||
# endif
|
||||
|
||||
#elif defined(__IAR_SYSTEMS_ICC__) || defined(__IAR_SYSTEMS_ICC)
|
||||
# if defined(__ICCARM__)
|
||||
# define ARCHITECTURE_ID "ARM"
|
||||
|
||||
# elif defined(__ICCRX__)
|
||||
# define ARCHITECTURE_ID "RX"
|
||||
|
||||
# elif defined(__ICCRH850__)
|
||||
# define ARCHITECTURE_ID "RH850"
|
||||
|
||||
# elif defined(__ICCRL78__)
|
||||
# define ARCHITECTURE_ID "RL78"
|
||||
|
||||
# elif defined(__ICCRISCV__)
|
||||
# define ARCHITECTURE_ID "RISCV"
|
||||
|
||||
# elif defined(__ICCAVR__)
|
||||
# define ARCHITECTURE_ID "AVR"
|
||||
|
||||
# elif defined(__ICC430__)
|
||||
# define ARCHITECTURE_ID "MSP430"
|
||||
|
||||
# elif defined(__ICCV850__)
|
||||
# define ARCHITECTURE_ID "V850"
|
||||
|
||||
# elif defined(__ICC8051__)
|
||||
# define ARCHITECTURE_ID "8051"
|
||||
|
||||
# elif defined(__ICCSTM8__)
|
||||
# define ARCHITECTURE_ID "STM8"
|
||||
|
||||
# else /* unknown architecture */
|
||||
# define ARCHITECTURE_ID ""
|
||||
# endif
|
||||
|
||||
#elif defined(__ghs__)
|
||||
# if defined(__PPC64__)
|
||||
# define ARCHITECTURE_ID "PPC64"
|
||||
|
||||
# elif defined(__ppc__)
|
||||
# define ARCHITECTURE_ID "PPC"
|
||||
|
||||
# elif defined(__ARM__)
|
||||
# define ARCHITECTURE_ID "ARM"
|
||||
|
||||
# elif defined(__x86_64__)
|
||||
# define ARCHITECTURE_ID "x64"
|
||||
|
||||
# elif defined(__i386__)
|
||||
# define ARCHITECTURE_ID "X86"
|
||||
|
||||
# else /* unknown architecture */
|
||||
# define ARCHITECTURE_ID ""
|
||||
# endif
|
||||
|
||||
#elif defined(__clang__) && defined(__ti__)
|
||||
# if defined(__ARM_ARCH)
|
||||
# define ARCHITECTURE_ID "ARM"
|
||||
|
||||
# else /* unknown architecture */
|
||||
# define ARCHITECTURE_ID ""
|
||||
# endif
|
||||
|
||||
#elif defined(__TI_COMPILER_VERSION__)
|
||||
# if defined(__TI_ARM__)
|
||||
# define ARCHITECTURE_ID "ARM"
|
||||
|
||||
# elif defined(__MSP430__)
|
||||
# define ARCHITECTURE_ID "MSP430"
|
||||
|
||||
# elif defined(__TMS320C28XX__)
|
||||
# define ARCHITECTURE_ID "TMS320C28x"
|
||||
|
||||
# elif defined(__TMS320C6X__) || defined(_TMS320C6X)
|
||||
# define ARCHITECTURE_ID "TMS320C6x"
|
||||
|
||||
# else /* unknown architecture */
|
||||
# define ARCHITECTURE_ID ""
|
||||
# endif
|
||||
|
||||
# elif defined(__ADSPSHARC__)
|
||||
# define ARCHITECTURE_ID "SHARC"
|
||||
|
||||
# elif defined(__ADSPBLACKFIN__)
|
||||
# define ARCHITECTURE_ID "Blackfin"
|
||||
|
||||
#elif defined(__TASKING__)
|
||||
|
||||
# if defined(__CTC__) || defined(__CPTC__)
|
||||
# define ARCHITECTURE_ID "TriCore"
|
||||
|
||||
# elif defined(__CMCS__)
|
||||
# define ARCHITECTURE_ID "MCS"
|
||||
|
||||
# elif defined(__CARM__)
|
||||
# define ARCHITECTURE_ID "ARM"
|
||||
|
||||
# elif defined(__CARC__)
|
||||
# define ARCHITECTURE_ID "ARC"
|
||||
|
||||
# elif defined(__C51__)
|
||||
# define ARCHITECTURE_ID "8051"
|
||||
|
||||
# elif defined(__CPCP__)
|
||||
# define ARCHITECTURE_ID "PCP"
|
||||
|
||||
# else
|
||||
# define ARCHITECTURE_ID ""
|
||||
# endif
|
||||
|
||||
#else
|
||||
# define ARCHITECTURE_ID
|
||||
#endif
|
||||
|
||||
/* Convert integer to decimal digit literals. */
|
||||
#define DEC(n) \
|
||||
('0' + (((n) / 10000000)%10)), \
|
||||
('0' + (((n) / 1000000)%10)), \
|
||||
('0' + (((n) / 100000)%10)), \
|
||||
('0' + (((n) / 10000)%10)), \
|
||||
('0' + (((n) / 1000)%10)), \
|
||||
('0' + (((n) / 100)%10)), \
|
||||
('0' + (((n) / 10)%10)), \
|
||||
('0' + ((n) % 10))
|
||||
|
||||
/* Convert integer to hex digit literals. */
|
||||
#define HEX(n) \
|
||||
('0' + ((n)>>28 & 0xF)), \
|
||||
('0' + ((n)>>24 & 0xF)), \
|
||||
('0' + ((n)>>20 & 0xF)), \
|
||||
('0' + ((n)>>16 & 0xF)), \
|
||||
('0' + ((n)>>12 & 0xF)), \
|
||||
('0' + ((n)>>8 & 0xF)), \
|
||||
('0' + ((n)>>4 & 0xF)), \
|
||||
('0' + ((n) & 0xF))
|
||||
|
||||
/* Construct a string literal encoding the version number. */
|
||||
#ifdef COMPILER_VERSION
|
||||
char const* info_version = "INFO" ":" "compiler_version[" COMPILER_VERSION "]";
|
||||
|
||||
/* Construct a string literal encoding the version number components. */
|
||||
#elif defined(COMPILER_VERSION_MAJOR)
|
||||
char const info_version[] = {
|
||||
'I', 'N', 'F', 'O', ':',
|
||||
'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','[',
|
||||
COMPILER_VERSION_MAJOR,
|
||||
# ifdef COMPILER_VERSION_MINOR
|
||||
'.', COMPILER_VERSION_MINOR,
|
||||
# ifdef COMPILER_VERSION_PATCH
|
||||
'.', COMPILER_VERSION_PATCH,
|
||||
# ifdef COMPILER_VERSION_TWEAK
|
||||
'.', COMPILER_VERSION_TWEAK,
|
||||
# endif
|
||||
# endif
|
||||
# endif
|
||||
']','\0'};
|
||||
#endif
|
||||
|
||||
/* Construct a string literal encoding the internal version number. */
|
||||
#ifdef COMPILER_VERSION_INTERNAL
|
||||
char const info_version_internal[] = {
|
||||
'I', 'N', 'F', 'O', ':',
|
||||
'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','_',
|
||||
'i','n','t','e','r','n','a','l','[',
|
||||
COMPILER_VERSION_INTERNAL,']','\0'};
|
||||
#elif defined(COMPILER_VERSION_INTERNAL_STR)
|
||||
char const* info_version_internal = "INFO" ":" "compiler_version_internal[" COMPILER_VERSION_INTERNAL_STR "]";
|
||||
#endif
|
||||
|
||||
/* Construct a string literal encoding the version number components. */
|
||||
#ifdef SIMULATE_VERSION_MAJOR
|
||||
char const info_simulate_version[] = {
|
||||
'I', 'N', 'F', 'O', ':',
|
||||
's','i','m','u','l','a','t','e','_','v','e','r','s','i','o','n','[',
|
||||
SIMULATE_VERSION_MAJOR,
|
||||
# ifdef SIMULATE_VERSION_MINOR
|
||||
'.', SIMULATE_VERSION_MINOR,
|
||||
# ifdef SIMULATE_VERSION_PATCH
|
||||
'.', SIMULATE_VERSION_PATCH,
|
||||
# ifdef SIMULATE_VERSION_TWEAK
|
||||
'.', SIMULATE_VERSION_TWEAK,
|
||||
# endif
|
||||
# endif
|
||||
# endif
|
||||
']','\0'};
|
||||
#endif
|
||||
|
||||
/* Construct the string literal in pieces to prevent the source from
|
||||
getting matched. Store it in a pointer rather than an array
|
||||
because some compilers will just produce instructions to fill the
|
||||
array rather than assigning a pointer to a static array. */
|
||||
char const* info_platform = "INFO" ":" "platform[" PLATFORM_ID "]";
|
||||
char const* info_arch = "INFO" ":" "arch[" ARCHITECTURE_ID "]";
|
||||
|
||||
|
||||
|
||||
#define CXX_STD_98 199711L
|
||||
#define CXX_STD_11 201103L
|
||||
#define CXX_STD_14 201402L
|
||||
#define CXX_STD_17 201703L
|
||||
#define CXX_STD_20 202002L
|
||||
#define CXX_STD_23 202302L
|
||||
|
||||
#if defined(__INTEL_COMPILER) && defined(_MSVC_LANG)
|
||||
# if _MSVC_LANG > CXX_STD_17
|
||||
# define CXX_STD _MSVC_LANG
|
||||
# elif _MSVC_LANG == CXX_STD_17 && defined(__cpp_aggregate_paren_init)
|
||||
# define CXX_STD CXX_STD_20
|
||||
# elif _MSVC_LANG > CXX_STD_14 && __cplusplus > CXX_STD_17
|
||||
# define CXX_STD CXX_STD_20
|
||||
# elif _MSVC_LANG > CXX_STD_14
|
||||
# define CXX_STD CXX_STD_17
|
||||
# elif defined(__INTEL_CXX11_MODE__) && defined(__cpp_aggregate_nsdmi)
|
||||
# define CXX_STD CXX_STD_14
|
||||
# elif defined(__INTEL_CXX11_MODE__)
|
||||
# define CXX_STD CXX_STD_11
|
||||
# else
|
||||
# define CXX_STD CXX_STD_98
|
||||
# endif
|
||||
#elif defined(_MSC_VER) && defined(_MSVC_LANG)
|
||||
# if _MSVC_LANG > __cplusplus
|
||||
# define CXX_STD _MSVC_LANG
|
||||
# else
|
||||
# define CXX_STD __cplusplus
|
||||
# endif
|
||||
#elif defined(__NVCOMPILER)
|
||||
# if __cplusplus == CXX_STD_17 && defined(__cpp_aggregate_paren_init)
|
||||
# define CXX_STD CXX_STD_20
|
||||
# else
|
||||
# define CXX_STD __cplusplus
|
||||
# endif
|
||||
#elif defined(__INTEL_COMPILER) || defined(__PGI)
|
||||
# if __cplusplus == CXX_STD_11 && defined(__cpp_namespace_attributes)
|
||||
# define CXX_STD CXX_STD_17
|
||||
# elif __cplusplus == CXX_STD_11 && defined(__cpp_aggregate_nsdmi)
|
||||
# define CXX_STD CXX_STD_14
|
||||
# else
|
||||
# define CXX_STD __cplusplus
|
||||
# endif
|
||||
#elif (defined(__IBMCPP__) || defined(__ibmxl__)) && defined(__linux__)
|
||||
# if __cplusplus == CXX_STD_11 && defined(__cpp_aggregate_nsdmi)
|
||||
# define CXX_STD CXX_STD_14
|
||||
# else
|
||||
# define CXX_STD __cplusplus
|
||||
# endif
|
||||
#elif __cplusplus == 1 && defined(__GXX_EXPERIMENTAL_CXX0X__)
|
||||
# define CXX_STD CXX_STD_11
|
||||
#else
|
||||
# define CXX_STD __cplusplus
|
||||
#endif
|
||||
|
||||
const char* info_language_standard_default = "INFO" ":" "standard_default["
|
||||
#if CXX_STD > CXX_STD_23
|
||||
"26"
|
||||
#elif CXX_STD > CXX_STD_20
|
||||
"23"
|
||||
#elif CXX_STD > CXX_STD_17
|
||||
"20"
|
||||
#elif CXX_STD > CXX_STD_14
|
||||
"17"
|
||||
#elif CXX_STD > CXX_STD_11
|
||||
"14"
|
||||
#elif CXX_STD >= CXX_STD_11
|
||||
"11"
|
||||
#else
|
||||
"98"
|
||||
#endif
|
||||
"]";
|
||||
|
||||
const char* info_language_extensions_default = "INFO" ":" "extensions_default["
|
||||
#if (defined(__clang__) || defined(__GNUC__) || defined(__xlC__) || \
|
||||
defined(__TI_COMPILER_VERSION__)) && \
|
||||
!defined(__STRICT_ANSI__)
|
||||
"ON"
|
||||
#else
|
||||
"OFF"
|
||||
#endif
|
||||
"]";
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
int require = 0;
|
||||
require += info_compiler[argc];
|
||||
require += info_platform[argc];
|
||||
require += info_arch[argc];
|
||||
#ifdef COMPILER_VERSION_MAJOR
|
||||
require += info_version[argc];
|
||||
#endif
|
||||
#ifdef COMPILER_VERSION_INTERNAL
|
||||
require += info_version_internal[argc];
|
||||
#endif
|
||||
#ifdef SIMULATE_ID
|
||||
require += info_simulate[argc];
|
||||
#endif
|
||||
#ifdef SIMULATE_VERSION_MAJOR
|
||||
require += info_simulate_version[argc];
|
||||
#endif
|
||||
#if defined(__CRAYXT_COMPUTE_LINUX_TARGET)
|
||||
require += info_cray[argc];
|
||||
#endif
|
||||
require += info_language_standard_default[argc];
|
||||
require += info_language_extensions_default[argc];
|
||||
(void)argv;
|
||||
return require;
|
||||
}
|
Binary file not shown.
@@ -0,0 +1,926 @@
|
||||
#if !defined(__HIP__) && !defined(__NVCC__)
|
||||
# error "A C or C++ compiler has been selected for HIP"
|
||||
#endif
|
||||
|
||||
|
||||
/* Version number components: V=Version, R=Revision, P=Patch
|
||||
Version date components: YYYY=Year, MM=Month, DD=Day */
|
||||
|
||||
#if defined(__NVCC__)
|
||||
# define COMPILER_ID "NVIDIA"
|
||||
# if defined(_MSC_VER)
|
||||
# define SIMULATE_ID "MSVC"
|
||||
# elif defined(__clang__)
|
||||
# define SIMULATE_ID "Clang"
|
||||
# elif defined(__GNUC__)
|
||||
# define SIMULATE_ID "GNU"
|
||||
# endif
|
||||
# if defined(__CUDACC_VER_MAJOR__)
|
||||
# define COMPILER_VERSION_MAJOR DEC(__CUDACC_VER_MAJOR__)
|
||||
# define COMPILER_VERSION_MINOR DEC(__CUDACC_VER_MINOR__)
|
||||
# define COMPILER_VERSION_PATCH DEC(__CUDACC_VER_BUILD__)
|
||||
# endif
|
||||
# if defined(_MSC_VER)
|
||||
/* _MSC_VER = VVRR */
|
||||
# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
|
||||
# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
|
||||
# elif defined(__clang__)
|
||||
# define SIMULATE_VERSION_MAJOR DEC(__clang_major__)
|
||||
# define SIMULATE_VERSION_MINOR DEC(__clang_minor__)
|
||||
# elif defined(__GNUC__)
|
||||
# define SIMULATE_VERSION_MAJOR DEC(__GNUC__)
|
||||
# define SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__)
|
||||
# endif
|
||||
|
||||
#elif defined(__clang__)
|
||||
# define COMPILER_ID "Clang"
|
||||
# if defined(_MSC_VER)
|
||||
# define SIMULATE_ID "MSVC"
|
||||
# endif
|
||||
# define COMPILER_VERSION_MAJOR DEC(__clang_major__)
|
||||
# define COMPILER_VERSION_MINOR DEC(__clang_minor__)
|
||||
# define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__)
|
||||
# if defined(_MSC_VER)
|
||||
/* _MSC_VER = VVRR */
|
||||
# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
|
||||
# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
|
||||
# endif
|
||||
|
||||
|
||||
/* These compilers are either not known or too old to define an
|
||||
identification macro. Try to identify the platform and guess that
|
||||
it is the native compiler. */
|
||||
#elif defined(__hpux) || defined(__hpua)
|
||||
# define COMPILER_ID "HP"
|
||||
|
||||
#else /* unknown compiler */
|
||||
# define COMPILER_ID ""
|
||||
#endif
|
||||
|
||||
/* Detect host compiler used by NVCC. */
|
||||
#ifdef __NVCC__
|
||||
|
||||
/* Version number components: V=Version, R=Revision, P=Patch
|
||||
Version date components: YYYY=Year, MM=Month, DD=Day */
|
||||
|
||||
#if defined(__INTEL_COMPILER) || defined(__ICC)
|
||||
# define HOST_COMPILER_ID "Intel"
|
||||
# if defined(_MSC_VER)
|
||||
# define HOST_SIMULATE_ID "MSVC"
|
||||
# endif
|
||||
# if defined(__GNUC__)
|
||||
# define HOST_SIMULATE_ID "GNU"
|
||||
# endif
|
||||
/* __INTEL_COMPILER = VRP prior to 2021, and then VVVV for 2021 and later,
|
||||
except that a few beta releases use the old format with V=2021. */
|
||||
# if __INTEL_COMPILER < 2021 || __INTEL_COMPILER == 202110 || __INTEL_COMPILER == 202111
|
||||
# define HOST_COMPILER_VERSION_MAJOR DEC(__INTEL_COMPILER/100)
|
||||
# define HOST_COMPILER_VERSION_MINOR DEC(__INTEL_COMPILER/10 % 10)
|
||||
# if defined(__INTEL_COMPILER_UPDATE)
|
||||
# define HOST_COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER_UPDATE)
|
||||
# else
|
||||
# define HOST_COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER % 10)
|
||||
# endif
|
||||
# else
|
||||
# define HOST_COMPILER_VERSION_MAJOR DEC(__INTEL_COMPILER)
|
||||
# define HOST_COMPILER_VERSION_MINOR DEC(__INTEL_COMPILER_UPDATE)
|
||||
/* The third version component from --version is an update index,
|
||||
but no macro is provided for it. */
|
||||
# define HOST_COMPILER_VERSION_PATCH DEC(0)
|
||||
# endif
|
||||
# if defined(__INTEL_COMPILER_BUILD_DATE)
|
||||
/* __INTEL_COMPILER_BUILD_DATE = YYYYMMDD */
|
||||
# define HOST_COMPILER_VERSION_TWEAK DEC(__INTEL_COMPILER_BUILD_DATE)
|
||||
# endif
|
||||
# if defined(_MSC_VER)
|
||||
/* _MSC_VER = VVRR */
|
||||
# define HOST_SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
|
||||
# define HOST_SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
|
||||
# endif
|
||||
# if defined(__GNUC__)
|
||||
# define HOST_SIMULATE_VERSION_MAJOR DEC(__GNUC__)
|
||||
# elif defined(__GNUG__)
|
||||
# define HOST_SIMULATE_VERSION_MAJOR DEC(__GNUG__)
|
||||
# endif
|
||||
# if defined(__GNUC_MINOR__)
|
||||
# define HOST_SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__)
|
||||
# endif
|
||||
# if defined(__GNUC_PATCHLEVEL__)
|
||||
# define HOST_SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__)
|
||||
# endif
|
||||
|
||||
#elif (defined(__clang__) && defined(__INTEL_CLANG_COMPILER)) || defined(__INTEL_LLVM_COMPILER)
|
||||
# define HOST_COMPILER_ID "IntelLLVM"
|
||||
#if defined(_MSC_VER)
|
||||
# define HOST_SIMULATE_ID "MSVC"
|
||||
#endif
|
||||
#if defined(__GNUC__)
|
||||
# define HOST_SIMULATE_ID "GNU"
|
||||
#endif
|
||||
/* __INTEL_LLVM_COMPILER = VVVVRP prior to 2021.2.0, VVVVRRPP for 2021.2.0 and
|
||||
* later. Look for 6 digit vs. 8 digit version number to decide encoding.
|
||||
* VVVV is no smaller than the current year when a version is released.
|
||||
*/
|
||||
#if __INTEL_LLVM_COMPILER < 1000000L
|
||||
# define HOST_COMPILER_VERSION_MAJOR DEC(__INTEL_LLVM_COMPILER/100)
|
||||
# define HOST_COMPILER_VERSION_MINOR DEC(__INTEL_LLVM_COMPILER/10 % 10)
|
||||
# define HOST_COMPILER_VERSION_PATCH DEC(__INTEL_LLVM_COMPILER % 10)
|
||||
#else
|
||||
# define HOST_COMPILER_VERSION_MAJOR DEC(__INTEL_LLVM_COMPILER/10000)
|
||||
# define HOST_COMPILER_VERSION_MINOR DEC(__INTEL_LLVM_COMPILER/100 % 100)
|
||||
# define HOST_COMPILER_VERSION_PATCH DEC(__INTEL_LLVM_COMPILER % 100)
|
||||
#endif
|
||||
#if defined(_MSC_VER)
|
||||
/* _MSC_VER = VVRR */
|
||||
# define HOST_SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
|
||||
# define HOST_SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
|
||||
#endif
|
||||
#if defined(__GNUC__)
|
||||
# define HOST_SIMULATE_VERSION_MAJOR DEC(__GNUC__)
|
||||
#elif defined(__GNUG__)
|
||||
# define HOST_SIMULATE_VERSION_MAJOR DEC(__GNUG__)
|
||||
#endif
|
||||
#if defined(__GNUC_MINOR__)
|
||||
# define HOST_SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__)
|
||||
#endif
|
||||
#if defined(__GNUC_PATCHLEVEL__)
|
||||
# define HOST_SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__)
|
||||
#endif
|
||||
|
||||
#elif defined(__PATHCC__)
|
||||
# define HOST_COMPILER_ID "PathScale"
|
||||
# define HOST_COMPILER_VERSION_MAJOR DEC(__PATHCC__)
|
||||
# define HOST_COMPILER_VERSION_MINOR DEC(__PATHCC_MINOR__)
|
||||
# if defined(__PATHCC_PATCHLEVEL__)
|
||||
# define HOST_COMPILER_VERSION_PATCH DEC(__PATHCC_PATCHLEVEL__)
|
||||
# endif
|
||||
|
||||
#elif defined(__BORLANDC__) && defined(__CODEGEARC_VERSION__)
|
||||
# define HOST_COMPILER_ID "Embarcadero"
|
||||
# define HOST_COMPILER_VERSION_MAJOR HEX(__CODEGEARC_VERSION__>>24 & 0x00FF)
|
||||
# define HOST_COMPILER_VERSION_MINOR HEX(__CODEGEARC_VERSION__>>16 & 0x00FF)
|
||||
# define HOST_COMPILER_VERSION_PATCH DEC(__CODEGEARC_VERSION__ & 0xFFFF)
|
||||
|
||||
#elif defined(__BORLANDC__)
|
||||
# define HOST_COMPILER_ID "Borland"
|
||||
/* __BORLANDC__ = 0xVRR */
|
||||
# define HOST_COMPILER_VERSION_MAJOR HEX(__BORLANDC__>>8)
|
||||
# define HOST_COMPILER_VERSION_MINOR HEX(__BORLANDC__ & 0xFF)
|
||||
|
||||
#elif defined(__WATCOMC__) && __WATCOMC__ < 1200
|
||||
# define HOST_COMPILER_ID "Watcom"
|
||||
/* __WATCOMC__ = VVRR */
|
||||
# define HOST_COMPILER_VERSION_MAJOR DEC(__WATCOMC__ / 100)
|
||||
# define HOST_COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10)
|
||||
# if (__WATCOMC__ % 10) > 0
|
||||
# define HOST_COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10)
|
||||
# endif
|
||||
|
||||
#elif defined(__WATCOMC__)
|
||||
# define HOST_COMPILER_ID "OpenWatcom"
|
||||
/* __WATCOMC__ = VVRP + 1100 */
|
||||
# define HOST_COMPILER_VERSION_MAJOR DEC((__WATCOMC__ - 1100) / 100)
|
||||
# define HOST_COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10)
|
||||
# if (__WATCOMC__ % 10) > 0
|
||||
# define HOST_COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10)
|
||||
# endif
|
||||
|
||||
#elif defined(__SUNPRO_CC)
|
||||
# define HOST_COMPILER_ID "SunPro"
|
||||
# if __SUNPRO_CC >= 0x5100
|
||||
/* __SUNPRO_CC = 0xVRRP */
|
||||
# define HOST_COMPILER_VERSION_MAJOR HEX(__SUNPRO_CC>>12)
|
||||
# define HOST_COMPILER_VERSION_MINOR HEX(__SUNPRO_CC>>4 & 0xFF)
|
||||
# define HOST_COMPILER_VERSION_PATCH HEX(__SUNPRO_CC & 0xF)
|
||||
# else
|
||||
/* __SUNPRO_CC = 0xVRP */
|
||||
# define HOST_COMPILER_VERSION_MAJOR HEX(__SUNPRO_CC>>8)
|
||||
# define HOST_COMPILER_VERSION_MINOR HEX(__SUNPRO_CC>>4 & 0xF)
|
||||
# define HOST_COMPILER_VERSION_PATCH HEX(__SUNPRO_CC & 0xF)
|
||||
# endif
|
||||
|
||||
#elif defined(__HP_aCC)
|
||||
# define HOST_COMPILER_ID "HP"
|
||||
/* __HP_aCC = VVRRPP */
|
||||
# define HOST_COMPILER_VERSION_MAJOR DEC(__HP_aCC/10000)
|
||||
# define HOST_COMPILER_VERSION_MINOR DEC(__HP_aCC/100 % 100)
|
||||
# define HOST_COMPILER_VERSION_PATCH DEC(__HP_aCC % 100)
|
||||
|
||||
#elif defined(__DECCXX)
|
||||
# define HOST_COMPILER_ID "Compaq"
|
||||
/* __DECCXX_VER = VVRRTPPPP */
|
||||
# define HOST_COMPILER_VERSION_MAJOR DEC(__DECCXX_VER/10000000)
|
||||
# define HOST_COMPILER_VERSION_MINOR DEC(__DECCXX_VER/100000 % 100)
|
||||
# define HOST_COMPILER_VERSION_PATCH DEC(__DECCXX_VER % 10000)
|
||||
|
||||
#elif defined(__IBMCPP__) && defined(__COMPILER_VER__)
|
||||
# define HOST_COMPILER_ID "zOS"
|
||||
/* __IBMCPP__ = VRP */
|
||||
# define HOST_COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100)
|
||||
# define HOST_COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10)
|
||||
# define HOST_COMPILER_VERSION_PATCH DEC(__IBMCPP__ % 10)
|
||||
|
||||
#elif defined(__open_xl__) && defined(__clang__)
|
||||
# define HOST_COMPILER_ID "IBMClang"
|
||||
# define HOST_COMPILER_VERSION_MAJOR DEC(__open_xl_version__)
|
||||
# define HOST_COMPILER_VERSION_MINOR DEC(__open_xl_release__)
|
||||
# define HOST_COMPILER_VERSION_PATCH DEC(__open_xl_modification__)
|
||||
# define HOST_COMPILER_VERSION_TWEAK DEC(__open_xl_ptf_fix_level__)
|
||||
|
||||
|
||||
#elif defined(__ibmxl__) && defined(__clang__)
|
||||
# define HOST_COMPILER_ID "XLClang"
|
||||
# define HOST_COMPILER_VERSION_MAJOR DEC(__ibmxl_version__)
|
||||
# define HOST_COMPILER_VERSION_MINOR DEC(__ibmxl_release__)
|
||||
# define HOST_COMPILER_VERSION_PATCH DEC(__ibmxl_modification__)
|
||||
# define HOST_COMPILER_VERSION_TWEAK DEC(__ibmxl_ptf_fix_level__)
|
||||
|
||||
|
||||
#elif defined(__IBMCPP__) && !defined(__COMPILER_VER__) && __IBMCPP__ >= 800
|
||||
# define HOST_COMPILER_ID "XL"
|
||||
/* __IBMCPP__ = VRP */
|
||||
# define HOST_COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100)
|
||||
# define HOST_COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10)
|
||||
# define HOST_COMPILER_VERSION_PATCH DEC(__IBMCPP__ % 10)
|
||||
|
||||
#elif defined(__IBMCPP__) && !defined(__COMPILER_VER__) && __IBMCPP__ < 800
|
||||
# define HOST_COMPILER_ID "VisualAge"
|
||||
/* __IBMCPP__ = VRP */
|
||||
# define HOST_COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100)
|
||||
# define HOST_COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10)
|
||||
# define HOST_COMPILER_VERSION_PATCH DEC(__IBMCPP__ % 10)
|
||||
|
||||
#elif defined(__NVCOMPILER)
|
||||
# define HOST_COMPILER_ID "NVHPC"
|
||||
# define HOST_COMPILER_VERSION_MAJOR DEC(__NVCOMPILER_MAJOR__)
|
||||
# define HOST_COMPILER_VERSION_MINOR DEC(__NVCOMPILER_MINOR__)
|
||||
# if defined(__NVCOMPILER_PATCHLEVEL__)
|
||||
# define HOST_COMPILER_VERSION_PATCH DEC(__NVCOMPILER_PATCHLEVEL__)
|
||||
# endif
|
||||
|
||||
#elif defined(__PGI)
|
||||
# define HOST_COMPILER_ID "PGI"
|
||||
# define HOST_COMPILER_VERSION_MAJOR DEC(__PGIC__)
|
||||
# define HOST_COMPILER_VERSION_MINOR DEC(__PGIC_MINOR__)
|
||||
# if defined(__PGIC_PATCHLEVEL__)
|
||||
# define HOST_COMPILER_VERSION_PATCH DEC(__PGIC_PATCHLEVEL__)
|
||||
# endif
|
||||
|
||||
#elif defined(__clang__) && defined(__cray__)
|
||||
# define HOST_COMPILER_ID "CrayClang"
|
||||
# define HOST_COMPILER_VERSION_MAJOR DEC(__cray_major__)
|
||||
# define HOST_COMPILER_VERSION_MINOR DEC(__cray_minor__)
|
||||
# define HOST_COMPILER_VERSION_PATCH DEC(__cray_patchlevel__)
|
||||
# define HOST_COMPILER_VERSION_INTERNAL_STR __clang_version__
|
||||
|
||||
|
||||
#elif defined(_CRAYC)
|
||||
# define HOST_COMPILER_ID "Cray"
|
||||
# define HOST_COMPILER_VERSION_MAJOR DEC(_RELEASE_MAJOR)
|
||||
# define HOST_COMPILER_VERSION_MINOR DEC(_RELEASE_MINOR)
|
||||
|
||||
#elif defined(__TI_COMPILER_VERSION__)
|
||||
# define HOST_COMPILER_ID "TI"
|
||||
/* __TI_COMPILER_VERSION__ = VVVRRRPPP */
|
||||
# define HOST_COMPILER_VERSION_MAJOR DEC(__TI_COMPILER_VERSION__/1000000)
|
||||
# define HOST_COMPILER_VERSION_MINOR DEC(__TI_COMPILER_VERSION__/1000 % 1000)
|
||||
# define HOST_COMPILER_VERSION_PATCH DEC(__TI_COMPILER_VERSION__ % 1000)
|
||||
|
||||
#elif defined(__CLANG_FUJITSU)
|
||||
# define HOST_COMPILER_ID "FujitsuClang"
|
||||
# define HOST_COMPILER_VERSION_MAJOR DEC(__FCC_major__)
|
||||
# define HOST_COMPILER_VERSION_MINOR DEC(__FCC_minor__)
|
||||
# define HOST_COMPILER_VERSION_PATCH DEC(__FCC_patchlevel__)
|
||||
# define HOST_COMPILER_VERSION_INTERNAL_STR __clang_version__
|
||||
|
||||
|
||||
#elif defined(__FUJITSU)
|
||||
# define HOST_COMPILER_ID "Fujitsu"
|
||||
# if defined(__FCC_version__)
|
||||
# define HOST_COMPILER_VERSION __FCC_version__
|
||||
# elif defined(__FCC_major__)
|
||||
# define HOST_COMPILER_VERSION_MAJOR DEC(__FCC_major__)
|
||||
# define HOST_COMPILER_VERSION_MINOR DEC(__FCC_minor__)
|
||||
# define HOST_COMPILER_VERSION_PATCH DEC(__FCC_patchlevel__)
|
||||
# endif
|
||||
# if defined(__fcc_version)
|
||||
# define HOST_COMPILER_VERSION_INTERNAL DEC(__fcc_version)
|
||||
# elif defined(__FCC_VERSION)
|
||||
# define HOST_COMPILER_VERSION_INTERNAL DEC(__FCC_VERSION)
|
||||
# endif
|
||||
|
||||
|
||||
#elif defined(__ghs__)
|
||||
# define HOST_COMPILER_ID "GHS"
|
||||
/* __GHS_VERSION_NUMBER = VVVVRP */
|
||||
# ifdef __GHS_VERSION_NUMBER
|
||||
# define HOST_COMPILER_VERSION_MAJOR DEC(__GHS_VERSION_NUMBER / 100)
|
||||
# define HOST_COMPILER_VERSION_MINOR DEC(__GHS_VERSION_NUMBER / 10 % 10)
|
||||
# define HOST_COMPILER_VERSION_PATCH DEC(__GHS_VERSION_NUMBER % 10)
|
||||
# endif
|
||||
|
||||
#elif defined(__TASKING__)
|
||||
# define HOST_COMPILER_ID "Tasking"
|
||||
# define HOST_COMPILER_VERSION_MAJOR DEC(__VERSION__/1000)
|
||||
# define HOST_COMPILER_VERSION_MINOR DEC(__VERSION__ % 100)
|
||||
# define HOST_COMPILER_VERSION_INTERNAL DEC(__VERSION__)
|
||||
|
||||
#elif defined(__ORANGEC__)
|
||||
# define HOST_COMPILER_ID "OrangeC"
|
||||
# define HOST_COMPILER_VERSION_MAJOR DEC(__ORANGEC_MAJOR__)
|
||||
# define HOST_COMPILER_VERSION_MINOR DEC(__ORANGEC_MINOR__)
|
||||
# define HOST_COMPILER_VERSION_PATCH DEC(__ORANGEC_PATCHLEVEL__)
|
||||
|
||||
#elif defined(__SCO_VERSION__)
|
||||
# define HOST_COMPILER_ID "SCO"
|
||||
|
||||
#elif defined(__ARMCC_VERSION) && !defined(__clang__)
|
||||
# define HOST_COMPILER_ID "ARMCC"
|
||||
#if __ARMCC_VERSION >= 1000000
|
||||
/* __ARMCC_VERSION = VRRPPPP */
|
||||
# define HOST_COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/1000000)
|
||||
# define HOST_COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 100)
|
||||
# define HOST_COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION % 10000)
|
||||
#else
|
||||
/* __ARMCC_VERSION = VRPPPP */
|
||||
# define HOST_COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/100000)
|
||||
# define HOST_COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 10)
|
||||
# define HOST_COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION % 10000)
|
||||
#endif
|
||||
|
||||
|
||||
#elif defined(__clang__) && defined(__apple_build_version__)
|
||||
# define HOST_COMPILER_ID "AppleClang"
|
||||
# if defined(_MSC_VER)
|
||||
# define HOST_SIMULATE_ID "MSVC"
|
||||
# endif
|
||||
# define HOST_COMPILER_VERSION_MAJOR DEC(__clang_major__)
|
||||
# define HOST_COMPILER_VERSION_MINOR DEC(__clang_minor__)
|
||||
# define HOST_COMPILER_VERSION_PATCH DEC(__clang_patchlevel__)
|
||||
# if defined(_MSC_VER)
|
||||
/* _MSC_VER = VVRR */
|
||||
# define HOST_SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
|
||||
# define HOST_SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
|
||||
# endif
|
||||
# define HOST_COMPILER_VERSION_TWEAK DEC(__apple_build_version__)
|
||||
|
||||
#elif defined(__clang__) && defined(__ARMCOMPILER_VERSION)
|
||||
# define HOST_COMPILER_ID "ARMClang"
|
||||
# define HOST_COMPILER_VERSION_MAJOR DEC(__ARMCOMPILER_VERSION/1000000)
|
||||
# define HOST_COMPILER_VERSION_MINOR DEC(__ARMCOMPILER_VERSION/10000 % 100)
|
||||
# define HOST_COMPILER_VERSION_PATCH DEC(__ARMCOMPILER_VERSION/100 % 100)
|
||||
# define HOST_COMPILER_VERSION_INTERNAL DEC(__ARMCOMPILER_VERSION)
|
||||
|
||||
#elif defined(__clang__) && defined(__ti__)
|
||||
# define HOST_COMPILER_ID "TIClang"
|
||||
# define HOST_COMPILER_VERSION_MAJOR DEC(__ti_major__)
|
||||
# define HOST_COMPILER_VERSION_MINOR DEC(__ti_minor__)
|
||||
# define HOST_COMPILER_VERSION_PATCH DEC(__ti_patchlevel__)
|
||||
# define HOST_COMPILER_VERSION_INTERNAL DEC(__ti_version__)
|
||||
|
||||
#elif defined(__clang__)
|
||||
# define HOST_COMPILER_ID "Clang"
|
||||
# if defined(_MSC_VER)
|
||||
# define HOST_SIMULATE_ID "MSVC"
|
||||
# endif
|
||||
# define HOST_COMPILER_VERSION_MAJOR DEC(__clang_major__)
|
||||
# define HOST_COMPILER_VERSION_MINOR DEC(__clang_minor__)
|
||||
# define HOST_COMPILER_VERSION_PATCH DEC(__clang_patchlevel__)
|
||||
# if defined(_MSC_VER)
|
||||
/* _MSC_VER = VVRR */
|
||||
# define HOST_SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100)
|
||||
# define HOST_SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100)
|
||||
# endif
|
||||
|
||||
#elif defined(__LCC__) && (defined(__GNUC__) || defined(__GNUG__) || defined(__MCST__))
|
||||
# define HOST_COMPILER_ID "LCC"
|
||||
# define HOST_COMPILER_VERSION_MAJOR DEC(__LCC__ / 100)
|
||||
# define HOST_COMPILER_VERSION_MINOR DEC(__LCC__ % 100)
|
||||
# if defined(__LCC_MINOR__)
|
||||
# define HOST_COMPILER_VERSION_PATCH DEC(__LCC_MINOR__)
|
||||
# endif
|
||||
# if defined(__GNUC__) && defined(__GNUC_MINOR__)
|
||||
# define HOST_SIMULATE_ID "GNU"
|
||||
# define HOST_SIMULATE_VERSION_MAJOR DEC(__GNUC__)
|
||||
# define HOST_SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__)
|
||||
# if defined(__GNUC_PATCHLEVEL__)
|
||||
# define HOST_SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__)
|
||||
# endif
|
||||
# endif
|
||||
|
||||
#elif defined(__GNUC__) || defined(__GNUG__)
|
||||
# define HOST_COMPILER_ID "GNU"
|
||||
# if defined(__GNUC__)
|
||||
# define HOST_COMPILER_VERSION_MAJOR DEC(__GNUC__)
|
||||
# else
|
||||
# define HOST_COMPILER_VERSION_MAJOR DEC(__GNUG__)
|
||||
# endif
|
||||
# if defined(__GNUC_MINOR__)
|
||||
# define HOST_COMPILER_VERSION_MINOR DEC(__GNUC_MINOR__)
|
||||
# endif
|
||||
# if defined(__GNUC_PATCHLEVEL__)
|
||||
# define HOST_COMPILER_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__)
|
||||
# endif
|
||||
|
||||
#elif defined(_MSC_VER)
|
||||
# define HOST_COMPILER_ID "MSVC"
|
||||
/* _MSC_VER = VVRR */
|
||||
# define HOST_COMPILER_VERSION_MAJOR DEC(_MSC_VER / 100)
|
||||
# define HOST_COMPILER_VERSION_MINOR DEC(_MSC_VER % 100)
|
||||
# if defined(_MSC_FULL_VER)
|
||||
# if _MSC_VER >= 1400
|
||||
/* _MSC_FULL_VER = VVRRPPPPP */
|
||||
# define HOST_COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 100000)
|
||||
# else
|
||||
/* _MSC_FULL_VER = VVRRPPPP */
|
||||
# define HOST_COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 10000)
|
||||
# endif
|
||||
# endif
|
||||
# if defined(_MSC_BUILD)
|
||||
# define HOST_COMPILER_VERSION_TWEAK DEC(_MSC_BUILD)
|
||||
# endif
|
||||
|
||||
#elif defined(_ADI_COMPILER)
|
||||
# define HOST_COMPILER_ID "ADSP"
|
||||
#if defined(__VERSIONNUM__)
|
||||
/* __VERSIONNUM__ = 0xVVRRPPTT */
|
||||
# define HOST_COMPILER_VERSION_MAJOR DEC(__VERSIONNUM__ >> 24 & 0xFF)
|
||||
# define HOST_COMPILER_VERSION_MINOR DEC(__VERSIONNUM__ >> 16 & 0xFF)
|
||||
# define HOST_COMPILER_VERSION_PATCH DEC(__VERSIONNUM__ >> 8 & 0xFF)
|
||||
# define HOST_COMPILER_VERSION_TWEAK DEC(__VERSIONNUM__ & 0xFF)
|
||||
#endif
|
||||
|
||||
#elif defined(__IAR_SYSTEMS_ICC__) || defined(__IAR_SYSTEMS_ICC)
|
||||
# define HOST_COMPILER_ID "IAR"
|
||||
# if defined(__VER__) && defined(__ICCARM__)
|
||||
# define HOST_COMPILER_VERSION_MAJOR DEC((__VER__) / 1000000)
|
||||
# define HOST_COMPILER_VERSION_MINOR DEC(((__VER__) / 1000) % 1000)
|
||||
# define HOST_COMPILER_VERSION_PATCH DEC((__VER__) % 1000)
|
||||
# define HOST_COMPILER_VERSION_INTERNAL DEC(__IAR_SYSTEMS_ICC__)
|
||||
# elif defined(__VER__) && (defined(__ICCAVR__) || defined(__ICCRX__) || defined(__ICCRH850__) || defined(__ICCRL78__) || defined(__ICC430__) || defined(__ICCRISCV__) || defined(__ICCV850__) || defined(__ICC8051__) || defined(__ICCSTM8__))
|
||||
# define HOST_COMPILER_VERSION_MAJOR DEC((__VER__) / 100)
|
||||
# define HOST_COMPILER_VERSION_MINOR DEC((__VER__) - (((__VER__) / 100)*100))
|
||||
# define HOST_COMPILER_VERSION_PATCH DEC(__SUBVERSION__)
|
||||
# define HOST_COMPILER_VERSION_INTERNAL DEC(__IAR_SYSTEMS_ICC__)
|
||||
# endif
|
||||
|
||||
|
||||
#endif
|
||||
#endif /* __NVCC__ */
|
||||
|
||||
|
||||
/* Construct the string literal in pieces to prevent the source from
|
||||
getting matched. Store it in a pointer rather than an array
|
||||
because some compilers will just produce instructions to fill the
|
||||
array rather than assigning a pointer to a static array. */
|
||||
char const* info_compiler = "INFO" ":" "compiler[" COMPILER_ID "]";
|
||||
#ifdef SIMULATE_ID
|
||||
char const* info_simulate = "INFO" ":" "simulate[" SIMULATE_ID "]";
|
||||
#endif
|
||||
|
||||
#define STRINGIFY_HELPER(X) #X
|
||||
#define STRINGIFY(X) STRINGIFY_HELPER(X)
|
||||
|
||||
/* Identify known platforms by name. */
|
||||
#if defined(__linux) || defined(__linux__) || defined(linux)
|
||||
# define PLATFORM_ID "Linux"
|
||||
|
||||
#elif defined(__MSYS__)
|
||||
# define PLATFORM_ID "MSYS"
|
||||
|
||||
#elif defined(__CYGWIN__)
|
||||
# define PLATFORM_ID "Cygwin"
|
||||
|
||||
#elif defined(__MINGW32__)
|
||||
# define PLATFORM_ID "MinGW"
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
# define PLATFORM_ID "Darwin"
|
||||
|
||||
#elif defined(_WIN32) || defined(__WIN32__) || defined(WIN32)
|
||||
# define PLATFORM_ID "Windows"
|
||||
|
||||
#elif defined(__FreeBSD__) || defined(__FreeBSD)
|
||||
# define PLATFORM_ID "FreeBSD"
|
||||
|
||||
#elif defined(__NetBSD__) || defined(__NetBSD)
|
||||
# define PLATFORM_ID "NetBSD"
|
||||
|
||||
#elif defined(__OpenBSD__) || defined(__OPENBSD)
|
||||
# define PLATFORM_ID "OpenBSD"
|
||||
|
||||
#elif defined(__sun) || defined(sun)
|
||||
# define PLATFORM_ID "SunOS"
|
||||
|
||||
#elif defined(_AIX) || defined(__AIX) || defined(__AIX__) || defined(__aix) || defined(__aix__)
|
||||
# define PLATFORM_ID "AIX"
|
||||
|
||||
#elif defined(__hpux) || defined(__hpux__)
|
||||
# define PLATFORM_ID "HP-UX"
|
||||
|
||||
#elif defined(__HAIKU__)
|
||||
# define PLATFORM_ID "Haiku"
|
||||
|
||||
#elif defined(__BeOS) || defined(__BEOS__) || defined(_BEOS)
|
||||
# define PLATFORM_ID "BeOS"
|
||||
|
||||
#elif defined(__QNX__) || defined(__QNXNTO__)
|
||||
# define PLATFORM_ID "QNX"
|
||||
|
||||
#elif defined(__tru64) || defined(_tru64) || defined(__TRU64__)
|
||||
# define PLATFORM_ID "Tru64"
|
||||
|
||||
#elif defined(__riscos) || defined(__riscos__)
|
||||
# define PLATFORM_ID "RISCos"
|
||||
|
||||
#elif defined(__sinix) || defined(__sinix__) || defined(__SINIX__)
|
||||
# define PLATFORM_ID "SINIX"
|
||||
|
||||
#elif defined(__UNIX_SV__)
|
||||
# define PLATFORM_ID "UNIX_SV"
|
||||
|
||||
#elif defined(__bsdos__)
|
||||
# define PLATFORM_ID "BSDOS"
|
||||
|
||||
#elif defined(_MPRAS) || defined(MPRAS)
|
||||
# define PLATFORM_ID "MP-RAS"
|
||||
|
||||
#elif defined(__osf) || defined(__osf__)
|
||||
# define PLATFORM_ID "OSF1"
|
||||
|
||||
#elif defined(_SCO_SV) || defined(SCO_SV) || defined(sco_sv)
|
||||
# define PLATFORM_ID "SCO_SV"
|
||||
|
||||
#elif defined(__ultrix) || defined(__ultrix__) || defined(_ULTRIX)
|
||||
# define PLATFORM_ID "ULTRIX"
|
||||
|
||||
#elif defined(__XENIX__) || defined(_XENIX) || defined(XENIX)
|
||||
# define PLATFORM_ID "Xenix"
|
||||
|
||||
#elif defined(__WATCOMC__)
|
||||
# if defined(__LINUX__)
|
||||
# define PLATFORM_ID "Linux"
|
||||
|
||||
# elif defined(__DOS__)
|
||||
# define PLATFORM_ID "DOS"
|
||||
|
||||
# elif defined(__OS2__)
|
||||
# define PLATFORM_ID "OS2"
|
||||
|
||||
# elif defined(__WINDOWS__)
|
||||
# define PLATFORM_ID "Windows3x"
|
||||
|
||||
# elif defined(__VXWORKS__)
|
||||
# define PLATFORM_ID "VxWorks"
|
||||
|
||||
# else /* unknown platform */
|
||||
# define PLATFORM_ID
|
||||
# endif
|
||||
|
||||
#elif defined(__INTEGRITY)
|
||||
# if defined(INT_178B)
|
||||
# define PLATFORM_ID "Integrity178"
|
||||
|
||||
# else /* regular Integrity */
|
||||
# define PLATFORM_ID "Integrity"
|
||||
# endif
|
||||
|
||||
# elif defined(_ADI_COMPILER)
|
||||
# define PLATFORM_ID "ADSP"
|
||||
|
||||
#else /* unknown platform */
|
||||
# define PLATFORM_ID
|
||||
|
||||
#endif
|
||||
|
||||
/* For windows compilers MSVC and Intel we can determine
|
||||
the architecture of the compiler being used. This is because
|
||||
the compilers do not have flags that can change the architecture,
|
||||
but rather depend on which compiler is being used
|
||||
*/
|
||||
#if defined(_WIN32) && defined(_MSC_VER)
|
||||
# if defined(_M_IA64)
|
||||
# define ARCHITECTURE_ID "IA64"
|
||||
|
||||
# elif defined(_M_ARM64EC)
|
||||
# define ARCHITECTURE_ID "ARM64EC"
|
||||
|
||||
# elif defined(_M_X64) || defined(_M_AMD64)
|
||||
# define ARCHITECTURE_ID "x64"
|
||||
|
||||
# elif defined(_M_IX86)
|
||||
# define ARCHITECTURE_ID "X86"
|
||||
|
||||
# elif defined(_M_ARM64)
|
||||
# define ARCHITECTURE_ID "ARM64"
|
||||
|
||||
# elif defined(_M_ARM)
|
||||
# if _M_ARM == 4
|
||||
# define ARCHITECTURE_ID "ARMV4I"
|
||||
# elif _M_ARM == 5
|
||||
# define ARCHITECTURE_ID "ARMV5I"
|
||||
# else
|
||||
# define ARCHITECTURE_ID "ARMV" STRINGIFY(_M_ARM)
|
||||
# endif
|
||||
|
||||
# elif defined(_M_MIPS)
|
||||
# define ARCHITECTURE_ID "MIPS"
|
||||
|
||||
# elif defined(_M_SH)
|
||||
# define ARCHITECTURE_ID "SHx"
|
||||
|
||||
# else /* unknown architecture */
|
||||
# define ARCHITECTURE_ID ""
|
||||
# endif
|
||||
|
||||
#elif defined(__WATCOMC__)
|
||||
# if defined(_M_I86)
|
||||
# define ARCHITECTURE_ID "I86"
|
||||
|
||||
# elif defined(_M_IX86)
|
||||
# define ARCHITECTURE_ID "X86"
|
||||
|
||||
# else /* unknown architecture */
|
||||
# define ARCHITECTURE_ID ""
|
||||
# endif
|
||||
|
||||
#elif defined(__IAR_SYSTEMS_ICC__) || defined(__IAR_SYSTEMS_ICC)
|
||||
# if defined(__ICCARM__)
|
||||
# define ARCHITECTURE_ID "ARM"
|
||||
|
||||
# elif defined(__ICCRX__)
|
||||
# define ARCHITECTURE_ID "RX"
|
||||
|
||||
# elif defined(__ICCRH850__)
|
||||
# define ARCHITECTURE_ID "RH850"
|
||||
|
||||
# elif defined(__ICCRL78__)
|
||||
# define ARCHITECTURE_ID "RL78"
|
||||
|
||||
# elif defined(__ICCRISCV__)
|
||||
# define ARCHITECTURE_ID "RISCV"
|
||||
|
||||
# elif defined(__ICCAVR__)
|
||||
# define ARCHITECTURE_ID "AVR"
|
||||
|
||||
# elif defined(__ICC430__)
|
||||
# define ARCHITECTURE_ID "MSP430"
|
||||
|
||||
# elif defined(__ICCV850__)
|
||||
# define ARCHITECTURE_ID "V850"
|
||||
|
||||
# elif defined(__ICC8051__)
|
||||
# define ARCHITECTURE_ID "8051"
|
||||
|
||||
# elif defined(__ICCSTM8__)
|
||||
# define ARCHITECTURE_ID "STM8"
|
||||
|
||||
# else /* unknown architecture */
|
||||
# define ARCHITECTURE_ID ""
|
||||
# endif
|
||||
|
||||
#elif defined(__ghs__)
|
||||
# if defined(__PPC64__)
|
||||
# define ARCHITECTURE_ID "PPC64"
|
||||
|
||||
# elif defined(__ppc__)
|
||||
# define ARCHITECTURE_ID "PPC"
|
||||
|
||||
# elif defined(__ARM__)
|
||||
# define ARCHITECTURE_ID "ARM"
|
||||
|
||||
# elif defined(__x86_64__)
|
||||
# define ARCHITECTURE_ID "x64"
|
||||
|
||||
# elif defined(__i386__)
|
||||
# define ARCHITECTURE_ID "X86"
|
||||
|
||||
# else /* unknown architecture */
|
||||
# define ARCHITECTURE_ID ""
|
||||
# endif
|
||||
|
||||
#elif defined(__clang__) && defined(__ti__)
|
||||
# if defined(__ARM_ARCH)
|
||||
# define ARCHITECTURE_ID "ARM"
|
||||
|
||||
# else /* unknown architecture */
|
||||
# define ARCHITECTURE_ID ""
|
||||
# endif
|
||||
|
||||
#elif defined(__TI_COMPILER_VERSION__)
|
||||
# if defined(__TI_ARM__)
|
||||
# define ARCHITECTURE_ID "ARM"
|
||||
|
||||
# elif defined(__MSP430__)
|
||||
# define ARCHITECTURE_ID "MSP430"
|
||||
|
||||
# elif defined(__TMS320C28XX__)
|
||||
# define ARCHITECTURE_ID "TMS320C28x"
|
||||
|
||||
# elif defined(__TMS320C6X__) || defined(_TMS320C6X)
|
||||
# define ARCHITECTURE_ID "TMS320C6x"
|
||||
|
||||
# else /* unknown architecture */
|
||||
# define ARCHITECTURE_ID ""
|
||||
# endif
|
||||
|
||||
# elif defined(__ADSPSHARC__)
|
||||
# define ARCHITECTURE_ID "SHARC"
|
||||
|
||||
# elif defined(__ADSPBLACKFIN__)
|
||||
# define ARCHITECTURE_ID "Blackfin"
|
||||
|
||||
#elif defined(__TASKING__)
|
||||
|
||||
# if defined(__CTC__) || defined(__CPTC__)
|
||||
# define ARCHITECTURE_ID "TriCore"
|
||||
|
||||
# elif defined(__CMCS__)
|
||||
# define ARCHITECTURE_ID "MCS"
|
||||
|
||||
# elif defined(__CARM__)
|
||||
# define ARCHITECTURE_ID "ARM"
|
||||
|
||||
# elif defined(__CARC__)
|
||||
# define ARCHITECTURE_ID "ARC"
|
||||
|
||||
# elif defined(__C51__)
|
||||
# define ARCHITECTURE_ID "8051"
|
||||
|
||||
# elif defined(__CPCP__)
|
||||
# define ARCHITECTURE_ID "PCP"
|
||||
|
||||
# else
|
||||
# define ARCHITECTURE_ID ""
|
||||
# endif
|
||||
|
||||
#else
|
||||
# define ARCHITECTURE_ID
|
||||
#endif
|
||||
|
||||
/* Convert integer to decimal digit literals. */
|
||||
#define DEC(n) \
|
||||
('0' + (((n) / 10000000)%10)), \
|
||||
('0' + (((n) / 1000000)%10)), \
|
||||
('0' + (((n) / 100000)%10)), \
|
||||
('0' + (((n) / 10000)%10)), \
|
||||
('0' + (((n) / 1000)%10)), \
|
||||
('0' + (((n) / 100)%10)), \
|
||||
('0' + (((n) / 10)%10)), \
|
||||
('0' + ((n) % 10))
|
||||
|
||||
/* Convert integer to hex digit literals. */
|
||||
#define HEX(n) \
|
||||
('0' + ((n)>>28 & 0xF)), \
|
||||
('0' + ((n)>>24 & 0xF)), \
|
||||
('0' + ((n)>>20 & 0xF)), \
|
||||
('0' + ((n)>>16 & 0xF)), \
|
||||
('0' + ((n)>>12 & 0xF)), \
|
||||
('0' + ((n)>>8 & 0xF)), \
|
||||
('0' + ((n)>>4 & 0xF)), \
|
||||
('0' + ((n) & 0xF))
|
||||
|
||||
/* Construct a string literal encoding the version number. */
|
||||
#ifdef COMPILER_VERSION
|
||||
char const* info_version = "INFO" ":" "compiler_version[" COMPILER_VERSION "]";
|
||||
|
||||
/* Construct a string literal encoding the version number components. */
|
||||
#elif defined(COMPILER_VERSION_MAJOR)
|
||||
char const info_version[] = {
|
||||
'I', 'N', 'F', 'O', ':',
|
||||
'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','[',
|
||||
COMPILER_VERSION_MAJOR,
|
||||
# ifdef COMPILER_VERSION_MINOR
|
||||
'.', COMPILER_VERSION_MINOR,
|
||||
# ifdef COMPILER_VERSION_PATCH
|
||||
'.', COMPILER_VERSION_PATCH,
|
||||
# ifdef COMPILER_VERSION_TWEAK
|
||||
'.', COMPILER_VERSION_TWEAK,
|
||||
# endif
|
||||
# endif
|
||||
# endif
|
||||
']','\0'};
|
||||
#endif
|
||||
|
||||
/* Construct a string literal encoding the internal version number. */
|
||||
#ifdef COMPILER_VERSION_INTERNAL
|
||||
char const info_version_internal[] = {
|
||||
'I', 'N', 'F', 'O', ':',
|
||||
'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','_',
|
||||
'i','n','t','e','r','n','a','l','[',
|
||||
COMPILER_VERSION_INTERNAL,']','\0'};
|
||||
#elif defined(COMPILER_VERSION_INTERNAL_STR)
|
||||
char const* info_version_internal = "INFO" ":" "compiler_version_internal[" COMPILER_VERSION_INTERNAL_STR "]";
|
||||
#endif
|
||||
|
||||
/* Construct a string literal encoding the version number components. */
|
||||
#ifdef SIMULATE_VERSION_MAJOR
|
||||
char const info_simulate_version[] = {
|
||||
'I', 'N', 'F', 'O', ':',
|
||||
's','i','m','u','l','a','t','e','_','v','e','r','s','i','o','n','[',
|
||||
SIMULATE_VERSION_MAJOR,
|
||||
# ifdef SIMULATE_VERSION_MINOR
|
||||
'.', SIMULATE_VERSION_MINOR,
|
||||
# ifdef SIMULATE_VERSION_PATCH
|
||||
'.', SIMULATE_VERSION_PATCH,
|
||||
# ifdef SIMULATE_VERSION_TWEAK
|
||||
'.', SIMULATE_VERSION_TWEAK,
|
||||
# endif
|
||||
# endif
|
||||
# endif
|
||||
']','\0'};
|
||||
#endif
|
||||
|
||||
/* Construct the string literal in pieces to prevent the source from
|
||||
getting matched. Store it in a pointer rather than an array
|
||||
because some compilers will just produce instructions to fill the
|
||||
array rather than assigning a pointer to a static array. */
|
||||
char const* info_platform = "INFO" ":" "platform[" PLATFORM_ID "]";
|
||||
char const* info_arch = "INFO" ":" "arch[" ARCHITECTURE_ID "]";
|
||||
|
||||
|
||||
|
||||
#ifdef HOST_COMPILER_ID
|
||||
char const* info_host_compiler = "INFO" ":" "host_compiler[" HOST_COMPILER_ID "]";
|
||||
#endif
|
||||
#ifdef HOST_COMPILER_VERSION
|
||||
char const* info_host_compiler_version = "INFO" ":" "host_compiler_version[" HOST_COMPILER_VERSION "]";
|
||||
#elif defined(HOST_COMPILER_VERSION_MAJOR)
|
||||
char const info_host_compiler_version[] = {
|
||||
'I', 'N', 'F', 'O', ':','h','o','s','t','_',
|
||||
'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','[',
|
||||
HOST_COMPILER_VERSION_MAJOR,
|
||||
# ifdef HOST_COMPILER_VERSION_MINOR
|
||||
'.', HOST_COMPILER_VERSION_MINOR,
|
||||
# ifdef HOST_COMPILER_VERSION_PATCH
|
||||
'.', HOST_COMPILER_VERSION_PATCH,
|
||||
# ifdef HOST_COMPILER_VERSION_TWEAK
|
||||
'.', HOST_COMPILER_VERSION_TWEAK,
|
||||
# endif
|
||||
# endif
|
||||
# endif
|
||||
']','\0'};
|
||||
#endif
|
||||
#
|
||||
#define CXX_STD_98 199711L
|
||||
#define CXX_STD_11 201103L
|
||||
#define CXX_STD_14 201402L
|
||||
#define CXX_STD_17 201703L
|
||||
#define CXX_STD_20 202002L
|
||||
#define CXX_STD_23 202302L
|
||||
|
||||
#define CXX_STD __cplusplus
|
||||
|
||||
const char* info_language_standard_default = "INFO" ":" "standard_default["
|
||||
#if CXX_STD > CXX_STD_23
|
||||
"26"
|
||||
#elif CXX_STD > CXX_STD_20
|
||||
"23"
|
||||
#elif CXX_STD > CXX_STD_17
|
||||
"20"
|
||||
#elif CXX_STD > CXX_STD_14
|
||||
"17"
|
||||
#elif CXX_STD > CXX_STD_11
|
||||
"14"
|
||||
#elif CXX_STD >= CXX_STD_11
|
||||
"11"
|
||||
#else
|
||||
"98"
|
||||
#endif
|
||||
"]";
|
||||
|
||||
const char* info_language_extensions_default = "INFO" ":" "extensions_default["
|
||||
#if (defined(__clang__) || defined(__GNUC__)) && !defined(__STRICT_ANSI__)
|
||||
"ON"
|
||||
#else
|
||||
"OFF"
|
||||
#endif
|
||||
"]";
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
int require = 0;
|
||||
require += info_compiler[argc];
|
||||
require += info_platform[argc];
|
||||
#ifdef COMPILER_VERSION_MAJOR
|
||||
require += info_version[argc];
|
||||
#endif
|
||||
#ifdef SIMULATE_ID
|
||||
require += info_simulate[argc];
|
||||
#endif
|
||||
#ifdef SIMULATE_VERSION_MAJOR
|
||||
require += info_simulate_version[argc];
|
||||
#endif
|
||||
#ifdef HOST_COMPILER_ID
|
||||
require += info_host_compiler[argc];
|
||||
#endif
|
||||
#ifdef HOST_COMPILER_VERSION_MAJOR
|
||||
require += info_host_compiler_version[argc];
|
||||
#endif
|
||||
require += info_language_standard_default[argc];
|
||||
require += info_language_extensions_default[argc];
|
||||
(void)argv;
|
||||
return require;
|
||||
}
|
337
rin/miner/hip-output/build/CMakeFiles/CMakeConfigureLog.yaml
Normal file
337
rin/miner/hip-output/build/CMakeFiles/CMakeConfigureLog.yaml
Normal file
@@ -0,0 +1,337 @@
|
||||
|
||||
---
|
||||
events:
|
||||
-
|
||||
kind: "message-v1"
|
||||
backtrace:
|
||||
- "/usr/share/cmake/Modules/CMakeDetermineSystem.cmake:205 (message)"
|
||||
- "CMakeLists.txt:2 (project)"
|
||||
message: |
|
||||
The system is: Linux - 6.8.0-79-generic - x86_64
|
||||
-
|
||||
kind: "message-v1"
|
||||
backtrace:
|
||||
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:17 (message)"
|
||||
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:64 (__determine_compiler_id_test)"
|
||||
- "/usr/share/cmake/Modules/CMakeDetermineCXXCompiler.cmake:126 (CMAKE_DETERMINE_COMPILER_ID)"
|
||||
- "CMakeLists.txt:2 (project)"
|
||||
message: |
|
||||
Compiling the CXX compiler identification source file "CMakeCXXCompilerId.cpp" failed.
|
||||
Compiler: /opt/rocm-7.0/llvm/bin/clang++
|
||||
Build flags:
|
||||
Id flags:
|
||||
|
||||
The output was:
|
||||
1
|
||||
ld.lld: error: cannot open Scrt1.o: No such file or directory
|
||||
ld.lld: error: cannot open crti.o: No such file or directory
|
||||
ld.lld: error: unable to find library -lstdc++
|
||||
ld.lld: error: unable to find library -lm
|
||||
ld.lld: error: cannot open /opt/rocm-7.0/lib/llvm/lib/clang/20/lib/x86_64-unknown-linux-gnu/libclang_rt.builtins.a: No such file or directory
|
||||
ld.lld: error: unable to find library -lgcc_s
|
||||
ld.lld: error: unable to find library -lc
|
||||
ld.lld: error: cannot open /opt/rocm-7.0/lib/llvm/lib/clang/20/lib/x86_64-unknown-linux-gnu/libclang_rt.builtins.a: No such file or directory
|
||||
ld.lld: error: unable to find library -lgcc_s
|
||||
ld.lld: error: cannot open crtn.o: No such file or directory
|
||||
clang++: error: linker command failed with exit code 1 (use -v to see invocation)
|
||||
|
||||
|
||||
-
|
||||
kind: "message-v1"
|
||||
backtrace:
|
||||
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:17 (message)"
|
||||
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:64 (__determine_compiler_id_test)"
|
||||
- "/usr/share/cmake/Modules/CMakeDetermineCXXCompiler.cmake:126 (CMAKE_DETERMINE_COMPILER_ID)"
|
||||
- "CMakeLists.txt:2 (project)"
|
||||
message: |
|
||||
Compiling the CXX compiler identification source file "CMakeCXXCompilerId.cpp" succeeded.
|
||||
Compiler: /opt/rocm-7.0/llvm/bin/clang++
|
||||
Build flags:
|
||||
Id flags: -c
|
||||
|
||||
The output was:
|
||||
0
|
||||
|
||||
|
||||
Compilation of the CXX compiler identification source "CMakeCXXCompilerId.cpp" produced "CMakeCXXCompilerId.o"
|
||||
|
||||
The CXX compiler identification is Clang, found in:
|
||||
/tmp/rinhash-hip/build/CMakeFiles/3.31.6/CompilerIdCXX/CMakeCXXCompilerId.o
|
||||
|
||||
-
|
||||
kind: "message-v1"
|
||||
backtrace:
|
||||
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:1250 (message)"
|
||||
- "/usr/share/cmake/Modules/CMakeDetermineHIPCompiler.cmake:95 (CMAKE_DETERMINE_COMPILER_ID_VENDOR)"
|
||||
- "CMakeLists.txt:2 (project)"
|
||||
message: |
|
||||
Checking whether the HIP compiler is NVIDIA using "" did not match "nvcc: NVIDIA \\(R\\) Cuda compiler driver":
|
||||
AMD clang version 20.0.0git (https://github.com/ROCm/llvm-project.git 32697402bdd2c9b01f45d53f123dc646206d3eb5+PATCHED:6509c030a655df7073b63a5b3d705e00f4f461ca)
|
||||
Target: x86_64-unknown-linux-gnu
|
||||
Thread model: posix
|
||||
InstalledDir: /opt/rocm-7.0/lib/llvm/bin
|
||||
-
|
||||
kind: "message-v1"
|
||||
backtrace:
|
||||
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:1237 (message)"
|
||||
- "/usr/share/cmake/Modules/CMakeDetermineHIPCompiler.cmake:95 (CMAKE_DETERMINE_COMPILER_ID_VENDOR)"
|
||||
- "CMakeLists.txt:2 (project)"
|
||||
message: |
|
||||
Checking whether the HIP compiler is Clang using "" matched "(clang version)":
|
||||
AMD clang version 20.0.0git (https://github.com/ROCm/llvm-project.git 32697402bdd2c9b01f45d53f123dc646206d3eb5+PATCHED:6509c030a655df7073b63a5b3d705e00f4f461ca)
|
||||
Target: x86_64-unknown-linux-gnu
|
||||
Thread model: posix
|
||||
InstalledDir: /opt/rocm-7.0/lib/llvm/bin
|
||||
-
|
||||
kind: "message-v1"
|
||||
backtrace:
|
||||
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:17 (message)"
|
||||
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:64 (__determine_compiler_id_test)"
|
||||
- "/usr/share/cmake/Modules/CMakeDetermineHIPCompiler.cmake:136 (CMAKE_DETERMINE_COMPILER_ID)"
|
||||
- "CMakeLists.txt:2 (project)"
|
||||
message: |
|
||||
Compiling the HIP compiler identification source file "CMakeHIPCompilerId.hip" failed.
|
||||
Compiler: /opt/rocm-7.0/llvm/bin/clang++
|
||||
Build flags:
|
||||
Id flags: -v
|
||||
|
||||
The output was:
|
||||
1
|
||||
AMD clang version 20.0.0git (https://github.com/ROCm/llvm-project.git 32697402bdd2c9b01f45d53f123dc646206d3eb5+PATCHED:6509c030a655df7073b63a5b3d705e00f4f461ca)
|
||||
Target: x86_64-unknown-linux-gnu
|
||||
Thread model: posix
|
||||
InstalledDir: /opt/rocm-7.0/lib/llvm/bin
|
||||
Found HIP installation: /opt/rocm-7.0, version 3.5.0
|
||||
"/opt/rocm-7.0/lib/llvm/bin/clang-20" -cc1 -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-linux-gnu -Werror=atomic-alignment -emit-obj -dumpdir a- -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name CMakeHIPCompilerId.hip -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=all -fno-rounding-math -mconstructor-aliases -aux-target-cpu x86-64 -fcuda-is-device -fno-threadsafe-statics -mllvm -amdgpu-internalize-symbols -fcuda-allow-variadic-functions -fvisibility=hidden -fapply-global-visibility-to-externs -mlink-builtin-bitcode /opt/rocm-7.0/lib/llvm/amdgcn/bitcode/hip.bc -mlink-builtin-bitcode /opt/rocm-7.0/lib/llvm/amdgcn/bitcode/ocml.bc -mlink-builtin-bitcode /opt/rocm-7.0/lib/llvm/amdgcn/bitcode/ockl.bc -mlink-builtin-bitcode /opt/rocm-7.0/lib/llvm/amdgcn/bitcode/oclc_daz_opt_off.bc -mlink-builtin-bitcode /opt/rocm-7.0/lib/llvm/amdgcn/bitcode/oclc_unsafe_math_off.bc -mlink-builtin-bitcode /opt/rocm-7.0/lib/llvm/amdgcn/bitcode/oclc_finite_only_off.bc -mlink-builtin-bitcode /opt/rocm-7.0/lib/llvm/amdgcn/bitcode/oclc_correctly_rounded_sqrt_on.bc -mlink-builtin-bitcode /opt/rocm-7.0/lib/llvm/amdgcn/bitcode/oclc_wavefrontsize64_on.bc -mlink-builtin-bitcode /opt/rocm-7.0/lib/llvm/amdgcn/bitcode/oclc_isa_version_906.bc -mlink-builtin-bitcode /opt/rocm-7.0/lib/llvm/amdgcn/bitcode/oclc_abi_version_600.bc -target-cpu gfx906 -debugger-tuning=gdb -fdebug-compilation-dir=/tmp/rinhash-hip/build/CMakeFiles/3.31.6/CompilerIdHIP -v -resource-dir /opt/rocm-7.0/lib/llvm/lib/clang/20 -internal-isystem /opt/rocm-7.0/lib/llvm/lib/clang/20 -idirafter /opt/rocm-7.0/include -I/opt/rocm-7.0/include -internal-isystem /opt/rocm-7.0/lib/llvm/lib/clang/20/include -internal-isystem /usr/local/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -internal-isystem /opt/rocm-7.0/lib/llvm/lib/clang/20/include -internal-isystem /usr/local/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -fdeprecated-macro -fno-autolink -ferror-limit 19 -fhip-new-launch-api -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -fcxx-exceptions -fexceptions -cuid=7bec080eaa19a2fa -fcuda-allow-variadic-functions -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/CMakeHIPCompilerId-gfx906-ca7e26.o -x hip CMakeHIPCompilerId.hip
|
||||
clang -cc1 version 20.0.0git based upon LLVM 20.0.0git default target x86_64-unknown-linux-gnu
|
||||
ignoring nonexistent directory "/opt/rocm-7.0/include"
|
||||
ignoring nonexistent directory "/opt/rocm-7.0/include"
|
||||
ignoring nonexistent directory "/include"
|
||||
ignoring nonexistent directory "/include"
|
||||
ignoring duplicate directory "/opt/rocm-7.0/lib/llvm/lib/clang/20/include"
|
||||
ignoring duplicate directory "/usr/local/include"
|
||||
ignoring duplicate directory "/usr/include"
|
||||
ignoring duplicate directory "/usr/local/include"
|
||||
ignoring duplicate directory "/opt/rocm-7.0/lib/llvm/lib/clang/20/include"
|
||||
ignoring duplicate directory "/usr/include"
|
||||
#include "..." search starts here:
|
||||
#include <...> search starts here:
|
||||
/opt/rocm-7.0/lib/llvm/lib/clang/20
|
||||
/opt/rocm-7.0/lib/llvm/lib/clang/20/include
|
||||
/usr/local/include
|
||||
/usr/include
|
||||
End of search list.
|
||||
"/opt/rocm-7.0/lib/llvm/bin/lld" -flavor gnu -m elf64_amdgpu --no-undefined -shared -plugin-opt=-amdgpu-internalize-symbols --lto-partitions=8 -plugin-opt=mcpu=gfx906 --whole-archive -o /tmp/CMakeHIPCompilerId-gfx906-df2108.out /tmp/CMakeHIPCompilerId-gfx906-ca7e26.o --no-whole-archive
|
||||
"/opt/rocm-7.0/lib/llvm/bin/clang-offload-bundler" -type=o -bundle-align=4096 -targets=host-x86_64-unknown-linux-gnu,hipv4-amdgcn-amd-amdhsa--gfx906 -input=/dev/null -input=/tmp/CMakeHIPCompilerId-gfx906-df2108.out -output=/tmp/CMakeHIPCompilerId-36bb2a.hipfb -verbose
|
||||
"/opt/rocm-7.0/lib/llvm/bin/clang-20" -cc1 -triple x86_64-unknown-linux-gnu -aux-triple amdgcn-amd-amdhsa -emit-obj -dumpdir a- -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name CMakeHIPCompilerId.hip -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=all -fmath-errno -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/tmp/rinhash-hip/build/CMakeFiles/3.31.6/CompilerIdHIP -v -fcoverage-compilation-dir=/tmp/rinhash-hip/build/CMakeFiles/3.31.6/CompilerIdHIP -resource-dir /opt/rocm-7.0/lib/llvm/lib/clang/20 -internal-isystem /opt/rocm-7.0/lib/llvm/lib/clang/20 -idirafter /opt/rocm-7.0/include -I/opt/rocm-7.0/include -internal-isystem /opt/rocm-7.0/lib/llvm/lib/clang/20/include -internal-isystem /usr/local/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -internal-isystem /opt/rocm-7.0/lib/llvm/lib/clang/20/include -internal-isystem /usr/local/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -fdeprecated-macro -ferror-limit 19 -fhip-new-launch-api -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -fcxx-exceptions -fexceptions -fcuda-include-gpubinary /tmp/CMakeHIPCompilerId-36bb2a.hipfb -cuid=7bec080eaa19a2fa -fcuda-allow-variadic-functions -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/CMakeHIPCompilerId-23fde8.o -x hip CMakeHIPCompilerId.hip
|
||||
clang -cc1 version 20.0.0git based upon LLVM 20.0.0git default target x86_64-unknown-linux-gnu
|
||||
ignoring nonexistent directory "/opt/rocm-7.0/include"
|
||||
ignoring nonexistent directory "/opt/rocm-7.0/include"
|
||||
ignoring nonexistent directory "/include"
|
||||
ignoring nonexistent directory "/include"
|
||||
ignoring duplicate directory "/opt/rocm-7.0/lib/llvm/lib/clang/20/include"
|
||||
ignoring duplicate directory "/usr/local/include"
|
||||
ignoring duplicate directory "/usr/include"
|
||||
#include "..." search starts here:
|
||||
#include <...> search starts here:
|
||||
/opt/rocm-7.0/lib/llvm/lib/clang/20
|
||||
/opt/rocm-7.0/lib/llvm/lib/clang/20/include
|
||||
/usr/local/include
|
||||
/usr/include
|
||||
End of search list.
|
||||
"/opt/rocm-7.0/lib/llvm/bin/ld.lld" --hash-style=gnu --eh-frame-hdr -m elf_x86_64 -pie -dynamic-linker /lib64/ld-linux-x86-64.so.2 -o a.out Scrt1.o crti.o /opt/rocm-7.0/lib/llvm/lib/clang/20/lib/linux/clang_rt.crtbegin-x86_64.o -L/lib/../lib64 -L/usr/lib/../lib64 -L/lib -L/usr/lib -L/opt/rocm-7.0/lib -L/opt/rocm-7.0/lib64 /tmp/CMakeHIPCompilerId-23fde8.o -L/opt/rocm-7.0/lib -l:libamdhip64.so.7 -lstdc++ -lm /opt/rocm-7.0/lib/llvm/lib/clang/20/lib/x86_64-unknown-linux-gnu/libclang_rt.builtins.a -lgcc_s -lc /opt/rocm-7.0/lib/llvm/lib/clang/20/lib/x86_64-unknown-linux-gnu/libclang_rt.builtins.a -lgcc_s /opt/rocm-7.0/lib/llvm/lib/clang/20/lib/linux/clang_rt.crtend-x86_64.o crtn.o
|
||||
ld.lld: error: cannot open Scrt1.o: No such file or directory
|
||||
ld.lld: error: cannot open crti.o: No such file or directory
|
||||
ld.lld: error: unable to find library -lstdc++
|
||||
ld.lld: error: unable to find library -lm
|
||||
ld.lld: error: cannot open /opt/rocm-7.0/lib/llvm/lib/clang/20/lib/x86_64-unknown-linux-gnu/libclang_rt.builtins.a: No such file or directory
|
||||
ld.lld: error: unable to find library -lgcc_s
|
||||
ld.lld: error: unable to find library -lc
|
||||
ld.lld: error: cannot open /opt/rocm-7.0/lib/llvm/lib/clang/20/lib/x86_64-unknown-linux-gnu/libclang_rt.builtins.a: No such file or directory
|
||||
ld.lld: error: unable to find library -lgcc_s
|
||||
ld.lld: error: cannot open crtn.o: No such file or directory
|
||||
clang++: error: linker command failed with exit code 1 (use -v to see invocation)
|
||||
|
||||
|
||||
-
|
||||
kind: "message-v1"
|
||||
backtrace:
|
||||
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:17 (message)"
|
||||
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:64 (__determine_compiler_id_test)"
|
||||
- "/usr/share/cmake/Modules/CMakeDetermineHIPCompiler.cmake:136 (CMAKE_DETERMINE_COMPILER_ID)"
|
||||
- "CMakeLists.txt:2 (project)"
|
||||
message: |
|
||||
Compiling the HIP compiler identification source file "CMakeHIPCompilerId.hip" failed.
|
||||
Compiler: /opt/rocm-7.0/llvm/bin/clang++
|
||||
Build flags:
|
||||
Id flags:
|
||||
|
||||
The output was:
|
||||
1
|
||||
ld.lld: error: cannot open Scrt1.o: No such file or directory
|
||||
ld.lld: error: cannot open crti.o: No such file or directory
|
||||
ld.lld: error: unable to find library -lstdc++
|
||||
ld.lld: error: unable to find library -lm
|
||||
ld.lld: error: cannot open /opt/rocm-7.0/lib/llvm/lib/clang/20/lib/x86_64-unknown-linux-gnu/libclang_rt.builtins.a: No such file or directory
|
||||
ld.lld: error: unable to find library -lgcc_s
|
||||
ld.lld: error: unable to find library -lc
|
||||
ld.lld: error: cannot open /opt/rocm-7.0/lib/llvm/lib/clang/20/lib/x86_64-unknown-linux-gnu/libclang_rt.builtins.a: No such file or directory
|
||||
ld.lld: error: unable to find library -lgcc_s
|
||||
ld.lld: error: cannot open crtn.o: No such file or directory
|
||||
clang++: error: linker command failed with exit code 1 (use -v to see invocation)
|
||||
|
||||
|
||||
-
|
||||
kind: "message-v1"
|
||||
backtrace:
|
||||
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:17 (message)"
|
||||
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:64 (__determine_compiler_id_test)"
|
||||
- "/usr/share/cmake/Modules/CMakeDetermineHIPCompiler.cmake:136 (CMAKE_DETERMINE_COMPILER_ID)"
|
||||
- "CMakeLists.txt:2 (project)"
|
||||
message: |
|
||||
Compiling the HIP compiler identification source file "CMakeHIPCompilerId.hip" failed.
|
||||
Compiler: /opt/rocm-7.0/llvm/bin/clang++
|
||||
Build flags:
|
||||
Id flags: -v
|
||||
|
||||
The output was:
|
||||
1
|
||||
AMD clang version 20.0.0git (https://github.com/ROCm/llvm-project.git 32697402bdd2c9b01f45d53f123dc646206d3eb5+PATCHED:6509c030a655df7073b63a5b3d705e00f4f461ca)
|
||||
Target: x86_64-unknown-linux-gnu
|
||||
Thread model: posix
|
||||
InstalledDir: /opt/rocm-7.0/lib/llvm/bin
|
||||
Found HIP installation: /opt/rocm-7.0, version 3.5.0
|
||||
"/opt/rocm-7.0/lib/llvm/bin/clang-20" -cc1 -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-linux-gnu -Werror=atomic-alignment -emit-obj -dumpdir a- -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name CMakeHIPCompilerId.hip -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=all -fno-rounding-math -mconstructor-aliases -aux-target-cpu x86-64 -fcuda-is-device -fno-threadsafe-statics -mllvm -amdgpu-internalize-symbols -fcuda-allow-variadic-functions -fvisibility=hidden -fapply-global-visibility-to-externs -mlink-builtin-bitcode /opt/rocm-7.0/lib/llvm/amdgcn/bitcode/hip.bc -mlink-builtin-bitcode /opt/rocm-7.0/lib/llvm/amdgcn/bitcode/ocml.bc -mlink-builtin-bitcode /opt/rocm-7.0/lib/llvm/amdgcn/bitcode/ockl.bc -mlink-builtin-bitcode /opt/rocm-7.0/lib/llvm/amdgcn/bitcode/oclc_daz_opt_off.bc -mlink-builtin-bitcode /opt/rocm-7.0/lib/llvm/amdgcn/bitcode/oclc_unsafe_math_off.bc -mlink-builtin-bitcode /opt/rocm-7.0/lib/llvm/amdgcn/bitcode/oclc_finite_only_off.bc -mlink-builtin-bitcode /opt/rocm-7.0/lib/llvm/amdgcn/bitcode/oclc_correctly_rounded_sqrt_on.bc -mlink-builtin-bitcode /opt/rocm-7.0/lib/llvm/amdgcn/bitcode/oclc_wavefrontsize64_on.bc -mlink-builtin-bitcode /opt/rocm-7.0/lib/llvm/amdgcn/bitcode/oclc_isa_version_906.bc -mlink-builtin-bitcode /opt/rocm-7.0/lib/llvm/amdgcn/bitcode/oclc_abi_version_600.bc -target-cpu gfx906 -debugger-tuning=gdb -fdebug-compilation-dir=/tmp/rinhash-hip/build/CMakeFiles/3.31.6/CompilerIdHIP -v -resource-dir /opt/rocm-7.0/lib/llvm/lib/clang/20 -internal-isystem /opt/rocm-7.0/lib/llvm/lib/clang/20 -idirafter /opt/rocm-7.0/include -I/opt/rocm-7.0/include -internal-isystem /opt/rocm-7.0/lib/llvm/lib/clang/20/include -internal-isystem /usr/local/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -internal-isystem /opt/rocm-7.0/lib/llvm/lib/clang/20/include -internal-isystem /usr/local/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -fdeprecated-macro -fno-autolink -ferror-limit 19 -fhip-new-launch-api -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -fcxx-exceptions -fexceptions -cuid=7bec080eaa19a2fa -fcuda-allow-variadic-functions -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/CMakeHIPCompilerId-gfx906-a5723d.o -x hip CMakeHIPCompilerId.hip
|
||||
clang -cc1 version 20.0.0git based upon LLVM 20.0.0git default target x86_64-unknown-linux-gnu
|
||||
ignoring nonexistent directory "/opt/rocm-7.0/include"
|
||||
ignoring nonexistent directory "/opt/rocm-7.0/include"
|
||||
ignoring nonexistent directory "/include"
|
||||
ignoring nonexistent directory "/include"
|
||||
ignoring duplicate directory "/opt/rocm-7.0/lib/llvm/lib/clang/20/include"
|
||||
ignoring duplicate directory "/usr/local/include"
|
||||
ignoring duplicate directory "/usr/include"
|
||||
ignoring duplicate directory "/usr/local/include"
|
||||
ignoring duplicate directory "/opt/rocm-7.0/lib/llvm/lib/clang/20/include"
|
||||
ignoring duplicate directory "/usr/include"
|
||||
#include "..." search starts here:
|
||||
#include <...> search starts here:
|
||||
/opt/rocm-7.0/lib/llvm/lib/clang/20
|
||||
/opt/rocm-7.0/lib/llvm/lib/clang/20/include
|
||||
/usr/local/include
|
||||
/usr/include
|
||||
End of search list.
|
||||
"/opt/rocm-7.0/lib/llvm/bin/lld" -flavor gnu -m elf64_amdgpu --no-undefined -shared -plugin-opt=-amdgpu-internalize-symbols --lto-partitions=8 -plugin-opt=mcpu=gfx906 --whole-archive -o /tmp/CMakeHIPCompilerId-gfx906-73cd59.out /tmp/CMakeHIPCompilerId-gfx906-a5723d.o --no-whole-archive
|
||||
"/opt/rocm-7.0/lib/llvm/bin/clang-offload-bundler" -type=o -bundle-align=4096 -targets=host-x86_64-unknown-linux-gnu,hipv4-amdgcn-amd-amdhsa--gfx906 -input=/dev/null -input=/tmp/CMakeHIPCompilerId-gfx906-73cd59.out -output=/tmp/CMakeHIPCompilerId-af3c23.hipfb -verbose
|
||||
"/opt/rocm-7.0/lib/llvm/bin/clang-20" -cc1 -triple x86_64-unknown-linux-gnu -aux-triple amdgcn-amd-amdhsa -emit-obj -dumpdir a- -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name CMakeHIPCompilerId.hip -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=all -fmath-errno -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/tmp/rinhash-hip/build/CMakeFiles/3.31.6/CompilerIdHIP -v -fcoverage-compilation-dir=/tmp/rinhash-hip/build/CMakeFiles/3.31.6/CompilerIdHIP -resource-dir /opt/rocm-7.0/lib/llvm/lib/clang/20 -internal-isystem /opt/rocm-7.0/lib/llvm/lib/clang/20 -idirafter /opt/rocm-7.0/include -I/opt/rocm-7.0/include -internal-isystem /opt/rocm-7.0/lib/llvm/lib/clang/20/include -internal-isystem /usr/local/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -internal-isystem /opt/rocm-7.0/lib/llvm/lib/clang/20/include -internal-isystem /usr/local/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -fdeprecated-macro -ferror-limit 19 -fhip-new-launch-api -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -fcxx-exceptions -fexceptions -fcuda-include-gpubinary /tmp/CMakeHIPCompilerId-af3c23.hipfb -cuid=7bec080eaa19a2fa -fcuda-allow-variadic-functions -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/CMakeHIPCompilerId-fc89bb.o -x hip CMakeHIPCompilerId.hip
|
||||
clang -cc1 version 20.0.0git based upon LLVM 20.0.0git default target x86_64-unknown-linux-gnu
|
||||
ignoring nonexistent directory "/opt/rocm-7.0/include"
|
||||
ignoring nonexistent directory "/opt/rocm-7.0/include"
|
||||
ignoring nonexistent directory "/include"
|
||||
ignoring nonexistent directory "/include"
|
||||
ignoring duplicate directory "/opt/rocm-7.0/lib/llvm/lib/clang/20/include"
|
||||
ignoring duplicate directory "/usr/local/include"
|
||||
ignoring duplicate directory "/usr/include"
|
||||
#include "..." search starts here:
|
||||
#include <...> search starts here:
|
||||
/opt/rocm-7.0/lib/llvm/lib/clang/20
|
||||
/opt/rocm-7.0/lib/llvm/lib/clang/20/include
|
||||
/usr/local/include
|
||||
/usr/include
|
||||
End of search list.
|
||||
"/opt/rocm-7.0/lib/llvm/bin/ld.lld" --hash-style=gnu --eh-frame-hdr -m elf_x86_64 -pie -dynamic-linker /lib64/ld-linux-x86-64.so.2 -o a.out Scrt1.o crti.o /opt/rocm-7.0/lib/llvm/lib/clang/20/lib/linux/clang_rt.crtbegin-x86_64.o -L/lib/../lib64 -L/usr/lib/../lib64 -L/lib -L/usr/lib -L/opt/rocm-7.0/lib -L/opt/rocm-7.0/lib64 /tmp/CMakeHIPCompilerId-fc89bb.o -L/opt/rocm-7.0/lib -l:libamdhip64.so.7 -lstdc++ -lm /opt/rocm-7.0/lib/llvm/lib/clang/20/lib/x86_64-unknown-linux-gnu/libclang_rt.builtins.a -lgcc_s -lc /opt/rocm-7.0/lib/llvm/lib/clang/20/lib/x86_64-unknown-linux-gnu/libclang_rt.builtins.a -lgcc_s /opt/rocm-7.0/lib/llvm/lib/clang/20/lib/linux/clang_rt.crtend-x86_64.o crtn.o
|
||||
ld.lld: error: cannot open Scrt1.o: No such file or directory
|
||||
ld.lld: error: cannot open crti.o: No such file or directory
|
||||
ld.lld: error: unable to find library -lstdc++
|
||||
ld.lld: error: unable to find library -lm
|
||||
ld.lld: error: cannot open /opt/rocm-7.0/lib/llvm/lib/clang/20/lib/x86_64-unknown-linux-gnu/libclang_rt.builtins.a: No such file or directory
|
||||
ld.lld: error: unable to find library -lgcc_s
|
||||
ld.lld: error: unable to find library -lc
|
||||
ld.lld: error: cannot open /opt/rocm-7.0/lib/llvm/lib/clang/20/lib/x86_64-unknown-linux-gnu/libclang_rt.builtins.a: No such file or directory
|
||||
ld.lld: error: unable to find library -lgcc_s
|
||||
ld.lld: error: cannot open crtn.o: No such file or directory
|
||||
clang++: error: linker command failed with exit code 1 (use -v to see invocation)
|
||||
|
||||
|
||||
-
|
||||
kind: "message-v1"
|
||||
backtrace:
|
||||
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:17 (message)"
|
||||
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:64 (__determine_compiler_id_test)"
|
||||
- "/usr/share/cmake/Modules/CMakeDetermineHIPCompiler.cmake:136 (CMAKE_DETERMINE_COMPILER_ID)"
|
||||
- "CMakeLists.txt:2 (project)"
|
||||
message: |
|
||||
Compiling the HIP compiler identification source file "CMakeHIPCompilerId.hip" failed.
|
||||
Compiler: /opt/rocm-7.0/llvm/bin/clang++
|
||||
Build flags:
|
||||
Id flags:
|
||||
|
||||
The output was:
|
||||
1
|
||||
ld.lld: error: cannot open Scrt1.o: No such file or directory
|
||||
ld.lld: error: cannot open crti.o: No such file or directory
|
||||
ld.lld: error: unable to find library -lstdc++
|
||||
ld.lld: error: unable to find library -lm
|
||||
ld.lld: error: cannot open /opt/rocm-7.0/lib/llvm/lib/clang/20/lib/x86_64-unknown-linux-gnu/libclang_rt.builtins.a: No such file or directory
|
||||
ld.lld: error: unable to find library -lgcc_s
|
||||
ld.lld: error: unable to find library -lc
|
||||
ld.lld: error: cannot open /opt/rocm-7.0/lib/llvm/lib/clang/20/lib/x86_64-unknown-linux-gnu/libclang_rt.builtins.a: No such file or directory
|
||||
ld.lld: error: unable to find library -lgcc_s
|
||||
ld.lld: error: cannot open crtn.o: No such file or directory
|
||||
clang++: error: linker command failed with exit code 1 (use -v to see invocation)
|
||||
|
||||
|
||||
-
|
||||
kind: "message-v1"
|
||||
backtrace:
|
||||
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:1250 (message)"
|
||||
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:86 (CMAKE_DETERMINE_COMPILER_ID_VENDOR)"
|
||||
- "/usr/share/cmake/Modules/CMakeDetermineHIPCompiler.cmake:136 (CMAKE_DETERMINE_COMPILER_ID)"
|
||||
- "CMakeLists.txt:2 (project)"
|
||||
message: |
|
||||
Checking whether the HIP compiler is NVIDIA using "" did not match "nvcc: NVIDIA \\(R\\) Cuda compiler driver":
|
||||
clang++: error: no input files
|
||||
-
|
||||
kind: "message-v1"
|
||||
backtrace:
|
||||
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:1250 (message)"
|
||||
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:86 (CMAKE_DETERMINE_COMPILER_ID_VENDOR)"
|
||||
- "/usr/share/cmake/Modules/CMakeDetermineHIPCompiler.cmake:136 (CMAKE_DETERMINE_COMPILER_ID)"
|
||||
- "CMakeLists.txt:2 (project)"
|
||||
message: |
|
||||
Checking whether the HIP compiler is Clang using "" did not match "(clang version)":
|
||||
clang++: error: no input files
|
||||
-
|
||||
kind: "message-v1"
|
||||
backtrace:
|
||||
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:1250 (message)"
|
||||
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:86 (CMAKE_DETERMINE_COMPILER_ID_VENDOR)"
|
||||
- "/usr/share/cmake/Modules/CMakeDetermineHIPCompiler.cmake:136 (CMAKE_DETERMINE_COMPILER_ID)"
|
||||
- "CMakeLists.txt:2 (project)"
|
||||
message: |
|
||||
Checking whether the HIP compiler is NVIDIA using "" did not match "nvcc: NVIDIA \\(R\\) Cuda compiler driver":
|
||||
clang++: error: no input files
|
||||
-
|
||||
kind: "message-v1"
|
||||
backtrace:
|
||||
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:1250 (message)"
|
||||
- "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake:86 (CMAKE_DETERMINE_COMPILER_ID_VENDOR)"
|
||||
- "/usr/share/cmake/Modules/CMakeDetermineHIPCompiler.cmake:136 (CMAKE_DETERMINE_COMPILER_ID)"
|
||||
- "CMakeLists.txt:2 (project)"
|
||||
message: |
|
||||
Checking whether the HIP compiler is Clang using "" did not match "(clang version)":
|
||||
clang++: error: no input files
|
||||
-
|
||||
kind: "message-v1"
|
||||
backtrace:
|
||||
- "/usr/share/cmake/Modules/CMakeDetermineHIPCompiler.cmake:188 (message)"
|
||||
- "CMakeLists.txt:2 (project)"
|
||||
message: |
|
||||
Parsed HIP implicit link information from compiler id output:
|
||||
link line regex: [^( *|.*[/\\])(ld[0-9]*(\\.[a-z]+)?|CMAKE_LINK_STARTFILE-NOTFOUND|([^/\\]+-)?ld|collect2)[^/\\]*( |$)]
|
||||
implicit libs: []
|
||||
implicit objs: []
|
||||
implicit dirs: []
|
||||
implicit fwks: []
|
||||
|
||||
|
||||
...
|
1
rin/miner/hip-output/build/CMakeFiles/cmake.check_cache
Normal file
1
rin/miner/hip-output/build/CMakeFiles/cmake.check_cache
Normal file
@@ -0,0 +1 @@
|
||||
# This file is generated by cmake for dependency checking of the CMakeCache.txt file
|
34
rin/miner/hip-output/hip_runtime_shim.h
Normal file
34
rin/miner/hip-output/hip_runtime_shim.h
Normal file
@@ -0,0 +1,34 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef __HIP_PLATFORM_AMD__
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <hip/hip_runtime_api.h>
|
||||
#define cudaError_t hipError_t
|
||||
#define cudaSuccess hipSuccess
|
||||
#define cudaMalloc hipMalloc
|
||||
#define cudaFree hipFree
|
||||
#define cudaMemcpy hipMemcpy
|
||||
#define cudaMemcpyHostToDevice hipMemcpyHostToDevice
|
||||
#define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost
|
||||
#define cudaDeviceSynchronize hipDeviceSynchronize
|
||||
#define cudaGetErrorString hipGetErrorString
|
||||
#define cudaGetLastError hipGetLastError
|
||||
#define cudaMemGetInfo hipMemGetInfo
|
||||
#define cudaDeviceReset hipDeviceReset
|
||||
#define __global__ __global__
|
||||
#define __device__ __device__
|
||||
#define __host__ __host__
|
||||
#define __shared__ __shared__
|
||||
#define __syncthreads __syncthreads
|
||||
#define __forceinline__ __forceinline__
|
||||
#define __constant__ __constant__
|
||||
#define __align__(x) __attribute__((aligned(x)))
|
||||
#define blockIdx hipBlockIdx_x
|
||||
#define threadIdx hipThreadIdx_x
|
||||
#define blockDim hipBlockDim_x
|
||||
#define gridDim hipGridDim_x
|
||||
#define hipLaunchKernelGGL(F,GRID,BLOCK,SHMEM,STREAM,...) hipLaunchKernelGGL(F, dim3(GRID), dim3(BLOCK), SHMEM, STREAM, __VA_ARGS__)
|
||||
#else
|
||||
#include <cuda_runtime.h>
|
||||
#include <device_launch_parameters.h>
|
||||
#endif
|
283
rin/miner/hip-output/rinhash.hip.cu
Normal file
283
rin/miner/hip-output/rinhash.hip.cu
Normal file
@@ -0,0 +1,283 @@
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <vector>
|
||||
#include <stdexcept>
|
||||
|
||||
// Include shared device functions
|
||||
#include "rinhash_device.cuh"
|
||||
#include "argon2d_device.cuh"
|
||||
#include "sha3-256.hip.cu"
|
||||
#include "blake3_device.cuh"
|
||||
|
||||
// Modified kernel to use device functions and write output
|
||||
extern "C" __global__ void rinhash_cuda_kernel(
|
||||
const uint8_t* input,
|
||||
size_t input_len,
|
||||
uint8_t* output,
|
||||
block* argon2_memory
|
||||
) {
|
||||
__shared__ uint8_t blake3_out[32];
|
||||
__shared__ uint8_t argon2_out[32];
|
||||
|
||||
if (threadIdx.x == 0) {
|
||||
// Step 1: BLAKE3 hash
|
||||
light_hash_device(input, input_len, blake3_out);
|
||||
|
||||
// Step 2: Argon2d hash (t_cost=2, m_cost=64, lanes=1)
|
||||
uint8_t salt[11] = { 'R','i','n','C','o','i','n','S','a','l','t' };
|
||||
device_argon2d_hash(argon2_out, blake3_out, 32, 2, 64, 1, argon2_memory, salt, 11);
|
||||
|
||||
// Step 3: SHA3-256 hash
|
||||
uint8_t sha3_out[32];
|
||||
sha3_256_device(argon2_out, 32, sha3_out);
|
||||
|
||||
// Write result to output
|
||||
for (int i = 0; i < 32; i++) {
|
||||
output[i] = sha3_out[i];
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
// RinHash HIP implementation for a single header
|
||||
extern "C" void rinhash_cuda(const uint8_t* input, size_t input_len, uint8_t* output) {
|
||||
// Argon2 parameters
|
||||
const uint32_t m_cost = 64; // blocks (64 KiB)
|
||||
|
||||
uint8_t *d_input = nullptr;
|
||||
uint8_t *d_output = nullptr;
|
||||
block *d_memory = nullptr;
|
||||
|
||||
cudaError_t err;
|
||||
|
||||
// Allocate device buffers
|
||||
err = cudaMalloc(&d_input, input_len);
|
||||
if (err != cudaSuccess) {
|
||||
fprintf(stderr, "HIP error: Failed to allocate input memory: %s\n", cudaGetErrorString(err));
|
||||
return;
|
||||
}
|
||||
|
||||
err = cudaMalloc(&d_output, 32);
|
||||
if (err != cudaSuccess) {
|
||||
fprintf(stderr, "HIP error: Failed to allocate output memory: %s\n", cudaGetErrorString(err));
|
||||
cudaFree(d_input);
|
||||
return;
|
||||
}
|
||||
|
||||
// Allocate Argon2 memory once per hash
|
||||
err = cudaMalloc(&d_memory, m_cost * sizeof(block));
|
||||
if (err != cudaSuccess) {
|
||||
fprintf(stderr, "HIP error: Failed to allocate argon2 memory: %s\n", cudaGetErrorString(err));
|
||||
cudaFree(d_input);
|
||||
cudaFree(d_output);
|
||||
return;
|
||||
}
|
||||
|
||||
// Copy input header
|
||||
err = cudaMemcpy(d_input, input, input_len, cudaMemcpyHostToDevice);
|
||||
if (err != cudaSuccess) {
|
||||
fprintf(stderr, "HIP error: Failed to copy input to device: %s\n", cudaGetErrorString(err));
|
||||
cudaFree(d_memory);
|
||||
cudaFree(d_input);
|
||||
cudaFree(d_output);
|
||||
return;
|
||||
}
|
||||
|
||||
// Launch the kernel (single thread is fine for single hash)
|
||||
rinhash_cuda_kernel<<<1, 1>>>(d_input, input_len, d_output, d_memory);
|
||||
|
||||
// Wait
|
||||
err = cudaDeviceSynchronize();
|
||||
if (err != cudaSuccess) {
|
||||
fprintf(stderr, "HIP error during kernel execution: %s\n", cudaGetErrorString(err));
|
||||
cudaFree(d_memory);
|
||||
cudaFree(d_input);
|
||||
cudaFree(d_output);
|
||||
return;
|
||||
}
|
||||
|
||||
// Copy result
|
||||
err = cudaMemcpy(output, d_output, 32, cudaMemcpyDeviceToHost);
|
||||
if (err != cudaSuccess) {
|
||||
fprintf(stderr, "HIP error: Failed to copy output from device: %s\n", cudaGetErrorString(err));
|
||||
}
|
||||
|
||||
// Free
|
||||
cudaFree(d_memory);
|
||||
cudaFree(d_input);
|
||||
cudaFree(d_output);
|
||||
}
|
||||
|
||||
// Helper function to convert a block header to bytes
|
||||
extern "C" void blockheader_to_bytes(
|
||||
const uint32_t* version,
|
||||
const uint32_t* prev_block,
|
||||
const uint32_t* merkle_root,
|
||||
const uint32_t* timestamp,
|
||||
const uint32_t* bits,
|
||||
const uint32_t* nonce,
|
||||
uint8_t* output,
|
||||
size_t* output_len
|
||||
) {
|
||||
size_t offset = 0;
|
||||
|
||||
memcpy(output + offset, version, 4); offset += 4;
|
||||
memcpy(output + offset, prev_block, 32); offset += 32;
|
||||
memcpy(output + offset, merkle_root, 32); offset += 32;
|
||||
memcpy(output + offset, timestamp, 4); offset += 4;
|
||||
memcpy(output + offset, bits, 4); offset += 4;
|
||||
memcpy(output + offset, nonce, 4); offset += 4;
|
||||
|
||||
*output_len = offset;
|
||||
}
|
||||
|
||||
// Batch processing version for mining (sequential per header for correctness)
|
||||
extern "C" void rinhash_cuda_batch(
|
||||
const uint8_t* block_headers,
|
||||
size_t block_header_len,
|
||||
uint8_t* outputs,
|
||||
uint32_t num_blocks
|
||||
) {
|
||||
// Argon2 parameters
|
||||
const uint32_t m_cost = 64;
|
||||
|
||||
// Allocate reusable device buffers
|
||||
uint8_t *d_input = nullptr;
|
||||
uint8_t *d_output = nullptr;
|
||||
block *d_memory = nullptr;
|
||||
|
||||
cudaError_t err;
|
||||
|
||||
err = cudaMalloc(&d_input, block_header_len);
|
||||
if (err != cudaSuccess) {
|
||||
fprintf(stderr, "HIP error: Failed to allocate header buffer: %s\n", cudaGetErrorString(err));
|
||||
return;
|
||||
}
|
||||
|
||||
err = cudaMalloc(&d_output, 32);
|
||||
if (err != cudaSuccess) {
|
||||
fprintf(stderr, "HIP error: Failed to allocate output buffer: %s\n", cudaGetErrorString(err));
|
||||
cudaFree(d_input);
|
||||
return;
|
||||
}
|
||||
|
||||
err = cudaMalloc(&d_memory, m_cost * sizeof(block));
|
||||
if (err != cudaSuccess) {
|
||||
fprintf(stderr, "HIP error: Failed to allocate argon2 memory: %s\n", cudaGetErrorString(err));
|
||||
cudaFree(d_input);
|
||||
cudaFree(d_output);
|
||||
return;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < num_blocks; i++) {
|
||||
const uint8_t* header = block_headers + i * block_header_len;
|
||||
uint8_t* out = outputs + i * 32;
|
||||
|
||||
err = cudaMemcpy(d_input, header, block_header_len, cudaMemcpyHostToDevice);
|
||||
if (err != cudaSuccess) {
|
||||
fprintf(stderr, "HIP error: copy header %u failed: %s\n", i, cudaGetErrorString(err));
|
||||
break;
|
||||
}
|
||||
|
||||
rinhash_cuda_kernel<<<1, 1>>>(d_input, block_header_len, d_output, d_memory);
|
||||
|
||||
err = cudaDeviceSynchronize();
|
||||
if (err != cudaSuccess) {
|
||||
fprintf(stderr, "HIP error in kernel %u: %s\n", i, cudaGetErrorString(err));
|
||||
break;
|
||||
}
|
||||
|
||||
err = cudaMemcpy(out, d_output, 32, cudaMemcpyDeviceToHost);
|
||||
if (err != cudaSuccess) {
|
||||
fprintf(stderr, "HIP error: copy out %u failed: %s\n", i, cudaGetErrorString(err));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
cudaFree(d_memory);
|
||||
cudaFree(d_output);
|
||||
cudaFree(d_input);
|
||||
}
|
||||
|
||||
// Main RinHash function that would be called from outside
|
||||
extern "C" void RinHash(
|
||||
const uint32_t* version,
|
||||
const uint32_t* prev_block,
|
||||
const uint32_t* merkle_root,
|
||||
const uint32_t* timestamp,
|
||||
const uint32_t* bits,
|
||||
const uint32_t* nonce,
|
||||
uint8_t* output
|
||||
) {
|
||||
uint8_t block_header[80];
|
||||
size_t block_header_len;
|
||||
|
||||
blockheader_to_bytes(
|
||||
version,
|
||||
prev_block,
|
||||
merkle_root,
|
||||
timestamp,
|
||||
bits,
|
||||
nonce,
|
||||
block_header,
|
||||
&block_header_len
|
||||
);
|
||||
|
||||
rinhash_cuda(block_header, block_header_len, output);
|
||||
}
|
||||
|
||||
// Mining function that tries different nonces (host-side best selection)
|
||||
extern "C" void RinHash_mine(
|
||||
const uint32_t* version,
|
||||
const uint32_t* prev_block,
|
||||
const uint32_t* merkle_root,
|
||||
const uint32_t* timestamp,
|
||||
const uint32_t* bits,
|
||||
uint32_t start_nonce,
|
||||
uint32_t num_nonces,
|
||||
uint32_t* found_nonce,
|
||||
uint8_t* target_hash,
|
||||
uint8_t* best_hash
|
||||
) {
|
||||
const size_t block_header_len = 80;
|
||||
std::vector<uint8_t> block_headers(block_header_len * num_nonces);
|
||||
std::vector<uint8_t> hashes(32 * num_nonces);
|
||||
|
||||
for (uint32_t i = 0; i < num_nonces; i++) {
|
||||
uint32_t current_nonce = start_nonce + i;
|
||||
uint8_t* header = block_headers.data() + i * block_header_len;
|
||||
size_t header_len;
|
||||
|
||||
blockheader_to_bytes(
|
||||
version,
|
||||
prev_block,
|
||||
merkle_root,
|
||||
timestamp,
|
||||
bits,
|
||||
¤t_nonce,
|
||||
header,
|
||||
&header_len
|
||||
);
|
||||
}
|
||||
|
||||
rinhash_cuda_batch(block_headers.data(), block_header_len, hashes.data(), num_nonces);
|
||||
|
||||
memcpy(best_hash, hashes.data(), 32);
|
||||
*found_nonce = start_nonce;
|
||||
|
||||
for (uint32_t i = 1; i < num_nonces; i++) {
|
||||
uint8_t* current_hash = hashes.data() + i * 32;
|
||||
bool is_better = false;
|
||||
for (int j = 0; j < 32; j++) {
|
||||
if (current_hash[j] < best_hash[j]) { is_better = true; break; }
|
||||
else if (current_hash[j] > best_hash[j]) { break; }
|
||||
}
|
||||
if (is_better) {
|
||||
memcpy(best_hash, current_hash, 32);
|
||||
*found_nonce = start_nonce + i;
|
||||
}
|
||||
}
|
||||
}
|
8
rin/miner/hip-output/rinhash_device.cuh
Normal file
8
rin/miner/hip-output/rinhash_device.cuh
Normal file
@@ -0,0 +1,8 @@
|
||||
#ifndef RINHASH_DEVICE_CUH
|
||||
#define RINHASH_DEVICE_CUH
|
||||
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#endif // RINHASH_DEVICE_CUH
|
140
rin/miner/hip-output/sha3-256.hip.cu
Normal file
140
rin/miner/hip-output/sha3-256.hip.cu
Normal file
@@ -0,0 +1,140 @@
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#define KECCAKF_ROUNDS 24
|
||||
|
||||
|
||||
// 64bit 値のビット回転(左回転)
|
||||
__device__ inline uint64_t rotate(uint64_t x, int n) {
|
||||
return (x << n) | (x >> (64 - n));
|
||||
}
|
||||
|
||||
// Keccak‐f[1600] 変換(内部状態 st[25] に対して 24 ラウンドの permutation を実行)
|
||||
__device__ inline uint64_t ROTL64(uint64_t x, int n) {
|
||||
return (x << n) | (x >> (64 - n));
|
||||
}
|
||||
|
||||
__device__ void keccakf(uint64_t st[25]) {
|
||||
const int R[24] = {
|
||||
1, 3, 6, 10, 15, 21,
|
||||
28, 36, 45, 55, 2, 14,
|
||||
27, 41, 56, 8, 25, 43,
|
||||
62, 18, 39, 61, 20, 44
|
||||
};
|
||||
|
||||
const int P[24] = {
|
||||
10, 7, 11, 17, 18, 3,
|
||||
5, 16, 8, 21, 24, 4,
|
||||
15, 23, 19, 13, 12, 2,
|
||||
20, 14, 22, 9, 6, 1
|
||||
};
|
||||
|
||||
const uint64_t RC[24] = {
|
||||
0x0000000000000001ULL, 0x0000000000008082ULL,
|
||||
0x800000000000808aULL, 0x8000000080008000ULL,
|
||||
0x000000000000808bULL, 0x0000000080000001ULL,
|
||||
0x8000000080008081ULL, 0x8000000000008009ULL,
|
||||
0x000000000000008aULL, 0x0000000000000088ULL,
|
||||
0x0000000080008009ULL, 0x000000008000000aULL,
|
||||
0x000000008000808bULL, 0x800000000000008bULL,
|
||||
0x8000000000008089ULL, 0x8000000000008003ULL,
|
||||
0x8000000000008002ULL, 0x8000000000000080ULL,
|
||||
0x000000000000800aULL, 0x800000008000000aULL,
|
||||
0x8000000080008081ULL, 0x8000000000008080ULL,
|
||||
0x0000000080000001ULL, 0x8000000080008008ULL
|
||||
};
|
||||
|
||||
int i, j, round;
|
||||
uint64_t t, bc[5];
|
||||
|
||||
for (round = 0; round < 24; round++) {
|
||||
// Theta
|
||||
for (i = 0; i < 5; i++)
|
||||
bc[i] = st[i] ^ st[i + 5] ^ st[i + 10] ^ st[i + 15] ^ st[i + 20];
|
||||
for (i = 0; i < 5; i++) {
|
||||
t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1);
|
||||
for (j = 0; j < 25; j += 5)
|
||||
st[j + i] ^= t;
|
||||
}
|
||||
|
||||
// Rho and Pi
|
||||
t = st[1];
|
||||
for (i = 0; i < 24; i++) {
|
||||
j = P[i];
|
||||
bc[0] = st[j];
|
||||
st[j] = ROTL64(t, R[i]);
|
||||
t = bc[0];
|
||||
}
|
||||
|
||||
// Chi
|
||||
for (j = 0; j < 25; j += 5) {
|
||||
for (i = 0; i < 5; i++)
|
||||
bc[i] = st[j + i];
|
||||
for (i = 0; i < 5; i++)
|
||||
st[j + i] ^= (~bc[(i + 1) % 5]) & bc[(i + 2) % 5];
|
||||
}
|
||||
|
||||
// Iota
|
||||
st[0] ^= RC[round];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// little-endian で 64bit 値を読み込む(8 バイトの配列から)
|
||||
__device__ inline uint64_t load64_le(const uint8_t *src) {
|
||||
uint64_t x = 0;
|
||||
#pragma unroll
|
||||
for (int i = 0; i < 8; i++) {
|
||||
x |= ((uint64_t)src[i]) << (8 * i);
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
// little-endian で 64bit 値を書き込む(8 バイトの配列へ)
|
||||
__device__ inline void store64_le(uint8_t *dst, uint64_t x) {
|
||||
#pragma unroll
|
||||
for (int i = 0; i < 8; i++) {
|
||||
dst[i] = (uint8_t)(x >> (8 * i));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
__device__ 関数 sha3_256_device
|
||||
・引数 input, inlen で与えられる入力データを吸収し、
|
||||
SHA3-256 仕様によりパディングおよび Keccak-f[1600] 変換を実行します。
|
||||
・最終的に内部状態の先頭 32 バイト(4 ワード)を little-endian 形式で
|
||||
hash_out に出力します。
|
||||
・SHA3-256 ではレート(吸収部サイズ)が 136 バイトです。
|
||||
*/
|
||||
__device__ void sha3_256_device(const uint8_t *input, size_t inlen, uint8_t *hash_out) {
|
||||
const size_t rate = 136; // SHA3-256 の吸収部サイズ(バイト単位)
|
||||
uint64_t st[25] = {0}; // 内部状態(25ワード=1600ビット)
|
||||
|
||||
for (int i = 0; i < 25; i++) st[i] = 0;
|
||||
size_t offset = 0;
|
||||
|
||||
|
||||
// 通常ブロック(rateバイト)処理(今回inlen=32なのでスキップされるはず)
|
||||
while (inlen >= rate) {
|
||||
// 吸収
|
||||
for (int i = 0; i < (rate / 8); i++) {
|
||||
st[i] ^= load64_le(input + i * 8);
|
||||
}
|
||||
// 最終 Keccak-f
|
||||
keccakf(st);
|
||||
input += rate;
|
||||
inlen -= rate;
|
||||
}
|
||||
for (int i = 0; i < 4; i++) {
|
||||
st[i] ^= load64_le(input + i * 8); // 4 * 8 = 32バイト
|
||||
}
|
||||
((uint8_t*)st)[32] ^= 0x06; // パディング(32バイト目)
|
||||
((uint8_t*)st)[rate - 1] ^= 0x80; // パディング(最後のバイト)
|
||||
keccakf(st); // 最終 Keccak-f
|
||||
|
||||
|
||||
// スクイーズ:出力32バイト
|
||||
for (int i = 0; i < 4; i++) {
|
||||
store64_le(hash_out + i * 8, st[i]);
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user