This commit is contained in:
Dobromir Popov
2025-09-06 20:55:10 +03:00
parent b475590b61
commit 00cda24e71
5 changed files with 78 additions and 37 deletions

Submodule rin/miner/cpuminer/cpuminer-opt-rin deleted from 65c11e57f8

View File

@@ -162,48 +162,65 @@ extern "C" void rinhash_hip(const uint8_t* input, size_t input_len, uint8_t* out
// Memory is kept allocated for reuse - NO hipFree() calls here! // Memory is kept allocated for reuse - NO hipFree() calls here!
} }
// GPU batch processing - the KEY to real GPU performance! // PERSISTENT GPU MEMORY - Allocate once, reuse forever! (MASSIVE PERFORMANCE BOOST)
// This processes 1024 different nonces simultaneously (like 1024 CPU threads) static uint8_t *d_input_persistent = nullptr;
static uint8_t *d_output_persistent = nullptr;
static block *d_memory_persistent = nullptr;
static uint32_t persistent_max_batch = 0;
static bool persistent_memory_initialized = false;
// HIGH-PERFORMANCE batch processing with PERSISTENT memory reuse
extern "C" void rinhash_hip_batch(const uint8_t* input_template, size_t input_len, uint8_t* output_batch, uint32_t start_nonce, uint32_t batch_size) { extern "C" void rinhash_hip_batch(const uint8_t* input_template, size_t input_len, uint8_t* output_batch, uint32_t start_nonce, uint32_t batch_size) {
// Ensure we have enough memory for batch processing
const uint32_t max_batch = 1024;
if (batch_size > max_batch) batch_size = max_batch;
// Initialize memory for batch size
static uint8_t *d_input_batch = nullptr;
static uint8_t *d_output_batch = nullptr;
static block *d_memory_batch = nullptr;
static bool batch_memory_initialized = false;
if (!batch_memory_initialized) {
hipError_t err; hipError_t err;
// Allocate batch input buffer (1024 × 80 bytes) // SMART MEMORY MANAGEMENT: Only reallocate if we need MORE memory
err = hipMalloc(&d_input_batch, max_batch * 80); if (!persistent_memory_initialized || batch_size > persistent_max_batch) {
// Free old memory if we're expanding
if (persistent_memory_initialized) {
printf("RinHashGPU: Expanding memory from %u to %u nonces\n", persistent_max_batch, batch_size);
hipFree(d_input_persistent);
hipFree(d_output_persistent);
hipFree(d_memory_persistent);
}
// Allocate with some HEADROOM for future batches (reduce reallocations)
persistent_max_batch = batch_size * 2; // 2x headroom for growth
const size_t input_size = persistent_max_batch * 80;
const size_t output_size = persistent_max_batch * 32;
const size_t memory_size = persistent_max_batch * 64 * sizeof(block);
printf("RinHashGPU: PERSISTENT ALLOCATION: %zu MB input + %zu MB output + %zu MB Argon2 = %zu MB total (capacity: %u nonces)\n",
input_size / (1024*1024), output_size / (1024*1024), memory_size / (1024*1024),
(input_size + output_size + memory_size) / (1024*1024), persistent_max_batch);
// Allocate PERSISTENT buffers with headroom
err = hipMalloc(&d_input_persistent, input_size);
if (err != hipSuccess) { if (err != hipSuccess) {
fprintf(stderr, "HIP error: Failed to allocate batch input: %s\n", hipGetErrorString(err)); fprintf(stderr, "HIP error: Failed to allocate persistent input (%zu MB): %s\n", input_size / (1024*1024), hipGetErrorString(err));
persistent_memory_initialized = false;
return; return;
} }
// Allocate batch output buffer (1024 × 32 bytes) err = hipMalloc(&d_output_persistent, output_size);
err = hipMalloc(&d_output_batch, max_batch * 32);
if (err != hipSuccess) { if (err != hipSuccess) {
fprintf(stderr, "HIP error: Failed to allocate batch output: %s\n", hipGetErrorString(err)); fprintf(stderr, "HIP error: Failed to allocate persistent output (%zu MB): %s\n", output_size / (1024*1024), hipGetErrorString(err));
hipFree(d_input_batch); hipFree(d_input_persistent);
persistent_memory_initialized = false;
return; return;
} }
// Allocate batch Argon2 memory (1024 × 64 blocks) err = hipMalloc(&d_memory_persistent, memory_size);
err = hipMalloc(&d_memory_batch, max_batch * 64 * sizeof(block));
if (err != hipSuccess) { if (err != hipSuccess) {
fprintf(stderr, "HIP error: Failed to allocate batch memory: %s\n", hipGetErrorString(err)); fprintf(stderr, "HIP error: Failed to allocate persistent Argon2 memory (%zu MB): %s\n", memory_size / (1024*1024), hipGetErrorString(err));
hipFree(d_input_batch); hipFree(d_input_persistent);
hipFree(d_output_batch); hipFree(d_output_persistent);
persistent_memory_initialized = false;
return; return;
} }
batch_memory_initialized = true; persistent_memory_initialized = true;
printf("RinHashGPU: Batch memory initialized for %d concurrent hashes\n", max_batch); printf("RinHashGPU: PERSISTENT MEMORY initialized - NO MORE ALLOCATIONS until expansion needed!\n");
} }
// Prepare batch input data on host // Prepare batch input data on host
@@ -215,19 +232,26 @@ extern "C" void rinhash_hip_batch(const uint8_t* input_template, size_t input_le
memcpy(&host_batch[i * 80 + 76], &nonce, 4); memcpy(&host_batch[i * 80 + 76], &nonce, 4);
} }
// Copy batch input to GPU // ULTRA-FAST memory transfer using persistent buffers (NO ALLOCATION OVERHEAD)
hipError_t err = hipMemcpy(d_input_batch, host_batch, batch_size * 80, hipMemcpyHostToDevice); err = hipMemcpyAsync(d_input_persistent, host_batch, batch_size * 80, hipMemcpyHostToDevice, 0);
if (err != hipSuccess) { if (err != hipSuccess) {
fprintf(stderr, "HIP error: Failed to copy batch input: %s\n", hipGetErrorString(err)); fprintf(stderr, "HIP error: Failed to copy batch input: %s\n", hipGetErrorString(err));
free(host_batch); free(host_batch);
return; return;
} }
// Launch batch kernel - NOW EACH THREAD PROCESSES ONE NONCE! // Launch DYNAMIC INDEPENDENT MINING kernel - Each thread = independent miner!
dim3 blocks((batch_size + 255) / 256); // Enough blocks for all threads const uint32_t miners_per_block = 1024; // 1024 independent miners per block
dim3 threads_per_block(256); const uint32_t total_blocks = (batch_size + miners_per_block - 1) / miners_per_block;
dim3 blocks(total_blocks);
dim3 threads_per_block(miners_per_block);
printf("RinHashGPU: Launching %u blocks × %u threads = %u independent miners processing %u nonces\n",
total_blocks, miners_per_block, total_blocks * miners_per_block, batch_size);
rinhash_hip_kernel_batch<<<blocks, threads_per_block>>>( rinhash_hip_kernel_batch<<<blocks, threads_per_block>>>(
d_input_batch, input_len, d_output_batch, d_memory_batch, start_nonce, batch_size d_input_persistent, input_len, d_output_persistent, d_memory_persistent, start_nonce, batch_size
); );
// Wait for completion // Wait for completion
@@ -238,17 +262,22 @@ extern "C" void rinhash_hip_batch(const uint8_t* input_template, size_t input_le
return; return;
} }
// Copy results back to host // BLAZING-FAST result transfer using persistent output buffer
err = hipMemcpy(output_batch, d_output_batch, batch_size * 32, hipMemcpyDeviceToHost); err = hipMemcpyAsync(output_batch, d_output_persistent, batch_size * 32, hipMemcpyDeviceToHost, 0);
if (err != hipSuccess) { if (err != hipSuccess) {
fprintf(stderr, "HIP error: Failed to copy batch output: %s\n", hipGetErrorString(err)); fprintf(stderr, "HIP error: Failed to copy batch output: %s\n", hipGetErrorString(err));
} }
// Synchronize for completion (no GPU memory cleanup - PERSISTENT REUSE!)
hipDeviceSynchronize();
// Only free HOST memory (GPU memory stays allocated for maximum performance)
free(host_batch); free(host_batch);
} }
// Cleanup function to free GPU memory cache when miner shuts down // Cleanup function to free GPU memory when miner shuts down
extern "C" void rinhash_hip_cleanup() { extern "C" void rinhash_hip_cleanup() {
// Clean up old cache system
if (gpu_memory_initialized) { if (gpu_memory_initialized) {
hipFree(d_input_cache); hipFree(d_input_cache);
hipFree(d_output_cache); hipFree(d_output_cache);
@@ -259,6 +288,19 @@ extern "C" void rinhash_hip_cleanup() {
gpu_memory_initialized = false; gpu_memory_initialized = false;
cached_input_size = 0; cached_input_size = 0;
} }
// Clean up new persistent system
if (persistent_memory_initialized) {
printf("RinHashGPU: Cleaning up persistent memory on shutdown\n");
hipFree(d_input_persistent);
hipFree(d_output_persistent);
hipFree(d_memory_persistent);
d_input_persistent = nullptr;
d_output_persistent = nullptr;
d_memory_persistent = nullptr;
persistent_memory_initialized = false;
persistent_max_batch = 0;
}
} }
// Helper function to convert a block header to bytes // Helper function to convert a block header to bytes