wip
This commit is contained in:
Submodule rin/miner/cpuminer/cpuminer-opt-rin deleted from 65c11e57f8
Binary file not shown.
Binary file not shown.
@@ -162,48 +162,65 @@ extern "C" void rinhash_hip(const uint8_t* input, size_t input_len, uint8_t* out
|
||||
// Memory is kept allocated for reuse - NO hipFree() calls here!
|
||||
}
|
||||
|
||||
// GPU batch processing - the KEY to real GPU performance!
|
||||
// This processes 1024 different nonces simultaneously (like 1024 CPU threads)
|
||||
// PERSISTENT GPU MEMORY - Allocate once, reuse forever! (MASSIVE PERFORMANCE BOOST)
|
||||
static uint8_t *d_input_persistent = nullptr;
|
||||
static uint8_t *d_output_persistent = nullptr;
|
||||
static block *d_memory_persistent = nullptr;
|
||||
static uint32_t persistent_max_batch = 0;
|
||||
static bool persistent_memory_initialized = false;
|
||||
|
||||
// HIGH-PERFORMANCE batch processing with PERSISTENT memory reuse
|
||||
extern "C" void rinhash_hip_batch(const uint8_t* input_template, size_t input_len, uint8_t* output_batch, uint32_t start_nonce, uint32_t batch_size) {
|
||||
// Ensure we have enough memory for batch processing
|
||||
const uint32_t max_batch = 1024;
|
||||
if (batch_size > max_batch) batch_size = max_batch;
|
||||
hipError_t err;
|
||||
|
||||
// Initialize memory for batch size
|
||||
static uint8_t *d_input_batch = nullptr;
|
||||
static uint8_t *d_output_batch = nullptr;
|
||||
static block *d_memory_batch = nullptr;
|
||||
static bool batch_memory_initialized = false;
|
||||
|
||||
if (!batch_memory_initialized) {
|
||||
hipError_t err;
|
||||
// SMART MEMORY MANAGEMENT: Only reallocate if we need MORE memory
|
||||
if (!persistent_memory_initialized || batch_size > persistent_max_batch) {
|
||||
// Free old memory if we're expanding
|
||||
if (persistent_memory_initialized) {
|
||||
printf("RinHashGPU: Expanding memory from %u to %u nonces\n", persistent_max_batch, batch_size);
|
||||
hipFree(d_input_persistent);
|
||||
hipFree(d_output_persistent);
|
||||
hipFree(d_memory_persistent);
|
||||
}
|
||||
|
||||
// Allocate batch input buffer (1024 × 80 bytes)
|
||||
err = hipMalloc(&d_input_batch, max_batch * 80);
|
||||
// Allocate with some HEADROOM for future batches (reduce reallocations)
|
||||
persistent_max_batch = batch_size * 2; // 2x headroom for growth
|
||||
|
||||
const size_t input_size = persistent_max_batch * 80;
|
||||
const size_t output_size = persistent_max_batch * 32;
|
||||
const size_t memory_size = persistent_max_batch * 64 * sizeof(block);
|
||||
|
||||
printf("RinHashGPU: PERSISTENT ALLOCATION: %zu MB input + %zu MB output + %zu MB Argon2 = %zu MB total (capacity: %u nonces)\n",
|
||||
input_size / (1024*1024), output_size / (1024*1024), memory_size / (1024*1024),
|
||||
(input_size + output_size + memory_size) / (1024*1024), persistent_max_batch);
|
||||
|
||||
// Allocate PERSISTENT buffers with headroom
|
||||
err = hipMalloc(&d_input_persistent, input_size);
|
||||
if (err != hipSuccess) {
|
||||
fprintf(stderr, "HIP error: Failed to allocate batch input: %s\n", hipGetErrorString(err));
|
||||
fprintf(stderr, "HIP error: Failed to allocate persistent input (%zu MB): %s\n", input_size / (1024*1024), hipGetErrorString(err));
|
||||
persistent_memory_initialized = false;
|
||||
return;
|
||||
}
|
||||
|
||||
// Allocate batch output buffer (1024 × 32 bytes)
|
||||
err = hipMalloc(&d_output_batch, max_batch * 32);
|
||||
err = hipMalloc(&d_output_persistent, output_size);
|
||||
if (err != hipSuccess) {
|
||||
fprintf(stderr, "HIP error: Failed to allocate batch output: %s\n", hipGetErrorString(err));
|
||||
hipFree(d_input_batch);
|
||||
fprintf(stderr, "HIP error: Failed to allocate persistent output (%zu MB): %s\n", output_size / (1024*1024), hipGetErrorString(err));
|
||||
hipFree(d_input_persistent);
|
||||
persistent_memory_initialized = false;
|
||||
return;
|
||||
}
|
||||
|
||||
// Allocate batch Argon2 memory (1024 × 64 blocks)
|
||||
err = hipMalloc(&d_memory_batch, max_batch * 64 * sizeof(block));
|
||||
err = hipMalloc(&d_memory_persistent, memory_size);
|
||||
if (err != hipSuccess) {
|
||||
fprintf(stderr, "HIP error: Failed to allocate batch memory: %s\n", hipGetErrorString(err));
|
||||
hipFree(d_input_batch);
|
||||
hipFree(d_output_batch);
|
||||
fprintf(stderr, "HIP error: Failed to allocate persistent Argon2 memory (%zu MB): %s\n", memory_size / (1024*1024), hipGetErrorString(err));
|
||||
hipFree(d_input_persistent);
|
||||
hipFree(d_output_persistent);
|
||||
persistent_memory_initialized = false;
|
||||
return;
|
||||
}
|
||||
|
||||
batch_memory_initialized = true;
|
||||
printf("RinHashGPU: Batch memory initialized for %d concurrent hashes\n", max_batch);
|
||||
persistent_memory_initialized = true;
|
||||
printf("RinHashGPU: PERSISTENT MEMORY initialized - NO MORE ALLOCATIONS until expansion needed!\n");
|
||||
}
|
||||
|
||||
// Prepare batch input data on host
|
||||
@@ -215,19 +232,26 @@ extern "C" void rinhash_hip_batch(const uint8_t* input_template, size_t input_le
|
||||
memcpy(&host_batch[i * 80 + 76], &nonce, 4);
|
||||
}
|
||||
|
||||
// Copy batch input to GPU
|
||||
hipError_t err = hipMemcpy(d_input_batch, host_batch, batch_size * 80, hipMemcpyHostToDevice);
|
||||
// ULTRA-FAST memory transfer using persistent buffers (NO ALLOCATION OVERHEAD)
|
||||
err = hipMemcpyAsync(d_input_persistent, host_batch, batch_size * 80, hipMemcpyHostToDevice, 0);
|
||||
if (err != hipSuccess) {
|
||||
fprintf(stderr, "HIP error: Failed to copy batch input: %s\n", hipGetErrorString(err));
|
||||
free(host_batch);
|
||||
return;
|
||||
}
|
||||
|
||||
// Launch batch kernel - NOW EACH THREAD PROCESSES ONE NONCE!
|
||||
dim3 blocks((batch_size + 255) / 256); // Enough blocks for all threads
|
||||
dim3 threads_per_block(256);
|
||||
// Launch DYNAMIC INDEPENDENT MINING kernel - Each thread = independent miner!
|
||||
const uint32_t miners_per_block = 1024; // 1024 independent miners per block
|
||||
const uint32_t total_blocks = (batch_size + miners_per_block - 1) / miners_per_block;
|
||||
|
||||
dim3 blocks(total_blocks);
|
||||
dim3 threads_per_block(miners_per_block);
|
||||
|
||||
printf("RinHashGPU: Launching %u blocks × %u threads = %u independent miners processing %u nonces\n",
|
||||
total_blocks, miners_per_block, total_blocks * miners_per_block, batch_size);
|
||||
|
||||
rinhash_hip_kernel_batch<<<blocks, threads_per_block>>>(
|
||||
d_input_batch, input_len, d_output_batch, d_memory_batch, start_nonce, batch_size
|
||||
d_input_persistent, input_len, d_output_persistent, d_memory_persistent, start_nonce, batch_size
|
||||
);
|
||||
|
||||
// Wait for completion
|
||||
@@ -238,17 +262,22 @@ extern "C" void rinhash_hip_batch(const uint8_t* input_template, size_t input_le
|
||||
return;
|
||||
}
|
||||
|
||||
// Copy results back to host
|
||||
err = hipMemcpy(output_batch, d_output_batch, batch_size * 32, hipMemcpyDeviceToHost);
|
||||
// BLAZING-FAST result transfer using persistent output buffer
|
||||
err = hipMemcpyAsync(output_batch, d_output_persistent, batch_size * 32, hipMemcpyDeviceToHost, 0);
|
||||
if (err != hipSuccess) {
|
||||
fprintf(stderr, "HIP error: Failed to copy batch output: %s\n", hipGetErrorString(err));
|
||||
}
|
||||
|
||||
// Synchronize for completion (no GPU memory cleanup - PERSISTENT REUSE!)
|
||||
hipDeviceSynchronize();
|
||||
|
||||
// Only free HOST memory (GPU memory stays allocated for maximum performance)
|
||||
free(host_batch);
|
||||
}
|
||||
|
||||
// Cleanup function to free GPU memory cache when miner shuts down
|
||||
// Cleanup function to free GPU memory when miner shuts down
|
||||
extern "C" void rinhash_hip_cleanup() {
|
||||
// Clean up old cache system
|
||||
if (gpu_memory_initialized) {
|
||||
hipFree(d_input_cache);
|
||||
hipFree(d_output_cache);
|
||||
@@ -259,6 +288,19 @@ extern "C" void rinhash_hip_cleanup() {
|
||||
gpu_memory_initialized = false;
|
||||
cached_input_size = 0;
|
||||
}
|
||||
|
||||
// Clean up new persistent system
|
||||
if (persistent_memory_initialized) {
|
||||
printf("RinHashGPU: Cleaning up persistent memory on shutdown\n");
|
||||
hipFree(d_input_persistent);
|
||||
hipFree(d_output_persistent);
|
||||
hipFree(d_memory_persistent);
|
||||
d_input_persistent = nullptr;
|
||||
d_output_persistent = nullptr;
|
||||
d_memory_persistent = nullptr;
|
||||
persistent_memory_initialized = false;
|
||||
persistent_max_batch = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Helper function to convert a block header to bytes
|
||||
|
Binary file not shown.
Reference in New Issue
Block a user