new backtesting feature
This commit is contained in:
@@ -1789,35 +1789,25 @@ class RealTrainingAdapter:
|
||||
|
||||
import torch
|
||||
|
||||
# OPTIMIZATION: Pre-convert batches ONCE and move to GPU immediately
|
||||
# This avoids CPU→GPU transfer bottleneck during training
|
||||
logger.info(" Pre-converting batches and moving to GPU (one-time operation)...")
|
||||
# OPTIMIZATION: Pre-convert batches ONCE
|
||||
# NOTE: Using CPU for batch storage to avoid ROCm/HIP kernel issues
|
||||
# GPU will be used during forward/backward passes in trainer
|
||||
logger.info(" Pre-converting batches (one-time operation)...")
|
||||
|
||||
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
device = torch.device('cpu') # Store batches on CPU
|
||||
use_gpu = torch.cuda.is_available()
|
||||
|
||||
if use_gpu:
|
||||
logger.info(f" GPU: {torch.cuda.get_device_name(0)}")
|
||||
logger.info(f" GPU available: {torch.cuda.get_device_name(0)}")
|
||||
logger.info(f" GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
|
||||
logger.info(f" Batches will be stored on CPU, moved to GPU during training")
|
||||
|
||||
cached_batches = []
|
||||
for i, data in enumerate(training_data):
|
||||
batch = self._convert_annotation_to_transformer_batch(data)
|
||||
if batch is not None:
|
||||
# OPTIMIZATION: Move batch to GPU immediately with pinned memory
|
||||
if use_gpu:
|
||||
batch_gpu = {}
|
||||
for k, v in batch.items():
|
||||
if isinstance(v, torch.Tensor):
|
||||
# Use pin_memory() for faster CPU→GPU transfer
|
||||
# Then move to GPU with non_blocking=True
|
||||
batch_gpu[k] = v.pin_memory().to(device, non_blocking=True)
|
||||
else:
|
||||
batch_gpu[k] = v
|
||||
cached_batches.append(batch_gpu)
|
||||
del batch # Free CPU memory immediately
|
||||
else:
|
||||
cached_batches.append(batch)
|
||||
# Store batches on CPU (trainer will move to GPU)
|
||||
cached_batches.append(batch)
|
||||
|
||||
# Show progress every 10 batches
|
||||
if (i + 1) % 10 == 0 or i == 0:
|
||||
@@ -1825,11 +1815,6 @@ class RealTrainingAdapter:
|
||||
else:
|
||||
logger.warning(f" Failed to convert sample {i+1}")
|
||||
|
||||
# Synchronize GPU operations
|
||||
if use_gpu:
|
||||
torch.cuda.synchronize()
|
||||
logger.info(f" All {len(cached_batches)} batches now on GPU")
|
||||
|
||||
# Clear training_data to free memory
|
||||
training_data.clear()
|
||||
del training_data
|
||||
|
||||
Reference in New Issue
Block a user