training progress

This commit is contained in:
Dobromir Popov
2025-11-10 20:01:07 +02:00
parent a2d34c6d7c
commit 999dea9eb0
2 changed files with 116 additions and 38 deletions

View File

@@ -337,8 +337,8 @@ class RealTrainingAdapter:
# Get training config
training_config = test_case.get('training_config', {})
timeframes = training_config.get('timeframes', ['1s', '1m', '1h', '1d'])
# Reduce sequence length to avoid OOM - 200 candles is more reasonable
# With 5 timeframes, this gives 1000 total positions vs 3000 with 600 candles
# RESTORED: 200 candles per timeframe (memory leak fixed)
# With 5 timeframes * 200 candles = 1000 total positions
candles_per_timeframe = training_config.get('candles_per_timeframe', 200) # 200 candles per batch
# Determine secondary symbol based on primary symbol
@@ -586,20 +586,24 @@ class RealTrainingAdapter:
logger.info(f" Test case {i+1}: Added {len(hold_samples)} HOLD samples (during position)")
# Create EXIT sample (where model SHOULD exit trade)
exit_timestamp = test_case.get('annotation_metadata', {}).get('exit_timestamp')
if exit_timestamp:
# Exit info is in expected_outcome, not annotation_metadata
exit_price = expected_outcome.get('exit_price')
if exit_price:
# For now, use same market state (TODO: fetch market state at exit time)
# The model will learn to exit based on profit_loss_pct and position state
exit_sample = {
'market_state': market_state, # TODO: Get market state at exit time
'market_state': market_state, # Using entry market state as proxy
'action': 'CLOSE',
'direction': expected_outcome.get('direction'),
'profit_loss_pct': expected_outcome.get('profit_loss_pct'),
'entry_price': expected_outcome.get('entry_price'),
'exit_price': expected_outcome.get('exit_price'),
'timestamp': exit_timestamp,
'label': 'EXIT' # Exit signal
'exit_price': exit_price,
'timestamp': test_case.get('timestamp'), # Entry timestamp (exit time not stored separately)
'label': 'EXIT', # Exit signal
'in_position': True # Model is in position when deciding to exit
}
training_data.append(exit_sample)
logger.info(f" Test case {i+1}: EXIT sample @ {exit_sample['exit_price']} ({exit_sample['profit_loss_pct']:.2f}%)")
logger.info(f" Test case {i+1}: EXIT sample @ {exit_price} ({expected_outcome.get('profit_loss_pct', 0):.2f}%)")
# Create NEGATIVE samples (where model should NOT trade)
# These are candles before and after the signal (±15 candles)
@@ -1186,12 +1190,13 @@ class RealTrainingAdapter:
timeframes = market_state.get('timeframes', {})
secondary_timeframes = market_state.get('secondary_timeframes', {})
# Target sequence length - use actual data length (typically 200 candles)
# Find the first available timeframe to determine sequence length
target_seq_len = 200 # Default
# Target sequence length - RESTORED to 200 (memory leak fixed)
# With 5 timeframes * 200 candles = 1000 sequence positions
# Memory management fixes allow full sequence length
target_seq_len = 200 # Restored to original
for tf_data in timeframes.values():
if tf_data and 'close' in tf_data and len(tf_data['close']) > 0:
target_seq_len = min(len(tf_data['close']), 200) # Cap at 200 to avoid OOM
target_seq_len = min(len(tf_data['close']), 200) # Cap at 200
break
# Extract each timeframe (returns None if not available)
@@ -1409,12 +1414,14 @@ class RealTrainingAdapter:
# For HOLD samples, expect no price change
future_price_ratio = 0.0
future_prices = torch.tensor([future_price_ratio], dtype=torch.float32)
# FIXED: Shape must be [batch, 1] to match price_head output
future_prices = torch.tensor([[future_price_ratio]], dtype=torch.float32) # [1, 1]
# Trade success (1.0 if profitable, 0.0 otherwise)
# Shape must be [batch_size, 1] to match confidence head output
# Shape must be [batch_size, 1] to match confidence head output [batch, 1]
profit_loss_pct = training_sample.get('profit_loss_pct', 0.0)
trade_success = torch.tensor([[1.0 if profit_loss_pct > 0 else 0.0]], dtype=torch.float32)
# FIXED: Ensure shape is [1, 1] not [1] to match BCELoss requirements
trade_success = torch.tensor([[1.0 if profit_loss_pct > 0 else 0.0]], dtype=torch.float32) # [1, 1]
# Return batch dictionary with ALL timeframes
batch = {
@@ -1580,8 +1587,8 @@ class RealTrainingAdapter:
logger.info(f" Converted {len(training_data)} samples to {len(converted_batches)} training batches")
# Use batch size of 1 to avoid OOM with large sequence lengths
# With 5 timeframes * 600 candles = 3000 sequence positions per sample,
# even batch_size=5 causes 15,000 positions which is too large for GPU
# With 5 timeframes * 100 candles = 500 sequence positions per sample
# Batch size of 1 ensures we don't exceed GPU memory (8GB)
mini_batch_size = 1 # Process one sample at a time to avoid OOM
def _combine_batches(batch_list: List[Dict[str, 'torch.Tensor']]) -> Dict[str, 'torch.Tensor']:
@@ -1623,6 +1630,10 @@ class RealTrainingAdapter:
epoch_accuracy = 0.0
num_batches = 0
# Clear CUDA cache before epoch
if torch.cuda.is_available():
torch.cuda.empty_cache()
for i, batch in enumerate(grouped_batches):
try:
# Determine if this is an accumulation step or optimizer step
@@ -1639,16 +1650,41 @@ class RealTrainingAdapter:
epoch_accuracy += batch_accuracy
num_batches += 1
# Log first batch and every 10th batch for debugging
if (i + 1) == 1 or (i + 1) % 10 == 0:
# Log first batch and every 5th batch for debugging
if (i + 1) == 1 or (i + 1) % 5 == 0:
logger.info(f" Batch {i + 1}/{len(grouped_batches)}, Loss: {batch_loss:.6f}, Action Acc: {batch_accuracy:.2%}, Candle Acc: {batch_candle_accuracy:.2%}")
else:
logger.warning(f" Batch {i + 1} returned None result - skipping")
# Clear CUDA cache after optimizer step (not accumulation step)
if torch.cuda.is_available() and not is_accumulation_step:
# CRITICAL FIX: Delete batch tensors immediately to free GPU memory
# This prevents memory accumulation during gradient accumulation
for key in list(batch.keys()):
if isinstance(batch[key], torch.Tensor):
del batch[key]
del batch
# CRITICAL: Clear CUDA cache after EVERY batch to prevent memory accumulation
# This is essential with large models and limited GPU memory
if torch.cuda.is_available():
torch.cuda.empty_cache()
# After optimizer step (not accumulation), force garbage collection
if not is_accumulation_step:
import gc
gc.collect()
if torch.cuda.is_available():
torch.cuda.synchronize()
except torch.cuda.OutOfMemoryError as oom_error:
logger.error(f" CUDA OOM in batch {i + 1}: {oom_error}")
# Aggressive memory cleanup on OOM
if torch.cuda.is_available():
torch.cuda.empty_cache()
torch.cuda.synchronize()
# Reset optimizer state to prevent corruption
trainer.optimizer.zero_grad(set_to_none=True)
logger.warning(f" Skipping batch {i + 1} due to OOM, optimizer state reset")
continue
except Exception as e:
logger.error(f" Error in batch {i + 1}: {e}")
import traceback