From 0d08339d98f1af659b9b3e4e191e11079bb200e7 Mon Sep 17 00:00:00 2001 From: Dobromir Popov Date: Wed, 1 Oct 2025 00:08:46 +0300 Subject: [PATCH] docs: Add comprehensive training fix implementation plan - Document critical issues and fixes applied - Detail proper training loop architecture - Outline signal-position linking system - Define comprehensive reward calculation - List implementation phases and next steps --- TRAINING_FIX_IMPLEMENTATION.md | 286 +++++++++++++++++++++++++++++++++ 1 file changed, 286 insertions(+) create mode 100644 TRAINING_FIX_IMPLEMENTATION.md diff --git a/TRAINING_FIX_IMPLEMENTATION.md b/TRAINING_FIX_IMPLEMENTATION.md new file mode 100644 index 0000000..f2aa3d3 --- /dev/null +++ b/TRAINING_FIX_IMPLEMENTATION.md @@ -0,0 +1,286 @@ +# Trading System Training Fix Implementation + +**Date**: September 30, 2025 +**Status**: In Progress + +--- + +## Critical Issues Identified + +### 1. Division by Zero ✅ FIXED +**Problem**: Trading executor crashed when price was 0 or invalid +**Solution**: Added price validation before division in `core/trading_executor.py` +```python +if current_price <= 0: + logger.error(f"Invalid price {current_price} for {symbol}") + return False +``` + +### 2. Mock Predictions ✅ FIXED +**Problem**: System fell back to "mock predictions" when training unavailable (POLICY VIOLATION!) +**Solution**: Removed mock fallback, system now fails gracefully +```python +logger.error("CRITICAL: Enhanced training system not available - predictions disabled. NEVER use mock data.") +``` + +### 3. Torch Import ✅ ALREADY FIXED +**Problem**: "cannot access local variable 'torch'" error +**Status**: Already has None placeholder when import fails + +--- + +## Training Loop Issues + +###Current State (BROKEN): +1. **Immediate Training on Next Tick** + - Training happens on `next_price - current_price` (≈0.00) + - No actual position tracking + - Rewards are meaningless noise + +2. **No Position Close Training** + - Positions open/close but NO training triggered + - Real PnL calculated but unused for training + - Models never learn from actual trade outcomes + +3. **Manual Trades Only** + - Only manual trades trigger model training + - Automated trades don't train models + +--- + +## Proper Training Loop Implementation + +### Required Components: + +#### 1. Signal-Position Linking +```python +class SignalPositionTracker: + """Links trading signals to positions for outcome-based training""" + + def __init__(self): + self.active_trades = {} # position_id -> signal_data + + def register_signal(self, signal_id, signal_data, position_id): + """Store signal context when position opens""" + self.active_trades[position_id] = { + 'signal_id': signal_id, + 'signal': signal_data, + 'entry_time': datetime.now(), + 'market_state': signal_data.get('market_state'), + 'models_used': { + 'cnn': signal_data.get('cnn_contribution', 0), + 'dqn': signal_data.get('dqn_contribution', 0), + 'cob_rl': signal_data.get('cob_contribution', 0) + } + } + + def get_signal_for_position(self, position_id): + """Retrieve signal when position closes""" + return self.active_trades.pop(position_id, None) +``` + +#### 2. Position Close Hook +```python +# In core/trading_executor.py after trade_record is created: + +def _on_position_close(self, trade_record, position): + """Called when position closes - trigger training""" + + # Get original signal + signal_data = self.position_tracker.get_signal_for_position(position.id) + if not signal_data: + logger.warning(f"No signal found for position {position.id}") + return + + # Calculate comprehensive reward + reward = self._calculate_training_reward(trade_record, signal_data) + + # Train all models that contributed + if self.orchestrator: + self.orchestrator.train_on_trade_outcome( + signal_data=signal_data, + trade_record=trade_record, + reward=reward + ) +``` + +#### 3. Comprehensive Reward Function +```python +def _calculate_training_reward(self, trade_record, signal_data): + """Calculate sophisticated reward from closed trade""" + + # Base PnL (already includes fees) + pnl = trade_record.pnl + + # Time penalty (encourage faster trades) + hold_time_minutes = trade_record.hold_time_seconds / 60 + time_penalty = -0.001 * max(0, hold_time_minutes - 5) # Penalty after 5min + + # Risk-adjusted reward + position_risk = trade_record.quantity * trade_record.entry_price / self.balance + risk_adjusted = pnl / (position_risk + 0.01) + + # Consistency bonus/penalty + recent_pnls = [t.pnl for t in self.trade_history[-20:]] + if len(recent_pnls) > 1: + pnl_std = np.std(recent_pnls) + consistency = pnl / (pnl_std + 0.001) + else: + consistency = 0 + + # Win/loss streak adjustment + if pnl > 0: + streak_bonus = min(0.1, self.winning_streak * 0.02) + else: + streak_bonus = -min(0.2, self.losing_streak * 0.05) + + # Final reward (scaled for model learning) + final_reward = ( + pnl * 10.0 + # Base PnL (scaled) + time_penalty + # Efficiency + risk_adjusted * 2.0 + # Risk management + consistency * 0.5 + # Volatility + streak_bonus # Consistency + ) + + logger.info(f"REWARD CALC: PnL={pnl:.4f}, Time={time_penalty:.4f}, " + f"Risk={risk_adjusted:.4f}, Final={final_reward:.4f}") + + return final_reward +``` + +#### 4. Multi-Model Training +```python +# In core/orchestrator.py + +def train_on_trade_outcome(self, signal_data, trade_record, reward): + """Train all models that contributed to the signal""" + + market_state = signal_data.get('market_state') + action = self._action_to_index(trade_record.side) # BUY=0, SELL=1 + + # Train CNN + if self.cnn_model and signal_data['models_used']['cnn'] > 0: + weight = signal_data['models_used']['cnn'] + self._train_cnn_on_outcome(market_state, action, reward, weight) + logger.info(f"CNN trained with weight {weight:.2f}") + + # Train DQN + if self.dqn_agent and signal_data['models_used']['dqn'] > 0: + weight = signal_data['models_used']['dqn'] + next_state = self._extract_current_state() + self.dqn_agent.remember(market_state, action, reward * weight, next_state, done=True) + + if len(self.dqn_agent.memory) > 32: + loss = self.dqn_agent.replay(batch_size=32) + logger.info(f"DQN trained with weight {weight:.2f}, loss={loss:.4f}") + + # Train COB RL + if self.cob_rl_model and signal_data['models_used']['cob_rl'] > 0: + weight = signal_data['models_used']['cob_rl'] + cob_data = signal_data.get('cob_data', {}) + self._train_cob_on_outcome(cob_data, action, reward, weight) + logger.info(f"COB RL trained with weight {weight:.2f}") + + logger.info(f"✅ TRAINED ALL MODELS: PnL=${trade_record.pnl:.2f}, Reward={reward:.4f}") +``` + +--- + +## Implementation Steps + +### Phase 1: Core Infrastructure (Priority 1) ✅ +- [x] Fix division by zero +- [x] Remove mock predictions +- [x] Fix torch imports + +### Phase 2: Training Loop (Priority 2) - IN PROGRESS +- [ ] Create SignalPositionTracker class +- [ ] Add position close hook in trading_executor +- [ ] Implement comprehensive reward function +- [ ] Add train_on_trade_outcome to orchestrator +- [ ] Remove immediate training on next-tick + +### Phase 3: Reward Improvements (Priority 3) +- [ ] Multi-timeframe rewards (1m, 5m, 15m outcomes) +- [ ] Selective training (skip tiny movements) +- [ ] Better feature engineering +- [ ] Prioritized experience replay + +### Phase 4: Testing & Validation +- [ ] Test with paper trading +- [ ] Validate rewards are non-zero +- [ ] Confirm models are training +- [ ] Monitor training metrics + +--- + +## Expected Improvements + +### Before: +- Rewards: ~0.00 (next-tick noise) +- Training: Only on next-tick price +- Learning: Models see no real outcomes +- Effectiveness: 1/10 + +### After: +- Rewards: Real PnL-based (-$5 to +$10 range) +- Training: On actual position close +- Learning: Models see real trade results +- Effectiveness: 9/10 + +--- + +## Files to Modify + +1. **core/trading_executor.py** + - Add position close hook + - Create SignalPositionTracker + - Implement reward calculation + +2. **core/orchestrator.py** + - Add train_on_trade_outcome method + - Implement multi-model training + +3. **web/clean_dashboard.py** + - Remove immediate training + - Add signal registration on execution + - Link signals to positions + +4. **core/training_integration.py** (optional) + - May need updates for consistency + +--- + +## Monitoring & Validation + +### Log Messages to Watch: +``` +✅ TRAINED ALL MODELS: PnL=$2.35, Reward=25.40 +REWARD CALC: PnL=0.0235, Time=-0.002, Risk=1.15, Final=25.40 +CNN trained with weight 0.35 +DQN trained with weight 0.45, loss=0.0123 +COB RL trained with weight 0.20 +``` + +### Metrics to Track: +- Average reward per trade (should be >> 0.01) +- Training frequency (should match trade close frequency) +- Model convergence (loss decreasing over time) +- Win rate improvement (should increase with training) + +--- + +## Next Steps + +1. Implement SignalPositionTracker +2. Add position close hook +3. Create reward calculation +4. Test with 10 manual trades +5. Validate rewards are meaningful +6. Deploy to automated trading + +--- + +**Status**: Phase 1 Complete, Phase 2 In Progress +