wip old MISC fix

2025-12-08 16:56:37 +02:00
parent 81e7e6bfe6
commit 03888b6200
5 changed files with 719 additions and 343 deletions
--- a/NN/models/dqn_agent.py
+++ b/NN/models/dqn_agent.py
@@ -270,9 +270,9 @@ class DQNAgent:
        self.batch_size = batch_size
        self.target_update = target_update
        
-        # Set device for computation (default to GPU if available)
+        # Set device for computation (read from config.yaml if available)
        if device is None:
-            self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+            self.device = self._get_device_from_config()
        else:
            self.device = device
        
@@ -282,10 +282,6 @@ class DQNAgent:
        self.policy_net = DQNNetwork(self.state_dim, self.n_actions, config).to(self.device)
        self.target_net = DQNNetwork(self.state_dim, self.n_actions, config).to(self.device)
        
-        # Ensure models are on the correct device
-        self.policy_net = self.policy_net.to(self.device)
-        self.target_net = self.target_net.to(self.device)
-        
        # Initialize the target network with the same weights as the policy network
        self.target_net.load_state_dict(self.policy_net.state_dict())
        
@@ -317,13 +313,92 @@ class DQNAgent:
        
        # Market regime adaptation weights
        self.market_regime_weights = {
-            'trending': 1.0,
-            'sideways': 0.8,
-            'volatile': 1.2,
-            'bullish': 1.1,
-            'bearish': 1.1
+            'trending': 1.2,    # Higher confidence in trending markets
+            'ranging': 0.8,     # Lower confidence in ranging markets
+            'volatile': 0.6     # Much lower confidence in volatile markets
        }
        
+        # Additional initialization
+        self.recent_actions = deque(maxlen=10)
+        self.recent_prices = deque(maxlen=20)
+        self.recent_rewards = deque(maxlen=100)
+        
+        # Price direction tracking
+        self.last_price_direction = {
+            'direction': 0.0,
+            'confidence': 0.0
+        }
+        
+        self.price_movement_memory = []
+        self.losses = []
+        self.no_improvement_count = 0
+        self.confidence_history = []
+        self.avg_confidence = 0.0
+        self.max_confidence = 0.0
+        self.min_confidence = 1.0
+        
+        # Enhanced training features
+        self.use_dueling = True
+        self.use_prioritized_replay = priority_memory
+        self.alpha = 0.6
+        self.beta = 0.4
+        self.beta_increment = 0.001
+        self.use_double_dqn = True
+        self.target_update_freq = target_update
+        self.training_steps = 0
+        self.gradient_clip_norm = 1.0
+        self.epsilon_history = []
+        self.td_errors = []
+        
+        # Trade settings
+        self.trade_action_fee = 0.0005
+        self.minimum_action_confidence = 0.3
+        
+        # Violent move detection
+        self.price_history = []
+        self.volatility_window = 20
+        self.volatility_threshold = 0.0015
+        self.post_violent_move = False
+        self.violent_move_cooldown = 0
+        
+        # Feature integration
+        self.last_hidden_features = None
+        self.feature_history = []
+        self.realtime_tick_features = None
+        self.tick_feature_weight = 0.3
+        
+        # Mixed precision training
+        if torch.cuda.is_available() and hasattr(torch.cuda, 'amp') and 'DISABLE_MIXED_PRECISION' not in os.environ:
+            self.use_mixed_precision = True
+            self.scaler = torch.amp.GradScaler('cuda')
+            logger.info("Mixed precision training enabled")
+        else:
+            self.use_mixed_precision = False
+            logger.info("Mixed precision training disabled")
+        
+        self.training = True
+        
+        # Compatibility
+        self.state_size = np.prod(state_shape)
+        self.action_size = n_actions
+        self.memory_size = buffer_size
+        self.timeframes = ["1m", "5m", "15m"][:self.state_dim[0] if isinstance(self.state_dim, tuple) else 3]
+        
+        logger.info(f"DQN Agent using Enhanced CNN with device: {self.device}")
+        logger.info(f"Trade action fee set to {self.trade_action_fee}, minimum confidence: {self.minimum_action_confidence}")
+        logger.info(f"Real-time tick feature integration enabled with weight: {self.tick_feature_weight}")
+        
+        total_params = sum(p.numel() for p in self.policy_net.parameters())
+        logger.info(f"Enhanced CNN Policy Network: {total_params:,} parameters")
+        
+        # Position management
+        self.current_position = 0.0
+        self.position_entry_price = 0.0
+        self.position_entry_time = None
+        self.entry_confidence_threshold = 0.35
+        self.exit_confidence_threshold = 0.15
+        self.uncertainty_threshold = 0.1
+        
        # Load best checkpoint if available
        if self.enable_checkpoints:
            self.load_best_checkpoint()
@@ -331,114 +406,47 @@ class DQNAgent:
        logger.info(f"DQN Agent initialized with checkpoint management: {enable_checkpoints}")
        if enable_checkpoints:
            logger.info(f"Model name: {model_name}, Checkpoint frequency: {self.checkpoint_frequency}")
-        
-        # Add this line to the __init__ method
-        self.recent_actions = deque(maxlen=10)
-        self.recent_prices = deque(maxlen=20)
-        self.recent_rewards = deque(maxlen=100)
-        
-        # Price direction tracking - stores direction and confidence
-        self.last_price_direction = {
-            'direction': 0.0,    # Single value between -1 and 1
-            'confidence': 0.0    # Single value between 0 and 1
-        }
-        
-        # Store separate memory for price direction examples
-        self.price_movement_memory = []  # For storing examples of clear price movements
-        
-        # Performance tracking
-        self.losses = []
-        self.no_improvement_count = 0
-        
-        # Confidence tracking
-        self.confidence_history = []
-        self.avg_confidence = 0.0
-        self.max_confidence = 0.0
-        self.min_confidence = 1.0
-        
-        # Enhanced features from EnhancedDQNAgent
-        # Market adaptation capabilities
-        self.market_regime_weights = {
-            'trending': 1.2,    # Higher confidence in trending markets
-            'ranging': 0.8,     # Lower confidence in ranging markets
-            'volatile': 0.6     # Much lower confidence in volatile markets
-        }
-        
-        # Dueling network support (requires enhanced network architecture)
-        self.use_dueling = True
-        
-        # Prioritized experience replay parameters
-        self.use_prioritized_replay = priority_memory
-        self.alpha = 0.6  # Priority exponent
-        self.beta = 0.4   # Importance sampling exponent
-        self.beta_increment = 0.001
-        
-        # Double DQN support
-        self.use_double_dqn = True
-        
-        # Enhanced training features from EnhancedDQNAgent
-        self.target_update_freq = target_update  # More descriptive name
-        self.training_steps = 0
-        self.gradient_clip_norm = 1.0  # Gradient clipping
-        
-        # Enhanced statistics tracking
-        self.epsilon_history = []
-        self.td_errors = []  # Track TD errors for analysis
-        
-        # Trade action fee and confidence thresholds
-        self.trade_action_fee = 0.0005  # Small fee to discourage unnecessary trading
-        self.minimum_action_confidence = 0.3  # Minimum confidence to consider trading (lowered from 0.5)
-        
-        # Violent move detection
-        self.price_history = []
-        self.volatility_window = 20  # Window size for volatility calculation
-        self.volatility_threshold = 0.0015  # Threshold for considering a move "violent"
-        self.post_violent_move = False  # Flag for recent violent move
-        self.violent_move_cooldown = 0  # Cooldown after violent move
-        
-        # Feature integration
-        self.last_hidden_features = None  # Store last extracted features
-        self.feature_history = []  # Store history of features for analysis
-        
-        # Real-time tick features integration
-        self.realtime_tick_features = None  # Latest tick features from tick processor
-        self.tick_feature_weight = 0.3  # Weight for tick features in decision making
-        
-        # Check if mixed precision training should be used
-        if torch.cuda.is_available() and hasattr(torch.cuda, 'amp') and 'DISABLE_MIXED_PRECISION' not in os.environ:
-            self.use_mixed_precision = True
-            self.scaler = torch.amp.GradScaler('cuda')
-            logger.info("Mixed precision training enabled")
-        else:
-            self.use_mixed_precision = False
-            logger.info("Mixed precision training disabled")
+    
+    def _get_device_from_config(self) -> torch.device:
+        """Get device from config.yaml or auto-detect"""
+        try:
+            # Try to load config
+            from core.config import get_config
+            config = get_config()
+            gpu_config = config._config.get('gpu', {})
            
-        # Track if we're in training mode
-        self.training = True
-        
-        # For compatibility with old code
-        self.state_size = np.prod(state_shape)
-        self.action_size = n_actions
-        self.memory_size = buffer_size
-        self.timeframes = ["1m", "5m", "15m"][:self.state_dim[0] if isinstance(self.state_dim, tuple) else 3]  # Default timeframes
-        
-        logger.info(f"DQN Agent using Enhanced CNN with device: {self.device}")
-        logger.info(f"Trade action fee set to {self.trade_action_fee}, minimum confidence: {self.minimum_action_confidence}")
-        logger.info(f"Real-time tick feature integration enabled with weight: {self.tick_feature_weight}")
-        
-        # Log model parameters
-        total_params = sum(p.numel() for p in self.policy_net.parameters())
-        logger.info(f"Enhanced CNN Policy Network: {total_params:,} parameters")
-        
-        # Position management for 2-action system
-        self.current_position = 0.0  # -1 (short), 0 (neutral), 1 (long)
-        self.position_entry_price = 0.0
-        self.position_entry_time = None
-        
-        # Different thresholds for entry vs exit decisions - AGGRESSIVE for more training data
-        self.entry_confidence_threshold = 0.35  # Lower threshold for new positions (was 0.7)
-        self.exit_confidence_threshold = 0.15   # Very low threshold for closing positions (was 0.3)
-        self.uncertainty_threshold = 0.1        # When to stay neutral
+            device_setting = gpu_config.get('device', 'auto')
+            fallback_to_cpu = gpu_config.get('fallback_to_cpu', True)
+            gpu_enabled = gpu_config.get('enabled', True)
+            
+            # If GPU is disabled in config, use CPU
+            if not gpu_enabled:
+                logger.info("GPU disabled in config.yaml, using CPU")
+                return torch.device('cpu')
+            
+            # Handle device selection
+            if device_setting == 'cpu':
+                logger.info("Device set to CPU in config.yaml")
+                return torch.device('cpu')
+            elif device_setting == 'cuda' or device_setting == 'auto':
+                # Try GPU first
+                if torch.cuda.is_available():
+                    logger.info("Using GPU (CUDA available)")
+                    return torch.device('cuda')
+                else:
+                    if fallback_to_cpu:
+                        logger.warning("CUDA not available, falling back to CPU")
+                        return torch.device('cpu')
+                    else:
+                        raise RuntimeError("CUDA not available and fallback_to_cpu is False")
+            else:
+                logger.warning(f"Unknown device setting '{device_setting}', using auto-detection")
+                return torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+                
+        except Exception as e:
+            logger.warning(f"Error reading device config: {e}, using auto-detection")
+            # Fallback to auto-detection
+            return torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        
    def load_best_checkpoint(self):
        """Load the best checkpoint for this DQN agent"""
@@ -512,104 +520,7 @@ class DQNAgent:
        except Exception as e:
            logger.error(f"Error saving DQN checkpoint: {e}")
            return False
-        
-        # Store separate memory for price direction examples
-        self.price_movement_memory = []  # For storing examples of clear price movements
-        
-        # Performance tracking
-        self.losses = []
-        self.no_improvement_count = 0
-        
-        # Confidence tracking
-        self.confidence_history = []
-        self.avg_confidence = 0.0
-        self.max_confidence = 0.0
-        self.min_confidence = 1.0
-        
-        # Enhanced features from EnhancedDQNAgent
-        # Market adaptation capabilities
-        self.market_regime_weights = {
-            'trending': 1.2,    # Higher confidence in trending markets
-            'ranging': 0.8,     # Lower confidence in ranging markets
-            'volatile': 0.6     # Much lower confidence in volatile markets
-        }
-        
-        # Dueling network support (requires enhanced network architecture)
-        self.use_dueling = True
-        
-        # Prioritized experience replay parameters
-        self.use_prioritized_replay = priority_memory
-        self.alpha = 0.6  # Priority exponent
-        self.beta = 0.4   # Importance sampling exponent
-        self.beta_increment = 0.001
-        
-        # Double DQN support
-        self.use_double_dqn = True
-        
-        # Enhanced training features from EnhancedDQNAgent
-        self.target_update_freq = target_update  # More descriptive name
-        self.training_steps = 0
-        self.gradient_clip_norm = 1.0  # Gradient clipping
-        
-        # Enhanced statistics tracking
-        self.epsilon_history = []
-        self.td_errors = []  # Track TD errors for analysis
-        
-        # Trade action fee and confidence thresholds
-        self.trade_action_fee = 0.0005  # Small fee to discourage unnecessary trading
-        self.minimum_action_confidence = 0.3  # Minimum confidence to consider trading (lowered from 0.5)
-        
-        # Violent move detection
-        self.price_history = []
-        self.volatility_window = 20  # Window size for volatility calculation
-        self.volatility_threshold = 0.0015  # Threshold for considering a move "violent"
-        self.post_violent_move = False  # Flag for recent violent move
-        self.violent_move_cooldown = 0  # Cooldown after violent move
-        
-        # Feature integration
-        self.last_hidden_features = None  # Store last extracted features
-        self.feature_history = []  # Store history of features for analysis
-        
-        # Real-time tick features integration
-        self.realtime_tick_features = None  # Latest tick features from tick processor
-        self.tick_feature_weight = 0.3  # Weight for tick features in decision making
-        
-        # Check if mixed precision training should be used
-        if torch.cuda.is_available() and hasattr(torch.cuda, 'amp') and 'DISABLE_MIXED_PRECISION' not in os.environ:
-            self.use_mixed_precision = True
-            self.scaler = torch.amp.GradScaler('cuda')
-            logger.info("Mixed precision training enabled")
-        else:
-            self.use_mixed_precision = False
-            logger.info("Mixed precision training disabled")
-            
-        # Track if we're in training mode
-        self.training = True
-        
-        # For compatibility with old code
-        self.state_size = np.prod(state_shape)
-        self.action_size = n_actions
-        self.memory_size = buffer_size
-        self.timeframes = ["1m", "5m", "15m"][:self.state_dim[0] if isinstance(self.state_dim, tuple) else 3]  # Default timeframes
-        
-        logger.info(f"DQN Agent using Enhanced CNN with device: {self.device}")
-        logger.info(f"Trade action fee set to {self.trade_action_fee}, minimum confidence: {self.minimum_action_confidence}")
-        logger.info(f"Real-time tick feature integration enabled with weight: {self.tick_feature_weight}")
-        
-        # Log model parameters
-        total_params = sum(p.numel() for p in self.policy_net.parameters())
-        logger.info(f"Enhanced CNN Policy Network: {total_params:,} parameters")
-        
-        # Position management for 2-action system
-        self.current_position = 0.0  # -1 (short), 0 (neutral), 1 (long)
-        self.position_entry_price = 0.0
-        self.position_entry_time = None
-        
-        # Different thresholds for entry vs exit decisions - AGGRESSIVE for more training data
-        self.entry_confidence_threshold = 0.35  # Lower threshold for new positions (was 0.7)
-        self.exit_confidence_threshold = 0.15   # Very low threshold for closing positions (was 0.3)
-        self.uncertainty_threshold = 0.1        # When to stay neutral
-        
+    
    def move_models_to_device(self, device=None):
        """Move models to the specified device (GPU/CPU)"""
        if device is not None: