improve training and model data

2025-07-07 15:48:25 +03:00
parent 271e7d59b5
commit 2d8f763eeb
16 changed files with 2047 additions and 1699 deletions
--- a/NN/models/dqn_agent.py
+++ b/NN/models/dqn_agent.py
@@ -5,7 +5,7 @@ import numpy as np
 from collections import deque
 import random
 from typing import Tuple, List
-import os
+import osvu
 import sys
 import logging
 import torch.nn.functional as F
@@ -129,7 +129,128 @@ class DQNAgent:
        logger.info(f"DQN Agent initialized with checkpoint management: {enable_checkpoints}")
        if enable_checkpoints:
            logger.info(f"Model name: {model_name}, Checkpoint frequency: {self.checkpoint_frequency}")
-    
+        
+        # Add this line to the __init__ method
+        self.recent_actions = deque(maxlen=10)
+        self.recent_prices = deque(maxlen=20)
+        self.recent_rewards = deque(maxlen=100)
+        
+        # Price prediction tracking
+        self.last_price_pred = {
+            'immediate': {
+                'direction': 1,  # Default to "sideways"
+                'confidence': 0.0,
+                'change': 0.0
+            },
+            'midterm': {
+                'direction': 1,  # Default to "sideways"
+                'confidence': 0.0,
+                'change': 0.0
+            },
+            'longterm': {
+                'direction': 1,  # Default to "sideways"
+                'confidence': 0.0,
+                'change': 0.0
+            }
+        }
+        
+        # Store separate memory for price direction examples
+        self.price_movement_memory = []  # For storing examples of clear price movements
+        
+        # Performance tracking
+        self.losses = []
+        self.no_improvement_count = 0
+        
+        # Confidence tracking
+        self.confidence_history = []
+        self.avg_confidence = 0.0
+        self.max_confidence = 0.0
+        self.min_confidence = 1.0
+        
+        # Enhanced features from EnhancedDQNAgent
+        # Market adaptation capabilities
+        self.market_regime_weights = {
+            'trending': 1.2,    # Higher confidence in trending markets
+            'ranging': 0.8,     # Lower confidence in ranging markets
+            'volatile': 0.6     # Much lower confidence in volatile markets
+        }
+        
+        # Dueling network support (requires enhanced network architecture)
+        self.use_dueling = True
+        
+        # Prioritized experience replay parameters
+        self.use_prioritized_replay = priority_memory
+        self.alpha = 0.6  # Priority exponent
+        self.beta = 0.4   # Importance sampling exponent
+        self.beta_increment = 0.001
+        
+        # Double DQN support
+        self.use_double_dqn = True
+        
+        # Enhanced training features from EnhancedDQNAgent
+        self.target_update_freq = target_update  # More descriptive name
+        self.training_steps = 0
+        self.gradient_clip_norm = 1.0  # Gradient clipping
+        
+        # Enhanced statistics tracking
+        self.epsilon_history = []
+        self.td_errors = []  # Track TD errors for analysis
+        
+        # Trade action fee and confidence thresholds
+        self.trade_action_fee = 0.0005  # Small fee to discourage unnecessary trading
+        self.minimum_action_confidence = 0.3  # Minimum confidence to consider trading (lowered from 0.5)
+        
+        # Violent move detection
+        self.price_history = []
+        self.volatility_window = 20  # Window size for volatility calculation
+        self.volatility_threshold = 0.0015  # Threshold for considering a move "violent"
+        self.post_violent_move = False  # Flag for recent violent move
+        self.violent_move_cooldown = 0  # Cooldown after violent move
+        
+        # Feature integration
+        self.last_hidden_features = None  # Store last extracted features
+        self.feature_history = []  # Store history of features for analysis
+        
+        # Real-time tick features integration
+        self.realtime_tick_features = None  # Latest tick features from tick processor
+        self.tick_feature_weight = 0.3  # Weight for tick features in decision making
+        
+        # Check if mixed precision training should be used
+        self.use_mixed_precision = False
+        if torch.cuda.is_available() and hasattr(torch.cuda, 'amp') and 'DISABLE_MIXED_PRECISION' not in os.environ:
+            self.use_mixed_precision = True
+            self.scaler = torch.cuda.amp.GradScaler()
+            logger.info("Mixed precision training enabled")
+        else:
+            logger.info("Mixed precision training disabled")
+            
+        # Track if we're in training mode
+        self.training = True
+        
+        # For compatibility with old code
+        self.state_size = np.prod(state_shape)
+        self.action_size = n_actions
+        self.memory_size = buffer_size
+        self.timeframes = ["1m", "5m", "15m"][:self.state_dim[0] if isinstance(self.state_dim, tuple) else 3]  # Default timeframes
+        
+        logger.info(f"DQN Agent using Enhanced CNN with device: {self.device}")
+        logger.info(f"Trade action fee set to {self.trade_action_fee}, minimum confidence: {self.minimum_action_confidence}")
+        logger.info(f"Real-time tick feature integration enabled with weight: {self.tick_feature_weight}")
+        
+        # Log model parameters
+        total_params = sum(p.numel() for p in self.policy_net.parameters())
+        logger.info(f"Enhanced CNN Policy Network: {total_params:,} parameters")
+        
+        # Position management for 2-action system
+        self.current_position = 0.0  # -1 (short), 0 (neutral), 1 (long)
+        self.position_entry_price = 0.0
+        self.position_entry_time = None
+        
+        # Different thresholds for entry vs exit decisions - AGGRESSIVE for more training data
+        self.entry_confidence_threshold = 0.35  # Lower threshold for new positions (was 0.7)
+        self.exit_confidence_threshold = 0.15   # Very low threshold for closing positions (was 0.3)
+        self.uncertainty_threshold = 0.1        # When to stay neutral
+        
    def load_best_checkpoint(self):
        """Load the best checkpoint for this DQN agent"""
        try:
@@ -267,9 +388,6 @@ class DQNAgent:
        # Trade action fee and confidence thresholds
        self.trade_action_fee = 0.0005  # Small fee to discourage unnecessary trading
        self.minimum_action_confidence = 0.3  # Minimum confidence to consider trading (lowered from 0.5)
-        self.recent_actions = deque(maxlen=10)
-        self.recent_prices = deque(maxlen=20)
-        self.recent_rewards = deque(maxlen=100)
        
        # Violent move detection
        self.price_history = []