wip old MISC fix
This commit is contained in:
@@ -270,9 +270,9 @@ class DQNAgent:
|
||||
self.batch_size = batch_size
|
||||
self.target_update = target_update
|
||||
|
||||
# Set device for computation (default to GPU if available)
|
||||
# Set device for computation (read from config.yaml if available)
|
||||
if device is None:
|
||||
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
self.device = self._get_device_from_config()
|
||||
else:
|
||||
self.device = device
|
||||
|
||||
@@ -282,10 +282,6 @@ class DQNAgent:
|
||||
self.policy_net = DQNNetwork(self.state_dim, self.n_actions, config).to(self.device)
|
||||
self.target_net = DQNNetwork(self.state_dim, self.n_actions, config).to(self.device)
|
||||
|
||||
# Ensure models are on the correct device
|
||||
self.policy_net = self.policy_net.to(self.device)
|
||||
self.target_net = self.target_net.to(self.device)
|
||||
|
||||
# Initialize the target network with the same weights as the policy network
|
||||
self.target_net.load_state_dict(self.policy_net.state_dict())
|
||||
|
||||
@@ -317,13 +313,92 @@ class DQNAgent:
|
||||
|
||||
# Market regime adaptation weights
|
||||
self.market_regime_weights = {
|
||||
'trending': 1.0,
|
||||
'sideways': 0.8,
|
||||
'volatile': 1.2,
|
||||
'bullish': 1.1,
|
||||
'bearish': 1.1
|
||||
'trending': 1.2, # Higher confidence in trending markets
|
||||
'ranging': 0.8, # Lower confidence in ranging markets
|
||||
'volatile': 0.6 # Much lower confidence in volatile markets
|
||||
}
|
||||
|
||||
# Additional initialization
|
||||
self.recent_actions = deque(maxlen=10)
|
||||
self.recent_prices = deque(maxlen=20)
|
||||
self.recent_rewards = deque(maxlen=100)
|
||||
|
||||
# Price direction tracking
|
||||
self.last_price_direction = {
|
||||
'direction': 0.0,
|
||||
'confidence': 0.0
|
||||
}
|
||||
|
||||
self.price_movement_memory = []
|
||||
self.losses = []
|
||||
self.no_improvement_count = 0
|
||||
self.confidence_history = []
|
||||
self.avg_confidence = 0.0
|
||||
self.max_confidence = 0.0
|
||||
self.min_confidence = 1.0
|
||||
|
||||
# Enhanced training features
|
||||
self.use_dueling = True
|
||||
self.use_prioritized_replay = priority_memory
|
||||
self.alpha = 0.6
|
||||
self.beta = 0.4
|
||||
self.beta_increment = 0.001
|
||||
self.use_double_dqn = True
|
||||
self.target_update_freq = target_update
|
||||
self.training_steps = 0
|
||||
self.gradient_clip_norm = 1.0
|
||||
self.epsilon_history = []
|
||||
self.td_errors = []
|
||||
|
||||
# Trade settings
|
||||
self.trade_action_fee = 0.0005
|
||||
self.minimum_action_confidence = 0.3
|
||||
|
||||
# Violent move detection
|
||||
self.price_history = []
|
||||
self.volatility_window = 20
|
||||
self.volatility_threshold = 0.0015
|
||||
self.post_violent_move = False
|
||||
self.violent_move_cooldown = 0
|
||||
|
||||
# Feature integration
|
||||
self.last_hidden_features = None
|
||||
self.feature_history = []
|
||||
self.realtime_tick_features = None
|
||||
self.tick_feature_weight = 0.3
|
||||
|
||||
# Mixed precision training
|
||||
if torch.cuda.is_available() and hasattr(torch.cuda, 'amp') and 'DISABLE_MIXED_PRECISION' not in os.environ:
|
||||
self.use_mixed_precision = True
|
||||
self.scaler = torch.amp.GradScaler('cuda')
|
||||
logger.info("Mixed precision training enabled")
|
||||
else:
|
||||
self.use_mixed_precision = False
|
||||
logger.info("Mixed precision training disabled")
|
||||
|
||||
self.training = True
|
||||
|
||||
# Compatibility
|
||||
self.state_size = np.prod(state_shape)
|
||||
self.action_size = n_actions
|
||||
self.memory_size = buffer_size
|
||||
self.timeframes = ["1m", "5m", "15m"][:self.state_dim[0] if isinstance(self.state_dim, tuple) else 3]
|
||||
|
||||
logger.info(f"DQN Agent using Enhanced CNN with device: {self.device}")
|
||||
logger.info(f"Trade action fee set to {self.trade_action_fee}, minimum confidence: {self.minimum_action_confidence}")
|
||||
logger.info(f"Real-time tick feature integration enabled with weight: {self.tick_feature_weight}")
|
||||
|
||||
total_params = sum(p.numel() for p in self.policy_net.parameters())
|
||||
logger.info(f"Enhanced CNN Policy Network: {total_params:,} parameters")
|
||||
|
||||
# Position management
|
||||
self.current_position = 0.0
|
||||
self.position_entry_price = 0.0
|
||||
self.position_entry_time = None
|
||||
self.entry_confidence_threshold = 0.35
|
||||
self.exit_confidence_threshold = 0.15
|
||||
self.uncertainty_threshold = 0.1
|
||||
|
||||
# Load best checkpoint if available
|
||||
if self.enable_checkpoints:
|
||||
self.load_best_checkpoint()
|
||||
@@ -331,114 +406,47 @@ class DQNAgent:
|
||||
logger.info(f"DQN Agent initialized with checkpoint management: {enable_checkpoints}")
|
||||
if enable_checkpoints:
|
||||
logger.info(f"Model name: {model_name}, Checkpoint frequency: {self.checkpoint_frequency}")
|
||||
|
||||
# Add this line to the __init__ method
|
||||
self.recent_actions = deque(maxlen=10)
|
||||
self.recent_prices = deque(maxlen=20)
|
||||
self.recent_rewards = deque(maxlen=100)
|
||||
|
||||
# Price direction tracking - stores direction and confidence
|
||||
self.last_price_direction = {
|
||||
'direction': 0.0, # Single value between -1 and 1
|
||||
'confidence': 0.0 # Single value between 0 and 1
|
||||
}
|
||||
|
||||
# Store separate memory for price direction examples
|
||||
self.price_movement_memory = [] # For storing examples of clear price movements
|
||||
|
||||
# Performance tracking
|
||||
self.losses = []
|
||||
self.no_improvement_count = 0
|
||||
|
||||
# Confidence tracking
|
||||
self.confidence_history = []
|
||||
self.avg_confidence = 0.0
|
||||
self.max_confidence = 0.0
|
||||
self.min_confidence = 1.0
|
||||
|
||||
# Enhanced features from EnhancedDQNAgent
|
||||
# Market adaptation capabilities
|
||||
self.market_regime_weights = {
|
||||
'trending': 1.2, # Higher confidence in trending markets
|
||||
'ranging': 0.8, # Lower confidence in ranging markets
|
||||
'volatile': 0.6 # Much lower confidence in volatile markets
|
||||
}
|
||||
|
||||
# Dueling network support (requires enhanced network architecture)
|
||||
self.use_dueling = True
|
||||
|
||||
# Prioritized experience replay parameters
|
||||
self.use_prioritized_replay = priority_memory
|
||||
self.alpha = 0.6 # Priority exponent
|
||||
self.beta = 0.4 # Importance sampling exponent
|
||||
self.beta_increment = 0.001
|
||||
|
||||
# Double DQN support
|
||||
self.use_double_dqn = True
|
||||
|
||||
# Enhanced training features from EnhancedDQNAgent
|
||||
self.target_update_freq = target_update # More descriptive name
|
||||
self.training_steps = 0
|
||||
self.gradient_clip_norm = 1.0 # Gradient clipping
|
||||
|
||||
# Enhanced statistics tracking
|
||||
self.epsilon_history = []
|
||||
self.td_errors = [] # Track TD errors for analysis
|
||||
|
||||
# Trade action fee and confidence thresholds
|
||||
self.trade_action_fee = 0.0005 # Small fee to discourage unnecessary trading
|
||||
self.minimum_action_confidence = 0.3 # Minimum confidence to consider trading (lowered from 0.5)
|
||||
|
||||
# Violent move detection
|
||||
self.price_history = []
|
||||
self.volatility_window = 20 # Window size for volatility calculation
|
||||
self.volatility_threshold = 0.0015 # Threshold for considering a move "violent"
|
||||
self.post_violent_move = False # Flag for recent violent move
|
||||
self.violent_move_cooldown = 0 # Cooldown after violent move
|
||||
|
||||
# Feature integration
|
||||
self.last_hidden_features = None # Store last extracted features
|
||||
self.feature_history = [] # Store history of features for analysis
|
||||
|
||||
# Real-time tick features integration
|
||||
self.realtime_tick_features = None # Latest tick features from tick processor
|
||||
self.tick_feature_weight = 0.3 # Weight for tick features in decision making
|
||||
|
||||
# Check if mixed precision training should be used
|
||||
if torch.cuda.is_available() and hasattr(torch.cuda, 'amp') and 'DISABLE_MIXED_PRECISION' not in os.environ:
|
||||
self.use_mixed_precision = True
|
||||
self.scaler = torch.amp.GradScaler('cuda')
|
||||
logger.info("Mixed precision training enabled")
|
||||
else:
|
||||
self.use_mixed_precision = False
|
||||
logger.info("Mixed precision training disabled")
|
||||
|
||||
def _get_device_from_config(self) -> torch.device:
|
||||
"""Get device from config.yaml or auto-detect"""
|
||||
try:
|
||||
# Try to load config
|
||||
from core.config import get_config
|
||||
config = get_config()
|
||||
gpu_config = config._config.get('gpu', {})
|
||||
|
||||
# Track if we're in training mode
|
||||
self.training = True
|
||||
|
||||
# For compatibility with old code
|
||||
self.state_size = np.prod(state_shape)
|
||||
self.action_size = n_actions
|
||||
self.memory_size = buffer_size
|
||||
self.timeframes = ["1m", "5m", "15m"][:self.state_dim[0] if isinstance(self.state_dim, tuple) else 3] # Default timeframes
|
||||
|
||||
logger.info(f"DQN Agent using Enhanced CNN with device: {self.device}")
|
||||
logger.info(f"Trade action fee set to {self.trade_action_fee}, minimum confidence: {self.minimum_action_confidence}")
|
||||
logger.info(f"Real-time tick feature integration enabled with weight: {self.tick_feature_weight}")
|
||||
|
||||
# Log model parameters
|
||||
total_params = sum(p.numel() for p in self.policy_net.parameters())
|
||||
logger.info(f"Enhanced CNN Policy Network: {total_params:,} parameters")
|
||||
|
||||
# Position management for 2-action system
|
||||
self.current_position = 0.0 # -1 (short), 0 (neutral), 1 (long)
|
||||
self.position_entry_price = 0.0
|
||||
self.position_entry_time = None
|
||||
|
||||
# Different thresholds for entry vs exit decisions - AGGRESSIVE for more training data
|
||||
self.entry_confidence_threshold = 0.35 # Lower threshold for new positions (was 0.7)
|
||||
self.exit_confidence_threshold = 0.15 # Very low threshold for closing positions (was 0.3)
|
||||
self.uncertainty_threshold = 0.1 # When to stay neutral
|
||||
device_setting = gpu_config.get('device', 'auto')
|
||||
fallback_to_cpu = gpu_config.get('fallback_to_cpu', True)
|
||||
gpu_enabled = gpu_config.get('enabled', True)
|
||||
|
||||
# If GPU is disabled in config, use CPU
|
||||
if not gpu_enabled:
|
||||
logger.info("GPU disabled in config.yaml, using CPU")
|
||||
return torch.device('cpu')
|
||||
|
||||
# Handle device selection
|
||||
if device_setting == 'cpu':
|
||||
logger.info("Device set to CPU in config.yaml")
|
||||
return torch.device('cpu')
|
||||
elif device_setting == 'cuda' or device_setting == 'auto':
|
||||
# Try GPU first
|
||||
if torch.cuda.is_available():
|
||||
logger.info("Using GPU (CUDA available)")
|
||||
return torch.device('cuda')
|
||||
else:
|
||||
if fallback_to_cpu:
|
||||
logger.warning("CUDA not available, falling back to CPU")
|
||||
return torch.device('cpu')
|
||||
else:
|
||||
raise RuntimeError("CUDA not available and fallback_to_cpu is False")
|
||||
else:
|
||||
logger.warning(f"Unknown device setting '{device_setting}', using auto-detection")
|
||||
return torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error reading device config: {e}, using auto-detection")
|
||||
# Fallback to auto-detection
|
||||
return torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
|
||||
def load_best_checkpoint(self):
|
||||
"""Load the best checkpoint for this DQN agent"""
|
||||
@@ -512,104 +520,7 @@ class DQNAgent:
|
||||
except Exception as e:
|
||||
logger.error(f"Error saving DQN checkpoint: {e}")
|
||||
return False
|
||||
|
||||
# Store separate memory for price direction examples
|
||||
self.price_movement_memory = [] # For storing examples of clear price movements
|
||||
|
||||
# Performance tracking
|
||||
self.losses = []
|
||||
self.no_improvement_count = 0
|
||||
|
||||
# Confidence tracking
|
||||
self.confidence_history = []
|
||||
self.avg_confidence = 0.0
|
||||
self.max_confidence = 0.0
|
||||
self.min_confidence = 1.0
|
||||
|
||||
# Enhanced features from EnhancedDQNAgent
|
||||
# Market adaptation capabilities
|
||||
self.market_regime_weights = {
|
||||
'trending': 1.2, # Higher confidence in trending markets
|
||||
'ranging': 0.8, # Lower confidence in ranging markets
|
||||
'volatile': 0.6 # Much lower confidence in volatile markets
|
||||
}
|
||||
|
||||
# Dueling network support (requires enhanced network architecture)
|
||||
self.use_dueling = True
|
||||
|
||||
# Prioritized experience replay parameters
|
||||
self.use_prioritized_replay = priority_memory
|
||||
self.alpha = 0.6 # Priority exponent
|
||||
self.beta = 0.4 # Importance sampling exponent
|
||||
self.beta_increment = 0.001
|
||||
|
||||
# Double DQN support
|
||||
self.use_double_dqn = True
|
||||
|
||||
# Enhanced training features from EnhancedDQNAgent
|
||||
self.target_update_freq = target_update # More descriptive name
|
||||
self.training_steps = 0
|
||||
self.gradient_clip_norm = 1.0 # Gradient clipping
|
||||
|
||||
# Enhanced statistics tracking
|
||||
self.epsilon_history = []
|
||||
self.td_errors = [] # Track TD errors for analysis
|
||||
|
||||
# Trade action fee and confidence thresholds
|
||||
self.trade_action_fee = 0.0005 # Small fee to discourage unnecessary trading
|
||||
self.minimum_action_confidence = 0.3 # Minimum confidence to consider trading (lowered from 0.5)
|
||||
|
||||
# Violent move detection
|
||||
self.price_history = []
|
||||
self.volatility_window = 20 # Window size for volatility calculation
|
||||
self.volatility_threshold = 0.0015 # Threshold for considering a move "violent"
|
||||
self.post_violent_move = False # Flag for recent violent move
|
||||
self.violent_move_cooldown = 0 # Cooldown after violent move
|
||||
|
||||
# Feature integration
|
||||
self.last_hidden_features = None # Store last extracted features
|
||||
self.feature_history = [] # Store history of features for analysis
|
||||
|
||||
# Real-time tick features integration
|
||||
self.realtime_tick_features = None # Latest tick features from tick processor
|
||||
self.tick_feature_weight = 0.3 # Weight for tick features in decision making
|
||||
|
||||
# Check if mixed precision training should be used
|
||||
if torch.cuda.is_available() and hasattr(torch.cuda, 'amp') and 'DISABLE_MIXED_PRECISION' not in os.environ:
|
||||
self.use_mixed_precision = True
|
||||
self.scaler = torch.amp.GradScaler('cuda')
|
||||
logger.info("Mixed precision training enabled")
|
||||
else:
|
||||
self.use_mixed_precision = False
|
||||
logger.info("Mixed precision training disabled")
|
||||
|
||||
# Track if we're in training mode
|
||||
self.training = True
|
||||
|
||||
# For compatibility with old code
|
||||
self.state_size = np.prod(state_shape)
|
||||
self.action_size = n_actions
|
||||
self.memory_size = buffer_size
|
||||
self.timeframes = ["1m", "5m", "15m"][:self.state_dim[0] if isinstance(self.state_dim, tuple) else 3] # Default timeframes
|
||||
|
||||
logger.info(f"DQN Agent using Enhanced CNN with device: {self.device}")
|
||||
logger.info(f"Trade action fee set to {self.trade_action_fee}, minimum confidence: {self.minimum_action_confidence}")
|
||||
logger.info(f"Real-time tick feature integration enabled with weight: {self.tick_feature_weight}")
|
||||
|
||||
# Log model parameters
|
||||
total_params = sum(p.numel() for p in self.policy_net.parameters())
|
||||
logger.info(f"Enhanced CNN Policy Network: {total_params:,} parameters")
|
||||
|
||||
# Position management for 2-action system
|
||||
self.current_position = 0.0 # -1 (short), 0 (neutral), 1 (long)
|
||||
self.position_entry_price = 0.0
|
||||
self.position_entry_time = None
|
||||
|
||||
# Different thresholds for entry vs exit decisions - AGGRESSIVE for more training data
|
||||
self.entry_confidence_threshold = 0.35 # Lower threshold for new positions (was 0.7)
|
||||
self.exit_confidence_threshold = 0.15 # Very low threshold for closing positions (was 0.3)
|
||||
self.uncertainty_threshold = 0.1 # When to stay neutral
|
||||
|
||||
|
||||
def move_models_to_device(self, device=None):
|
||||
"""Move models to the specified device (GPU/CPU)"""
|
||||
if device is not None:
|
||||
|
||||
Reference in New Issue
Block a user