improve training and model data
This commit is contained in:
@ -5,7 +5,7 @@ import numpy as np
|
||||
from collections import deque
|
||||
import random
|
||||
from typing import Tuple, List
|
||||
import os
|
||||
import osvu
|
||||
import sys
|
||||
import logging
|
||||
import torch.nn.functional as F
|
||||
@ -129,7 +129,128 @@ class DQNAgent:
|
||||
logger.info(f"DQN Agent initialized with checkpoint management: {enable_checkpoints}")
|
||||
if enable_checkpoints:
|
||||
logger.info(f"Model name: {model_name}, Checkpoint frequency: {self.checkpoint_frequency}")
|
||||
|
||||
|
||||
# Add this line to the __init__ method
|
||||
self.recent_actions = deque(maxlen=10)
|
||||
self.recent_prices = deque(maxlen=20)
|
||||
self.recent_rewards = deque(maxlen=100)
|
||||
|
||||
# Price prediction tracking
|
||||
self.last_price_pred = {
|
||||
'immediate': {
|
||||
'direction': 1, # Default to "sideways"
|
||||
'confidence': 0.0,
|
||||
'change': 0.0
|
||||
},
|
||||
'midterm': {
|
||||
'direction': 1, # Default to "sideways"
|
||||
'confidence': 0.0,
|
||||
'change': 0.0
|
||||
},
|
||||
'longterm': {
|
||||
'direction': 1, # Default to "sideways"
|
||||
'confidence': 0.0,
|
||||
'change': 0.0
|
||||
}
|
||||
}
|
||||
|
||||
# Store separate memory for price direction examples
|
||||
self.price_movement_memory = [] # For storing examples of clear price movements
|
||||
|
||||
# Performance tracking
|
||||
self.losses = []
|
||||
self.no_improvement_count = 0
|
||||
|
||||
# Confidence tracking
|
||||
self.confidence_history = []
|
||||
self.avg_confidence = 0.0
|
||||
self.max_confidence = 0.0
|
||||
self.min_confidence = 1.0
|
||||
|
||||
# Enhanced features from EnhancedDQNAgent
|
||||
# Market adaptation capabilities
|
||||
self.market_regime_weights = {
|
||||
'trending': 1.2, # Higher confidence in trending markets
|
||||
'ranging': 0.8, # Lower confidence in ranging markets
|
||||
'volatile': 0.6 # Much lower confidence in volatile markets
|
||||
}
|
||||
|
||||
# Dueling network support (requires enhanced network architecture)
|
||||
self.use_dueling = True
|
||||
|
||||
# Prioritized experience replay parameters
|
||||
self.use_prioritized_replay = priority_memory
|
||||
self.alpha = 0.6 # Priority exponent
|
||||
self.beta = 0.4 # Importance sampling exponent
|
||||
self.beta_increment = 0.001
|
||||
|
||||
# Double DQN support
|
||||
self.use_double_dqn = True
|
||||
|
||||
# Enhanced training features from EnhancedDQNAgent
|
||||
self.target_update_freq = target_update # More descriptive name
|
||||
self.training_steps = 0
|
||||
self.gradient_clip_norm = 1.0 # Gradient clipping
|
||||
|
||||
# Enhanced statistics tracking
|
||||
self.epsilon_history = []
|
||||
self.td_errors = [] # Track TD errors for analysis
|
||||
|
||||
# Trade action fee and confidence thresholds
|
||||
self.trade_action_fee = 0.0005 # Small fee to discourage unnecessary trading
|
||||
self.minimum_action_confidence = 0.3 # Minimum confidence to consider trading (lowered from 0.5)
|
||||
|
||||
# Violent move detection
|
||||
self.price_history = []
|
||||
self.volatility_window = 20 # Window size for volatility calculation
|
||||
self.volatility_threshold = 0.0015 # Threshold for considering a move "violent"
|
||||
self.post_violent_move = False # Flag for recent violent move
|
||||
self.violent_move_cooldown = 0 # Cooldown after violent move
|
||||
|
||||
# Feature integration
|
||||
self.last_hidden_features = None # Store last extracted features
|
||||
self.feature_history = [] # Store history of features for analysis
|
||||
|
||||
# Real-time tick features integration
|
||||
self.realtime_tick_features = None # Latest tick features from tick processor
|
||||
self.tick_feature_weight = 0.3 # Weight for tick features in decision making
|
||||
|
||||
# Check if mixed precision training should be used
|
||||
self.use_mixed_precision = False
|
||||
if torch.cuda.is_available() and hasattr(torch.cuda, 'amp') and 'DISABLE_MIXED_PRECISION' not in os.environ:
|
||||
self.use_mixed_precision = True
|
||||
self.scaler = torch.cuda.amp.GradScaler()
|
||||
logger.info("Mixed precision training enabled")
|
||||
else:
|
||||
logger.info("Mixed precision training disabled")
|
||||
|
||||
# Track if we're in training mode
|
||||
self.training = True
|
||||
|
||||
# For compatibility with old code
|
||||
self.state_size = np.prod(state_shape)
|
||||
self.action_size = n_actions
|
||||
self.memory_size = buffer_size
|
||||
self.timeframes = ["1m", "5m", "15m"][:self.state_dim[0] if isinstance(self.state_dim, tuple) else 3] # Default timeframes
|
||||
|
||||
logger.info(f"DQN Agent using Enhanced CNN with device: {self.device}")
|
||||
logger.info(f"Trade action fee set to {self.trade_action_fee}, minimum confidence: {self.minimum_action_confidence}")
|
||||
logger.info(f"Real-time tick feature integration enabled with weight: {self.tick_feature_weight}")
|
||||
|
||||
# Log model parameters
|
||||
total_params = sum(p.numel() for p in self.policy_net.parameters())
|
||||
logger.info(f"Enhanced CNN Policy Network: {total_params:,} parameters")
|
||||
|
||||
# Position management for 2-action system
|
||||
self.current_position = 0.0 # -1 (short), 0 (neutral), 1 (long)
|
||||
self.position_entry_price = 0.0
|
||||
self.position_entry_time = None
|
||||
|
||||
# Different thresholds for entry vs exit decisions - AGGRESSIVE for more training data
|
||||
self.entry_confidence_threshold = 0.35 # Lower threshold for new positions (was 0.7)
|
||||
self.exit_confidence_threshold = 0.15 # Very low threshold for closing positions (was 0.3)
|
||||
self.uncertainty_threshold = 0.1 # When to stay neutral
|
||||
|
||||
def load_best_checkpoint(self):
|
||||
"""Load the best checkpoint for this DQN agent"""
|
||||
try:
|
||||
@ -267,9 +388,6 @@ class DQNAgent:
|
||||
# Trade action fee and confidence thresholds
|
||||
self.trade_action_fee = 0.0005 # Small fee to discourage unnecessary trading
|
||||
self.minimum_action_confidence = 0.3 # Minimum confidence to consider trading (lowered from 0.5)
|
||||
self.recent_actions = deque(maxlen=10)
|
||||
self.recent_prices = deque(maxlen=20)
|
||||
self.recent_rewards = deque(maxlen=100)
|
||||
|
||||
# Violent move detection
|
||||
self.price_history = []
|
||||
|
Reference in New Issue
Block a user