fix model mappings,dash updates, trading
This commit is contained in:
@ -1,76 +1,87 @@
|
||||
# #!/usr/bin/env python3
|
||||
# """
|
||||
# Enhanced RL Training Launcher with Real Data Integration
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Enhanced RL Training Launcher with Real Data Integration
|
||||
|
||||
# This script launches the comprehensive RL training system that uses:
|
||||
# - Real-time tick data (300s window for momentum detection)
|
||||
# - Multi-timeframe OHLCV data (1s, 1m, 1h, 1d)
|
||||
# - BTC reference data for correlation
|
||||
# - CNN hidden features and predictions
|
||||
# - Williams Market Structure pivot points
|
||||
# - Market microstructure analysis
|
||||
This script launches the comprehensive RL training system that uses:
|
||||
- Real-time tick data (300s window for momentum detection)
|
||||
- Multi-timeframe OHLCV data (1s, 1m, 1h, 1d)
|
||||
- BTC reference data for correlation
|
||||
- CNN hidden features and predictions
|
||||
- Williams Market Structure pivot points
|
||||
- Market microstructure analysis
|
||||
|
||||
# The RL model will receive ~13,400 features instead of the previous ~100 basic features.
|
||||
# """
|
||||
The RL model will receive ~13,400 features instead of the previous ~100 basic features.
|
||||
Training metrics are automatically logged to TensorBoard for visualization.
|
||||
"""
|
||||
|
||||
# import asyncio
|
||||
# import logging
|
||||
# import time
|
||||
# import signal
|
||||
# import sys
|
||||
# from datetime import datetime, timedelta
|
||||
# from pathlib import Path
|
||||
# from typing import Dict, List, Optional
|
||||
import asyncio
|
||||
import logging
|
||||
import time
|
||||
import signal
|
||||
import sys
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
# # Configure logging
|
||||
# logging.basicConfig(
|
||||
# level=logging.INFO,
|
||||
# format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||
# handlers=[
|
||||
# logging.FileHandler('enhanced_rl_training.log'),
|
||||
# logging.StreamHandler(sys.stdout)
|
||||
# ]
|
||||
# )
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||
handlers=[
|
||||
logging.FileHandler('enhanced_rl_training.log'),
|
||||
logging.StreamHandler(sys.stdout)
|
||||
]
|
||||
)
|
||||
|
||||
# logger = logging.getLogger(__name__)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# # Import our enhanced components
|
||||
# from core.config import get_config
|
||||
# from core.data_provider import DataProvider
|
||||
# from core.enhanced_orchestrator import EnhancedTradingOrchestrator
|
||||
# from training.enhanced_rl_trainer import EnhancedRLTrainer
|
||||
# from training.enhanced_rl_state_builder import EnhancedRLStateBuilder
|
||||
# from training.williams_market_structure import WilliamsMarketStructure
|
||||
# from training.cnn_rl_bridge import CNNRLBridge
|
||||
# Import our enhanced components
|
||||
from core.config import get_config
|
||||
from core.data_provider import DataProvider
|
||||
from core.enhanced_orchestrator import EnhancedTradingOrchestrator
|
||||
from training.enhanced_rl_trainer import EnhancedRLTrainer
|
||||
from training.enhanced_rl_state_builder import EnhancedRLStateBuilder
|
||||
from training.williams_market_structure import WilliamsMarketStructure
|
||||
from training.cnn_rl_bridge import CNNRLBridge
|
||||
from utils.tensorboard_logger import TensorBoardLogger
|
||||
|
||||
# class EnhancedRLTrainingSystem:
|
||||
# """Comprehensive RL training system with real data integration"""
|
||||
class EnhancedRLTrainingSystem:
|
||||
"""Comprehensive RL training system with real data integration"""
|
||||
|
||||
# def __init__(self):
|
||||
# """Initialize the enhanced RL training system"""
|
||||
# self.config = get_config()
|
||||
# self.running = False
|
||||
# self.data_provider = None
|
||||
# self.orchestrator = None
|
||||
# self.rl_trainer = None
|
||||
def __init__(self):
|
||||
"""Initialize the enhanced RL training system"""
|
||||
self.config = get_config()
|
||||
self.running = False
|
||||
self.data_provider = None
|
||||
self.orchestrator = None
|
||||
self.rl_trainer = None
|
||||
|
||||
# # Performance tracking
|
||||
# self.training_stats = {
|
||||
# 'training_sessions': 0,
|
||||
# 'total_experiences': 0,
|
||||
# 'avg_state_size': 0,
|
||||
# 'data_quality_score': 0.0,
|
||||
# 'last_training_time': None
|
||||
# }
|
||||
# Performance tracking
|
||||
self.training_stats = {
|
||||
'training_sessions': 0,
|
||||
'total_experiences': 0,
|
||||
'avg_state_size': 0,
|
||||
'data_quality_score': 0.0,
|
||||
'last_training_time': None
|
||||
}
|
||||
|
||||
# logger.info("Enhanced RL Training System initialized")
|
||||
# logger.info("Features:")
|
||||
# logger.info("- Real-time tick data processing (300s window)")
|
||||
# logger.info("- Multi-timeframe OHLCV analysis (1s, 1m, 1h, 1d)")
|
||||
# logger.info("- BTC correlation analysis")
|
||||
# logger.info("- CNN feature integration")
|
||||
# logger.info("- Williams Market Structure pivot points")
|
||||
# logger.info("- ~13,400 feature state vector (vs previous ~100)")
|
||||
# Initialize TensorBoard logger
|
||||
experiment_name = f"enhanced_rl_training_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
||||
self.tb_logger = TensorBoardLogger(
|
||||
log_dir="runs",
|
||||
experiment_name=experiment_name,
|
||||
enabled=True
|
||||
)
|
||||
|
||||
logger.info("Enhanced RL Training System initialized")
|
||||
logger.info(f"TensorBoard logging enabled for experiment: {experiment_name}")
|
||||
logger.info("Features:")
|
||||
logger.info("- Real-time tick data processing (300s window)")
|
||||
logger.info("- Multi-timeframe OHLCV analysis (1s, 1m, 1h, 1d)")
|
||||
logger.info("- BTC correlation analysis")
|
||||
logger.info("- CNN feature integration")
|
||||
logger.info("- Williams Market Structure pivot points")
|
||||
logger.info("- ~13,400 feature state vector (vs previous ~100)")
|
||||
|
||||
# async def initialize(self):
|
||||
# """Initialize all components"""
|
||||
@ -274,69 +285,106 @@
|
||||
# logger.warning(f"Error calculating data quality: {e}")
|
||||
# return 0.5 # Default to medium quality
|
||||
|
||||
# async def _train_rl_agents(self, market_states: Dict[str, any]) -> Dict[str, any]:
|
||||
# """Train RL agents with comprehensive market states"""
|
||||
# try:
|
||||
# training_results = {
|
||||
# 'symbols_trained': [],
|
||||
# 'total_experiences': 0,
|
||||
# 'avg_state_size': 0,
|
||||
# 'training_errors': []
|
||||
# }
|
||||
async def _train_rl_agents(self, market_states: Dict[str, any]) -> Dict[str, any]:
|
||||
"""Train RL agents with comprehensive market states"""
|
||||
try:
|
||||
training_results = {
|
||||
'symbols_trained': [],
|
||||
'total_experiences': 0,
|
||||
'avg_state_size': 0,
|
||||
'training_errors': [],
|
||||
'losses': {},
|
||||
'rewards': {}
|
||||
}
|
||||
|
||||
# for symbol, market_state in market_states.items():
|
||||
# try:
|
||||
# # Convert market state to comprehensive RL state
|
||||
# rl_state = self.rl_trainer._market_state_to_rl_state(market_state)
|
||||
for symbol, market_state in market_states.items():
|
||||
try:
|
||||
# Convert market state to comprehensive RL state
|
||||
rl_state = self.rl_trainer._market_state_to_rl_state(market_state)
|
||||
|
||||
# if rl_state is not None and len(rl_state) > 0:
|
||||
# # Record state size
|
||||
# training_results['avg_state_size'] += len(rl_state)
|
||||
if rl_state is not None and len(rl_state) > 0:
|
||||
# Record state size
|
||||
state_size = len(rl_state)
|
||||
training_results['avg_state_size'] += state_size
|
||||
|
||||
# # Simulate trading action for experience generation
|
||||
# # In real implementation, this would be actual trading decisions
|
||||
# action = self._simulate_trading_action(symbol, rl_state)
|
||||
# Log state size to TensorBoard
|
||||
self.tb_logger.log_scalar(
|
||||
f'State/{symbol}/Size',
|
||||
state_size,
|
||||
self.training_stats['training_sessions']
|
||||
)
|
||||
|
||||
# # Generate reward based on market outcome
|
||||
# reward = self._calculate_training_reward(symbol, market_state, action)
|
||||
# Simulate trading action for experience generation
|
||||
# In real implementation, this would be actual trading decisions
|
||||
action = self._simulate_trading_action(symbol, rl_state)
|
||||
|
||||
# # Add experience to RL agent
|
||||
# agent = self.rl_trainer.agents.get(symbol)
|
||||
# if agent:
|
||||
# # Create next state (would be actual next market state in real scenario)
|
||||
# next_state = rl_state # Simplified for now
|
||||
# Generate reward based on market outcome
|
||||
reward = self._calculate_training_reward(symbol, market_state, action)
|
||||
|
||||
# Store reward for TensorBoard logging
|
||||
training_results['rewards'][symbol] = reward
|
||||
|
||||
# Log action and reward to TensorBoard
|
||||
self.tb_logger.log_scalars(f'Actions/{symbol}', {
|
||||
'action': action,
|
||||
'reward': reward
|
||||
}, self.training_stats['training_sessions'])
|
||||
|
||||
# Add experience to RL agent
|
||||
agent = self.rl_trainer.agents.get(symbol)
|
||||
if agent:
|
||||
# Create next state (would be actual next market state in real scenario)
|
||||
next_state = rl_state # Simplified for now
|
||||
|
||||
# agent.remember(
|
||||
# state=rl_state,
|
||||
# action=action,
|
||||
# reward=reward,
|
||||
# next_state=next_state,
|
||||
# done=False
|
||||
# )
|
||||
agent.remember(
|
||||
state=rl_state,
|
||||
action=action,
|
||||
reward=reward,
|
||||
next_state=next_state,
|
||||
done=False
|
||||
)
|
||||
|
||||
# # Train agent if enough experiences
|
||||
# if len(agent.replay_buffer) >= agent.batch_size:
|
||||
# loss = agent.replay()
|
||||
# if loss is not None:
|
||||
# logger.debug(f"Agent {symbol} training loss: {loss:.4f}")
|
||||
# Train agent if enough experiences
|
||||
if len(agent.replay_buffer) >= agent.batch_size:
|
||||
loss = agent.replay()
|
||||
if loss is not None:
|
||||
logger.debug(f"Agent {symbol} training loss: {loss:.4f}")
|
||||
|
||||
# Store loss for TensorBoard logging
|
||||
training_results['losses'][symbol] = loss
|
||||
|
||||
# Log loss to TensorBoard
|
||||
self.tb_logger.log_scalar(
|
||||
f'Training/{symbol}/Loss',
|
||||
loss,
|
||||
self.training_stats['training_sessions']
|
||||
)
|
||||
|
||||
# training_results['symbols_trained'].append(symbol)
|
||||
# training_results['total_experiences'] += 1
|
||||
training_results['symbols_trained'].append(symbol)
|
||||
training_results['total_experiences'] += 1
|
||||
|
||||
# except Exception as e:
|
||||
# error_msg = f"Error training {symbol}: {e}"
|
||||
# logger.warning(error_msg)
|
||||
# training_results['training_errors'].append(error_msg)
|
||||
except Exception as e:
|
||||
error_msg = f"Error training {symbol}: {e}"
|
||||
logger.warning(error_msg)
|
||||
training_results['training_errors'].append(error_msg)
|
||||
|
||||
# # Calculate average state size
|
||||
# if len(training_results['symbols_trained']) > 0:
|
||||
# training_results['avg_state_size'] /= len(training_results['symbols_trained'])
|
||||
# Calculate average state size
|
||||
if len(training_results['symbols_trained']) > 0:
|
||||
training_results['avg_state_size'] /= len(training_results['symbols_trained'])
|
||||
|
||||
# Log overall training metrics to TensorBoard
|
||||
self.tb_logger.log_scalars('Training/Overall', {
|
||||
'symbols_trained': len(training_results['symbols_trained']),
|
||||
'experiences': training_results['total_experiences'],
|
||||
'avg_state_size': training_results['avg_state_size'],
|
||||
'errors': len(training_results['training_errors'])
|
||||
}, self.training_stats['training_sessions'])
|
||||
|
||||
# return training_results
|
||||
return training_results
|
||||
|
||||
# except Exception as e:
|
||||
# logger.error(f"Error training RL agents: {e}")
|
||||
# return {'error': str(e)}
|
||||
except Exception as e:
|
||||
logger.error(f"Error training RL agents: {e}")
|
||||
return {'error': str(e)}
|
||||
|
||||
# def _simulate_trading_action(self, symbol: str, rl_state) -> int:
|
||||
# """Simulate trading action for training (would be real decision in production)"""
|
||||
|
Reference in New Issue
Block a user