replay system

This commit is contained in:
Dobromir Popov
2025-07-20 12:37:02 +03:00
parent 469269e809
commit 12865fd3ef
13 changed files with 6132 additions and 465 deletions

View File

@ -0,0 +1,400 @@
#!/usr/bin/env python3
"""
Test Training Data Collection System
This script demonstrates and tests the comprehensive training data collection
system with data validation, rapid change detection, and profitable setup replay.
"""
import asyncio
import logging
import numpy as np
import pandas as pd
import time
from datetime import datetime, timedelta
from pathlib import Path
# Setup logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# Import our training system components
from core.training_data_collector import (
TrainingDataCollector,
RapidChangeDetector,
ModelInputPackage,
TrainingOutcome,
TrainingEpisode
)
from core.cnn_training_pipeline import (
CNNPivotPredictor,
CNNTrainer
)
from core.data_provider import DataProvider
def create_sample_ohlcv_data() -> Dict[str, pd.DataFrame]:
"""Create sample OHLCV data for testing"""
timeframes = ['1s', '1m', '5m', '15m', '1h']
ohlcv_data = {}
for timeframe in timeframes:
# Create sample data
dates = pd.date_range(start='2024-01-01', periods=300, freq='1min')
# Generate realistic price data
base_price = 3000.0 # ETH price
price_data = []
current_price = base_price
for i in range(300):
# Add some randomness
change = np.random.normal(0, 0.002) # 0.2% std dev
current_price *= (1 + change)
# OHLCV for this period
open_price = current_price
high_price = current_price * (1 + abs(np.random.normal(0, 0.001)))
low_price = current_price * (1 - abs(np.random.normal(0, 0.001)))
close_price = current_price * (1 + np.random.normal(0, 0.0005))
volume = np.random.uniform(100, 1000)
price_data.append({
'timestamp': dates[i],
'open': open_price,
'high': high_price,
'low': low_price,
'close': close_price,
'volume': volume
})
current_price = close_price
df = pd.DataFrame(price_data)
df.set_index('timestamp', inplace=True)
ohlcv_data[timeframe] = df
return ohlcv_data
def create_sample_tick_data() -> List[Dict[str, Any]]:
"""Create sample tick data for testing"""
tick_data = []
base_price = 3000.0
for i in range(100):
tick_data.append({
'timestamp': datetime.now() - timedelta(seconds=100-i),
'price': base_price + np.random.normal(0, 5),
'volume': np.random.uniform(0.1, 10.0),
'side': 'buy' if np.random.random() > 0.5 else 'sell',
'trade_id': f'trade_{i}',
'quantity': np.random.uniform(0.1, 5.0)
})
return tick_data
def create_sample_cob_data() -> Dict[str, Any]:
"""Create sample COB data for testing"""
return {
'timestamp': datetime.now(),
'bid_levels': [3000 - i for i in range(10)],
'ask_levels': [3000 + i for i in range(10)],
'bid_volumes': [np.random.uniform(1, 10) for _ in range(10)],
'ask_volumes': [np.random.uniform(1, 10) for _ in range(10)],
'spread': 1.0,
'depth': 100.0
}
def test_rapid_change_detector():
"""Test the rapid change detection system"""
logger.info("=== Testing Rapid Change Detector ===")
detector = RapidChangeDetector(
velocity_threshold=0.5,
volatility_multiplier=3.0,
lookback_minutes=5
)
symbol = 'ETHUSDT'
base_price = 3000.0
# Add normal price points
for i in range(120): # 2 minutes of data
timestamp = datetime.now() - timedelta(seconds=120-i)
price = base_price + np.random.normal(0, 1) # Small changes
detector.add_price_point(symbol, timestamp, price)
# Check for rapid change (should be False)
is_rapid, velocity, volatility_spike = detector.detect_rapid_change(symbol)
logger.info(f"Normal conditions - Rapid change: {is_rapid}, Velocity: {velocity:.3f}")
# Add rapid price change
for i in range(60): # 1 minute of rapid changes
timestamp = datetime.now() - timedelta(seconds=60-i)
price = base_price + 50 + i * 0.5 # Rapid increase
detector.add_price_point(symbol, timestamp, price)
# Check for rapid change (should be True)
is_rapid, velocity, volatility_spike = detector.detect_rapid_change(symbol)
logger.info(f"Rapid change conditions - Rapid change: {is_rapid}, Velocity: {velocity:.3f}")
return detector
def test_training_data_collector():
"""Test the training data collection system"""
logger.info("=== Testing Training Data Collector ===")
# Initialize collector
collector = TrainingDataCollector(
storage_dir="test_training_data",
max_episodes_per_symbol=100
)
collector.start_collection()
symbol = 'ETHUSDT'
# Create sample data
ohlcv_data = create_sample_ohlcv_data()
tick_data = create_sample_tick_data()
cob_data = create_sample_cob_data()
technical_indicators = {
'rsi_14': 65.5,
'macd': 0.5,
'sma_20': 3000.0,
'ema_12': 3005.0,
'bollinger_upper': 3050.0,
'bollinger_lower': 2950.0
}
pivot_points = [
{'timestamp': datetime.now(), 'price': 3020.0, 'type': 'high'},
{'timestamp': datetime.now() - timedelta(minutes=30), 'price': 2980.0, 'type': 'low'}
]
# Create CNN and RL features
cnn_features = np.random.randn(2000).astype(np.float32)
rl_state = np.random.randn(2000).astype(np.float32)
orchestrator_context = {
'market_session': 'european',
'volatility_regime': 'medium',
'trend_direction': 'uptrend'
}
# Collect training data
episode_id = collector.collect_training_data(
symbol=symbol,
ohlcv_data=ohlcv_data,
tick_data=tick_data,
cob_data=cob_data,
technical_indicators=technical_indicators,
pivot_points=pivot_points,
cnn_features=cnn_features,
rl_state=rl_state,
orchestrator_context=orchestrator_context
)
logger.info(f"Created training episode: {episode_id}")
# Test data validation
validation_results = collector.validate_data_integrity()
logger.info(f"Data integrity validation: {validation_results}")
# Get statistics
stats = collector.get_collection_statistics()
logger.info(f"Collection statistics: {stats}")
collector.stop_collection()
return collector
def test_cnn_training_pipeline():
"""Test the CNN training pipeline"""
logger.info("=== Testing CNN Training Pipeline ===")
# Initialize CNN model and trainer
model = CNNPivotPredictor(
input_channels=10,
sequence_length=300,
hidden_dim=128, # Smaller for testing
num_pivot_classes=3
)
trainer = CNNTrainer(
model=model,
device='cpu', # Use CPU for testing
learning_rate=0.001,
storage_dir="test_cnn_training"
)
# Create sample training episodes
episodes = []
for i in range(50): # Create 50 sample episodes
# Create sample input package
input_package = ModelInputPackage(
timestamp=datetime.now() - timedelta(minutes=i),
symbol='ETHUSDT',
ohlcv_data=create_sample_ohlcv_data(),
tick_data=create_sample_tick_data(),
cob_data=create_sample_cob_data(),
technical_indicators={'rsi': 50.0, 'macd': 0.0},
pivot_points=[],
cnn_features=np.random.randn(2000).astype(np.float32),
rl_state=np.random.randn(2000).astype(np.float32),
orchestrator_context={}
)
# Create sample outcome
outcome = TrainingOutcome(
input_package_hash=input_package.data_hash,
timestamp=input_package.timestamp,
symbol='ETHUSDT',
price_change_1m=np.random.normal(0, 0.01),
price_change_5m=np.random.normal(0, 0.02),
price_change_15m=np.random.normal(0, 0.03),
price_change_1h=np.random.normal(0, 0.05),
max_profit_potential=abs(np.random.normal(0, 0.02)),
max_loss_potential=abs(np.random.normal(0, 0.015)),
optimal_entry_price=3000.0,
optimal_exit_price=3000.0 + np.random.normal(0, 10),
optimal_holding_time=timedelta(minutes=np.random.randint(5, 60)),
is_profitable=np.random.random() > 0.4, # 60% profitable
profitability_score=np.random.uniform(0.3, 1.0),
risk_reward_ratio=np.random.uniform(1.0, 3.0),
is_rapid_change=np.random.random() > 0.8, # 20% rapid changes
change_velocity=np.random.uniform(0.1, 2.0),
volatility_spike=np.random.random() > 0.9,
outcome_validated=True
)
# Create training episode
episode = TrainingEpisode(
episode_id=f"test_episode_{i}",
input_package=input_package,
model_predictions={},
actual_outcome=outcome,
episode_type='normal'
)
episodes.append(episode)
# Test training on episodes
results = trainer._train_on_episodes(episodes, training_mode='test_batch')
logger.info(f"Training results: {results}")
# Test profitable episode training
profitable_results = trainer.train_on_profitable_episodes(
symbol='ETHUSDT',
min_profitability=0.7,
max_episodes=20
)
logger.info(f"Profitable training results: {profitable_results}")
# Get training statistics
stats = trainer.get_training_statistics()
logger.info(f"Training statistics: {stats}")
return trainer
def test_integration():
"""Test the complete integration"""
logger.info("=== Testing Complete Integration ===")
try:
# Test individual components
detector = test_rapid_change_detector()
collector = test_training_data_collector()
trainer = test_cnn_training_pipeline()
logger.info("✅ All components tested successfully!")
# Test data flow
logger.info("Testing data flow integration...")
# Simulate real-time data collection and training
symbol = 'ETHUSDT'
# Collect multiple data points
for i in range(10):
ohlcv_data = create_sample_ohlcv_data()
tick_data = create_sample_tick_data()
cob_data = create_sample_cob_data()
episode_id = collector.collect_training_data(
symbol=symbol,
ohlcv_data=ohlcv_data,
tick_data=tick_data,
cob_data=cob_data,
technical_indicators={'rsi': 50.0 + i},
pivot_points=[],
cnn_features=np.random.randn(2000).astype(np.float32),
rl_state=np.random.randn(2000).astype(np.float32),
orchestrator_context={}
)
logger.info(f"Collected episode {i+1}: {episode_id}")
time.sleep(0.1) # Small delay
# Get final statistics
final_stats = collector.get_collection_statistics()
logger.info(f"Final collection statistics: {final_stats}")
logger.info("✅ Integration test completed successfully!")
return True
except Exception as e:
logger.error(f"❌ Integration test failed: {e}")
import traceback
logger.error(traceback.format_exc())
return False
def main():
"""Main test function"""
logger.info("=" * 80)
logger.info("COMPREHENSIVE TRAINING DATA COLLECTION SYSTEM TEST")
logger.info("=" * 80)
start_time = time.time()
try:
# Run integration test
success = test_integration()
end_time = time.time()
duration = end_time - start_time
logger.info("=" * 80)
if success:
logger.info("✅ ALL TESTS PASSED!")
else:
logger.info("❌ SOME TESTS FAILED!")
logger.info(f"Test duration: {duration:.2f} seconds")
logger.info("=" * 80)
# Display summary
logger.info("\n📊 SYSTEM CAPABILITIES DEMONSTRATED:")
logger.info("✓ Comprehensive training data collection with validation")
logger.info("✓ Rapid price change detection for premium training examples")
logger.info("✓ Data integrity validation and completeness checking")
logger.info("✓ CNN training pipeline with backpropagation data storage")
logger.info("✓ Profitable episode prioritization and replay")
logger.info("✓ Training session value calculation and ranking")
logger.info("✓ Real-time data integration capabilities")
logger.info("\n🎯 NEXT STEPS:")
logger.info("1. Integrate with existing DataProvider for real market data")
logger.info("2. Connect with actual CNN and RL models")
logger.info("3. Implement outcome validation with real price data")
logger.info("4. Add dashboard integration for monitoring")
logger.info("5. Scale up for production deployment")
except Exception as e:
logger.error(f"❌ Test execution failed: {e}")
import traceback
logger.error(traceback.format_exc())
if __name__ == "__main__":
main()