From a3828c708cd69108722a020fcff347f7e0735232 Mon Sep 17 00:00:00 2001 From: Dobromir Popov Date: Fri, 25 Jul 2025 23:59:51 +0300 Subject: [PATCH] fix netwrk rebuild --- NN/models/enhanced_cnn.py | 32 +++---- core/data_models.py | 80 ++++++++++++---- test_fixed_input_size.py | 187 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 264 insertions(+), 35 deletions(-) create mode 100644 test_fixed_input_size.py diff --git a/NN/models/enhanced_cnn.py b/NN/models/enhanced_cnn.py index 6f09af2..b5084f0 100644 --- a/NN/models/enhanced_cnn.py +++ b/NN/models/enhanced_cnn.py @@ -376,20 +376,12 @@ class EnhancedCNN(nn.Module): return tensor.detach().clone().requires_grad_(tensor.requires_grad) def _check_rebuild_network(self, features): - """Check if network needs to be rebuilt for different feature dimensions""" - # Prevent rebuilding with zero or invalid dimensions - if features <= 0: - logger.error(f"Invalid feature dimension: {features}. Cannot rebuild network with zero or negative dimensions.") - logger.error(f"Current feature_dim: {self.feature_dim}. Keeping existing network.") - return False - + """DEPRECATED: Network should have fixed architecture - no runtime rebuilding""" if features != self.feature_dim: - logger.info(f"Rebuilding network for new feature dimension: {features} (was {self.feature_dim})") - self.feature_dim = features - self._build_network() - # Move to device after rebuilding - self.to(self.device) - return True + logger.error(f"CRITICAL: Input feature dimension mismatch! Expected {self.feature_dim}, got {features}") + logger.error("This indicates a bug in data preprocessing - input should be fixed size!") + logger.error("Network architecture should NOT change at runtime!") + raise ValueError(f"Input dimension mismatch: expected {self.feature_dim}, got {features}") return False def forward(self, x): @@ -429,10 +421,11 @@ class EnhancedCNN(nn.Module): # Now x is 3D: [batch, timeframes, features] x_reshaped = x - # Check if the feature dimension has changed and rebuild if necessary - if x_reshaped.size(1) * x_reshaped.size(2) != self.feature_dim: - total_features = x_reshaped.size(1) * x_reshaped.size(2) - self._check_rebuild_network(total_features) + # Validate input dimensions (should be fixed) + total_features = x_reshaped.size(1) * x_reshaped.size(2) + if total_features != self.feature_dim: + logger.error(f"Input dimension mismatch: expected {self.feature_dim}, got {total_features}") + raise ValueError(f"Input dimension mismatch: expected {self.feature_dim}, got {total_features}") # Apply ultra massive convolutions x_conv = self.conv_layers(x_reshaped) @@ -445,9 +438,10 @@ class EnhancedCNN(nn.Module): # For 2D input [batch, features] x_flat = x - # Check if dimensions have changed + # Validate input dimensions (should be fixed) if x_flat.size(1) != self.feature_dim: - self._check_rebuild_network(x_flat.size(1)) + logger.error(f"Input dimension mismatch: expected {self.feature_dim}, got {x_flat.size(1)}") + raise ValueError(f"Input dimension mismatch: expected {self.feature_dim}, got {x_flat.size(1)}") # Apply ULTRA MASSIVE FC layers to get base features features = self.fc_layers(x_flat) # [batch, 1024] diff --git a/core/data_models.py b/core/data_models.py index 3a3643a..ae2ecd6 100644 --- a/core/data_models.py +++ b/core/data_models.py @@ -108,43 +108,83 @@ class BaseDataInput: Convert BaseDataInput to standardized feature vector for models Returns: - np.ndarray: Standardized feature vector combining all data sources + np.ndarray: FIXED SIZE standardized feature vector (7850 features) """ + # FIXED FEATURE SIZE - this should NEVER change at runtime + FIXED_FEATURE_SIZE = 7850 features = [] # OHLCV features for ETH (300 frames x 4 timeframes x 5 features = 6000 features) for ohlcv_list in [self.ohlcv_1s, self.ohlcv_1m, self.ohlcv_1h, self.ohlcv_1d]: - for bar in ohlcv_list[-300:]: # Ensure exactly 300 frames + # Ensure exactly 300 frames by padding or truncating + ohlcv_frames = ohlcv_list[-300:] if len(ohlcv_list) >= 300 else ohlcv_list + + # Pad with zeros if not enough data + while len(ohlcv_frames) < 300: + # Create a dummy OHLCV bar with zeros + dummy_bar = OHLCVBar( + symbol="ETH/USDT", + timestamp=datetime.now(), + open=0.0, high=0.0, low=0.0, close=0.0, volume=0.0, + timeframe="1s" + ) + ohlcv_frames.insert(0, dummy_bar) + + # Extract features from exactly 300 frames + for bar in ohlcv_frames: features.extend([bar.open, bar.high, bar.low, bar.close, bar.volume]) # BTC OHLCV features (300 frames x 5 features = 1500 features) - for bar in self.btc_ohlcv_1s[-300:]: # Ensure exactly 300 frames + btc_frames = self.btc_ohlcv_1s[-300:] if len(self.btc_ohlcv_1s) >= 300 else self.btc_ohlcv_1s + + # Pad BTC data if needed + while len(btc_frames) < 300: + dummy_bar = OHLCVBar( + symbol="BTC/USDT", + timestamp=datetime.now(), + open=0.0, high=0.0, low=0.0, close=0.0, volume=0.0, + timeframe="1s" + ) + btc_frames.insert(0, dummy_bar) + + for bar in btc_frames: features.extend([bar.open, bar.high, bar.low, bar.close, bar.volume]) - # COB features (±20 buckets x multiple metrics ≈ 800 features) + # COB features (FIXED SIZE: 200 features) + cob_features = [] if self.cob_data: - # Price bucket features - for price in sorted(self.cob_data.price_buckets.keys()): + # Price bucket features (up to 40 buckets x 4 metrics = 160 features) + price_keys = sorted(self.cob_data.price_buckets.keys())[:40] # Max 40 buckets + for price in price_keys: bucket_data = self.cob_data.price_buckets[price] - features.extend([ + cob_features.extend([ bucket_data.get('bid_volume', 0.0), bucket_data.get('ask_volume', 0.0), bucket_data.get('total_volume', 0.0), bucket_data.get('imbalance', 0.0) ]) - # Moving averages of imbalance for ±5 buckets (5 buckets x 4 MAs x 2 sides = 40 features) - for ma_dict in [self.cob_data.ma_1s_imbalance, self.cob_data.ma_5s_imbalance, - self.cob_data.ma_15s_imbalance, self.cob_data.ma_60s_imbalance]: - for price in sorted(list(ma_dict.keys())[:5]): # ±5 buckets - features.append(ma_dict[price]) + # Moving averages (up to 10 features) + ma_features = [] + for ma_dict in [self.cob_data.ma_1s_imbalance, self.cob_data.ma_5s_imbalance]: + for price in sorted(list(ma_dict.keys())[:5]): # Max 5 buckets per MA + ma_features.append(ma_dict[price]) + if len(ma_features) >= 10: + break + if len(ma_features) >= 10: + break + cob_features.extend(ma_features) - # Technical indicators (variable, pad to 100 features) + # Pad COB features to exactly 200 + cob_features.extend([0.0] * (200 - len(cob_features))) + features.extend(cob_features[:200]) # Ensure exactly 200 COB features + + # Technical indicators (FIXED SIZE: 100 features) indicator_values = list(self.technical_indicators.values()) features.extend(indicator_values[:100]) # Take first 100 indicators - features.extend([0.0] * max(0, 100 - len(indicator_values))) # Pad if needed + features.extend([0.0] * max(0, 100 - len(indicator_values))) # Pad to exactly 100 - # Last predictions from other models (variable, pad to 50 features) + # Last predictions from other models (FIXED SIZE: 50 features) prediction_features = [] for model_output in self.last_predictions.values(): prediction_features.extend([ @@ -155,7 +195,15 @@ class BaseDataInput: model_output.predictions.get('expected_reward', 0.0) ]) features.extend(prediction_features[:50]) # Take first 50 prediction features - features.extend([0.0] * max(0, 50 - len(prediction_features))) # Pad if needed + features.extend([0.0] * max(0, 50 - len(prediction_features))) # Pad to exactly 50 + + # CRITICAL: Ensure EXACTLY the fixed feature size + if len(features) > FIXED_FEATURE_SIZE: + features = features[:FIXED_FEATURE_SIZE] # Truncate if too long + elif len(features) < FIXED_FEATURE_SIZE: + features.extend([0.0] * (FIXED_FEATURE_SIZE - len(features))) # Pad if too short + + assert len(features) == FIXED_FEATURE_SIZE, f"Feature vector size mismatch: {len(features)} != {FIXED_FEATURE_SIZE}" return np.array(features, dtype=np.float32) diff --git a/test_fixed_input_size.py b/test_fixed_input_size.py new file mode 100644 index 0000000..c1dc3ef --- /dev/null +++ b/test_fixed_input_size.py @@ -0,0 +1,187 @@ +#!/usr/bin/env python3 +""" +Test Fixed Input Size + +Verify that the CNN model now receives consistent input dimensions +""" + +import numpy as np +from datetime import datetime +from core.data_models import BaseDataInput, OHLCVBar +from core.enhanced_cnn_adapter import EnhancedCNNAdapter + +def create_test_data(with_cob=True, with_indicators=True): + """Create test BaseDataInput with varying data completeness""" + + # Create basic OHLCV data + ohlcv_bars = [] + for i in range(100): # Less than 300 to test padding + bar = OHLCVBar( + symbol="ETH/USDT", + timestamp=datetime.now(), + open=100.0 + i, + high=101.0 + i, + low=99.0 + i, + close=100.5 + i, + volume=1000 + i, + timeframe="1s" + ) + ohlcv_bars.append(bar) + + # Create test data + base_data = BaseDataInput( + symbol="ETH/USDT", + timestamp=datetime.now(), + ohlcv_1s=ohlcv_bars, + ohlcv_1m=ohlcv_bars[:50], # Even less data + ohlcv_1h=ohlcv_bars[:20], + ohlcv_1d=ohlcv_bars[:10], + btc_ohlcv_1s=ohlcv_bars[:80], # Incomplete BTC data + technical_indicators={'rsi': 50.0, 'macd': 0.1} if with_indicators else {}, + last_predictions={} + ) + + # Add COB data if requested (simplified for testing) + if with_cob: + # Create a simple mock COB data object + class MockCOBData: + def __init__(self): + self.price_buckets = { + 2500.0: {'bid_volume': 100, 'ask_volume': 90, 'total_volume': 190, 'imbalance': 0.05}, + 2501.0: {'bid_volume': 80, 'ask_volume': 120, 'total_volume': 200, 'imbalance': -0.2} + } + self.ma_1s_imbalance = {2500.0: 0.1, 2501.0: -0.1} + self.ma_5s_imbalance = {2500.0: 0.05, 2501.0: -0.05} + + base_data.cob_data = MockCOBData() + + return base_data + +def test_consistent_feature_size(): + """Test that feature vectors are always the same size""" + print("=== Testing Consistent Feature Size ===") + + # Test different data scenarios + scenarios = [ + ("Full data", True, True), + ("No COB data", False, True), + ("No indicators", True, False), + ("Minimal data", False, False) + ] + + feature_sizes = [] + + for name, with_cob, with_indicators in scenarios: + base_data = create_test_data(with_cob, with_indicators) + features = base_data.get_feature_vector() + + print(f"{name}: {len(features)} features") + feature_sizes.append(len(features)) + + # Check if all sizes are the same + if len(set(feature_sizes)) == 1: + print(f"✅ All feature vectors have consistent size: {feature_sizes[0]}") + return feature_sizes[0] + else: + print(f"❌ Inconsistent feature sizes: {feature_sizes}") + return None + +def test_cnn_adapter(): + """Test that CNN adapter works with fixed input size""" + print("\n=== Testing CNN Adapter ===") + + try: + # Create CNN adapter + adapter = EnhancedCNNAdapter() + print(f"CNN model initialized with feature_dim: {adapter.model.feature_dim}") + + # Test with different data scenarios + scenarios = [ + ("Full data", True, True), + ("No COB data", False, True), + ("Minimal data", False, False) + ] + + for name, with_cob, with_indicators in scenarios: + try: + base_data = create_test_data(with_cob, with_indicators) + + # Make prediction + result = adapter.predict(base_data) + + print(f"✅ {name}: Prediction successful - {result.action} (conf={result.confidence:.3f})") + + except Exception as e: + print(f"❌ {name}: Prediction failed - {e}") + + return True + + except Exception as e: + print(f"❌ CNN adapter initialization failed: {e}") + return False + +def test_no_network_rebuilding(): + """Test that network doesn't rebuild during runtime""" + print("\n=== Testing No Network Rebuilding ===") + + try: + adapter = EnhancedCNNAdapter() + original_feature_dim = adapter.model.feature_dim + + print(f"Original feature_dim: {original_feature_dim}") + + # Make multiple predictions with different data + for i in range(5): + base_data = create_test_data(with_cob=(i % 2 == 0), with_indicators=(i % 3 == 0)) + + try: + result = adapter.predict(base_data) + current_feature_dim = adapter.model.feature_dim + + if current_feature_dim != original_feature_dim: + print(f"❌ Network was rebuilt! Original: {original_feature_dim}, Current: {current_feature_dim}") + return False + + print(f"✅ Prediction {i+1}: No rebuilding, feature_dim stable at {current_feature_dim}") + + except Exception as e: + print(f"❌ Prediction {i+1} failed: {e}") + return False + + print("✅ Network architecture remained stable throughout all predictions") + return True + + except Exception as e: + print(f"❌ Test failed: {e}") + return False + +def main(): + """Run all tests""" + print("=== Fixed Input Size Test Suite ===\n") + + # Test 1: Consistent feature size + fixed_size = test_consistent_feature_size() + + if fixed_size: + # Test 2: CNN adapter works + adapter_works = test_cnn_adapter() + + if adapter_works: + # Test 3: No network rebuilding + no_rebuilding = test_no_network_rebuilding() + + if no_rebuilding: + print("\n✅ ALL TESTS PASSED!") + print("✅ Feature vectors have consistent size") + print("✅ CNN adapter works with fixed input") + print("✅ No runtime network rebuilding") + print(f"✅ Fixed feature size: {fixed_size}") + else: + print("\n❌ Network rebuilding test failed") + else: + print("\n❌ CNN adapter test failed") + else: + print("\n❌ Feature size consistency test failed") + +if __name__ == "__main__": + main() \ No newline at end of file