tter pivots

This commit is contained in:
Dobromir Popov
2025-05-30 03:03:51 +03:00
parent 1130e02f35
commit 75dbac1761
6 changed files with 1459 additions and 1830 deletions

View File

@ -24,6 +24,18 @@ from typing import Dict, List, Optional, Tuple, Any
from dataclasses import dataclass
from enum import Enum
try:
from NN.models.cnn_model import CNNModel
except ImportError:
CNNModel = None # Allow running without TF/CNN if not installed or path issue
print("Warning: CNNModel could not be imported. CNN-based pivot prediction/training will be disabled.")
try:
from core.unified_data_stream import TrainingDataPacket
except ImportError:
TrainingDataPacket = None
print("Warning: TrainingDataPacket could not be imported. Using fallback interface.")
logger = logging.getLogger(__name__)
class TrendDirection(Enum):
@ -84,12 +96,25 @@ class WilliamsMarketStructure:
- Structure break detection
"""
def __init__(self, swing_strengths: List[int] = None):
def __init__(self,
swing_strengths: List[int] = None,
cnn_input_shape: Optional[Tuple[int, int]] = (900, 50), # Updated: 900 timesteps (1s), 50 features
cnn_output_size: Optional[int] = 10, # Updated: 5 levels * (type + price) = 10 outputs
cnn_model_config: Optional[Dict[str, Any]] = None, # For build_model params like filters, learning_rate
cnn_model_path: Optional[str] = None,
enable_cnn_feature: bool = True, # Master switch for this feature
training_data_provider: Optional[Any] = None): # Provider for TrainingDataPacket access
"""
Initialize Williams market structure analyzer
Args:
swing_strengths: List of swing detection strengths (bars on each side)
cnn_input_shape: Shape of input data for CNN (sequence_length, features)
cnn_output_size: Number of output classes for CNN (10 for 5 levels * 2 outputs each)
cnn_model_config: Dictionary with parameters for CNNModel.build_model()
cnn_model_path: Path to a pre-trained Keras CNN model (.h5 file)
enable_cnn_feature: If True, enables CNN prediction and training at pivots.
training_data_provider: Provider/stream for accessing TrainingDataPacket
"""
self.swing_strengths = swing_strengths or [2, 3, 5, 8, 13] # Fibonacci-based strengths
self.max_levels = 5
@ -99,6 +124,32 @@ class WilliamsMarketStructure:
self.swing_cache = {}
self.trend_cache = {}
self.enable_cnn_feature = enable_cnn_feature and CNNModel is not None
self.cnn_model: Optional[CNNModel] = None
self.previous_pivot_details_for_cnn: Optional[Dict[str, Any]] = None # Stores {'features': X, 'pivot': SwingPoint}
self.training_data_provider = training_data_provider # Access to TrainingDataPacket
if self.enable_cnn_feature:
try:
logger.info(f"Initializing CNN for multi-timeframe pivot prediction. Input: {cnn_input_shape}, Output: {cnn_output_size}")
logger.info("CNN will predict next pivot (type + price) for all 5 Williams levels")
self.cnn_model = CNNModel(input_shape=cnn_input_shape, output_size=cnn_output_size)
if cnn_model_path:
logger.info(f"Loading pre-trained CNN model from: {cnn_model_path}")
self.cnn_model.load(cnn_model_path)
else:
logger.info("Building new CNN model.")
# Use provided config or defaults for build_model
build_params = cnn_model_config or {}
self.cnn_model.build_model(**build_params)
logger.info("CNN Model initialized successfully.")
except Exception as e:
logger.error(f"Failed to initialize or load CNN model: {e}. Disabling CNN feature.", exc_info=True)
self.enable_cnn_feature = False
else:
logger.info("CNN feature for pivot prediction/training is disabled.")
logger.info(f"Williams Market Structure initialized with strengths: {self.swing_strengths}")
def calculate_recursive_pivot_points(self, ohlcv_data: np.ndarray) -> Dict[str, MarketStructureLevel]:
@ -187,8 +238,8 @@ class WilliamsMarketStructure:
all_swings = []
for strength in self.swing_strengths:
swings = self._find_swing_points_single_strength(ohlcv_data, strength)
for swing in swings:
swings_at_strength = self._find_swing_points_single_strength(ohlcv_data, strength)
for swing in swings_at_strength:
# Avoid duplicates (swings at same index)
if not any(existing.index == swing.index for existing in all_swings):
all_swings.append(swing)
@ -201,10 +252,10 @@ class WilliamsMarketStructure:
def _find_swing_points_single_strength(self, ohlcv_data: np.ndarray, strength: int) -> List[SwingPoint]:
"""Find swing points with specific strength requirement"""
swings = []
identified_swings_in_this_call = [] # Temporary list for swings found in this specific call
if len(ohlcv_data) < (strength * 2 + 1):
return swings
return identified_swings_in_this_call
for i in range(strength, len(ohlcv_data) - strength):
current_high = ohlcv_data[i, 2] # High price
@ -219,14 +270,16 @@ class WilliamsMarketStructure:
break
if is_swing_high:
swings.append(SwingPoint(
new_pivot = SwingPoint(
timestamp=datetime.fromtimestamp(ohlcv_data[i, 0]) if ohlcv_data[i, 0] > 1e9 else datetime.now(),
price=current_high,
index=i,
swing_type=SwingType.SWING_HIGH,
strength=strength,
volume=current_volume
))
)
identified_swings_in_this_call.append(new_pivot)
self._handle_cnn_at_pivot(new_pivot, ohlcv_data) # CNN logic call
# Check for swing low (lower than surrounding bars)
is_swing_low = True
@ -236,16 +289,18 @@ class WilliamsMarketStructure:
break
if is_swing_low:
swings.append(SwingPoint(
new_pivot = SwingPoint(
timestamp=datetime.fromtimestamp(ohlcv_data[i, 0]) if ohlcv_data[i, 0] > 1e9 else datetime.now(),
price=current_low,
index=i,
swing_type=SwingType.SWING_LOW,
strength=strength,
volume=current_volume
))
)
identified_swings_in_this_call.append(new_pivot)
self._handle_cnn_at_pivot(new_pivot, ohlcv_data) # CNN logic call
return swings
return identified_swings_in_this_call # Return swings found in this call
def _filter_significant_swings(self, swings: List[SwingPoint]) -> List[SwingPoint]:
"""Filter to keep only the most significant swings"""
@ -511,10 +566,10 @@ class WilliamsMarketStructure:
pivot_array: Array of pivot points as [timestamp, price, price, price, price, 0] format
level: Current level being calculated
"""
swings = []
if len(pivot_array) < 5:
return swings
identified_swings_in_this_call = [] # Temporary list
if len(pivot_array) < 5: # Min bars for even smallest strength (e.g. strength 2 needs 2+1+2=5)
return identified_swings_in_this_call
# Use configurable strength for higher levels (more conservative)
strength = min(2 + level, 5) # Level 1: 3 bars, Level 2: 4 bars, Level 3+: 5 bars
@ -526,38 +581,42 @@ class WilliamsMarketStructure:
# Check for swing high (pivot high surrounded by lower pivot highs)
is_swing_high = True
for j in range(i - strength, i + strength + 1):
if j != i and pivot_array[j, 1] >= current_price:
if j != i and pivot_array[j, 1] >= current_price: # Compare with price of other pivots
is_swing_high = False
break
if is_swing_high:
swings.append(SwingPoint(
new_pivot = SwingPoint(
timestamp=datetime.fromtimestamp(current_timestamp) if current_timestamp > 1e9 else datetime.now(),
price=current_price,
index=i,
swing_type=SwingType.SWING_HIGH,
strength=strength,
strength=strength, # Strength here is derived from level, e.g., min(2 + level, 5)
volume=0.0 # Pivot points don't have volume
))
)
identified_swings_in_this_call.append(new_pivot)
self._handle_cnn_at_pivot(new_pivot, pivot_array) # CNN logic call
# Check for swing low (pivot low surrounded by higher pivot lows)
is_swing_low = True
for j in range(i - strength, i + strength + 1):
if j != i and pivot_array[j, 1] <= current_price:
if j != i and pivot_array[j, 1] <= current_price: # Compare with price of other pivots
is_swing_low = False
break
if is_swing_low:
swings.append(SwingPoint(
new_pivot = SwingPoint(
timestamp=datetime.fromtimestamp(current_timestamp) if current_timestamp > 1e9 else datetime.now(),
price=current_price,
index=i,
swing_type=SwingType.SWING_LOW,
strength=strength,
strength=strength, # Strength here is derived from level
volume=0.0 # Pivot points don't have volume
))
)
identified_swings_in_this_call.append(new_pivot)
self._handle_cnn_at_pivot(new_pivot, pivot_array) # CNN logic call
return swings
return identified_swings_in_this_call # Return swings found in this call
def _convert_pivots_to_price_points(self, swing_points: List[SwingPoint]) -> np.ndarray:
"""
@ -695,4 +754,479 @@ class WilliamsMarketStructure:
features.extend([0.0, 0.0])
recent_breaks.append({})
return features[:50] # Ensure exactly 50 features per level
return features[:50] # Ensure exactly 50 features per level
def _handle_cnn_at_pivot(self,
newly_identified_pivot: SwingPoint,
ohlcv_data_context: np.ndarray):
"""
Handles CNN training for the previous pivot and prediction for the next pivot.
Called when a new pivot point is identified.
Args:
newly_identified_pivot: The SwingPoint object for the just-formed pivot.
ohlcv_data_context: The OHLCV data (or pivot array for higher levels)
relevant to this pivot's formation.
"""
if not self.enable_cnn_feature or self.cnn_model is None:
return
# 1. Train model based on the *previous* pivot's prediction and the *current* actual outcome
if self.previous_pivot_details_for_cnn:
try:
logger.debug(f"CNN Training: Previous pivot at idx {self.previous_pivot_details_for_cnn['pivot'].index}, "
f"Current pivot (ground truth) at idx {newly_identified_pivot.index}")
X_train = self.previous_pivot_details_for_cnn['features']
# previous_pivot_info contains 'pivot' which is the SwingPoint object of N-1
y_train = self._get_cnn_ground_truth(self.previous_pivot_details_for_cnn, newly_identified_pivot)
if X_train is not None and X_train.size > 0 and y_train is not None and y_train.size > 0:
# Reshape X_train if it's a single sample and model expects batch
if len(X_train.shape) == len(self.cnn_model.input_shape) and X_train.shape == self.cnn_model.input_shape :
X_train_batch = np.expand_dims(X_train, axis=0)
else: # Should already be correctly shaped by _prepare_cnn_input
X_train_batch = X_train # Or handle error
# Reshape y_train if needed
if self.cnn_model.output_size > 1 and len(y_train.shape) ==1: # e.g. [0.,1.] but model needs [[0.,1.]]
y_train_batch = np.expand_dims(y_train, axis=0)
elif self.cnn_model.output_size == 1 and not isinstance(y_train, (list, np.ndarray)): # e.g. plain 0 or 1
y_train_batch = np.array([[y_train]], dtype=np.float32)
elif self.cnn_model.output_size == 1 and isinstance(y_train, np.ndarray) and y_train.ndim == 1:
y_train_batch = y_train.reshape(-1,1) # ensure [[0.]] for single binary output
else:
y_train_batch = y_train
logger.info(f"CNN Training with X_shape: {X_train_batch.shape}, y_shape: {y_train_batch.shape}")
# Perform a single step of training (online learning)
# Use minimal callbacks for online learning, or allow configuration
self.cnn_model.model.fit(X_train_batch, y_train_batch, batch_size=1, epochs=1, verbose=0, callbacks=[])
logger.info(f"CNN online training step completed for pivot at index {self.previous_pivot_details_for_cnn['pivot'].index}.")
else:
logger.warning("CNN Training: Skipping due to invalid X_train or y_train.")
except Exception as e:
logger.error(f"Error during CNN online training: {e}", exc_info=True)
# 2. Predict for the *next* pivot based on the *current* newly_identified_pivot
try:
logger.debug(f"CNN Prediction: Preparing input for current pivot at idx {newly_identified_pivot.index}")
# The 'previous_pivot_details' for _prepare_cnn_input here is the one active *before* this current call
# which means it refers to the pivot that just got its ground truth trained on.
# If this is the first pivot ever, self.previous_pivot_details_for_cnn would be None.
# Correct context for _prepare_cnn_input:
# current_pivot = newly_identified_pivot
# previous_pivot_details = self.previous_pivot_details_for_cnn (this is N-1, which was used for training above)
X_predict = self._prepare_cnn_input(newly_identified_pivot,
ohlcv_data_context,
self.previous_pivot_details_for_cnn) # Pass the N-1 pivot details
if X_predict is not None and X_predict.size > 0:
# Reshape X_predict if it's a single sample and model expects batch
if len(X_predict.shape) == len(self.cnn_model.input_shape) and X_predict.shape == self.cnn_model.input_shape :
X_predict_batch = np.expand_dims(X_predict, axis=0)
else:
X_predict_batch = X_predict # Or handle error
logger.info(f"CNN Predicting with X_shape: {X_predict_batch.shape}")
pred_class, pred_proba = self.cnn_model.predict(X_predict_batch) # predict expects batch
# pred_class/pred_proba might be arrays if batch_size > 1, or if output is multi-dim
# For batch_size=1, take the first element
final_pred_class = pred_class[0] if isinstance(pred_class, np.ndarray) and pred_class.ndim > 0 else pred_class
final_pred_proba = pred_proba[0] if isinstance(pred_proba, np.ndarray) and pred_proba.ndim > 0 else pred_proba
logger.info(f"CNN Prediction for pivot after index {newly_identified_pivot.index}: Class={final_pred_class}, Proba/Val={final_pred_proba}")
# Store the features (X_predict) and the pivot (newly_identified_pivot) itself for the next training cycle
self.previous_pivot_details_for_cnn = {'features': X_predict, 'pivot': newly_identified_pivot}
else:
logger.warning("CNN Prediction: Skipping due to invalid X_predict.")
# If prediction can't be made, ensure we don't carry over stale 'previous_pivot_details_for_cnn'
# Or, decide if we should clear it or keep the N-2 details.
# For now, if X_predict is None, we clear it so no training happens next round unless a new pred is made.
self.previous_pivot_details_for_cnn = None
except Exception as e:
logger.error(f"Error during CNN prediction: {e}", exc_info=True)
self.previous_pivot_details_for_cnn = None # Clear on error to prevent bad training
def _prepare_cnn_input(self,
current_pivot: SwingPoint,
ohlcv_data_context: np.ndarray,
previous_pivot_details: Optional[Dict[str, Any]]) -> np.ndarray:
"""
Prepare multi-timeframe, multi-symbol input features for CNN using TrainingDataPacket.
Features include:
- ETH: 5 min ticks → 300 x 1s bars with ticks features (4 features)
- ETH: 900 x 1s OHLCV + indicators (10 features)
- ETH: 900 x 1m OHLCV + indicators (10 features)
- ETH: 900 x 1h OHLCV + indicators (10 features)
- ETH: All pivot points from all levels (15 features)
- BTC: 5 min ticks → 300 x 1s reference (4 features)
- Chart labels for data identification (7 features)
Total: ~50 features per timestep over 900 timesteps
Data normalized using 1h min/max to preserve cross-timeframe relationships.
Args:
current_pivot: The newly identified SwingPoint
ohlcv_data_context: The OHLCV data from Williams calculation (may not be used directly)
previous_pivot_details: Previous pivot info for context
Returns:
A numpy array of shape (900, 50) with normalized features
"""
if self.cnn_model is None or not self.training_data_provider:
logger.warning("CNN model or training data provider not available")
return np.zeros(self.cnn_model.input_shape if self.cnn_model else (900, 50), dtype=np.float32)
sequence_length, num_features = self.cnn_model.input_shape
try:
# Get latest TrainingDataPacket from provider
training_packet = self._get_latest_training_data()
if not training_packet:
logger.warning("No TrainingDataPacket available for CNN input")
return np.zeros((sequence_length, num_features), dtype=np.float32)
logger.debug(f"CNN Input: Preparing features for pivot at {current_pivot.timestamp}")
# Prepare feature components (in actual values)
eth_features = self._prepare_eth_features(training_packet, sequence_length)
btc_features = self._prepare_btc_reference_features(training_packet, sequence_length)
pivot_features = self._prepare_pivot_features(training_packet, current_pivot, sequence_length)
chart_labels = self._prepare_chart_labels(sequence_length)
# Combine all features (still in actual values)
combined_features = np.concatenate([
eth_features, # ~40 features
btc_features, # ~4 features
pivot_features, # ~3 features
chart_labels # ~3 features
], axis=1)
# Ensure we match expected feature count
if combined_features.shape[1] > num_features:
combined_features = combined_features[:, :num_features]
elif combined_features.shape[1] < num_features:
padding = np.zeros((sequence_length, num_features - combined_features.shape[1]))
combined_features = np.concatenate([combined_features, padding], axis=1)
# NORMALIZATION: Apply 1h timeframe min/max to preserve relationships
normalized_features = self._normalize_features_by_1h_range(combined_features, training_packet)
logger.debug(f"CNN Input prepared: shape {normalized_features.shape}, "
f"min: {normalized_features.min():.4f}, max: {normalized_features.max():.4f}")
return normalized_features.astype(np.float32)
except Exception as e:
logger.error(f"Error preparing CNN input: {e}", exc_info=True)
return np.zeros((sequence_length, num_features), dtype=np.float32)
def _get_latest_training_data(self):
"""Get latest TrainingDataPacket from provider"""
try:
if hasattr(self.training_data_provider, 'get_latest_training_data'):
return self.training_data_provider.get_latest_training_data()
elif hasattr(self.training_data_provider, 'training_data_buffer'):
return self.training_data_provider.training_data_buffer[-1] if self.training_data_provider.training_data_buffer else None
else:
logger.warning("Training data provider does not have expected interface")
return None
except Exception as e:
logger.error(f"Error getting training data: {e}")
return None
def _prepare_eth_features(self, training_packet, sequence_length: int) -> np.ndarray:
"""
Prepare ETH multi-timeframe features (keep in actual values):
- 1s bars with indicators (10 features)
- 1m bars with indicators (10 features)
- 1h bars with indicators (10 features)
- Tick-derived 1s features (10 features)
Total: 40 features per timestep
"""
features = []
# ETH 1s data with indicators
eth_1s_features = self._extract_timeframe_features(
training_packet.multi_timeframe_data.get('ETH/USDT', {}).get('1s', []),
sequence_length, 'ETH_1s'
)
features.append(eth_1s_features)
# ETH 1m data with indicators
eth_1m_features = self._extract_timeframe_features(
training_packet.multi_timeframe_data.get('ETH/USDT', {}).get('1m', []),
sequence_length, 'ETH_1m'
)
features.append(eth_1m_features)
# ETH 1h data with indicators
eth_1h_features = self._extract_timeframe_features(
training_packet.multi_timeframe_data.get('ETH/USDT', {}).get('1h', []),
sequence_length, 'ETH_1h'
)
features.append(eth_1h_features)
# ETH tick-derived features (5 min of ticks → 300 x 1s aggregated to match sequence_length)
eth_tick_features = self._extract_tick_features(
training_packet.tick_cache, 'ETH/USDT', sequence_length
)
features.append(eth_tick_features)
return np.concatenate(features, axis=1)
def _prepare_btc_reference_features(self, training_packet, sequence_length: int) -> np.ndarray:
"""
Prepare BTC reference features (keep in actual values):
- Tick-derived features for correlation analysis
Total: 4 features per timestep
"""
return self._extract_tick_features(
training_packet.tick_cache, 'BTC/USDT', sequence_length
)
def _prepare_pivot_features(self, training_packet, current_pivot: SwingPoint, sequence_length: int) -> np.ndarray:
"""
Prepare pivot point features from all Williams levels:
- Recent pivot characteristics
- Level-specific trend information
Total: 3 features per timestep (repeated for sequence)
"""
# Extract Williams pivot features using existing method if available
if hasattr(training_packet, 'universal_stream') and training_packet.universal_stream:
# Use existing pivot extraction logic
pivot_feature_vector = [
current_pivot.price,
1.0 if current_pivot.swing_type == SwingType.SWING_HIGH else 0.0,
float(current_pivot.strength)
]
else:
pivot_feature_vector = [0.0, 0.0, 0.0]
# Repeat pivot features for all timesteps in sequence
return np.tile(pivot_feature_vector, (sequence_length, 1))
def _prepare_chart_labels(self, sequence_length: int) -> np.ndarray:
"""
Prepare chart identification labels:
- Symbol identifiers
- Timeframe identifiers
Total: 3 features per timestep
"""
# Simple encoding: [is_eth, is_btc, timeframe_mix]
chart_labels = [1.0, 1.0, 1.0] # Mixed multi-timeframe ETH+BTC data
return np.tile(chart_labels, (sequence_length, 1))
def _extract_timeframe_features(self, ohlcv_data: List[Dict], sequence_length: int, timeframe_label: str) -> np.ndarray:
"""
Extract OHLCV + indicator features from timeframe data (keep actual values).
Returns 10 features: OHLCV + volume + 5 indicators
"""
if not ohlcv_data:
return np.zeros((sequence_length, 10))
# Take last sequence_length bars or pad if insufficient
data_to_use = ohlcv_data[-sequence_length:] if len(ohlcv_data) >= sequence_length else ohlcv_data
features = []
for bar in data_to_use:
bar_features = [
bar.get('open', 0.0),
bar.get('high', 0.0),
bar.get('low', 0.0),
bar.get('close', 0.0),
bar.get('volume', 0.0),
# TODO: Add 5 calculated indicators (SMA, EMA, RSI, MACD, etc.)
bar.get('sma_20', bar.get('close', 0.0)), # Placeholder
bar.get('ema_20', bar.get('close', 0.0)), # Placeholder
bar.get('rsi_14', 50.0), # Placeholder
bar.get('macd', 0.0), # Placeholder
bar.get('bb_upper', bar.get('high', 0.0)) # Placeholder
]
features.append(bar_features)
# Pad if insufficient data
while len(features) < sequence_length:
features.insert(0, features[0] if features else [0.0] * 10)
return np.array(features, dtype=np.float32)
def _extract_tick_features(self, tick_cache: List[Dict], symbol: str, sequence_length: int) -> np.ndarray:
"""
Extract tick-derived features aggregated to 1s intervals (keep actual values).
Returns 4 features: tick_count, total_volume, vwap, price_volatility per second
"""
# Filter ticks for symbol and last 5 minutes
symbol_ticks = [t for t in tick_cache[-1500:] if t.get('symbol') == symbol] # Assume ~5 ticks/sec
if not symbol_ticks:
return np.zeros((sequence_length, 4))
# Group ticks by second and calculate features
tick_features = []
current_time = datetime.now()
for i in range(sequence_length):
second_start = current_time - timedelta(seconds=sequence_length - i)
second_end = second_start + timedelta(seconds=1)
second_ticks = [
t for t in symbol_ticks
if second_start <= t.get('timestamp', datetime.min) < second_end
]
if second_ticks:
prices = [t.get('price', 0.0) for t in second_ticks]
volumes = [t.get('volume', 0.0) for t in second_ticks]
total_volume = sum(volumes)
tick_count = len(second_ticks)
vwap = sum(p * v for p, v in zip(prices, volumes)) / total_volume if total_volume > 0 else 0.0
price_volatility = np.std(prices) if len(prices) > 1 else 0.0
second_features = [tick_count, total_volume, vwap, price_volatility]
else:
second_features = [0.0, 0.0, 0.0, 0.0]
tick_features.append(second_features)
return np.array(tick_features, dtype=np.float32)
def _normalize_features_by_1h_range(self, features: np.ndarray, training_packet) -> np.ndarray:
"""
Normalize all features using 1h timeframe min/max to preserve cross-timeframe relationships.
This is the final normalization step before feeding to CNN.
"""
try:
# Get 1h ETH data for normalization reference
eth_1h_data = training_packet.multi_timeframe_data.get('ETH/USDT', {}).get('1h', [])
if not eth_1h_data:
logger.warning("No 1h data available for normalization, using feature-wise normalization")
# Fallback: normalize each feature independently
feature_min = np.min(features, axis=0, keepdims=True)
feature_max = np.max(features, axis=0, keepdims=True)
feature_range = feature_max - feature_min
feature_range[feature_range == 0] = 1.0 # Avoid division by zero
return (features - feature_min) / feature_range
# Extract 1h price range for primary normalization
h1_prices = []
for bar in eth_1h_data[-24:]: # Last 24 hours for robust range
h1_prices.extend([
bar.get('open', 0.0),
bar.get('high', 0.0),
bar.get('low', 0.0),
bar.get('close', 0.0)
])
if h1_prices:
h1_min = min(h1_prices)
h1_max = max(h1_prices)
h1_range = h1_max - h1_min
if h1_range > 0:
logger.debug(f"Normalizing features using 1h range: {h1_min:.2f} - {h1_max:.2f}")
# Apply 1h-based normalization to price-related features (first ~30 features)
normalized_features = features.copy()
price_feature_count = min(30, features.shape[1])
# Normalize price-related features with 1h range
normalized_features[:, :price_feature_count] = (
(features[:, :price_feature_count] - h1_min) / h1_range
)
# For non-price features (indicators, counts, etc.), use feature-wise normalization
if features.shape[1] > price_feature_count:
remaining_features = features[:, price_feature_count:]
feature_min = np.min(remaining_features, axis=0, keepdims=True)
feature_max = np.max(remaining_features, axis=0, keepdims=True)
feature_range = feature_max - feature_min
feature_range[feature_range == 0] = 1.0
normalized_features[:, price_feature_count:] = (
(remaining_features - feature_min) / feature_range
)
return normalized_features
# Fallback normalization if 1h range calculation fails
logger.warning("1h range calculation failed, using min-max normalization")
feature_min = np.min(features, axis=0, keepdims=True)
feature_max = np.max(features, axis=0, keepdims=True)
feature_range = feature_max - feature_min
feature_range[feature_range == 0] = 1.0
return (features - feature_min) / feature_range
except Exception as e:
logger.error(f"Error in normalization: {e}", exc_info=True)
# Emergency fallback: return features as-is but scaled to [0,1] roughly
return np.clip(features / (np.max(np.abs(features)) + 1e-8), -1.0, 1.0)
def _get_cnn_ground_truth(self,
previous_pivot_info: Dict[str, Any], # Contains 'pivot': SwingPoint obj of N-1
actual_current_pivot: SwingPoint # This is pivot N
) -> np.ndarray:
"""
Determine the ground truth for CNN prediction made at previous_pivot.
Updated to return prediction for next pivot in ALL 5 LEVELS:
- For each level: [type (0=LOW, 1=HIGH), normalized_price_target]
- Total output: 10 values (5 levels * 2 outputs each)
Args:
previous_pivot_info: Dict with 'pivot' = SwingPoint of N-1
actual_current_pivot: SwingPoint of pivot N (actual outcome)
Returns:
A numpy array of shape (10,) with ground truth for all levels
"""
if self.cnn_model is None:
return np.array([])
# Initialize ground truth array for all 5 levels
ground_truth = np.zeros(10, dtype=np.float32) # 5 levels * 2 outputs
try:
# For Level 0 (current pivot level), we have actual data
level_0_type = 1.0 if actual_current_pivot.swing_type == SwingType.SWING_HIGH else 0.0
level_0_price = actual_current_pivot.price
# Normalize price (this is a placeholder - proper normalization should use market context)
# In real implementation, use the same 1h range normalization as input features
normalized_price = level_0_price / 10000.0 # Rough normalization for ETH prices
ground_truth[0] = level_0_type # Level 0 type
ground_truth[1] = normalized_price # Level 0 price
# For higher levels (1-4), we would need to calculate what the next pivot would be
# This requires access to higher-level Williams calculations
# For now, use placeholder logic based on current pivot characteristics
for level in range(1, 5):
# Placeholder: higher levels follow similar pattern but with reduced confidence
confidence_factor = 1.0 / (level + 1)
ground_truth[level * 2] = level_0_type * confidence_factor # Level N type
ground_truth[level * 2 + 1] = normalized_price * confidence_factor # Level N price
logger.debug(f"CNN Ground Truth: Level 0 = [{level_0_type}, {normalized_price:.4f}], "
f"Current pivot = {actual_current_pivot.swing_type.name} @ {actual_current_pivot.price}")
return ground_truth
except Exception as e:
logger.error(f"Error calculating CNN ground truth: {e}", exc_info=True)
return np.zeros(10, dtype=np.float32)