tter pivots
This commit is contained in:
@ -24,6 +24,18 @@ from typing import Dict, List, Optional, Tuple, Any
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
|
||||
try:
|
||||
from NN.models.cnn_model import CNNModel
|
||||
except ImportError:
|
||||
CNNModel = None # Allow running without TF/CNN if not installed or path issue
|
||||
print("Warning: CNNModel could not be imported. CNN-based pivot prediction/training will be disabled.")
|
||||
|
||||
try:
|
||||
from core.unified_data_stream import TrainingDataPacket
|
||||
except ImportError:
|
||||
TrainingDataPacket = None
|
||||
print("Warning: TrainingDataPacket could not be imported. Using fallback interface.")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class TrendDirection(Enum):
|
||||
@ -84,12 +96,25 @@ class WilliamsMarketStructure:
|
||||
- Structure break detection
|
||||
"""
|
||||
|
||||
def __init__(self, swing_strengths: List[int] = None):
|
||||
def __init__(self,
|
||||
swing_strengths: List[int] = None,
|
||||
cnn_input_shape: Optional[Tuple[int, int]] = (900, 50), # Updated: 900 timesteps (1s), 50 features
|
||||
cnn_output_size: Optional[int] = 10, # Updated: 5 levels * (type + price) = 10 outputs
|
||||
cnn_model_config: Optional[Dict[str, Any]] = None, # For build_model params like filters, learning_rate
|
||||
cnn_model_path: Optional[str] = None,
|
||||
enable_cnn_feature: bool = True, # Master switch for this feature
|
||||
training_data_provider: Optional[Any] = None): # Provider for TrainingDataPacket access
|
||||
"""
|
||||
Initialize Williams market structure analyzer
|
||||
|
||||
Args:
|
||||
swing_strengths: List of swing detection strengths (bars on each side)
|
||||
cnn_input_shape: Shape of input data for CNN (sequence_length, features)
|
||||
cnn_output_size: Number of output classes for CNN (10 for 5 levels * 2 outputs each)
|
||||
cnn_model_config: Dictionary with parameters for CNNModel.build_model()
|
||||
cnn_model_path: Path to a pre-trained Keras CNN model (.h5 file)
|
||||
enable_cnn_feature: If True, enables CNN prediction and training at pivots.
|
||||
training_data_provider: Provider/stream for accessing TrainingDataPacket
|
||||
"""
|
||||
self.swing_strengths = swing_strengths or [2, 3, 5, 8, 13] # Fibonacci-based strengths
|
||||
self.max_levels = 5
|
||||
@ -99,6 +124,32 @@ class WilliamsMarketStructure:
|
||||
self.swing_cache = {}
|
||||
self.trend_cache = {}
|
||||
|
||||
self.enable_cnn_feature = enable_cnn_feature and CNNModel is not None
|
||||
self.cnn_model: Optional[CNNModel] = None
|
||||
self.previous_pivot_details_for_cnn: Optional[Dict[str, Any]] = None # Stores {'features': X, 'pivot': SwingPoint}
|
||||
self.training_data_provider = training_data_provider # Access to TrainingDataPacket
|
||||
|
||||
if self.enable_cnn_feature:
|
||||
try:
|
||||
logger.info(f"Initializing CNN for multi-timeframe pivot prediction. Input: {cnn_input_shape}, Output: {cnn_output_size}")
|
||||
logger.info("CNN will predict next pivot (type + price) for all 5 Williams levels")
|
||||
|
||||
self.cnn_model = CNNModel(input_shape=cnn_input_shape, output_size=cnn_output_size)
|
||||
if cnn_model_path:
|
||||
logger.info(f"Loading pre-trained CNN model from: {cnn_model_path}")
|
||||
self.cnn_model.load(cnn_model_path)
|
||||
else:
|
||||
logger.info("Building new CNN model.")
|
||||
# Use provided config or defaults for build_model
|
||||
build_params = cnn_model_config or {}
|
||||
self.cnn_model.build_model(**build_params)
|
||||
logger.info("CNN Model initialized successfully.")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize or load CNN model: {e}. Disabling CNN feature.", exc_info=True)
|
||||
self.enable_cnn_feature = False
|
||||
else:
|
||||
logger.info("CNN feature for pivot prediction/training is disabled.")
|
||||
|
||||
logger.info(f"Williams Market Structure initialized with strengths: {self.swing_strengths}")
|
||||
|
||||
def calculate_recursive_pivot_points(self, ohlcv_data: np.ndarray) -> Dict[str, MarketStructureLevel]:
|
||||
@ -187,8 +238,8 @@ class WilliamsMarketStructure:
|
||||
all_swings = []
|
||||
|
||||
for strength in self.swing_strengths:
|
||||
swings = self._find_swing_points_single_strength(ohlcv_data, strength)
|
||||
for swing in swings:
|
||||
swings_at_strength = self._find_swing_points_single_strength(ohlcv_data, strength)
|
||||
for swing in swings_at_strength:
|
||||
# Avoid duplicates (swings at same index)
|
||||
if not any(existing.index == swing.index for existing in all_swings):
|
||||
all_swings.append(swing)
|
||||
@ -201,10 +252,10 @@ class WilliamsMarketStructure:
|
||||
|
||||
def _find_swing_points_single_strength(self, ohlcv_data: np.ndarray, strength: int) -> List[SwingPoint]:
|
||||
"""Find swing points with specific strength requirement"""
|
||||
swings = []
|
||||
identified_swings_in_this_call = [] # Temporary list for swings found in this specific call
|
||||
|
||||
if len(ohlcv_data) < (strength * 2 + 1):
|
||||
return swings
|
||||
return identified_swings_in_this_call
|
||||
|
||||
for i in range(strength, len(ohlcv_data) - strength):
|
||||
current_high = ohlcv_data[i, 2] # High price
|
||||
@ -219,14 +270,16 @@ class WilliamsMarketStructure:
|
||||
break
|
||||
|
||||
if is_swing_high:
|
||||
swings.append(SwingPoint(
|
||||
new_pivot = SwingPoint(
|
||||
timestamp=datetime.fromtimestamp(ohlcv_data[i, 0]) if ohlcv_data[i, 0] > 1e9 else datetime.now(),
|
||||
price=current_high,
|
||||
index=i,
|
||||
swing_type=SwingType.SWING_HIGH,
|
||||
strength=strength,
|
||||
volume=current_volume
|
||||
))
|
||||
)
|
||||
identified_swings_in_this_call.append(new_pivot)
|
||||
self._handle_cnn_at_pivot(new_pivot, ohlcv_data) # CNN logic call
|
||||
|
||||
# Check for swing low (lower than surrounding bars)
|
||||
is_swing_low = True
|
||||
@ -236,16 +289,18 @@ class WilliamsMarketStructure:
|
||||
break
|
||||
|
||||
if is_swing_low:
|
||||
swings.append(SwingPoint(
|
||||
new_pivot = SwingPoint(
|
||||
timestamp=datetime.fromtimestamp(ohlcv_data[i, 0]) if ohlcv_data[i, 0] > 1e9 else datetime.now(),
|
||||
price=current_low,
|
||||
index=i,
|
||||
swing_type=SwingType.SWING_LOW,
|
||||
strength=strength,
|
||||
volume=current_volume
|
||||
))
|
||||
)
|
||||
identified_swings_in_this_call.append(new_pivot)
|
||||
self._handle_cnn_at_pivot(new_pivot, ohlcv_data) # CNN logic call
|
||||
|
||||
return swings
|
||||
return identified_swings_in_this_call # Return swings found in this call
|
||||
|
||||
def _filter_significant_swings(self, swings: List[SwingPoint]) -> List[SwingPoint]:
|
||||
"""Filter to keep only the most significant swings"""
|
||||
@ -511,10 +566,10 @@ class WilliamsMarketStructure:
|
||||
pivot_array: Array of pivot points as [timestamp, price, price, price, price, 0] format
|
||||
level: Current level being calculated
|
||||
"""
|
||||
swings = []
|
||||
|
||||
if len(pivot_array) < 5:
|
||||
return swings
|
||||
identified_swings_in_this_call = [] # Temporary list
|
||||
|
||||
if len(pivot_array) < 5: # Min bars for even smallest strength (e.g. strength 2 needs 2+1+2=5)
|
||||
return identified_swings_in_this_call
|
||||
|
||||
# Use configurable strength for higher levels (more conservative)
|
||||
strength = min(2 + level, 5) # Level 1: 3 bars, Level 2: 4 bars, Level 3+: 5 bars
|
||||
@ -526,38 +581,42 @@ class WilliamsMarketStructure:
|
||||
# Check for swing high (pivot high surrounded by lower pivot highs)
|
||||
is_swing_high = True
|
||||
for j in range(i - strength, i + strength + 1):
|
||||
if j != i and pivot_array[j, 1] >= current_price:
|
||||
if j != i and pivot_array[j, 1] >= current_price: # Compare with price of other pivots
|
||||
is_swing_high = False
|
||||
break
|
||||
|
||||
if is_swing_high:
|
||||
swings.append(SwingPoint(
|
||||
new_pivot = SwingPoint(
|
||||
timestamp=datetime.fromtimestamp(current_timestamp) if current_timestamp > 1e9 else datetime.now(),
|
||||
price=current_price,
|
||||
index=i,
|
||||
swing_type=SwingType.SWING_HIGH,
|
||||
strength=strength,
|
||||
strength=strength, # Strength here is derived from level, e.g., min(2 + level, 5)
|
||||
volume=0.0 # Pivot points don't have volume
|
||||
))
|
||||
|
||||
)
|
||||
identified_swings_in_this_call.append(new_pivot)
|
||||
self._handle_cnn_at_pivot(new_pivot, pivot_array) # CNN logic call
|
||||
|
||||
# Check for swing low (pivot low surrounded by higher pivot lows)
|
||||
is_swing_low = True
|
||||
for j in range(i - strength, i + strength + 1):
|
||||
if j != i and pivot_array[j, 1] <= current_price:
|
||||
if j != i and pivot_array[j, 1] <= current_price: # Compare with price of other pivots
|
||||
is_swing_low = False
|
||||
break
|
||||
|
||||
if is_swing_low:
|
||||
swings.append(SwingPoint(
|
||||
new_pivot = SwingPoint(
|
||||
timestamp=datetime.fromtimestamp(current_timestamp) if current_timestamp > 1e9 else datetime.now(),
|
||||
price=current_price,
|
||||
index=i,
|
||||
swing_type=SwingType.SWING_LOW,
|
||||
strength=strength,
|
||||
strength=strength, # Strength here is derived from level
|
||||
volume=0.0 # Pivot points don't have volume
|
||||
))
|
||||
)
|
||||
identified_swings_in_this_call.append(new_pivot)
|
||||
self._handle_cnn_at_pivot(new_pivot, pivot_array) # CNN logic call
|
||||
|
||||
return swings
|
||||
return identified_swings_in_this_call # Return swings found in this call
|
||||
|
||||
def _convert_pivots_to_price_points(self, swing_points: List[SwingPoint]) -> np.ndarray:
|
||||
"""
|
||||
@ -695,4 +754,479 @@ class WilliamsMarketStructure:
|
||||
features.extend([0.0, 0.0])
|
||||
recent_breaks.append({})
|
||||
|
||||
return features[:50] # Ensure exactly 50 features per level
|
||||
return features[:50] # Ensure exactly 50 features per level
|
||||
|
||||
def _handle_cnn_at_pivot(self,
|
||||
newly_identified_pivot: SwingPoint,
|
||||
ohlcv_data_context: np.ndarray):
|
||||
"""
|
||||
Handles CNN training for the previous pivot and prediction for the next pivot.
|
||||
Called when a new pivot point is identified.
|
||||
|
||||
Args:
|
||||
newly_identified_pivot: The SwingPoint object for the just-formed pivot.
|
||||
ohlcv_data_context: The OHLCV data (or pivot array for higher levels)
|
||||
relevant to this pivot's formation.
|
||||
"""
|
||||
if not self.enable_cnn_feature or self.cnn_model is None:
|
||||
return
|
||||
|
||||
# 1. Train model based on the *previous* pivot's prediction and the *current* actual outcome
|
||||
if self.previous_pivot_details_for_cnn:
|
||||
try:
|
||||
logger.debug(f"CNN Training: Previous pivot at idx {self.previous_pivot_details_for_cnn['pivot'].index}, "
|
||||
f"Current pivot (ground truth) at idx {newly_identified_pivot.index}")
|
||||
|
||||
X_train = self.previous_pivot_details_for_cnn['features']
|
||||
# previous_pivot_info contains 'pivot' which is the SwingPoint object of N-1
|
||||
y_train = self._get_cnn_ground_truth(self.previous_pivot_details_for_cnn, newly_identified_pivot)
|
||||
|
||||
if X_train is not None and X_train.size > 0 and y_train is not None and y_train.size > 0:
|
||||
# Reshape X_train if it's a single sample and model expects batch
|
||||
if len(X_train.shape) == len(self.cnn_model.input_shape) and X_train.shape == self.cnn_model.input_shape :
|
||||
X_train_batch = np.expand_dims(X_train, axis=0)
|
||||
else: # Should already be correctly shaped by _prepare_cnn_input
|
||||
X_train_batch = X_train # Or handle error
|
||||
|
||||
# Reshape y_train if needed
|
||||
if self.cnn_model.output_size > 1 and len(y_train.shape) ==1: # e.g. [0.,1.] but model needs [[0.,1.]]
|
||||
y_train_batch = np.expand_dims(y_train, axis=0)
|
||||
elif self.cnn_model.output_size == 1 and not isinstance(y_train, (list, np.ndarray)): # e.g. plain 0 or 1
|
||||
y_train_batch = np.array([[y_train]], dtype=np.float32)
|
||||
elif self.cnn_model.output_size == 1 and isinstance(y_train, np.ndarray) and y_train.ndim == 1:
|
||||
y_train_batch = y_train.reshape(-1,1) # ensure [[0.]] for single binary output
|
||||
else:
|
||||
y_train_batch = y_train
|
||||
|
||||
|
||||
logger.info(f"CNN Training with X_shape: {X_train_batch.shape}, y_shape: {y_train_batch.shape}")
|
||||
# Perform a single step of training (online learning)
|
||||
# Use minimal callbacks for online learning, or allow configuration
|
||||
self.cnn_model.model.fit(X_train_batch, y_train_batch, batch_size=1, epochs=1, verbose=0, callbacks=[])
|
||||
logger.info(f"CNN online training step completed for pivot at index {self.previous_pivot_details_for_cnn['pivot'].index}.")
|
||||
else:
|
||||
logger.warning("CNN Training: Skipping due to invalid X_train or y_train.")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during CNN online training: {e}", exc_info=True)
|
||||
|
||||
# 2. Predict for the *next* pivot based on the *current* newly_identified_pivot
|
||||
try:
|
||||
logger.debug(f"CNN Prediction: Preparing input for current pivot at idx {newly_identified_pivot.index}")
|
||||
|
||||
# The 'previous_pivot_details' for _prepare_cnn_input here is the one active *before* this current call
|
||||
# which means it refers to the pivot that just got its ground truth trained on.
|
||||
# If this is the first pivot ever, self.previous_pivot_details_for_cnn would be None.
|
||||
|
||||
# Correct context for _prepare_cnn_input:
|
||||
# current_pivot = newly_identified_pivot
|
||||
# previous_pivot_details = self.previous_pivot_details_for_cnn (this is N-1, which was used for training above)
|
||||
|
||||
X_predict = self._prepare_cnn_input(newly_identified_pivot,
|
||||
ohlcv_data_context,
|
||||
self.previous_pivot_details_for_cnn) # Pass the N-1 pivot details
|
||||
|
||||
if X_predict is not None and X_predict.size > 0:
|
||||
# Reshape X_predict if it's a single sample and model expects batch
|
||||
if len(X_predict.shape) == len(self.cnn_model.input_shape) and X_predict.shape == self.cnn_model.input_shape :
|
||||
X_predict_batch = np.expand_dims(X_predict, axis=0)
|
||||
else:
|
||||
X_predict_batch = X_predict # Or handle error
|
||||
|
||||
logger.info(f"CNN Predicting with X_shape: {X_predict_batch.shape}")
|
||||
pred_class, pred_proba = self.cnn_model.predict(X_predict_batch) # predict expects batch
|
||||
|
||||
# pred_class/pred_proba might be arrays if batch_size > 1, or if output is multi-dim
|
||||
# For batch_size=1, take the first element
|
||||
final_pred_class = pred_class[0] if isinstance(pred_class, np.ndarray) and pred_class.ndim > 0 else pred_class
|
||||
final_pred_proba = pred_proba[0] if isinstance(pred_proba, np.ndarray) and pred_proba.ndim > 0 else pred_proba
|
||||
|
||||
logger.info(f"CNN Prediction for pivot after index {newly_identified_pivot.index}: Class={final_pred_class}, Proba/Val={final_pred_proba}")
|
||||
|
||||
# Store the features (X_predict) and the pivot (newly_identified_pivot) itself for the next training cycle
|
||||
self.previous_pivot_details_for_cnn = {'features': X_predict, 'pivot': newly_identified_pivot}
|
||||
else:
|
||||
logger.warning("CNN Prediction: Skipping due to invalid X_predict.")
|
||||
# If prediction can't be made, ensure we don't carry over stale 'previous_pivot_details_for_cnn'
|
||||
# Or, decide if we should clear it or keep the N-2 details.
|
||||
# For now, if X_predict is None, we clear it so no training happens next round unless a new pred is made.
|
||||
self.previous_pivot_details_for_cnn = None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during CNN prediction: {e}", exc_info=True)
|
||||
self.previous_pivot_details_for_cnn = None # Clear on error to prevent bad training
|
||||
|
||||
def _prepare_cnn_input(self,
|
||||
current_pivot: SwingPoint,
|
||||
ohlcv_data_context: np.ndarray,
|
||||
previous_pivot_details: Optional[Dict[str, Any]]) -> np.ndarray:
|
||||
"""
|
||||
Prepare multi-timeframe, multi-symbol input features for CNN using TrainingDataPacket.
|
||||
|
||||
Features include:
|
||||
- ETH: 5 min ticks → 300 x 1s bars with ticks features (4 features)
|
||||
- ETH: 900 x 1s OHLCV + indicators (10 features)
|
||||
- ETH: 900 x 1m OHLCV + indicators (10 features)
|
||||
- ETH: 900 x 1h OHLCV + indicators (10 features)
|
||||
- ETH: All pivot points from all levels (15 features)
|
||||
- BTC: 5 min ticks → 300 x 1s reference (4 features)
|
||||
- Chart labels for data identification (7 features)
|
||||
|
||||
Total: ~50 features per timestep over 900 timesteps
|
||||
Data normalized using 1h min/max to preserve cross-timeframe relationships.
|
||||
|
||||
Args:
|
||||
current_pivot: The newly identified SwingPoint
|
||||
ohlcv_data_context: The OHLCV data from Williams calculation (may not be used directly)
|
||||
previous_pivot_details: Previous pivot info for context
|
||||
|
||||
Returns:
|
||||
A numpy array of shape (900, 50) with normalized features
|
||||
"""
|
||||
if self.cnn_model is None or not self.training_data_provider:
|
||||
logger.warning("CNN model or training data provider not available")
|
||||
return np.zeros(self.cnn_model.input_shape if self.cnn_model else (900, 50), dtype=np.float32)
|
||||
|
||||
sequence_length, num_features = self.cnn_model.input_shape
|
||||
|
||||
try:
|
||||
# Get latest TrainingDataPacket from provider
|
||||
training_packet = self._get_latest_training_data()
|
||||
if not training_packet:
|
||||
logger.warning("No TrainingDataPacket available for CNN input")
|
||||
return np.zeros((sequence_length, num_features), dtype=np.float32)
|
||||
|
||||
logger.debug(f"CNN Input: Preparing features for pivot at {current_pivot.timestamp}")
|
||||
|
||||
# Prepare feature components (in actual values)
|
||||
eth_features = self._prepare_eth_features(training_packet, sequence_length)
|
||||
btc_features = self._prepare_btc_reference_features(training_packet, sequence_length)
|
||||
pivot_features = self._prepare_pivot_features(training_packet, current_pivot, sequence_length)
|
||||
chart_labels = self._prepare_chart_labels(sequence_length)
|
||||
|
||||
# Combine all features (still in actual values)
|
||||
combined_features = np.concatenate([
|
||||
eth_features, # ~40 features
|
||||
btc_features, # ~4 features
|
||||
pivot_features, # ~3 features
|
||||
chart_labels # ~3 features
|
||||
], axis=1)
|
||||
|
||||
# Ensure we match expected feature count
|
||||
if combined_features.shape[1] > num_features:
|
||||
combined_features = combined_features[:, :num_features]
|
||||
elif combined_features.shape[1] < num_features:
|
||||
padding = np.zeros((sequence_length, num_features - combined_features.shape[1]))
|
||||
combined_features = np.concatenate([combined_features, padding], axis=1)
|
||||
|
||||
# NORMALIZATION: Apply 1h timeframe min/max to preserve relationships
|
||||
normalized_features = self._normalize_features_by_1h_range(combined_features, training_packet)
|
||||
|
||||
logger.debug(f"CNN Input prepared: shape {normalized_features.shape}, "
|
||||
f"min: {normalized_features.min():.4f}, max: {normalized_features.max():.4f}")
|
||||
|
||||
return normalized_features.astype(np.float32)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error preparing CNN input: {e}", exc_info=True)
|
||||
return np.zeros((sequence_length, num_features), dtype=np.float32)
|
||||
|
||||
def _get_latest_training_data(self):
|
||||
"""Get latest TrainingDataPacket from provider"""
|
||||
try:
|
||||
if hasattr(self.training_data_provider, 'get_latest_training_data'):
|
||||
return self.training_data_provider.get_latest_training_data()
|
||||
elif hasattr(self.training_data_provider, 'training_data_buffer'):
|
||||
return self.training_data_provider.training_data_buffer[-1] if self.training_data_provider.training_data_buffer else None
|
||||
else:
|
||||
logger.warning("Training data provider does not have expected interface")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting training data: {e}")
|
||||
return None
|
||||
|
||||
def _prepare_eth_features(self, training_packet, sequence_length: int) -> np.ndarray:
|
||||
"""
|
||||
Prepare ETH multi-timeframe features (keep in actual values):
|
||||
- 1s bars with indicators (10 features)
|
||||
- 1m bars with indicators (10 features)
|
||||
- 1h bars with indicators (10 features)
|
||||
- Tick-derived 1s features (10 features)
|
||||
Total: 40 features per timestep
|
||||
"""
|
||||
features = []
|
||||
|
||||
# ETH 1s data with indicators
|
||||
eth_1s_features = self._extract_timeframe_features(
|
||||
training_packet.multi_timeframe_data.get('ETH/USDT', {}).get('1s', []),
|
||||
sequence_length, 'ETH_1s'
|
||||
)
|
||||
features.append(eth_1s_features)
|
||||
|
||||
# ETH 1m data with indicators
|
||||
eth_1m_features = self._extract_timeframe_features(
|
||||
training_packet.multi_timeframe_data.get('ETH/USDT', {}).get('1m', []),
|
||||
sequence_length, 'ETH_1m'
|
||||
)
|
||||
features.append(eth_1m_features)
|
||||
|
||||
# ETH 1h data with indicators
|
||||
eth_1h_features = self._extract_timeframe_features(
|
||||
training_packet.multi_timeframe_data.get('ETH/USDT', {}).get('1h', []),
|
||||
sequence_length, 'ETH_1h'
|
||||
)
|
||||
features.append(eth_1h_features)
|
||||
|
||||
# ETH tick-derived features (5 min of ticks → 300 x 1s aggregated to match sequence_length)
|
||||
eth_tick_features = self._extract_tick_features(
|
||||
training_packet.tick_cache, 'ETH/USDT', sequence_length
|
||||
)
|
||||
features.append(eth_tick_features)
|
||||
|
||||
return np.concatenate(features, axis=1)
|
||||
|
||||
def _prepare_btc_reference_features(self, training_packet, sequence_length: int) -> np.ndarray:
|
||||
"""
|
||||
Prepare BTC reference features (keep in actual values):
|
||||
- Tick-derived features for correlation analysis
|
||||
Total: 4 features per timestep
|
||||
"""
|
||||
return self._extract_tick_features(
|
||||
training_packet.tick_cache, 'BTC/USDT', sequence_length
|
||||
)
|
||||
|
||||
def _prepare_pivot_features(self, training_packet, current_pivot: SwingPoint, sequence_length: int) -> np.ndarray:
|
||||
"""
|
||||
Prepare pivot point features from all Williams levels:
|
||||
- Recent pivot characteristics
|
||||
- Level-specific trend information
|
||||
Total: 3 features per timestep (repeated for sequence)
|
||||
"""
|
||||
# Extract Williams pivot features using existing method if available
|
||||
if hasattr(training_packet, 'universal_stream') and training_packet.universal_stream:
|
||||
# Use existing pivot extraction logic
|
||||
pivot_feature_vector = [
|
||||
current_pivot.price,
|
||||
1.0 if current_pivot.swing_type == SwingType.SWING_HIGH else 0.0,
|
||||
float(current_pivot.strength)
|
||||
]
|
||||
else:
|
||||
pivot_feature_vector = [0.0, 0.0, 0.0]
|
||||
|
||||
# Repeat pivot features for all timesteps in sequence
|
||||
return np.tile(pivot_feature_vector, (sequence_length, 1))
|
||||
|
||||
def _prepare_chart_labels(self, sequence_length: int) -> np.ndarray:
|
||||
"""
|
||||
Prepare chart identification labels:
|
||||
- Symbol identifiers
|
||||
- Timeframe identifiers
|
||||
Total: 3 features per timestep
|
||||
"""
|
||||
# Simple encoding: [is_eth, is_btc, timeframe_mix]
|
||||
chart_labels = [1.0, 1.0, 1.0] # Mixed multi-timeframe ETH+BTC data
|
||||
return np.tile(chart_labels, (sequence_length, 1))
|
||||
|
||||
def _extract_timeframe_features(self, ohlcv_data: List[Dict], sequence_length: int, timeframe_label: str) -> np.ndarray:
|
||||
"""
|
||||
Extract OHLCV + indicator features from timeframe data (keep actual values).
|
||||
Returns 10 features: OHLCV + volume + 5 indicators
|
||||
"""
|
||||
if not ohlcv_data:
|
||||
return np.zeros((sequence_length, 10))
|
||||
|
||||
# Take last sequence_length bars or pad if insufficient
|
||||
data_to_use = ohlcv_data[-sequence_length:] if len(ohlcv_data) >= sequence_length else ohlcv_data
|
||||
|
||||
features = []
|
||||
for bar in data_to_use:
|
||||
bar_features = [
|
||||
bar.get('open', 0.0),
|
||||
bar.get('high', 0.0),
|
||||
bar.get('low', 0.0),
|
||||
bar.get('close', 0.0),
|
||||
bar.get('volume', 0.0),
|
||||
# TODO: Add 5 calculated indicators (SMA, EMA, RSI, MACD, etc.)
|
||||
bar.get('sma_20', bar.get('close', 0.0)), # Placeholder
|
||||
bar.get('ema_20', bar.get('close', 0.0)), # Placeholder
|
||||
bar.get('rsi_14', 50.0), # Placeholder
|
||||
bar.get('macd', 0.0), # Placeholder
|
||||
bar.get('bb_upper', bar.get('high', 0.0)) # Placeholder
|
||||
]
|
||||
features.append(bar_features)
|
||||
|
||||
# Pad if insufficient data
|
||||
while len(features) < sequence_length:
|
||||
features.insert(0, features[0] if features else [0.0] * 10)
|
||||
|
||||
return np.array(features, dtype=np.float32)
|
||||
|
||||
def _extract_tick_features(self, tick_cache: List[Dict], symbol: str, sequence_length: int) -> np.ndarray:
|
||||
"""
|
||||
Extract tick-derived features aggregated to 1s intervals (keep actual values).
|
||||
Returns 4 features: tick_count, total_volume, vwap, price_volatility per second
|
||||
"""
|
||||
# Filter ticks for symbol and last 5 minutes
|
||||
symbol_ticks = [t for t in tick_cache[-1500:] if t.get('symbol') == symbol] # Assume ~5 ticks/sec
|
||||
|
||||
if not symbol_ticks:
|
||||
return np.zeros((sequence_length, 4))
|
||||
|
||||
# Group ticks by second and calculate features
|
||||
tick_features = []
|
||||
current_time = datetime.now()
|
||||
|
||||
for i in range(sequence_length):
|
||||
second_start = current_time - timedelta(seconds=sequence_length - i)
|
||||
second_end = second_start + timedelta(seconds=1)
|
||||
|
||||
second_ticks = [
|
||||
t for t in symbol_ticks
|
||||
if second_start <= t.get('timestamp', datetime.min) < second_end
|
||||
]
|
||||
|
||||
if second_ticks:
|
||||
prices = [t.get('price', 0.0) for t in second_ticks]
|
||||
volumes = [t.get('volume', 0.0) for t in second_ticks]
|
||||
total_volume = sum(volumes)
|
||||
|
||||
tick_count = len(second_ticks)
|
||||
vwap = sum(p * v for p, v in zip(prices, volumes)) / total_volume if total_volume > 0 else 0.0
|
||||
price_volatility = np.std(prices) if len(prices) > 1 else 0.0
|
||||
|
||||
second_features = [tick_count, total_volume, vwap, price_volatility]
|
||||
else:
|
||||
second_features = [0.0, 0.0, 0.0, 0.0]
|
||||
|
||||
tick_features.append(second_features)
|
||||
|
||||
return np.array(tick_features, dtype=np.float32)
|
||||
|
||||
def _normalize_features_by_1h_range(self, features: np.ndarray, training_packet) -> np.ndarray:
|
||||
"""
|
||||
Normalize all features using 1h timeframe min/max to preserve cross-timeframe relationships.
|
||||
This is the final normalization step before feeding to CNN.
|
||||
"""
|
||||
try:
|
||||
# Get 1h ETH data for normalization reference
|
||||
eth_1h_data = training_packet.multi_timeframe_data.get('ETH/USDT', {}).get('1h', [])
|
||||
|
||||
if not eth_1h_data:
|
||||
logger.warning("No 1h data available for normalization, using feature-wise normalization")
|
||||
# Fallback: normalize each feature independently
|
||||
feature_min = np.min(features, axis=0, keepdims=True)
|
||||
feature_max = np.max(features, axis=0, keepdims=True)
|
||||
feature_range = feature_max - feature_min
|
||||
feature_range[feature_range == 0] = 1.0 # Avoid division by zero
|
||||
return (features - feature_min) / feature_range
|
||||
|
||||
# Extract 1h price range for primary normalization
|
||||
h1_prices = []
|
||||
for bar in eth_1h_data[-24:]: # Last 24 hours for robust range
|
||||
h1_prices.extend([
|
||||
bar.get('open', 0.0),
|
||||
bar.get('high', 0.0),
|
||||
bar.get('low', 0.0),
|
||||
bar.get('close', 0.0)
|
||||
])
|
||||
|
||||
if h1_prices:
|
||||
h1_min = min(h1_prices)
|
||||
h1_max = max(h1_prices)
|
||||
h1_range = h1_max - h1_min
|
||||
|
||||
if h1_range > 0:
|
||||
logger.debug(f"Normalizing features using 1h range: {h1_min:.2f} - {h1_max:.2f}")
|
||||
|
||||
# Apply 1h-based normalization to price-related features (first ~30 features)
|
||||
normalized_features = features.copy()
|
||||
price_feature_count = min(30, features.shape[1])
|
||||
|
||||
# Normalize price-related features with 1h range
|
||||
normalized_features[:, :price_feature_count] = (
|
||||
(features[:, :price_feature_count] - h1_min) / h1_range
|
||||
)
|
||||
|
||||
# For non-price features (indicators, counts, etc.), use feature-wise normalization
|
||||
if features.shape[1] > price_feature_count:
|
||||
remaining_features = features[:, price_feature_count:]
|
||||
feature_min = np.min(remaining_features, axis=0, keepdims=True)
|
||||
feature_max = np.max(remaining_features, axis=0, keepdims=True)
|
||||
feature_range = feature_max - feature_min
|
||||
feature_range[feature_range == 0] = 1.0
|
||||
|
||||
normalized_features[:, price_feature_count:] = (
|
||||
(remaining_features - feature_min) / feature_range
|
||||
)
|
||||
|
||||
return normalized_features
|
||||
|
||||
# Fallback normalization if 1h range calculation fails
|
||||
logger.warning("1h range calculation failed, using min-max normalization")
|
||||
feature_min = np.min(features, axis=0, keepdims=True)
|
||||
feature_max = np.max(features, axis=0, keepdims=True)
|
||||
feature_range = feature_max - feature_min
|
||||
feature_range[feature_range == 0] = 1.0
|
||||
return (features - feature_min) / feature_range
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in normalization: {e}", exc_info=True)
|
||||
# Emergency fallback: return features as-is but scaled to [0,1] roughly
|
||||
return np.clip(features / (np.max(np.abs(features)) + 1e-8), -1.0, 1.0)
|
||||
|
||||
|
||||
def _get_cnn_ground_truth(self,
|
||||
previous_pivot_info: Dict[str, Any], # Contains 'pivot': SwingPoint obj of N-1
|
||||
actual_current_pivot: SwingPoint # This is pivot N
|
||||
) -> np.ndarray:
|
||||
"""
|
||||
Determine the ground truth for CNN prediction made at previous_pivot.
|
||||
|
||||
Updated to return prediction for next pivot in ALL 5 LEVELS:
|
||||
- For each level: [type (0=LOW, 1=HIGH), normalized_price_target]
|
||||
- Total output: 10 values (5 levels * 2 outputs each)
|
||||
|
||||
Args:
|
||||
previous_pivot_info: Dict with 'pivot' = SwingPoint of N-1
|
||||
actual_current_pivot: SwingPoint of pivot N (actual outcome)
|
||||
|
||||
Returns:
|
||||
A numpy array of shape (10,) with ground truth for all levels
|
||||
"""
|
||||
if self.cnn_model is None:
|
||||
return np.array([])
|
||||
|
||||
# Initialize ground truth array for all 5 levels
|
||||
ground_truth = np.zeros(10, dtype=np.float32) # 5 levels * 2 outputs
|
||||
|
||||
try:
|
||||
# For Level 0 (current pivot level), we have actual data
|
||||
level_0_type = 1.0 if actual_current_pivot.swing_type == SwingType.SWING_HIGH else 0.0
|
||||
level_0_price = actual_current_pivot.price
|
||||
|
||||
# Normalize price (this is a placeholder - proper normalization should use market context)
|
||||
# In real implementation, use the same 1h range normalization as input features
|
||||
normalized_price = level_0_price / 10000.0 # Rough normalization for ETH prices
|
||||
|
||||
ground_truth[0] = level_0_type # Level 0 type
|
||||
ground_truth[1] = normalized_price # Level 0 price
|
||||
|
||||
# For higher levels (1-4), we would need to calculate what the next pivot would be
|
||||
# This requires access to higher-level Williams calculations
|
||||
# For now, use placeholder logic based on current pivot characteristics
|
||||
|
||||
for level in range(1, 5):
|
||||
# Placeholder: higher levels follow similar pattern but with reduced confidence
|
||||
confidence_factor = 1.0 / (level + 1)
|
||||
|
||||
ground_truth[level * 2] = level_0_type * confidence_factor # Level N type
|
||||
ground_truth[level * 2 + 1] = normalized_price * confidence_factor # Level N price
|
||||
|
||||
logger.debug(f"CNN Ground Truth: Level 0 = [{level_0_type}, {normalized_price:.4f}], "
|
||||
f"Current pivot = {actual_current_pivot.swing_type.name} @ {actual_current_pivot.price}")
|
||||
|
||||
return ground_truth
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating CNN ground truth: {e}", exc_info=True)
|
||||
return np.zeros(10, dtype=np.float32)
|
Reference in New Issue
Block a user