multi pair inputs, wip, working training??

This commit is contained in:
Dobromir Popov
2025-03-29 01:56:45 +02:00
parent b313f70cd7
commit 0b2000e3e7
9 changed files with 686 additions and 37 deletions

View File

@ -17,16 +17,15 @@ logger = logging.getLogger(__name__)
class DataInterface:
"""
Handles data collection, processing, and preparation for neural network models.
This class is responsible for:
1. Fetching historical data
2. Preprocessing data for neural network input
3. Generating training datasets
4. Handling real-time data integration
Enhanced Data Interface supporting:
- Multiple trading pairs (up to 3)
- Multiple timeframes per pair (1s, 1m, 1h, 1d + custom)
- Technical indicators (up to 20)
- Cross-timeframe normalization
- Real-time tick streaming
"""
def __init__(self, symbol="BTC/USDT", timeframes=None, data_dir="NN/data"):
def __init__(self, symbol=None, timeframes=None, data_dir="NN/data"):
"""
Initialize the data interface.
@ -157,9 +156,9 @@ class DataInterface:
else:
cycle = np.sin(i / 24 * np.pi) * 0.01 # Daily cycle
# Calculate price change with random walk + cycles
price_change = price * (drift + volatility * np.random.randn() + cycle)
price += price_change
# Calculate price change with random walk + cycles (clamped to prevent overflow)
price_change = price * np.clip(drift + volatility * np.random.randn() + cycle, -0.1, 0.1)
price = np.clip(price + price_change, 1000, 100000) # Keep price in reasonable range
# Generate OHLC from the price
open_price = price
@ -171,8 +170,8 @@ class DataInterface:
high_price = max(high_price, open_price, close_price)
low_price = min(low_price, open_price, close_price)
# Generate volume (higher for larger price movements)
volume = abs(price_change) * (10000 + 5000 * np.random.rand())
# Generate volume (higher for larger price movements) with safe calculation
volume = 10000 + 5000 * np.random.rand() + abs(price_change)/price * 10000
prices.append((open_price, high_price, low_price, close_price))
volumes.append(volume)
@ -217,19 +216,41 @@ class DataInterface:
logger.error("No data available for feature creation")
return None, None, None
# For simplicity, we'll use just one timeframe for now
# In a more complex implementation, we would merge multiple timeframes
primary_tf = timeframes[0]
if primary_tf not in dfs:
logger.error(f"Primary timeframe {primary_tf} not available")
# Create features for each timeframe
features = []
targets = []
timestamps = []
for tf in timeframes:
if tf in dfs:
X, y, ts = self._create_features(dfs[tf], window_size)
features.append(X)
if len(targets) == 0: # Only need targets from one timeframe
targets = y
timestamps = ts
if not features:
return None, None, None
# Stack features from all timeframes along the time dimension
# Reshape each timeframe's features to [samples, window, 1, features]
reshaped_features = [f.reshape(f.shape[0], f.shape[1], 1, f.shape[2])
for f in features]
# Concatenate along the channel dimension
X = np.concatenate(reshaped_features, axis=2)
# Reshape to [samples, window, features*timeframes]
X = X.reshape(X.shape[0], X.shape[1], -1)
df = dfs[primary_tf]
# Validate data
if np.any(np.isnan(X)) or np.any(np.isinf(X)):
logger.error("Generated features contain NaN or infinite values")
return None, None, None
# Ensure all values are finite and normalized
X = np.nan_to_num(X, nan=0.0, posinf=1.0, neginf=-1.0)
X = np.clip(X, -1e6, 1e6) # Clip extreme values
# Create features
X, y, timestamps = self._create_features(df, window_size)
return X, y, timestamps
return X, targets, timestamps
def _create_features(self, df, window_size):
"""
@ -248,9 +269,28 @@ class DataInterface:
# Extract OHLCV columns
ohlcv = df[['open', 'high', 'low', 'close', 'volume']].values
# Scale the data
scaler = MinMaxScaler()
ohlcv_scaled = scaler.fit_transform(ohlcv)
# Validate data before scaling
if np.any(np.isnan(ohlcv)) or np.any(np.isinf(ohlcv)):
logger.error("Input data contains NaN or infinite values")
return None, None, None
# Handle potential constant columns (avoid division by zero in scaler)
ohlcv = np.nan_to_num(ohlcv, nan=0.0)
ranges = np.ptp(ohlcv, axis=0)
for i in range(len(ranges)):
if ranges[i] == 0: # Constant column
ohlcv[:, i] = 1 if i == 3 else 0 # Set close to 1, others to 0
# Scale the data with safety checks
try:
scaler = MinMaxScaler()
ohlcv_scaled = scaler.fit_transform(ohlcv)
if np.any(np.isnan(ohlcv_scaled)) or np.any(np.isinf(ohlcv_scaled)):
logger.error("Scaling produced invalid values")
return None, None, None
except Exception as e:
logger.error(f"Scaling failed: {str(e)}")
return None, None, None
# Store the scaler for later use
timeframe = next((tf for tf in self.timeframes if self.dataframes.get(tf) is not None and
@ -343,6 +383,11 @@ class DataInterface:
logger.info(f"Dataset generated and saved: {dataset_name}")
return dataset_info
def get_feature_count(self):
"""Get the number of features per input sample"""
# OHLCV (5 features) per timeframe
return 5 * len(self.timeframes)
def prepare_realtime_input(self, timeframe='1h', n_candles=30, window_size=20):
"""
Prepare a single input sample from the most recent data for real-time inference.
@ -387,4 +432,4 @@ class DataInterface:
# Get timestamp of the most recent candle
timestamp = df['timestamp'].iloc[-1]
return X, timestamp
return X, timestamp

View File

@ -0,0 +1,123 @@
"""
Enhanced Data Interface with additional NN trading parameters
"""
from typing import List, Optional, Tuple
import numpy as np
import pandas as pd
from datetime import datetime
from .data_interface import DataInterface
class MultiDataInterface(DataInterface):
"""
Enhanced data interface that supports window_size and output_size parameters
for neural network trading models.
"""
def __init__(self, symbol: str,
timeframes: List[str],
window_size: int = 20,
output_size: int = 3,
data_dir: str = "NN/data"):
"""
Initialize with window_size and output_size for NN predictions.
"""
super().__init__(symbol, timeframes, data_dir)
self.window_size = window_size
self.output_size = output_size
self.scalers = {} # Store scalers for each timeframe
self.min_window_threshold = 100 # Minimum candles needed for training
def get_feature_count(self) -> int:
"""
Get number of features (OHLCV) for NN input.
"""
return 5 # open, high, low, close, volume
def prepare_training_data(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
"""Prepare training data with windowed sequences"""
# Get historical data for primary timeframe
primary_tf = self.timeframes[0]
df = self.get_historical_data(timeframe=primary_tf,
n_candles=self.min_window_threshold + 1000)
if df is None or len(df) < self.min_window_threshold:
raise ValueError(f"Insufficient data for training. Need at least {self.min_window_threshold} candles")
# Prepare OHLCV sequences
ohlcv = df[['open', 'high', 'low', 'close', 'volume']].values
# Create sequences and labels
X = []
y = []
for i in range(len(ohlcv) - self.window_size - self.output_size):
# Input sequence
seq = ohlcv[i:i+self.window_size]
X.append(seq)
# Output target (price movement direction)
close_prices = ohlcv[i+self.window_size:i+self.window_size+self.output_size, 3] # Close prices
price_changes = np.diff(close_prices)
if self.output_size == 1:
# Binary classification (up/down)
label = 1 if price_changes[0] > 0 else 0
elif self.output_size == 3:
# 3-class classification (buy/hold/sell)
if price_changes[0] > 0.002: # Significant rise
label = 0 # Buy
elif price_changes[0] < -0.002: # Significant drop
label = 2 # Sell
else:
label = 1 # Hold
else:
raise ValueError(f"Unsupported output_size: {self.output_size}")
y.append(label)
# Convert to numpy arrays
X = np.array(X)
y = np.array(y)
# Split into train/validation (80/20)
split_idx = int(0.8 * len(X))
X_train, y_train = X[:split_idx], y[:split_idx]
X_val, y_val = X[split_idx:], y[split_idx:]
return X_train, y_train, X_val, y_val
def prepare_prediction_data(self) -> np.ndarray:
"""Prepare most recent window for predictions"""
primary_tf = self.timeframes[0]
df = self.get_historical_data(timeframe=primary_tf,
n_candles=self.window_size,
use_cache=False)
if df is None or len(df) < self.window_size:
raise ValueError(f"Need at least {self.window_size} candles for prediction")
ohlcv = df[['open', 'high', 'low', 'close', 'volume']].values[-self.window_size:]
return np.array([ohlcv]) # Add batch dimension
def process_predictions(self, predictions: np.ndarray):
"""Convert prediction probabilities to trading signals"""
signals = []
for pred in predictions:
if self.output_size == 1:
signal = "BUY" if pred[0] > 0.5 else "SELL"
confidence = np.abs(pred[0] - 0.5) * 2 # Convert to 0-1 scale
elif self.output_size == 3:
action_idx = np.argmax(pred)
signal = ["BUY", "HOLD", "SELL"][action_idx]
confidence = pred[action_idx]
else:
signal = "HOLD"
confidence = 0.0
signals.append({
'action': signal,
'confidence': confidence,
'timestamp': datetime.now().isoformat()
})
return signals

View File

@ -0,0 +1,182 @@
"""
Realtime Analyzer for Neural Network Trading System
This module implements real-time analysis of market data using trained neural network models.
"""
import logging
import time
import numpy as np
from threading import Thread
from queue import Queue
from datetime import datetime
logger = logging.getLogger(__name__)
class RealtimeAnalyzer:
"""
Handles real-time analysis of market data using trained neural network models.
Features:
- Connects to real-time data sources (websockets)
- Processes incoming data through the neural network
- Generates trading signals
- Manages risk and position sizing
- Logs all trading decisions
"""
def __init__(self, data_interface, model, symbol="BTC/USDT", timeframes=None):
"""
Initialize the realtime analyzer.
Args:
data_interface (DataInterface): Preconfigured data interface
model: Trained neural network model
symbol (str): Trading pair symbol
timeframes (list): List of timeframes to monitor
"""
self.data_interface = data_interface
self.model = model
self.symbol = symbol
self.timeframes = timeframes or ['1h']
self.running = False
self.data_queue = Queue()
self.prediction_interval = 60 # Seconds between predictions
logger.info(f"RealtimeAnalyzer initialized for {symbol}")
def start(self):
"""Start the realtime analysis process."""
if self.running:
logger.warning("Realtime analyzer already running")
return
self.running = True
# Start data collection thread
self.data_thread = Thread(target=self._collect_data, daemon=True)
self.data_thread.start()
# Start analysis thread
self.analysis_thread = Thread(target=self._analyze_data, daemon=True)
self.analysis_thread.start()
logger.info("Realtime analysis started")
def stop(self):
"""Stop the realtime analysis process."""
self.running = False
if hasattr(self, 'data_thread'):
self.data_thread.join(timeout=1)
if hasattr(self, 'analysis_thread'):
self.analysis_thread.join(timeout=1)
logger.info("Realtime analysis stopped")
def _collect_data(self):
"""Thread function for collecting real-time data."""
logger.info("Starting data collection thread")
# In a real implementation, this would connect to websockets/API
# For now, we'll simulate data collection from the data interface
while self.running:
try:
# Get latest data for each timeframe
for timeframe in self.timeframes:
# Get recent data (simulating real-time updates)
X, timestamp = self.data_interface.prepare_realtime_input(
timeframe=timeframe,
n_candles=30,
window_size=self.data_interface.window_size
)
if X is not None:
self.data_queue.put({
'timeframe': timeframe,
'data': X,
'timestamp': timestamp
})
# Throttle data collection
time.sleep(1)
except Exception as e:
logger.error(f"Error in data collection: {str(e)}")
time.sleep(5) # Wait before retrying
def _analyze_data(self):
"""Thread function for analyzing data and generating signals."""
logger.info("Starting analysis thread")
last_prediction_time = 0
while self.running:
try:
current_time = time.time()
# Only make predictions at the specified interval
if current_time - last_prediction_time < self.prediction_interval:
time.sleep(0.1)
continue
# Get latest data from queue
if not self.data_queue.empty():
data_item = self.data_queue.get()
# Make prediction
prediction = self.model.predict(data_item['data'])
# Process prediction
self._process_prediction(
prediction=prediction,
timeframe=data_item['timeframe'],
timestamp=data_item['timestamp']
)
last_prediction_time = current_time
time.sleep(0.1)
except Exception as e:
logger.error(f"Error in analysis: {str(e)}")
time.sleep(1) # Wait before retrying
def _process_prediction(self, prediction, timeframe, timestamp):
"""
Process model prediction and generate trading signals.
Args:
prediction: Model prediction output
timeframe (str): Timeframe the prediction is for
timestamp: Timestamp of the prediction
"""
# Convert prediction to trading signal
signal = self._prediction_to_signal(prediction)
# Log the signal
logger.info(
f"Signal generated - Timeframe: {timeframe}, "
f"Timestamp: {timestamp}, "
f"Signal: {signal}"
)
# In a real implementation, we would execute trades here
# For now, we'll just log the signals
def _prediction_to_signal(self, prediction):
"""
Convert model prediction to trading signal.
Args:
prediction: Model prediction output
Returns:
str: Trading signal (BUY, SELL, HOLD)
"""
# Simple threshold-based signal generation
if len(prediction.shape) == 1:
# Binary classification
return "BUY" if prediction[0] > 0.5 else "SELL"
else:
# Multi-class classification (3 outputs)
class_idx = np.argmax(prediction)
return ["SELL", "HOLD", "BUY"][class_idx]