train works

This commit is contained in:
Dobromir Popov
2025-03-31 03:20:12 +03:00
parent 8981ad0691
commit 1610d5bd49
10 changed files with 2554 additions and 406 deletions

View File

@ -209,11 +209,20 @@ class DataInterface:
price_changes = (next_close - curr_close) / curr_close
# Define thresholds for price movement classification
threshold = 0.001 # 0.1% threshold
threshold = 0.0005 # 0.05% threshold - smaller to encourage more signals
y = np.zeros(len(price_changes), dtype=int)
y[price_changes > threshold] = 2 # Up
y[price_changes < -threshold] = 0 # Down
y[(price_changes >= -threshold) & (price_changes <= threshold)] = 1 # Neutral
# Log the target distribution to understand our data better
sell_count = np.sum(y == 0)
hold_count = np.sum(y == 1)
buy_count = np.sum(y == 2)
total_count = len(y)
logger.info(f"Target distribution for {self.symbol} {self.timeframes[0]}: SELL: {sell_count} ({sell_count/total_count:.2%}), " +
f"HOLD: {hold_count} ({hold_count/total_count:.2%}), BUY: {buy_count} ({buy_count/total_count:.2%})")
logger.info(f"Created features - X shape: {X.shape}, y shape: {y.shape}")
return X, y, timestamps[window_size:]
@ -295,73 +304,107 @@ class DataInterface:
def calculate_pnl(self, predictions, actual_prices, position_size=1.0):
"""
Calculate PnL and win rates based on predictions and actual price movements.
Robust PnL calculator that handles:
- Action predictions (0=SELL, 1=HOLD, 2=BUY)
- Probability predictions (array of [sell_prob, hold_prob, buy_prob])
- Single price array or OHLC data
Args:
predictions: Array of predicted actions (0=SELL, 1=HOLD, 2=BUY) or probabilities
actual_prices: Array of actual close prices
position_size: Position size for each trade
predictions: Array of predicted actions or probabilities
actual_prices: Array of actual prices (can be 1D or 2D OHLC format)
position_size: Position size multiplier
Returns:
tuple: (pnl, win_rate, trades) where:
pnl is the total profit and loss
win_rate is the ratio of winning trades
trades is a list of trade dictionaries
tuple: (total_pnl, win_rate, trades)
"""
# Ensure we have enough prices for the predictions
if len(actual_prices) <= 1:
logger.error("Not enough price data for PnL calculation")
# Convert inputs to numpy arrays if they aren't already
try:
predictions = np.array(predictions)
actual_prices = np.array(actual_prices)
except Exception as e:
logger.error(f"Error converting inputs: {str(e)}")
return 0.0, 0.0, []
# Validate input shapes
if len(predictions.shape) > 2 or len(actual_prices.shape) > 2:
logger.error("Invalid input dimensions")
return 0.0, 0.0, []
# Convert OHLC data to close prices if needed
if len(actual_prices.shape) == 2 and actual_prices.shape[1] >= 4:
prices = actual_prices[:, 3] # Use close prices
else:
prices = actual_prices
# Adjust predictions length to match available price data
n_prices = len(actual_prices) - 1 # We need current and next price for each prediction
if len(predictions) > n_prices:
predictions = predictions[:n_prices]
elif len(predictions) < n_prices:
n_prices = len(predictions)
actual_prices = actual_prices[:n_prices + 1] # +1 to include the next price
# Handle case where prices is 2D with single column
if len(prices.shape) == 2 and prices.shape[1] == 1:
prices = prices.flatten()
# Convert probabilities to actions if needed
if len(predictions.shape) == 2 and predictions.shape[1] > 1:
actions = np.argmax(predictions, axis=1)
else:
actions = predictions
pnl = 0.0
trades = 0
wins = 0
trade_history = []
# Ensure we have enough prices
if len(prices) < 2:
logger.error("Not enough price data")
return 0.0, 0.0, []
# Trim to matching length
min_length = min(len(actions), len(prices)-1)
actions = actions[:min_length]
prices = prices[:min_length+1]
for i in range(len(predictions)):
pred = predictions[i]
current_price = actual_prices[i]
next_price = actual_prices[i + 1]
# Calculate price change percentage
pnl = 0.0
wins = 0
trades = []
for i in range(min_length):
current_price = prices[i]
next_price = prices[i+1]
action = actions[i]
# Skip HOLD actions
if action == 1:
continue
price_change = (next_price - current_price) / current_price
# Calculate PnL based on prediction
if pred == 2: # Buy
if action == 2: # BUY
trade_pnl = price_change * position_size
trades += 1
if trade_pnl > 0:
wins += 1
trade_history.append({
'type': 'buy',
'price': current_price,
'pnl': trade_pnl,
'timestamp': self.dataframes[self.timeframes[0]]['timestamp'].iloc[i] if self.dataframes[self.timeframes[0]] is not None else None
})
elif pred == 0: # Sell
trade_type = 'BUY'
is_win = price_change > 0
elif action == 0: # SELL
trade_pnl = -price_change * position_size
trades += 1
if trade_pnl > 0:
wins += 1
trade_history.append({
'type': 'sell',
'price': current_price,
'pnl': trade_pnl,
'timestamp': self.dataframes[self.timeframes[0]]['timestamp'].iloc[i] if self.dataframes[self.timeframes[0]] is not None else None
})
trade_type = 'SELL'
is_win = price_change < 0
else:
continue # Invalid action
pnl += trade_pnl
wins += int(is_win)
pnl += trade_pnl if pred in [0, 2] else 0
win_rate = wins / trades if trades > 0 else 0.0
return pnl, win_rate, trade_history
# Track trade details
trades.append({
'type': trade_type,
'entry': current_price,
'exit': next_price,
'pnl': trade_pnl,
'win': is_win,
'duration': 1 # In number of candles
})
win_rate = wins / len(trades) if trades else 0.0
# Add timestamps to trades if available
if hasattr(self, 'dataframes') and self.timeframes and self.timeframes[0] in self.dataframes:
df = self.dataframes[self.timeframes[0]]
if df is not None and 'timestamp' in df.columns:
for i, trade in enumerate(trades[:len(df)]):
trade['timestamp'] = df['timestamp'].iloc[i]
return pnl, win_rate, trades
def get_future_prices(self, prices, n_candles=3):
"""