FIRST WORKING CASE

This commit is contained in:
Dobromir Popov 2025-03-10 12:27:47 +02:00
parent 14d70f938e
commit 66a2c41338
9 changed files with 25729 additions and 50 deletions

2
.gitignore vendored
View File

@ -33,3 +33,5 @@ crypto/brian/models/best/*
crypto/brian/models/last/*
crypto/brian/live_chart.html
crypto/gogo2/models/*
crypto/gogo2/trading_bot.log
*.log

View File

@ -21,6 +21,7 @@ import torch.cuda.amp as amp # Add this import at the top
from sklearn.preprocessing import MinMaxScaler
import copy
import argparse
import traceback
# Configure logging
logging.basicConfig(
@ -428,30 +429,26 @@ class TradingEnvironment:
return False
def step(self, action):
"""Take an action in the environment"""
# Store previous balance for reward calculation
prev_balance = self.balance
"""Take an action in the environment and return the next state, reward, and done flag"""
# Store current price before taking action
self.current_price = self.data[self.current_step]['close']
# Update current price
if self.current_step < len(self.data) - 1:
self.current_step += 1
self.current_price = self.data[self.current_step]['close']
else:
# End of data
return self.get_state(), 0, True
# Check for stop loss or take profit
self._check_sl_tp()
# Calculate reward based on action
# Process action (0: HOLD, 1: BUY/LONG, 2: SELL/SHORT, 3: CLOSE)
reward = self.calculate_reward(action)
# Check if we've reached the end of the data
# Check for stop loss / take profit hits
self.check_sl_tp()
# Move to next step
self.current_step += 1
done = self.current_step >= len(self.data) - 1
return self.get_state(), reward, done
# Get new state
next_state = self.get_state()
def _check_sl_tp(self):
return next_state, reward, done
def check_sl_tp(self):
"""Check if stop loss or take profit has been hit"""
if self.position == 'flat':
return
@ -686,9 +683,6 @@ class TradingEnvironment:
if hasattr(self, 'predicted_prices') and len(self.predicted_prices) > 0:
# Normalize predictions relative to current price
pred_norm = np.array(self.predicted_prices[:3]) / latest_price - 1.0
# Pad if needed
if len(pred_norm) < 3:
pred_norm = np.pad(pred_norm, (0, 3 - len(pred_norm)), 'constant')
state_components.append(pred_norm)
else:
# Add zeros if no predictions
@ -1065,6 +1059,97 @@ class TradingEnvironment:
return analysis
def initialize_price_predictor(self, device="cpu"):
"""Initialize the price prediction model"""
self.price_predictor = PricePredictionModel(input_size=30, hidden_size=128, output_size=5)
self.price_predictor.to(device)
self.price_predictor_optimizer = optim.Adam(self.price_predictor.parameters(), lr=1e-3)
self.predicted_prices = np.array([])
def train_price_predictor(self):
"""Train the price prediction model on recent data"""
if len(self.features['price']) < 35:
return 0.0
# Get price history
price_history = self.features['price']
# Train the model
loss = self.price_predictor.train_on_new_data(
price_history,
self.price_predictor_optimizer,
epochs=5
)
return loss
def update_price_predictions(self):
"""Update price predictions"""
if len(self.features['price']) < 30:
self.predicted_prices = np.array([])
return
# Get price history
price_history = self.features['price']
# Get predictions
self.predicted_prices = self.price_predictor.predict_next_candles(price_history, num_candles=5)
def identify_optimal_trades(self):
"""Identify optimal entry and exit points based on local extrema"""
if len(self.features['price']) < 20:
return
# Find local bottoms and tops
bottoms, tops = find_local_extrema(self.features['price'], window=5)
# Store optimal trade points
self.optimal_bottoms = bottoms # Buy points
self.optimal_tops = tops # Sell points
# Create optimal trade signals
self.optimal_signals = np.zeros(len(self.features['price']))
for i in bottoms:
if 0 <= i < len(self.optimal_signals): # Ensure index is valid
self.optimal_signals[i] = 1 # Buy signal
for i in tops:
if 0 <= i < len(self.optimal_signals): # Ensure index is valid
self.optimal_signals[i] = -1 # Sell signal
logger.info(f"Identified {len(bottoms)} optimal buy points and {len(tops)} optimal sell points")
def calculate_position_size(self):
"""Calculate position size based on current balance and risk parameters"""
# Use a fixed percentage of balance for each trade
risk_percent = 5.0 # Risk 5% of balance per trade
# Calculate position size with leverage
position_size = self.balance * (risk_percent / 100) * MAX_LEVERAGE
# Apply a safety factor to avoid liquidation
safety_factor = 0.8
position_size *= safety_factor
# Ensure minimum position size
min_position = 10.0 # Minimum position size in USD
position_size = max(position_size, min(min_position, self.balance * 0.5))
# Ensure position size doesn't exceed balance * leverage
max_position = self.balance * MAX_LEVERAGE
position_size = min(position_size, max_position)
return position_size
def calculate_fees(self, position_size):
"""Calculate trading fees for a given position size"""
# Typical fee rate for crypto exchanges (0.1%)
fee_rate = 0.001
# Calculate fee
fee = position_size * fee_rate
return fee
# Ensure GPU usage if available
def get_device():
"""Get the best available device (CUDA GPU or CPU)"""
@ -1177,33 +1262,28 @@ class Agent:
return random.randrange(self.action_size)
def learn(self):
"""Learn from experience replay with mixed precision"""
"""Learn from a batch of experiences"""
if len(self.memory) < BATCH_SIZE:
return None
try:
# Sample batch from memory
# Sample a batch of experiences
experiences = self.memory.sample(BATCH_SIZE)
# Check if any experience has None values
for exp in experiences:
if exp.state is None or exp.next_state is None:
return None
# Convert to tensors
states = torch.FloatTensor([exp.state for exp in experiences]).to(self.device)
actions = torch.LongTensor([exp.action for exp in experiences]).unsqueeze(1).to(self.device)
rewards = torch.FloatTensor([exp.reward for exp in experiences]).to(self.device)
next_states = torch.FloatTensor([exp.next_state for exp in experiences]).to(self.device)
dones = torch.FloatTensor([exp.done for exp in experiences]).to(self.device)
# Convert experiences to tensors
states = torch.FloatTensor([e.state for e in experiences]).to(self.device)
actions = torch.LongTensor([e.action for e in experiences]).to(self.device)
rewards = torch.FloatTensor([e.reward for e in experiences]).to(self.device)
next_states = torch.FloatTensor([e.next_state for e in experiences]).to(self.device)
dones = torch.FloatTensor([e.done for e in experiences]).to(self.device)
# Use mixed precision for forward/backward passes
if self.device.type == "cuda":
with amp.autocast():
# Compute Q values
current_q_values = self.policy_net(states).gather(1, actions)
current_q_values = self.policy_net(states).gather(1, actions.unsqueeze(1))
# Compute next state values using target network
# Compute next Q values with target network
with torch.no_grad():
next_q_values = self.target_net(next_states).max(1)[0]
target_q_values = rewards + (GAMMA * next_q_values * (1 - dones))
@ -1214,21 +1294,25 @@ class Agent:
# Compute loss
loss = F.smooth_l1_loss(current_q_values, target_q_values)
# Optimize with gradient scaling
# Backward pass with mixed precision
self.optimizer.zero_grad()
self.scaler.scale(loss).backward()
# Gradient clipping to prevent exploding gradients
self.scaler.unscale_(self.optimizer)
torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), 1.0)
torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), max_norm=1.0)
self.scaler.step(self.optimizer)
self.scaler.update()
else:
# Standard precision training
# Standard precision for CPU
# Compute Q values
current_q_values = self.policy_net(states).gather(1, actions)
current_q_values = self.policy_net(states).gather(1, actions.unsqueeze(1))
# Compute next state values using target network
# Compute next Q values with target network
with torch.no_grad():
next_q_values = self.target_net(next_states).max(1)[0]
target_q_values = rewards + (GAMMA * next_q_values * (1 - dones))
# Reshape target values to match current_q_values
target_q_values = target_q_values.unsqueeze(1)
@ -1236,18 +1320,27 @@ class Agent:
# Compute loss
loss = F.smooth_l1_loss(current_q_values, target_q_values)
# Optimize the model
# Backward pass
self.optimizer.zero_grad()
loss.backward()
torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), 1.0)
# Gradient clipping to prevent exploding gradients
torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), max_norm=1.0)
self.optimizer.step()
# Update steps done
self.steps_done += 1
# Update target network
if self.steps_done % TARGET_UPDATE == 0:
self.target_net.load_state_dict(self.policy_net.state_dict())
return loss.item()
except Exception as e:
logger.error(f"Error during learning: {e}")
import traceback
logger.error(traceback.format_exc())
logger.error(f"Traceback: {traceback.format_exc()}")
return None
def update_target_network(self):

File diff suppressed because it is too large Load Diff

Binary file not shown.

After

Width:  |  Height:  |  Size: 60 KiB

View File

@ -0,0 +1 @@
episode_rewards,episode_lengths,balances,win_rates,episode_pnls,cumulative_pnl,drawdowns,prediction_accuracy
1 episode_rewards episode_lengths balances win_rates episode_pnls cumulative_pnl drawdowns prediction_accuracy