FIRST WORKING CASE
This commit is contained in:
parent
14d70f938e
commit
66a2c41338
2
.gitignore
vendored
2
.gitignore
vendored
@ -33,3 +33,5 @@ crypto/brian/models/best/*
|
||||
crypto/brian/models/last/*
|
||||
crypto/brian/live_chart.html
|
||||
crypto/gogo2/models/*
|
||||
crypto/gogo2/trading_bot.log
|
||||
*.log
|
||||
|
@ -21,6 +21,7 @@ import torch.cuda.amp as amp # Add this import at the top
|
||||
from sklearn.preprocessing import MinMaxScaler
|
||||
import copy
|
||||
import argparse
|
||||
import traceback
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
@ -428,30 +429,26 @@ class TradingEnvironment:
|
||||
return False
|
||||
|
||||
def step(self, action):
|
||||
"""Take an action in the environment"""
|
||||
# Store previous balance for reward calculation
|
||||
prev_balance = self.balance
|
||||
"""Take an action in the environment and return the next state, reward, and done flag"""
|
||||
# Store current price before taking action
|
||||
self.current_price = self.data[self.current_step]['close']
|
||||
|
||||
# Update current price
|
||||
if self.current_step < len(self.data) - 1:
|
||||
self.current_step += 1
|
||||
self.current_price = self.data[self.current_step]['close']
|
||||
else:
|
||||
# End of data
|
||||
return self.get_state(), 0, True
|
||||
|
||||
# Check for stop loss or take profit
|
||||
self._check_sl_tp()
|
||||
|
||||
# Calculate reward based on action
|
||||
# Process action (0: HOLD, 1: BUY/LONG, 2: SELL/SHORT, 3: CLOSE)
|
||||
reward = self.calculate_reward(action)
|
||||
|
||||
# Check if we've reached the end of the data
|
||||
# Check for stop loss / take profit hits
|
||||
self.check_sl_tp()
|
||||
|
||||
# Move to next step
|
||||
self.current_step += 1
|
||||
done = self.current_step >= len(self.data) - 1
|
||||
|
||||
return self.get_state(), reward, done
|
||||
# Get new state
|
||||
next_state = self.get_state()
|
||||
|
||||
def _check_sl_tp(self):
|
||||
return next_state, reward, done
|
||||
|
||||
def check_sl_tp(self):
|
||||
"""Check if stop loss or take profit has been hit"""
|
||||
if self.position == 'flat':
|
||||
return
|
||||
@ -686,9 +683,6 @@ class TradingEnvironment:
|
||||
if hasattr(self, 'predicted_prices') and len(self.predicted_prices) > 0:
|
||||
# Normalize predictions relative to current price
|
||||
pred_norm = np.array(self.predicted_prices[:3]) / latest_price - 1.0
|
||||
# Pad if needed
|
||||
if len(pred_norm) < 3:
|
||||
pred_norm = np.pad(pred_norm, (0, 3 - len(pred_norm)), 'constant')
|
||||
state_components.append(pred_norm)
|
||||
else:
|
||||
# Add zeros if no predictions
|
||||
@ -1065,6 +1059,97 @@ class TradingEnvironment:
|
||||
|
||||
return analysis
|
||||
|
||||
def initialize_price_predictor(self, device="cpu"):
|
||||
"""Initialize the price prediction model"""
|
||||
self.price_predictor = PricePredictionModel(input_size=30, hidden_size=128, output_size=5)
|
||||
self.price_predictor.to(device)
|
||||
self.price_predictor_optimizer = optim.Adam(self.price_predictor.parameters(), lr=1e-3)
|
||||
self.predicted_prices = np.array([])
|
||||
|
||||
def train_price_predictor(self):
|
||||
"""Train the price prediction model on recent data"""
|
||||
if len(self.features['price']) < 35:
|
||||
return 0.0
|
||||
|
||||
# Get price history
|
||||
price_history = self.features['price']
|
||||
|
||||
# Train the model
|
||||
loss = self.price_predictor.train_on_new_data(
|
||||
price_history,
|
||||
self.price_predictor_optimizer,
|
||||
epochs=5
|
||||
)
|
||||
|
||||
return loss
|
||||
|
||||
def update_price_predictions(self):
|
||||
"""Update price predictions"""
|
||||
if len(self.features['price']) < 30:
|
||||
self.predicted_prices = np.array([])
|
||||
return
|
||||
|
||||
# Get price history
|
||||
price_history = self.features['price']
|
||||
|
||||
# Get predictions
|
||||
self.predicted_prices = self.price_predictor.predict_next_candles(price_history, num_candles=5)
|
||||
|
||||
def identify_optimal_trades(self):
|
||||
"""Identify optimal entry and exit points based on local extrema"""
|
||||
if len(self.features['price']) < 20:
|
||||
return
|
||||
|
||||
# Find local bottoms and tops
|
||||
bottoms, tops = find_local_extrema(self.features['price'], window=5)
|
||||
|
||||
# Store optimal trade points
|
||||
self.optimal_bottoms = bottoms # Buy points
|
||||
self.optimal_tops = tops # Sell points
|
||||
|
||||
# Create optimal trade signals
|
||||
self.optimal_signals = np.zeros(len(self.features['price']))
|
||||
for i in bottoms:
|
||||
if 0 <= i < len(self.optimal_signals): # Ensure index is valid
|
||||
self.optimal_signals[i] = 1 # Buy signal
|
||||
for i in tops:
|
||||
if 0 <= i < len(self.optimal_signals): # Ensure index is valid
|
||||
self.optimal_signals[i] = -1 # Sell signal
|
||||
|
||||
logger.info(f"Identified {len(bottoms)} optimal buy points and {len(tops)} optimal sell points")
|
||||
|
||||
def calculate_position_size(self):
|
||||
"""Calculate position size based on current balance and risk parameters"""
|
||||
# Use a fixed percentage of balance for each trade
|
||||
risk_percent = 5.0 # Risk 5% of balance per trade
|
||||
|
||||
# Calculate position size with leverage
|
||||
position_size = self.balance * (risk_percent / 100) * MAX_LEVERAGE
|
||||
|
||||
# Apply a safety factor to avoid liquidation
|
||||
safety_factor = 0.8
|
||||
position_size *= safety_factor
|
||||
|
||||
# Ensure minimum position size
|
||||
min_position = 10.0 # Minimum position size in USD
|
||||
position_size = max(position_size, min(min_position, self.balance * 0.5))
|
||||
|
||||
# Ensure position size doesn't exceed balance * leverage
|
||||
max_position = self.balance * MAX_LEVERAGE
|
||||
position_size = min(position_size, max_position)
|
||||
|
||||
return position_size
|
||||
|
||||
def calculate_fees(self, position_size):
|
||||
"""Calculate trading fees for a given position size"""
|
||||
# Typical fee rate for crypto exchanges (0.1%)
|
||||
fee_rate = 0.001
|
||||
|
||||
# Calculate fee
|
||||
fee = position_size * fee_rate
|
||||
|
||||
return fee
|
||||
|
||||
# Ensure GPU usage if available
|
||||
def get_device():
|
||||
"""Get the best available device (CUDA GPU or CPU)"""
|
||||
@ -1177,33 +1262,28 @@ class Agent:
|
||||
return random.randrange(self.action_size)
|
||||
|
||||
def learn(self):
|
||||
"""Learn from experience replay with mixed precision"""
|
||||
"""Learn from a batch of experiences"""
|
||||
if len(self.memory) < BATCH_SIZE:
|
||||
return None
|
||||
|
||||
try:
|
||||
# Sample batch from memory
|
||||
# Sample a batch of experiences
|
||||
experiences = self.memory.sample(BATCH_SIZE)
|
||||
|
||||
# Check if any experience has None values
|
||||
for exp in experiences:
|
||||
if exp.state is None or exp.next_state is None:
|
||||
return None
|
||||
|
||||
# Convert to tensors
|
||||
states = torch.FloatTensor([exp.state for exp in experiences]).to(self.device)
|
||||
actions = torch.LongTensor([exp.action for exp in experiences]).unsqueeze(1).to(self.device)
|
||||
rewards = torch.FloatTensor([exp.reward for exp in experiences]).to(self.device)
|
||||
next_states = torch.FloatTensor([exp.next_state for exp in experiences]).to(self.device)
|
||||
dones = torch.FloatTensor([exp.done for exp in experiences]).to(self.device)
|
||||
# Convert experiences to tensors
|
||||
states = torch.FloatTensor([e.state for e in experiences]).to(self.device)
|
||||
actions = torch.LongTensor([e.action for e in experiences]).to(self.device)
|
||||
rewards = torch.FloatTensor([e.reward for e in experiences]).to(self.device)
|
||||
next_states = torch.FloatTensor([e.next_state for e in experiences]).to(self.device)
|
||||
dones = torch.FloatTensor([e.done for e in experiences]).to(self.device)
|
||||
|
||||
# Use mixed precision for forward/backward passes
|
||||
if self.device.type == "cuda":
|
||||
with amp.autocast():
|
||||
# Compute Q values
|
||||
current_q_values = self.policy_net(states).gather(1, actions)
|
||||
current_q_values = self.policy_net(states).gather(1, actions.unsqueeze(1))
|
||||
|
||||
# Compute next state values using target network
|
||||
# Compute next Q values with target network
|
||||
with torch.no_grad():
|
||||
next_q_values = self.target_net(next_states).max(1)[0]
|
||||
target_q_values = rewards + (GAMMA * next_q_values * (1 - dones))
|
||||
@ -1214,21 +1294,25 @@ class Agent:
|
||||
# Compute loss
|
||||
loss = F.smooth_l1_loss(current_q_values, target_q_values)
|
||||
|
||||
# Optimize with gradient scaling
|
||||
# Backward pass with mixed precision
|
||||
self.optimizer.zero_grad()
|
||||
self.scaler.scale(loss).backward()
|
||||
|
||||
# Gradient clipping to prevent exploding gradients
|
||||
self.scaler.unscale_(self.optimizer)
|
||||
torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), 1.0)
|
||||
torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), max_norm=1.0)
|
||||
|
||||
self.scaler.step(self.optimizer)
|
||||
self.scaler.update()
|
||||
else:
|
||||
# Standard precision training
|
||||
# Standard precision for CPU
|
||||
# Compute Q values
|
||||
current_q_values = self.policy_net(states).gather(1, actions)
|
||||
current_q_values = self.policy_net(states).gather(1, actions.unsqueeze(1))
|
||||
|
||||
# Compute next state values using target network
|
||||
# Compute next Q values with target network
|
||||
with torch.no_grad():
|
||||
next_q_values = self.target_net(next_states).max(1)[0]
|
||||
target_q_values = rewards + (GAMMA * next_q_values * (1 - dones))
|
||||
|
||||
# Reshape target values to match current_q_values
|
||||
target_q_values = target_q_values.unsqueeze(1)
|
||||
@ -1236,18 +1320,27 @@ class Agent:
|
||||
# Compute loss
|
||||
loss = F.smooth_l1_loss(current_q_values, target_q_values)
|
||||
|
||||
# Optimize the model
|
||||
# Backward pass
|
||||
self.optimizer.zero_grad()
|
||||
loss.backward()
|
||||
torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), 1.0)
|
||||
|
||||
# Gradient clipping to prevent exploding gradients
|
||||
torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), max_norm=1.0)
|
||||
|
||||
self.optimizer.step()
|
||||
|
||||
# Update steps done
|
||||
self.steps_done += 1
|
||||
|
||||
# Update target network
|
||||
if self.steps_done % TARGET_UPDATE == 0:
|
||||
self.target_net.load_state_dict(self.policy_net.state_dict())
|
||||
|
||||
return loss.item()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during learning: {e}")
|
||||
import traceback
|
||||
logger.error(traceback.format_exc())
|
||||
logger.error(f"Traceback: {traceback.format_exc()}")
|
||||
return None
|
||||
|
||||
def update_target_network(self):
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large
Load Diff
BIN
crypto/gogo2/training_results.png
Normal file
BIN
crypto/gogo2/training_results.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 60 KiB |
1
crypto/gogo2/training_stats.csv
Normal file
1
crypto/gogo2/training_stats.csv
Normal file
@ -0,0 +1 @@
|
||||
episode_rewards,episode_lengths,balances,win_rates,episode_pnls,cumulative_pnl,drawdowns,prediction_accuracy
|
|
Loading…
x
Reference in New Issue
Block a user