FIRST WORKING CASE
This commit is contained in:
parent
14d70f938e
commit
66a2c41338
2
.gitignore
vendored
2
.gitignore
vendored
@ -33,3 +33,5 @@ crypto/brian/models/best/*
|
|||||||
crypto/brian/models/last/*
|
crypto/brian/models/last/*
|
||||||
crypto/brian/live_chart.html
|
crypto/brian/live_chart.html
|
||||||
crypto/gogo2/models/*
|
crypto/gogo2/models/*
|
||||||
|
crypto/gogo2/trading_bot.log
|
||||||
|
*.log
|
||||||
|
@ -21,6 +21,7 @@ import torch.cuda.amp as amp # Add this import at the top
|
|||||||
from sklearn.preprocessing import MinMaxScaler
|
from sklearn.preprocessing import MinMaxScaler
|
||||||
import copy
|
import copy
|
||||||
import argparse
|
import argparse
|
||||||
|
import traceback
|
||||||
|
|
||||||
# Configure logging
|
# Configure logging
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
@ -428,30 +429,26 @@ class TradingEnvironment:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
def step(self, action):
|
def step(self, action):
|
||||||
"""Take an action in the environment"""
|
"""Take an action in the environment and return the next state, reward, and done flag"""
|
||||||
# Store previous balance for reward calculation
|
# Store current price before taking action
|
||||||
prev_balance = self.balance
|
|
||||||
|
|
||||||
# Update current price
|
|
||||||
if self.current_step < len(self.data) - 1:
|
|
||||||
self.current_step += 1
|
|
||||||
self.current_price = self.data[self.current_step]['close']
|
self.current_price = self.data[self.current_step]['close']
|
||||||
else:
|
|
||||||
# End of data
|
|
||||||
return self.get_state(), 0, True
|
|
||||||
|
|
||||||
# Check for stop loss or take profit
|
# Process action (0: HOLD, 1: BUY/LONG, 2: SELL/SHORT, 3: CLOSE)
|
||||||
self._check_sl_tp()
|
|
||||||
|
|
||||||
# Calculate reward based on action
|
|
||||||
reward = self.calculate_reward(action)
|
reward = self.calculate_reward(action)
|
||||||
|
|
||||||
# Check if we've reached the end of the data
|
# Check for stop loss / take profit hits
|
||||||
|
self.check_sl_tp()
|
||||||
|
|
||||||
|
# Move to next step
|
||||||
|
self.current_step += 1
|
||||||
done = self.current_step >= len(self.data) - 1
|
done = self.current_step >= len(self.data) - 1
|
||||||
|
|
||||||
return self.get_state(), reward, done
|
# Get new state
|
||||||
|
next_state = self.get_state()
|
||||||
|
|
||||||
def _check_sl_tp(self):
|
return next_state, reward, done
|
||||||
|
|
||||||
|
def check_sl_tp(self):
|
||||||
"""Check if stop loss or take profit has been hit"""
|
"""Check if stop loss or take profit has been hit"""
|
||||||
if self.position == 'flat':
|
if self.position == 'flat':
|
||||||
return
|
return
|
||||||
@ -686,9 +683,6 @@ class TradingEnvironment:
|
|||||||
if hasattr(self, 'predicted_prices') and len(self.predicted_prices) > 0:
|
if hasattr(self, 'predicted_prices') and len(self.predicted_prices) > 0:
|
||||||
# Normalize predictions relative to current price
|
# Normalize predictions relative to current price
|
||||||
pred_norm = np.array(self.predicted_prices[:3]) / latest_price - 1.0
|
pred_norm = np.array(self.predicted_prices[:3]) / latest_price - 1.0
|
||||||
# Pad if needed
|
|
||||||
if len(pred_norm) < 3:
|
|
||||||
pred_norm = np.pad(pred_norm, (0, 3 - len(pred_norm)), 'constant')
|
|
||||||
state_components.append(pred_norm)
|
state_components.append(pred_norm)
|
||||||
else:
|
else:
|
||||||
# Add zeros if no predictions
|
# Add zeros if no predictions
|
||||||
@ -1065,6 +1059,97 @@ class TradingEnvironment:
|
|||||||
|
|
||||||
return analysis
|
return analysis
|
||||||
|
|
||||||
|
def initialize_price_predictor(self, device="cpu"):
|
||||||
|
"""Initialize the price prediction model"""
|
||||||
|
self.price_predictor = PricePredictionModel(input_size=30, hidden_size=128, output_size=5)
|
||||||
|
self.price_predictor.to(device)
|
||||||
|
self.price_predictor_optimizer = optim.Adam(self.price_predictor.parameters(), lr=1e-3)
|
||||||
|
self.predicted_prices = np.array([])
|
||||||
|
|
||||||
|
def train_price_predictor(self):
|
||||||
|
"""Train the price prediction model on recent data"""
|
||||||
|
if len(self.features['price']) < 35:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
# Get price history
|
||||||
|
price_history = self.features['price']
|
||||||
|
|
||||||
|
# Train the model
|
||||||
|
loss = self.price_predictor.train_on_new_data(
|
||||||
|
price_history,
|
||||||
|
self.price_predictor_optimizer,
|
||||||
|
epochs=5
|
||||||
|
)
|
||||||
|
|
||||||
|
return loss
|
||||||
|
|
||||||
|
def update_price_predictions(self):
|
||||||
|
"""Update price predictions"""
|
||||||
|
if len(self.features['price']) < 30:
|
||||||
|
self.predicted_prices = np.array([])
|
||||||
|
return
|
||||||
|
|
||||||
|
# Get price history
|
||||||
|
price_history = self.features['price']
|
||||||
|
|
||||||
|
# Get predictions
|
||||||
|
self.predicted_prices = self.price_predictor.predict_next_candles(price_history, num_candles=5)
|
||||||
|
|
||||||
|
def identify_optimal_trades(self):
|
||||||
|
"""Identify optimal entry and exit points based on local extrema"""
|
||||||
|
if len(self.features['price']) < 20:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Find local bottoms and tops
|
||||||
|
bottoms, tops = find_local_extrema(self.features['price'], window=5)
|
||||||
|
|
||||||
|
# Store optimal trade points
|
||||||
|
self.optimal_bottoms = bottoms # Buy points
|
||||||
|
self.optimal_tops = tops # Sell points
|
||||||
|
|
||||||
|
# Create optimal trade signals
|
||||||
|
self.optimal_signals = np.zeros(len(self.features['price']))
|
||||||
|
for i in bottoms:
|
||||||
|
if 0 <= i < len(self.optimal_signals): # Ensure index is valid
|
||||||
|
self.optimal_signals[i] = 1 # Buy signal
|
||||||
|
for i in tops:
|
||||||
|
if 0 <= i < len(self.optimal_signals): # Ensure index is valid
|
||||||
|
self.optimal_signals[i] = -1 # Sell signal
|
||||||
|
|
||||||
|
logger.info(f"Identified {len(bottoms)} optimal buy points and {len(tops)} optimal sell points")
|
||||||
|
|
||||||
|
def calculate_position_size(self):
|
||||||
|
"""Calculate position size based on current balance and risk parameters"""
|
||||||
|
# Use a fixed percentage of balance for each trade
|
||||||
|
risk_percent = 5.0 # Risk 5% of balance per trade
|
||||||
|
|
||||||
|
# Calculate position size with leverage
|
||||||
|
position_size = self.balance * (risk_percent / 100) * MAX_LEVERAGE
|
||||||
|
|
||||||
|
# Apply a safety factor to avoid liquidation
|
||||||
|
safety_factor = 0.8
|
||||||
|
position_size *= safety_factor
|
||||||
|
|
||||||
|
# Ensure minimum position size
|
||||||
|
min_position = 10.0 # Minimum position size in USD
|
||||||
|
position_size = max(position_size, min(min_position, self.balance * 0.5))
|
||||||
|
|
||||||
|
# Ensure position size doesn't exceed balance * leverage
|
||||||
|
max_position = self.balance * MAX_LEVERAGE
|
||||||
|
position_size = min(position_size, max_position)
|
||||||
|
|
||||||
|
return position_size
|
||||||
|
|
||||||
|
def calculate_fees(self, position_size):
|
||||||
|
"""Calculate trading fees for a given position size"""
|
||||||
|
# Typical fee rate for crypto exchanges (0.1%)
|
||||||
|
fee_rate = 0.001
|
||||||
|
|
||||||
|
# Calculate fee
|
||||||
|
fee = position_size * fee_rate
|
||||||
|
|
||||||
|
return fee
|
||||||
|
|
||||||
# Ensure GPU usage if available
|
# Ensure GPU usage if available
|
||||||
def get_device():
|
def get_device():
|
||||||
"""Get the best available device (CUDA GPU or CPU)"""
|
"""Get the best available device (CUDA GPU or CPU)"""
|
||||||
@ -1177,33 +1262,28 @@ class Agent:
|
|||||||
return random.randrange(self.action_size)
|
return random.randrange(self.action_size)
|
||||||
|
|
||||||
def learn(self):
|
def learn(self):
|
||||||
"""Learn from experience replay with mixed precision"""
|
"""Learn from a batch of experiences"""
|
||||||
if len(self.memory) < BATCH_SIZE:
|
if len(self.memory) < BATCH_SIZE:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Sample batch from memory
|
# Sample a batch of experiences
|
||||||
experiences = self.memory.sample(BATCH_SIZE)
|
experiences = self.memory.sample(BATCH_SIZE)
|
||||||
|
|
||||||
# Check if any experience has None values
|
# Convert experiences to tensors
|
||||||
for exp in experiences:
|
states = torch.FloatTensor([e.state for e in experiences]).to(self.device)
|
||||||
if exp.state is None or exp.next_state is None:
|
actions = torch.LongTensor([e.action for e in experiences]).to(self.device)
|
||||||
return None
|
rewards = torch.FloatTensor([e.reward for e in experiences]).to(self.device)
|
||||||
|
next_states = torch.FloatTensor([e.next_state for e in experiences]).to(self.device)
|
||||||
# Convert to tensors
|
dones = torch.FloatTensor([e.done for e in experiences]).to(self.device)
|
||||||
states = torch.FloatTensor([exp.state for exp in experiences]).to(self.device)
|
|
||||||
actions = torch.LongTensor([exp.action for exp in experiences]).unsqueeze(1).to(self.device)
|
|
||||||
rewards = torch.FloatTensor([exp.reward for exp in experiences]).to(self.device)
|
|
||||||
next_states = torch.FloatTensor([exp.next_state for exp in experiences]).to(self.device)
|
|
||||||
dones = torch.FloatTensor([exp.done for exp in experiences]).to(self.device)
|
|
||||||
|
|
||||||
# Use mixed precision for forward/backward passes
|
# Use mixed precision for forward/backward passes
|
||||||
if self.device.type == "cuda":
|
if self.device.type == "cuda":
|
||||||
with amp.autocast():
|
with amp.autocast():
|
||||||
# Compute Q values
|
# Compute Q values
|
||||||
current_q_values = self.policy_net(states).gather(1, actions)
|
current_q_values = self.policy_net(states).gather(1, actions.unsqueeze(1))
|
||||||
|
|
||||||
# Compute next state values using target network
|
# Compute next Q values with target network
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
next_q_values = self.target_net(next_states).max(1)[0]
|
next_q_values = self.target_net(next_states).max(1)[0]
|
||||||
target_q_values = rewards + (GAMMA * next_q_values * (1 - dones))
|
target_q_values = rewards + (GAMMA * next_q_values * (1 - dones))
|
||||||
@ -1214,21 +1294,25 @@ class Agent:
|
|||||||
# Compute loss
|
# Compute loss
|
||||||
loss = F.smooth_l1_loss(current_q_values, target_q_values)
|
loss = F.smooth_l1_loss(current_q_values, target_q_values)
|
||||||
|
|
||||||
# Optimize with gradient scaling
|
# Backward pass with mixed precision
|
||||||
self.optimizer.zero_grad()
|
self.optimizer.zero_grad()
|
||||||
self.scaler.scale(loss).backward()
|
self.scaler.scale(loss).backward()
|
||||||
|
|
||||||
|
# Gradient clipping to prevent exploding gradients
|
||||||
self.scaler.unscale_(self.optimizer)
|
self.scaler.unscale_(self.optimizer)
|
||||||
torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), 1.0)
|
torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), max_norm=1.0)
|
||||||
|
|
||||||
self.scaler.step(self.optimizer)
|
self.scaler.step(self.optimizer)
|
||||||
self.scaler.update()
|
self.scaler.update()
|
||||||
else:
|
else:
|
||||||
# Standard precision training
|
# Standard precision for CPU
|
||||||
# Compute Q values
|
# Compute Q values
|
||||||
current_q_values = self.policy_net(states).gather(1, actions)
|
current_q_values = self.policy_net(states).gather(1, actions.unsqueeze(1))
|
||||||
|
|
||||||
# Compute next state values using target network
|
# Compute next Q values with target network
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
next_q_values = self.target_net(next_states).max(1)[0]
|
next_q_values = self.target_net(next_states).max(1)[0]
|
||||||
|
target_q_values = rewards + (GAMMA * next_q_values * (1 - dones))
|
||||||
|
|
||||||
# Reshape target values to match current_q_values
|
# Reshape target values to match current_q_values
|
||||||
target_q_values = target_q_values.unsqueeze(1)
|
target_q_values = target_q_values.unsqueeze(1)
|
||||||
@ -1236,18 +1320,27 @@ class Agent:
|
|||||||
# Compute loss
|
# Compute loss
|
||||||
loss = F.smooth_l1_loss(current_q_values, target_q_values)
|
loss = F.smooth_l1_loss(current_q_values, target_q_values)
|
||||||
|
|
||||||
# Optimize the model
|
# Backward pass
|
||||||
self.optimizer.zero_grad()
|
self.optimizer.zero_grad()
|
||||||
loss.backward()
|
loss.backward()
|
||||||
torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), 1.0)
|
|
||||||
|
# Gradient clipping to prevent exploding gradients
|
||||||
|
torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), max_norm=1.0)
|
||||||
|
|
||||||
self.optimizer.step()
|
self.optimizer.step()
|
||||||
|
|
||||||
|
# Update steps done
|
||||||
|
self.steps_done += 1
|
||||||
|
|
||||||
|
# Update target network
|
||||||
|
if self.steps_done % TARGET_UPDATE == 0:
|
||||||
|
self.target_net.load_state_dict(self.policy_net.state_dict())
|
||||||
|
|
||||||
return loss.item()
|
return loss.item()
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error during learning: {e}")
|
logger.error(f"Error during learning: {e}")
|
||||||
import traceback
|
logger.error(f"Traceback: {traceback.format_exc()}")
|
||||||
logger.error(traceback.format_exc())
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def update_target_network(self):
|
def update_target_network(self):
|
||||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large
Load Diff
BIN
crypto/gogo2/training_results.png
Normal file
BIN
crypto/gogo2/training_results.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 60 KiB |
1
crypto/gogo2/training_stats.csv
Normal file
1
crypto/gogo2/training_stats.csv
Normal file
@ -0,0 +1 @@
|
|||||||
|
episode_rewards,episode_lengths,balances,win_rates,episode_pnls,cumulative_pnl,drawdowns,prediction_accuracy
|
|
Loading…
x
Reference in New Issue
Block a user