fix checkpoint model loading

This commit is contained in:
Dobromir Popov 2025-02-02 21:04:23 +02:00
parent 79c51c0d5d
commit 0ebf4e13bd
2 changed files with 60 additions and 51 deletions

File diff suppressed because one or more lines are too long

View File

@ -47,7 +47,7 @@ def save_candles_cache(filename, candles_dict):
print("Error saving cache file:", e) print("Error saving cache file:", e)
# ------------------------------------- # -------------------------------------
# Checkpoint Functions (same as before) # Checkpoint Functions
# ------------------------------------- # -------------------------------------
def maintain_checkpoint_directory(directory, max_files=10): def maintain_checkpoint_directory(directory, max_files=10):
files = os.listdir(directory) files = os.listdir(directory)
@ -100,6 +100,8 @@ def save_checkpoint(model, epoch, reward, last_dir=LAST_DIR, best_dir=BEST_DIR):
print(f"Saved checkpoint for epoch {epoch} with reward {reward:.4f}") print(f"Saved checkpoint for epoch {epoch} with reward {reward:.4f}")
def load_best_checkpoint(model, best_dir=BEST_DIR): def load_best_checkpoint(model, best_dir=BEST_DIR):
"""Attempt to load the best checkpoint. If the architecture is different,
catch the RuntimeError and skip loading."""
best_models = get_best_models(best_dir) best_models = get_best_models(best_dir)
if not best_models: if not best_models:
return None return None
@ -107,7 +109,13 @@ def load_best_checkpoint(model, best_dir=BEST_DIR):
path = os.path.join(best_dir, best_file) path = os.path.join(best_dir, best_file)
print(f"Loading best model from checkpoint: {best_file} with reward {best_reward:.4f}") print(f"Loading best model from checkpoint: {best_file} with reward {best_reward:.4f}")
checkpoint = torch.load(path) checkpoint = torch.load(path)
model.load_state_dict(checkpoint["model_state_dict"]) try:
model.load_state_dict(checkpoint["model_state_dict"])
except RuntimeError as e:
print("Warning: Failed to load best checkpoint due to:")
print(e)
print("This is likely due to a change in model architecture. Skipping checkpoint load.")
return None
return checkpoint return checkpoint
# ------------------------------------- # -------------------------------------
@ -283,7 +291,6 @@ class BacktestEnvironment:
base_ts = base_candle["timestamp"] base_ts = base_candle["timestamp"]
for tf in self.timeframes: for tf in self.timeframes:
candles_list = self.candles_dict[tf] candles_list = self.candles_dict[tf]
# Get the candle from this timeframe that is closest to (and <=) base_ts.
aligned_index, _ = get_aligned_candle_with_index(candles_list, base_ts) aligned_index, _ = get_aligned_candle_with_index(candles_list, base_ts)
features = get_features_for_tf(candles_list, aligned_index, period=10) features = get_features_for_tf(candles_list, aligned_index, period=10)
state_features.extend(features) state_features.extend(features)
@ -392,15 +399,15 @@ def train_on_historical_data(env, rl_agent, num_epochs=10, epsilon=0.1):
# ------------------------------------- # -------------------------------------
async def main_backtest(): async def main_backtest():
symbol = 'BTC/USDT' symbol = 'BTC/USDT'
# Define timeframes: we'll use 5 different ones. # Define timeframes: 5 different ones.
timeframes = ["1m", "5m", "15m", "1h", "1d"] timeframes = ["1m", "5m", "15m", "1h", "1d"]
now = int(time.time() * 1000) now = int(time.time() * 1000)
# Use the base timeframe period of 1500 candles. For 1m, that is 1500 minutes. # For base timeframe 1m, get 1500 candles (1500 minutes)
period_ms = 1500 * 60 * 1000 period_ms = 1500 * 60 * 1000
since = now - period_ms since = now - period_ms
end_time = now end_time = now
# Initialize exchange using MEXC (or your preferred exchange). # Initialize exchange using MEXC
mexc_api_key = os.environ.get('MEXC_API_KEY', 'YOUR_API_KEY') mexc_api_key = os.environ.get('MEXC_API_KEY', 'YOUR_API_KEY')
mexc_api_secret = os.environ.get('MEXC_API_SECRET', 'YOUR_SECRET_KEY') mexc_api_secret = os.environ.get('MEXC_API_SECRET', 'YOUR_SECRET_KEY')
exchange = ccxt.mexc({ exchange = ccxt.mexc({
@ -409,58 +416,60 @@ async def main_backtest():
'enableRateLimit': True, 'enableRateLimit': True,
}) })
candles_dict = {} try:
for tf in timeframes: candles_dict = {}
print(f"Fetching historical data for timeframe {tf}...") for tf in timeframes:
candles = await fetch_historical_data(exchange, symbol, tf, since, end_time, batch_size=500) print(f"Fetching historical data for timeframe {tf}...")
candles_dict[tf] = candles candles = await fetch_historical_data(exchange, symbol, tf, since, end_time, batch_size=500)
candles_dict[tf] = candles
# Optionally, save the multi-timeframe cache. # Optionally, save the multi-timeframe cache.
save_candles_cache(CACHE_FILE, candles_dict) save_candles_cache(CACHE_FILE, candles_dict)
# Create the backtest environment using multi-timeframe data. # Create the backtest environment using multi-timeframe data.
env = BacktestEnvironment(candles_dict, base_tf="1m", timeframes=timeframes) env = BacktestEnvironment(candles_dict, base_tf="1m", timeframes=timeframes)
# Neural Network dimensions: each timeframe produces 7 features. # Neural network dimensions: each timeframe produces 7 features.
input_dim = len(timeframes) * 7 # 7 features * 5 timeframes = 35. input_dim = len(timeframes) * 7 # 7 features x 5 timeframes = 35.
hidden_dim = 128 hidden_dim = 128
output_dim = 3 # Actions: SELL, HOLD, BUY. output_dim = 3 # Actions: SELL, HOLD, BUY.
model = TradingModel(input_dim, hidden_dim, output_dim) model = TradingModel(input_dim, hidden_dim, output_dim)
optimizer = optim.Adam(model.parameters(), lr=1e-4) optimizer = optim.Adam(model.parameters(), lr=1e-4)
replay_buffer = ReplayBuffer(capacity=10000) replay_buffer = ReplayBuffer(capacity=10000)
rl_agent = ContinuousRLAgent(model, optimizer, replay_buffer, batch_size=32, gamma=0.99) rl_agent = ContinuousRLAgent(model, optimizer, replay_buffer, batch_size=32, gamma=0.99)
# Load best checkpoint if available. # Load best checkpoint if available. (In case of architecture change, it will be skipped.)
load_best_checkpoint(model, BEST_DIR) load_best_checkpoint(model, BEST_DIR)
# Train the agent over the historical period. # Train the agent over the historical period.
num_epochs = 10 # Adjust as needed. num_epochs = 10 # Adjust as needed.
train_on_historical_data(env, rl_agent, num_epochs=num_epochs, epsilon=0.1) train_on_historical_data(env, rl_agent, num_epochs=num_epochs, epsilon=0.1)
# Run a final simulation (without exploration) to record trade history. # Run a final simulation (without exploration) to record trade history.
state = env.reset(clear_trade_history=True) state = env.reset(clear_trade_history=True)
done = False done = False
cumulative_reward = 0.0 cumulative_reward = 0.0
while not done: while not done:
action = rl_agent.act(state, epsilon=0.0) action = rl_agent.act(state, epsilon=0.0)
state, reward, next_state, done = env.step(action) state, reward, next_state, done = env.step(action)
cumulative_reward += reward cumulative_reward += reward
state = next_state state = next_state
print("Final simulation cumulative profit:", cumulative_reward) print("Final simulation cumulative profit:", cumulative_reward)
# Evaluate trade performance. # Evaluate trade performance.
trades = env.trade_history trades = env.trade_history
num_trades = len(trades) num_trades = len(trades)
num_wins = sum(1 for trade in trades if trade["pnl"] > 0) num_wins = sum(1 for trade in trades if trade["pnl"] > 0)
win_rate = (num_wins / num_trades * 100) if num_trades > 0 else 0.0 win_rate = (num_wins / num_trades * 100) if num_trades > 0 else 0.0
total_profit = sum(trade["pnl"] for trade in trades) total_profit = sum(trade["pnl"] for trade in trades)
print(f"Total trades: {num_trades}, Wins: {num_wins}, Win rate: {win_rate:.2f}%, Total Profit: {total_profit:.4f}") print(f"Total trades: {num_trades}, Wins: {num_wins}, Win rate: {win_rate:.2f}%, Total Profit: {total_profit:.4f}")
# Plot chart with buy/sell markers on the base timeframe ("1m"). # Plot chart with buy/sell markers on the base timeframe ("1m").
plot_trade_history(candles_dict["1m"], trades) plot_trade_history(candles_dict["1m"], trades)
finally:
await exchange.close() # Ensure that exchange resources are released even if errors occur.
await exchange.close()
if __name__ == "__main__": if __name__ == "__main__":
load_dotenv() load_dotenv()