fix checkpoint model loading

This commit is contained in:
Dobromir Popov 2025-02-02 21:04:23 +02:00
parent 79c51c0d5d
commit 0ebf4e13bd
2 changed files with 60 additions and 51 deletions

File diff suppressed because one or more lines are too long

View File

@ -47,7 +47,7 @@ def save_candles_cache(filename, candles_dict):
print("Error saving cache file:", e)
# -------------------------------------
# Checkpoint Functions (same as before)
# Checkpoint Functions
# -------------------------------------
def maintain_checkpoint_directory(directory, max_files=10):
files = os.listdir(directory)
@ -100,6 +100,8 @@ def save_checkpoint(model, epoch, reward, last_dir=LAST_DIR, best_dir=BEST_DIR):
print(f"Saved checkpoint for epoch {epoch} with reward {reward:.4f}")
def load_best_checkpoint(model, best_dir=BEST_DIR):
"""Attempt to load the best checkpoint. If the architecture is different,
catch the RuntimeError and skip loading."""
best_models = get_best_models(best_dir)
if not best_models:
return None
@ -107,7 +109,13 @@ def load_best_checkpoint(model, best_dir=BEST_DIR):
path = os.path.join(best_dir, best_file)
print(f"Loading best model from checkpoint: {best_file} with reward {best_reward:.4f}")
checkpoint = torch.load(path)
model.load_state_dict(checkpoint["model_state_dict"])
try:
model.load_state_dict(checkpoint["model_state_dict"])
except RuntimeError as e:
print("Warning: Failed to load best checkpoint due to:")
print(e)
print("This is likely due to a change in model architecture. Skipping checkpoint load.")
return None
return checkpoint
# -------------------------------------
@ -283,7 +291,6 @@ class BacktestEnvironment:
base_ts = base_candle["timestamp"]
for tf in self.timeframes:
candles_list = self.candles_dict[tf]
# Get the candle from this timeframe that is closest to (and <=) base_ts.
aligned_index, _ = get_aligned_candle_with_index(candles_list, base_ts)
features = get_features_for_tf(candles_list, aligned_index, period=10)
state_features.extend(features)
@ -392,15 +399,15 @@ def train_on_historical_data(env, rl_agent, num_epochs=10, epsilon=0.1):
# -------------------------------------
async def main_backtest():
symbol = 'BTC/USDT'
# Define timeframes: we'll use 5 different ones.
# Define timeframes: 5 different ones.
timeframes = ["1m", "5m", "15m", "1h", "1d"]
now = int(time.time() * 1000)
# Use the base timeframe period of 1500 candles. For 1m, that is 1500 minutes.
# For base timeframe 1m, get 1500 candles (1500 minutes)
period_ms = 1500 * 60 * 1000
since = now - period_ms
end_time = now
# Initialize exchange using MEXC (or your preferred exchange).
# Initialize exchange using MEXC
mexc_api_key = os.environ.get('MEXC_API_KEY', 'YOUR_API_KEY')
mexc_api_secret = os.environ.get('MEXC_API_SECRET', 'YOUR_SECRET_KEY')
exchange = ccxt.mexc({
@ -409,58 +416,60 @@ async def main_backtest():
'enableRateLimit': True,
})
candles_dict = {}
for tf in timeframes:
print(f"Fetching historical data for timeframe {tf}...")
candles = await fetch_historical_data(exchange, symbol, tf, since, end_time, batch_size=500)
candles_dict[tf] = candles
try:
candles_dict = {}
for tf in timeframes:
print(f"Fetching historical data for timeframe {tf}...")
candles = await fetch_historical_data(exchange, symbol, tf, since, end_time, batch_size=500)
candles_dict[tf] = candles
# Optionally, save the multi-timeframe cache.
save_candles_cache(CACHE_FILE, candles_dict)
# Optionally, save the multi-timeframe cache.
save_candles_cache(CACHE_FILE, candles_dict)
# Create the backtest environment using multi-timeframe data.
env = BacktestEnvironment(candles_dict, base_tf="1m", timeframes=timeframes)
# Neural Network dimensions: each timeframe produces 7 features.
input_dim = len(timeframes) * 7 # 7 features * 5 timeframes = 35.
hidden_dim = 128
output_dim = 3 # Actions: SELL, HOLD, BUY.
# Create the backtest environment using multi-timeframe data.
env = BacktestEnvironment(candles_dict, base_tf="1m", timeframes=timeframes)
# Neural network dimensions: each timeframe produces 7 features.
input_dim = len(timeframes) * 7 # 7 features x 5 timeframes = 35.
hidden_dim = 128
output_dim = 3 # Actions: SELL, HOLD, BUY.
model = TradingModel(input_dim, hidden_dim, output_dim)
optimizer = optim.Adam(model.parameters(), lr=1e-4)
replay_buffer = ReplayBuffer(capacity=10000)
rl_agent = ContinuousRLAgent(model, optimizer, replay_buffer, batch_size=32, gamma=0.99)
model = TradingModel(input_dim, hidden_dim, output_dim)
optimizer = optim.Adam(model.parameters(), lr=1e-4)
replay_buffer = ReplayBuffer(capacity=10000)
rl_agent = ContinuousRLAgent(model, optimizer, replay_buffer, batch_size=32, gamma=0.99)
# Load best checkpoint if available.
load_best_checkpoint(model, BEST_DIR)
# Load best checkpoint if available. (In case of architecture change, it will be skipped.)
load_best_checkpoint(model, BEST_DIR)
# Train the agent over the historical period.
num_epochs = 10 # Adjust as needed.
train_on_historical_data(env, rl_agent, num_epochs=num_epochs, epsilon=0.1)
# Train the agent over the historical period.
num_epochs = 10 # Adjust as needed.
train_on_historical_data(env, rl_agent, num_epochs=num_epochs, epsilon=0.1)
# Run a final simulation (without exploration) to record trade history.
state = env.reset(clear_trade_history=True)
done = False
cumulative_reward = 0.0
while not done:
action = rl_agent.act(state, epsilon=0.0)
state, reward, next_state, done = env.step(action)
cumulative_reward += reward
state = next_state
print("Final simulation cumulative profit:", cumulative_reward)
# Run a final simulation (without exploration) to record trade history.
state = env.reset(clear_trade_history=True)
done = False
cumulative_reward = 0.0
while not done:
action = rl_agent.act(state, epsilon=0.0)
state, reward, next_state, done = env.step(action)
cumulative_reward += reward
state = next_state
print("Final simulation cumulative profit:", cumulative_reward)
# Evaluate trade performance.
trades = env.trade_history
num_trades = len(trades)
num_wins = sum(1 for trade in trades if trade["pnl"] > 0)
win_rate = (num_wins / num_trades * 100) if num_trades > 0 else 0.0
total_profit = sum(trade["pnl"] for trade in trades)
print(f"Total trades: {num_trades}, Wins: {num_wins}, Win rate: {win_rate:.2f}%, Total Profit: {total_profit:.4f}")
# Evaluate trade performance.
trades = env.trade_history
num_trades = len(trades)
num_wins = sum(1 for trade in trades if trade["pnl"] > 0)
win_rate = (num_wins / num_trades * 100) if num_trades > 0 else 0.0
total_profit = sum(trade["pnl"] for trade in trades)
print(f"Total trades: {num_trades}, Wins: {num_wins}, Win rate: {win_rate:.2f}%, Total Profit: {total_profit:.4f}")
# Plot chart with buy/sell markers on the base timeframe ("1m").
plot_trade_history(candles_dict["1m"], trades)
await exchange.close()
# Plot chart with buy/sell markers on the base timeframe ("1m").
plot_trade_history(candles_dict["1m"], trades)
finally:
# Ensure that exchange resources are released even if errors occur.
await exchange.close()
if __name__ == "__main__":
load_dotenv()