indents
This commit is contained in:
@ -4554,52 +4554,52 @@ class TradingOrchestrator:
|
||||
memory_size = len(getattr(model, "memory", []))
|
||||
batch_size = getattr(model, "batch_size", 32)
|
||||
if memory_size >= batch_size:
|
||||
self.training_logger.info(f"RL TRAINING STARTED for {model_name.upper()}:")
|
||||
self.training_logger.info(f"RL TRAINING STARTED for {model_name.upper()}:")
|
||||
self.training_logger.info(f" Experiences: {memory_size}")
|
||||
self.training_logger.info(f" Batch size: {batch_size}")
|
||||
self.training_logger.info(f" Action: {prediction['action']}")
|
||||
self.training_logger.info(f" Reward: {reward:.3f}")
|
||||
|
||||
# Ensure model is in training mode
|
||||
if hasattr(model, "policy_net"):
|
||||
model.policy_net.train()
|
||||
# Ensure model is in training mode
|
||||
if hasattr(model, "policy_net"):
|
||||
model.policy_net.train()
|
||||
|
||||
training_start_time = time.time()
|
||||
training_loss = model.replay()
|
||||
training_duration_ms = (time.time() - training_start_time) * 1000
|
||||
training_start_time = time.time()
|
||||
training_loss = model.replay()
|
||||
training_duration_ms = (time.time() - training_start_time) * 1000
|
||||
|
||||
if training_loss is not None and training_loss > 0:
|
||||
self.update_model_loss(model_name, training_loss)
|
||||
self._update_model_training_statistics(
|
||||
model_name, training_loss, training_duration_ms
|
||||
)
|
||||
self.training_logger.info(f"RL TRAINING COMPLETED for {model_name.upper()}:")
|
||||
self.training_logger.info(f" Loss: {training_loss:.4f}")
|
||||
self.training_logger.info(f" Training time: {training_duration_ms:.1f}ms")
|
||||
self.training_logger.info(f" Experiences used: {memory_size}")
|
||||
self.training_logger.info(f" Action: {prediction['action']}")
|
||||
self.training_logger.info(f" Reward: {reward:.3f}")
|
||||
self.training_logger.info(f" State shape: {state.shape}")
|
||||
return True
|
||||
elif training_loss == 0.0:
|
||||
logger.warning(
|
||||
f"RL training returned zero loss for {model_name} - possible gradient issue"
|
||||
)
|
||||
# Still update training statistics
|
||||
self._update_model_training_statistics(
|
||||
model_name, training_duration_ms=training_duration_ms
|
||||
)
|
||||
return False # Training failed
|
||||
else:
|
||||
# Still update training statistics even if no loss returned
|
||||
self._update_model_training_statistics(
|
||||
model_name, training_duration_ms=training_duration_ms
|
||||
)
|
||||
else:
|
||||
logger.debug(
|
||||
f"Not enough experiences for {model_name}: {memory_size}/{batch_size}"
|
||||
if training_loss is not None and training_loss > 0:
|
||||
self.update_model_loss(model_name, training_loss)
|
||||
self._update_model_training_statistics(
|
||||
model_name, training_loss, training_duration_ms
|
||||
)
|
||||
return True # Experience added successfully, training will happen later
|
||||
self.training_logger.info(f"RL TRAINING COMPLETED for {model_name.upper()}:")
|
||||
self.training_logger.info(f" Loss: {training_loss:.4f}")
|
||||
self.training_logger.info(f" Training time: {training_duration_ms:.1f}ms")
|
||||
self.training_logger.info(f" Experiences used: {memory_size}")
|
||||
self.training_logger.info(f" Action: {prediction['action']}")
|
||||
self.training_logger.info(f" Reward: {reward:.3f}")
|
||||
self.training_logger.info(f" State shape: {state.shape}")
|
||||
return True
|
||||
elif training_loss == 0.0:
|
||||
logger.warning(
|
||||
f"RL training returned zero loss for {model_name} - possible gradient issue"
|
||||
)
|
||||
# Still update training statistics
|
||||
self._update_model_training_statistics(
|
||||
model_name, training_duration_ms=training_duration_ms
|
||||
)
|
||||
return False # Training failed
|
||||
else:
|
||||
# Still update training statistics even if no loss returned
|
||||
self._update_model_training_statistics(
|
||||
model_name, training_duration_ms=training_duration_ms
|
||||
)
|
||||
else:
|
||||
logger.debug(
|
||||
f"Not enough experiences for {model_name}: {memory_size}/{batch_size}"
|
||||
)
|
||||
return True # Experience added successfully, training will happen later
|
||||
|
||||
return False
|
||||
|
||||
|
Reference in New Issue
Block a user