This commit is contained in:
Dobromir Popov
2025-07-30 11:42:04 +03:00
parent 36f429a0e2
commit 14086a898e

View File

@ -4554,52 +4554,52 @@ class TradingOrchestrator:
memory_size = len(getattr(model, "memory", [])) memory_size = len(getattr(model, "memory", []))
batch_size = getattr(model, "batch_size", 32) batch_size = getattr(model, "batch_size", 32)
if memory_size >= batch_size: if memory_size >= batch_size:
self.training_logger.info(f"RL TRAINING STARTED for {model_name.upper()}:") self.training_logger.info(f"RL TRAINING STARTED for {model_name.upper()}:")
self.training_logger.info(f" Experiences: {memory_size}") self.training_logger.info(f" Experiences: {memory_size}")
self.training_logger.info(f" Batch size: {batch_size}") self.training_logger.info(f" Batch size: {batch_size}")
self.training_logger.info(f" Action: {prediction['action']}") self.training_logger.info(f" Action: {prediction['action']}")
self.training_logger.info(f" Reward: {reward:.3f}") self.training_logger.info(f" Reward: {reward:.3f}")
# Ensure model is in training mode # Ensure model is in training mode
if hasattr(model, "policy_net"): if hasattr(model, "policy_net"):
model.policy_net.train() model.policy_net.train()
training_start_time = time.time() training_start_time = time.time()
training_loss = model.replay() training_loss = model.replay()
training_duration_ms = (time.time() - training_start_time) * 1000 training_duration_ms = (time.time() - training_start_time) * 1000
if training_loss is not None and training_loss > 0: if training_loss is not None and training_loss > 0:
self.update_model_loss(model_name, training_loss) self.update_model_loss(model_name, training_loss)
self._update_model_training_statistics( self._update_model_training_statistics(
model_name, training_loss, training_duration_ms model_name, training_loss, training_duration_ms
)
self.training_logger.info(f"RL TRAINING COMPLETED for {model_name.upper()}:")
self.training_logger.info(f" Loss: {training_loss:.4f}")
self.training_logger.info(f" Training time: {training_duration_ms:.1f}ms")
self.training_logger.info(f" Experiences used: {memory_size}")
self.training_logger.info(f" Action: {prediction['action']}")
self.training_logger.info(f" Reward: {reward:.3f}")
self.training_logger.info(f" State shape: {state.shape}")
return True
elif training_loss == 0.0:
logger.warning(
f"RL training returned zero loss for {model_name} - possible gradient issue"
)
# Still update training statistics
self._update_model_training_statistics(
model_name, training_duration_ms=training_duration_ms
)
return False # Training failed
else:
# Still update training statistics even if no loss returned
self._update_model_training_statistics(
model_name, training_duration_ms=training_duration_ms
)
else:
logger.debug(
f"Not enough experiences for {model_name}: {memory_size}/{batch_size}"
) )
return True # Experience added successfully, training will happen later self.training_logger.info(f"RL TRAINING COMPLETED for {model_name.upper()}:")
self.training_logger.info(f" Loss: {training_loss:.4f}")
self.training_logger.info(f" Training time: {training_duration_ms:.1f}ms")
self.training_logger.info(f" Experiences used: {memory_size}")
self.training_logger.info(f" Action: {prediction['action']}")
self.training_logger.info(f" Reward: {reward:.3f}")
self.training_logger.info(f" State shape: {state.shape}")
return True
elif training_loss == 0.0:
logger.warning(
f"RL training returned zero loss for {model_name} - possible gradient issue"
)
# Still update training statistics
self._update_model_training_statistics(
model_name, training_duration_ms=training_duration_ms
)
return False # Training failed
else:
# Still update training statistics even if no loss returned
self._update_model_training_statistics(
model_name, training_duration_ms=training_duration_ms
)
else:
logger.debug(
f"Not enough experiences for {model_name}: {memory_size}/{batch_size}"
)
return True # Experience added successfully, training will happen later
return False return False