fixes
This commit is contained in:
@ -1454,9 +1454,10 @@ class EnhancedRealtimeTrainingSystem:
|
||||
model.train()
|
||||
optimizer.zero_grad()
|
||||
|
||||
# Convert numpy arrays to PyTorch tensors
|
||||
features_tensor = torch.from_numpy(features).float()
|
||||
targets_tensor = torch.from_numpy(targets).long()
|
||||
# Convert numpy arrays to PyTorch tensors and move to device
|
||||
device = next(model.parameters()).device
|
||||
features_tensor = torch.from_numpy(features).float().to(device)
|
||||
targets_tensor = torch.from_numpy(targets).long().to(device)
|
||||
|
||||
# Ensure features_tensor has the correct shape for CNN (batch_size, channels, height, width)
|
||||
# Assuming features are flattened (batch_size, 15*20) and need to be reshaped to (batch_size, 1, 15, 20)
|
||||
@ -1471,7 +1472,21 @@ class EnhancedRealtimeTrainingSystem:
|
||||
# If the CNN expects (batch_size, channels, sequence_length)
|
||||
# features_tensor = features_tensor.view(features_tensor.shape[0], 1, 15 * 20) # Example for 1D CNN
|
||||
|
||||
# Let's assume the CNN expects 2D input (batch_size, flattened_features)
|
||||
# Ensure proper shape for CNN input
|
||||
if len(features_tensor.shape) == 2:
|
||||
# If it's (batch_size, features), keep as is for 1D CNN
|
||||
pass
|
||||
elif len(features_tensor.shape) == 1:
|
||||
# If it's (features), add batch dimension
|
||||
features_tensor = features_tensor.unsqueeze(0)
|
||||
else:
|
||||
# Reshape to (batch_size, features) if needed
|
||||
features_tensor = features_tensor.view(features_tensor.shape[0], -1)
|
||||
|
||||
# Limit input size to prevent shape mismatches
|
||||
if features_tensor.shape[1] > 1000: # Limit to 1000 features
|
||||
features_tensor = features_tensor[:, :1000]
|
||||
|
||||
outputs = model(features_tensor)
|
||||
|
||||
loss = criterion(outputs, targets_tensor)
|
||||
@ -1857,12 +1872,17 @@ class EnhancedRealtimeTrainingSystem:
|
||||
and self.orchestrator.rl_agent):
|
||||
|
||||
# Get Q-values from model
|
||||
q_values = self.orchestrator.rl_agent.act(current_state, return_q_values=True)
|
||||
if isinstance(q_values, tuple):
|
||||
action, q_vals = q_values
|
||||
q_values = q_vals.tolist() if hasattr(q_vals, 'tolist') else [0, 0, 0]
|
||||
action = self.orchestrator.rl_agent.act(current_state, explore=False)
|
||||
# Get Q-values separately if available
|
||||
if hasattr(self.orchestrator.rl_agent, 'policy_net'):
|
||||
with torch.no_grad():
|
||||
state_tensor = torch.FloatTensor(current_state).unsqueeze(0).to(self.orchestrator.rl_agent.device)
|
||||
q_values_tensor = self.orchestrator.rl_agent.policy_net(state_tensor)
|
||||
if isinstance(q_values_tensor, tuple):
|
||||
q_values = q_values_tensor[0].cpu().numpy()[0].tolist()
|
||||
else:
|
||||
q_values = q_values_tensor.cpu().numpy()[0].tolist()
|
||||
else:
|
||||
action = q_values
|
||||
q_values = [0.33, 0.33, 0.34] # Default uniform distribution
|
||||
|
||||
confidence = max(q_values) / sum(q_values) if sum(q_values) > 0 else 0.33
|
||||
|
Reference in New Issue
Block a user