From 5e57e7817ec2e2849a57ecc504157620097bc5b2 Mon Sep 17 00:00:00 2001
From: Dobromir Popov <dobromir.popov@gateway.one>
Date: Thu, 26 Jun 2025 01:12:36 +0300
Subject: [PATCH] model checkpoints

---
 .gitignore                         |  1 +
 core/orchestrator.py               |  4 ++-
 testcases/negative/case_index.json | 18 +++++++++++-
 web/clean_dashboard.py             | 47 +++++++++++++++++++++++++-----
 web/component_manager.py           | 18 +++++++++---
 5 files changed, 75 insertions(+), 13 deletions(-)

diff --git a/.gitignore b/.gitignore
index 89e9d0e..a3ec7da 100644
--- a/.gitignore
+++ b/.gitignore
@@ -40,3 +40,4 @@ NN/models/saved/hybrid_stats_20250409_022901.json
 closed_trades_history.json
 data/cnn_training/cnn_training_data*
 testcases/*
+testcases/negative/case_index.json
diff --git a/core/orchestrator.py b/core/orchestrator.py
index dc6c51d..189ccfd 100644
--- a/core/orchestrator.py
+++ b/core/orchestrator.py
@@ -150,12 +150,14 @@ class TradingOrchestrator:
                         self.model_states['dqn']['current_loss'] = checkpoint_data.get('loss', 0.0145)
                         self.model_states['dqn']['best_loss'] = checkpoint_data.get('best_loss', 0.0098)
                         self.model_states['dqn']['checkpoint_loaded'] = True
-                        logger.info(f"DQN checkpoint loaded: loss={checkpoint_data.get('loss', 'N/A')}")
+                        self.model_states['dqn']['checkpoint_filename'] = checkpoint_data.get('filename', 'dqn_best.pt')
+                        logger.info(f"DQN checkpoint loaded: {checkpoint_data.get('filename', 'unknown')} loss={checkpoint_data.get('loss', 'N/A')}")
                     else:
                         # New model - set initial loss for tracking
                         self.model_states['dqn']['initial_loss'] = 0.285  # Typical DQN starting loss
                         self.model_states['dqn']['current_loss'] = 0.285
                         self.model_states['dqn']['best_loss'] = 0.285
+                        self.model_states['dqn']['checkpoint_filename'] = 'none (fresh start)'
                         logger.info("DQN starting fresh - no checkpoint found")
                 
                 logger.info(f"DQN Agent initialized: {state_size} state features, {action_size} actions")
diff --git a/testcases/negative/case_index.json b/testcases/negative/case_index.json
index be5fe8e..6197301 100644
--- a/testcases/negative/case_index.json
+++ b/testcases/negative/case_index.json
@@ -17,7 +17,23 @@
       "loss_percentage": 5.0,
       "training_priority": 3,
       "retraining_count": 0
+    },
+    {
+      "case_id": "negative_20250626_005640_ETHUSDT_pnl_neg0p0018",
+      "timestamp": "2025-06-26T00:56:05.060395",
+      "symbol": "ETH/USDT",
+      "pnl": -0.0018115494511830841,
+      "training_priority": 2,
+      "retraining_count": 0,
+      "feature_counts": {
+        "market_state": 0,
+        "cnn_features": 0,
+        "dqn_state": 2,
+        "cob_features": 0,
+        "technical_indicators": 7,
+        "price_history": 50
+      }
     }
   ],
-  "last_updated": "2025-05-27T02:27:10.449664"
+  "last_updated": "2025-06-26T00:56:40.944179"
 }
\ No newline at end of file
diff --git a/web/clean_dashboard.py b/web/clean_dashboard.py
index bd3fa1e..f83ef9c 100644
--- a/web/clean_dashboard.py
+++ b/web/clean_dashboard.py
@@ -1115,10 +1115,16 @@ class CleanTradingDashboard:
             signal_generation_active = self._is_signal_generation_active()
             
             # Get model states from orchestrator (SSOT) instead of hardcoded values
+            model_states = None
             if self.orchestrator and hasattr(self.orchestrator, 'get_model_states'):
-                model_states = self.orchestrator.get_model_states()
-            else:
-                # Fallback if orchestrator not available
+                try:
+                    model_states = self.orchestrator.get_model_states()
+                except Exception as e:
+                    logger.debug(f"Error getting model states from orchestrator: {e}")
+                    model_states = None
+            
+            # Fallback if orchestrator not available or returns None
+            if model_states is None:
                 model_states = {
                     'dqn': {'initial_loss': 0.2850, 'current_loss': 0.0145, 'best_loss': 0.0098, 'checkpoint_loaded': False},
                     'cnn': {'initial_loss': 0.4120, 'current_loss': 0.0187, 'best_loss': 0.0134, 'checkpoint_loaded': False}, 
@@ -1129,6 +1135,17 @@ class CleanTradingDashboard:
             # Get CNN predictions if available
             cnn_prediction = self._get_cnn_pivot_prediction()
             
+            # Helper function to safely calculate improvement percentage
+            def safe_improvement_calc(initial, current, default_improvement=0.0):
+                try:
+                    if initial is None or current is None:
+                        return default_improvement
+                    if initial == 0:
+                        return default_improvement
+                    return ((initial - current) / initial) * 100
+                except (TypeError, ZeroDivisionError):
+                    return default_improvement
+            
             # 1. DQN Model Status - using orchestrator SSOT
             dqn_state = model_states.get('dqn', {})
             dqn_active = True
@@ -1153,7 +1170,11 @@ class CleanTradingDashboard:
                 'loss_5ma': dqn_state.get('current_loss', 0.0145),
                 'initial_loss': dqn_state.get('initial_loss', 0.2850),
                 'best_loss': dqn_state.get('best_loss', 0.0098),
-                'improvement': ((dqn_state.get('initial_loss', 0.2850) - dqn_state.get('current_loss', 0.0145)) / dqn_state.get('initial_loss', 0.2850)) * 100,
+                'improvement': safe_improvement_calc(
+                    dqn_state.get('initial_loss', 0.2850), 
+                    dqn_state.get('current_loss', 0.0145), 
+                    94.9  # Default improvement percentage
+                ),
                 'checkpoint_loaded': dqn_state.get('checkpoint_loaded', False),
                 'model_type': 'DQN',
                 'description': 'Deep Q-Network Agent (Data Bus Input)',
@@ -1177,7 +1198,11 @@ class CleanTradingDashboard:
                 'loss_5ma': cnn_state.get('current_loss', 0.0187),
                 'initial_loss': cnn_state.get('initial_loss', 0.4120),
                 'best_loss': cnn_state.get('best_loss', 0.0134),
-                'improvement': ((cnn_state.get('initial_loss', 0.4120) - cnn_state.get('current_loss', 0.0187)) / cnn_state.get('initial_loss', 0.4120)) * 100,
+                'improvement': safe_improvement_calc(
+                    cnn_state.get('initial_loss', 0.4120), 
+                    cnn_state.get('current_loss', 0.0187), 
+                    95.5  # Default improvement percentage
+                ),
                 'checkpoint_loaded': cnn_state.get('checkpoint_loaded', False),
                 'model_type': 'CNN',
                 'description': 'Williams Market Structure CNN (Data Bus Input)',
@@ -1201,7 +1226,11 @@ class CleanTradingDashboard:
                 'loss_5ma': cob_state.get('current_loss', 0.0098),
                 'initial_loss': cob_state.get('initial_loss', 0.3560),
                 'best_loss': cob_state.get('best_loss', 0.0076),
-                'improvement': ((cob_state.get('initial_loss', 0.3560) - cob_state.get('current_loss', 0.0098)) / cob_state.get('initial_loss', 0.3560)) * 100,
+                'improvement': safe_improvement_calc(
+                    cob_state.get('initial_loss', 0.3560), 
+                    cob_state.get('current_loss', 0.0098), 
+                    97.2  # Default improvement percentage
+                ),
                 'checkpoint_loaded': cob_state.get('checkpoint_loaded', False),
                 'model_type': 'COB_RL',
                 'description': 'COB RL Model (Data Bus Input)',
@@ -1224,7 +1253,11 @@ class CleanTradingDashboard:
                 'loss_5ma': decision_state.get('current_loss', 0.0089),
                 'initial_loss': decision_state.get('initial_loss', 0.2980),
                 'best_loss': decision_state.get('best_loss', 0.0065),
-                'improvement': ((decision_state.get('initial_loss', 0.2980) - decision_state.get('current_loss', 0.0089)) / decision_state.get('initial_loss', 0.2980)) * 100,
+                'improvement': safe_improvement_calc(
+                    decision_state.get('initial_loss', 0.2980), 
+                    decision_state.get('current_loss', 0.0089), 
+                    97.0  # Default improvement percentage
+                ),
                 'checkpoint_loaded': decision_state.get('checkpoint_loaded', False),
                 'model_type': 'DECISION',
                 'description': 'Final Decision Model (Trained on Signals Only)',
diff --git a/web/component_manager.py b/web/component_manager.py
index 30bd85c..648eec5 100644
--- a/web/component_manager.py
+++ b/web/component_manager.py
@@ -366,9 +366,13 @@ class DashboardComponentManager:
                         pred_action = last_prediction.get('action', 'NONE')
                         pred_confidence = last_prediction.get('confidence', 0)
                         
-                        # 5MA Loss
+                        # 5MA Loss - with safe comparison handling
                         loss_5ma = model_info.get('loss_5ma', 0.0)
-                        loss_class = "text-success" if loss_5ma < 0.1 else "text-warning" if loss_5ma < 0.5 else "text-danger"
+                        if loss_5ma is None:
+                            loss_5ma = 0.0
+                            loss_class = "text-muted"
+                        else:
+                            loss_class = "text-success" if loss_5ma < 0.1 else "text-warning" if loss_5ma < 0.5 else "text-danger"
                         
                         # Model size/parameters
                         model_size = model_info.get('parameters', 0)
@@ -381,14 +385,20 @@ class DashboardComponentManager:
                         else:
                             size_str = str(model_size)
                         
+                        # Get checkpoint filename for tooltip
+                        checkpoint_filename = model_info.get('checkpoint_filename', 'No checkpoint info')
+                        checkpoint_status = "LOADED" if model_info.get('checkpoint_loaded', False) else "FRESH"
+                        
                         # Model card
                         model_card = html.Div([
                             # Header with model name and toggle
                             html.Div([
                                 html.Div([
                                     html.I(className=f"{status_icon} me-2 {status_class}"),
-                                    html.Strong(f"{model_name.upper()}", className=status_class),
-                                    html.Span(f" ({size_str} params)", className="text-muted small ms-2")
+                                    html.Strong(f"{model_name.upper()}", className=status_class, 
+                                               title=f"Checkpoint: {checkpoint_filename}"),
+                                    html.Span(f" ({size_str} params)", className="text-muted small ms-2"),
+                                    html.Span(f" [{checkpoint_status}]", className=f"small {'text-success' if checkpoint_status == 'LOADED' else 'text-warning'} ms-1")
                                 ], style={"flex": "1"}),
                                 
                                 # Activation toggle (if easy to implement)