data input audit, cleanup

2025-08-08 17:16:05 +03:00
parent 2b0d2679c6
commit 78b96c10af
8 changed files with 335 additions and 182 deletions
--- a/core/api_rate_limiter.py
+++ b/core/api_rate_limiter.py
@@ -0,0 +1,159 @@
 """
 Simple shared API rate limiter and HTTP client wrapper.
 Provides a singleton via get_rate_limiter() used by data providers
 to throttle external API calls (e.g., Binance) and centralize retries.
 Windows-safe, ASCII-only logging.
 """
 from __future__ import annotations
 import threading
 import time
 import logging
 from typing import Any, Dict, Optional, Tuple
 try:
    import requests
 except Exception:  # requests should be available; fail lazily in make_request
    requests = None  # type: ignore
 logger = logging.getLogger(__name__)
 class RateLimiter:
    """Basic per-key interval limiter with retry/backoff HTTP wrapper.
    - Per-key min interval between calls
    - Optional simple error backoff per key
    - Thread-safe
    """
    def __init__(self, config: Optional[Dict[str, Dict[str, Any]]] = None) -> None:
        # Defaults tailored for typical public REST endpoints
        self._config: Dict[str, Dict[str, Any]] = config or {
            # Binance REST defaults
            "binance_api": {
                "min_interval": 0.25,  # seconds between calls (~4 req/s)
                "timeout": 10.0,
                "max_retries": 2,
                "backoff_base": 0.75,  # seconds
                "backoff_factor": 2.0,
            }
        }
        self._lock = threading.Lock()
        self._last_request_ts: Dict[str, float] = {}
        self._error_backoff_until: Dict[str, float] = {}
    def _now(self) -> float:
        return time.monotonic()
    def _get_key_config(self, key: str) -> Dict[str, Any]:
        return self._config.get(key, {
            "min_interval": 0.5,
            "timeout": 10.0,
            "max_retries": 1,
            "backoff_base": 0.5,
            "backoff_factor": 2.0,
        })
    def can_make_request(self, key: str) -> Tuple[bool, float]:
        """Return (can_request, wait_time_seconds)."""
        cfg = self._get_key_config(key)
        now = self._now()
        with self._lock:
            min_interval = float(cfg.get("min_interval", 0.5))
            last = self._last_request_ts.get(key, 0.0)
            ready_at = last + min_interval
            # Respect error backoff if set
            backoff_until = self._error_backoff_until.get(key, 0.0)
            ready_at = max(ready_at, backoff_until)
        if now >= ready_at:
            return True, 0.0
        return False, max(0.0, ready_at - now)
    def _note_request(self, key: str) -> None:
        with self._lock:
            self._last_request_ts[key] = self._now()
    def _note_error(self, key: str, attempt_index: int) -> None:
        cfg = self._get_key_config(key)
        base = float(cfg.get("backoff_base", 0.5))
        factor = float(cfg.get("backoff_factor", 2.0))
        delay = base * (factor ** max(0, attempt_index))
        with self._lock:
            self._error_backoff_until[key] = max(self._error_backoff_until.get(key, 0.0), self._now() + delay)
    def make_request(
        self,
        key: str,
        url: str,
        method: str = "GET",
        params: Optional[Dict[str, Any]] = None,
        data: Optional[Any] = None,
        headers: Optional[Dict[str, str]] = None,
    ) -> Optional["requests.Response"]:
        """Perform an HTTP request with per-key rate limiting and retries.
        Returns Response or None on failure.
        """
        cfg = self._get_key_config(key)
        timeout = float(cfg.get("timeout", 10.0))
        max_retries = int(cfg.get("max_retries", 1))
        if requests is None:
            logger.error("requests library not available")
            return None
        method_upper = (method or "GET").upper()
        for attempt in range(max_retries + 1):
            can, wait = self.can_make_request(key)
            if not can:
                time.sleep(min(wait, 5.0))  # cap local wait
            self._note_request(key)
            try:
                resp = requests.request(
                    method=method_upper,
                    url=url,
                    params=params,
                    data=data,
                    headers=headers,
                    timeout=timeout,
                )
                # If server imposes rate limit or transient error, backoff and retry
                if resp is None or resp.status_code >= 500 or resp.status_code in (408, 429):
                    logger.warning(
                        "HTTP retry %s for key %s status %s", attempt, key, getattr(resp, "status_code", "n/a")
                    )
                    self._note_error(key, attempt)
                    if attempt < max_retries:
                        continue
                    return resp
                return resp
            except Exception as ex:
                logger.warning("HTTP error on %s %s: %s", method_upper, url, ex)
                self._note_error(key, attempt)
                if attempt < max_retries:
                    continue
                return None
 _singleton: Optional[RateLimiter] = None
 def get_rate_limiter() -> RateLimiter:
    global _singleton
    if _singleton is None:
        _singleton = RateLimiter()
    return _singleton
--- a/core/orchestrator.py
+++ b/core/orchestrator.py
@@ -2363,6 +2363,12 @@ class TradingOrchestrator:
                                cnn_pred,
                                inference_duration_ms=inference_duration_ms,
                            )
                            # Save audit image of inputs used for this inference
                            try:
                                from utils.audit_plotter import save_inference_audit_image
                                save_inference_audit_image(base_data, model_name=model_name, symbol=symbol, out_root="audit_inputs")
                            except Exception as _audit_ex:
                                logger.debug(f"Audit image save skipped: {str(_audit_ex)}")
                            await self._store_inference_data_async(
                                model_name, model_input, cnn_pred, current_time, symbol
                            )
@@ -2387,6 +2393,12 @@ class TradingOrchestrator:
                            prediction,
                            inference_duration_ms=inference_duration_ms,
                        )
                        # Save audit image of inputs used for this inference
                        try:
                            from utils.audit_plotter import save_inference_audit_image
                            save_inference_audit_image(base_data, model_name=model_name, symbol=symbol, out_root="audit_inputs")
                        except Exception as _audit_ex:
                            logger.debug(f"Audit image save skipped: {str(_audit_ex)}")
                        # Store input data for RL
                        await self._store_inference_data_async(
                            model_name, model_input, prediction, current_time, symbol
@@ -2412,6 +2424,12 @@ class TradingOrchestrator:
                            prediction,
                            inference_duration_ms=inference_duration_ms,
                        )
                        # Save audit image of inputs used for this inference
                        try:
                            from utils.audit_plotter import save_inference_audit_image
                            save_inference_audit_image(base_data, model_name=model_name, symbol=symbol, out_root="audit_inputs")
                        except Exception as _audit_ex:
                            logger.debug(f"Audit image save skipped: {str(_audit_ex)}")
                        # Store input data for generic model
                        await self._store_inference_data_async(
                            model_name, model_input, prediction, current_time, symbol
--- a/debug/README.md
+++ b/debug/README.md
@@ -1,18 +0,0 @@
 # Debug Files
 This folder contains debug scripts and utilities for troubleshooting various components of the trading system.
 ## Contents
 - `debug_callback_simple.py` - Simple callback debugging
 - `debug_dashboard.py` - Dashboard debugging utilities
 - `debug_dashboard_500.py` - Dashboard 500 error debugging
 - `debug_dashboard_issue.py` - Dashboard issue debugging
 - `debug_mexc_auth.py` - MEXC authentication debugging
 - `debug_orchestrator_methods.py` - Orchestrator method debugging
 - `debug_simple_callback.py` - Simple callback testing
 - `debug_trading_activity.py` - Trading activity debugging
 ## Usage
 These files are used for debugging specific issues and should not be run in production. They contain diagnostic code and temporary fixes for troubleshooting purposes. 
--- a/debug/manual_trades.txt
+++ b/debug/manual_trades.txt
@@ -1,22 +0,0 @@
 from last session
 Recent Closed Trades
 Trading Performance
 Win Rate: 64.3% (9W/5L/0B)
 Avg Win: $5.79
 Avg Loss: $1.86
 Total Fees: $0.00
 Time Side Size Entry Exit Hold (s) P&L Fees
 14:40:24 SHORT $14.00 $3656.53 $3672.06 203 $-2.99 $0.008
 14:44:23 SHORT $14.64 $3656.53 $3669.76 289 $-2.67 $0.009
 14:50:29 SHORT $8.96 $3656.53 $3670.09 271 $-1.67 $0.005
 14:55:06 SHORT $7.17 $3656.53 $3669.79 705 $-1.31 $0.004
 15:12:58 SHORT $7.49 $3676.92 $3675.01 1125 $0.19 $0.004
 15:37:20 SHORT $5.97 $3676.92 $3665.79 213 $0.90 $0.004
 15:41:04 SHORT $18.12 $3676.92 $3652.71 192 $5.94 $0.011
 15:44:42 SHORT $18.16 $3676.92 $3645.10 1040 $7.83 $0.011
 16:02:26 SHORT $14.00 $3676.92 $3634.75 207 $8.01 $0.008
 16:06:04 SHORT $14.00 $3676.92 $3636.67 70 $7.65 $0.008
 16:07:43 SHORT $14.00 $3676.92 $3636.57 12 $7.67 $0.008
 16:08:16 SHORT $14.00 $3676.92 $3644.75 280 $6.11 $0.008
 16:13:16 SHORT $18.08 $3676.92 $3645.44 10 $7.72 $0.011
 16:13:37 SHORT $17.88 $3647.54 $3650.26 90 $-0.69 $0.011
--- a/scripts/kill_stale_processes.ps1
+++ b/scripts/kill_stale_processes.ps1
@@ -1,38 +0,0 @@
 # Kill stale Python dashboard processes
 # Enhanced version with better error handling and logging
 Write-Host "Checking for stale Python dashboard processes..."
 try {
    # Get all Python processes
    $pythonProcesses = Get-Process python -ErrorAction SilentlyContinue
    if ($pythonProcesses) {
        # Filter for dashboard processes
        $dashboardProcesses = $pythonProcesses | Where-Object {
            $_.ProcessName -eq 'python' -and 
            $_.MainWindowTitle -like '*dashboard*'
        }
        if ($dashboardProcesses) {
            Write-Host "Found $($dashboardProcesses.Count) dashboard process(es) to kill:"
            foreach ($process in $dashboardProcesses) {
                Write-Host "  - PID: $($process.Id), Title: $($process.MainWindowTitle)"
            }
            # Kill the processes
            $dashboardProcesses | Stop-Process -Force -ErrorAction SilentlyContinue
            Write-Host "Successfully killed $($dashboardProcesses.Count) dashboard process(es)"
        } else {
            Write-Host "No dashboard processes found to kill"
        }
    } else {
        Write-Host "No Python processes found"
    }
 } catch {
    Write-Host "Error checking for processes: $($_.Exception.Message)"
 }
 # Wait a moment for processes to fully terminate
 Start-Sleep -Seconds 1
 Write-Host "Process cleanup completed" 
--- a/scripts/restart_main_overnight.ps1
+++ b/scripts/restart_main_overnight.ps1
@@ -1,90 +0,0 @@
 # Overnight Training Restart Script (PowerShell)
 # Keeps main.py running continuously, restarting it if it crashes.
 # Usage: .\restart_main_overnight.ps1
 Write-Host "=" * 60
 Write-Host "OVERNIGHT TRAINING RESTART SCRIPT (PowerShell)"
 Write-Host "=" * 60
 Write-Host "Press Ctrl+C to stop the restart loop"
 Write-Host "Main script: main.py" 
 Write-Host "Restart delay on crash: 10 seconds"
 Write-Host "=" * 60
 $restartCount = 0
 $startTime = Get-Date
 # Create logs directory if it doesn't exist
 if (!(Test-Path "logs")) {
    New-Item -ItemType Directory -Path "logs"
 }
 # Setup log file
 $timestamp = Get-Date -Format "yyyyMMdd_HHmmss"
 $logFile = "logs\restart_main_ps_$timestamp.log"
 function Write-Log {
    param($Message)
    $timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss"
    $logMessage = "$timestamp - $Message"
    Write-Host $logMessage
    Add-Content -Path $logFile -Value $logMessage
 }
 Write-Log "Restart script started, logging to: $logFile"
 # Kill any existing Python processes
 try {
    Get-Process python* -ErrorAction SilentlyContinue | Stop-Process -Force -ErrorAction SilentlyContinue
    Start-Sleep -Seconds 2
    Write-Log "Killed existing Python processes"
 } catch {
    Write-Log "Could not kill existing processes: $_"
 }
 try {
    while ($true) {
        $restartCount++
        $runStartTime = Get-Date
        Write-Log "[RESTART #$restartCount] Starting main.py at $(Get-Date -Format 'HH:mm:ss')"
        # Start main.py
        try {
            $process = Start-Process -FilePath "python" -ArgumentList "main.py" -PassThru -Wait
            $exitCode = $process.ExitCode
            $runEndTime = Get-Date
            $runDuration = ($runEndTime - $runStartTime).TotalSeconds
            Write-Log "[EXIT] main.py exited with code $exitCode"
            Write-Log "[DURATION] Process ran for $([math]::Round($runDuration, 1)) seconds"
            # Check for fast exits
            if ($runDuration -lt 30) {
                Write-Log "[FAST EXIT] Process exited quickly, waiting 30 seconds..."
                Start-Sleep -Seconds 30
            } else {
                Write-Log "[DELAY] Waiting 10 seconds before restart..."
                Start-Sleep -Seconds 10
            }
            # Log stats every 10 restarts
            if ($restartCount % 10 -eq 0) {
                $totalDuration = (Get-Date) - $startTime
                Write-Log "[STATS] Session: $restartCount restarts in $([math]::Round($totalDuration.TotalHours, 1)) hours"
            }
        } catch {
            Write-Log "[ERROR] Error starting main.py: $_"
            Start-Sleep -Seconds 10
        }
    }
 } catch {
    Write-Log "[INTERRUPT] Restart loop interrupted: $_"
 } finally {
    $totalDuration = (Get-Date) - $startTime
    Write-Log "=" * 60
    Write-Log "OVERNIGHT TRAINING SESSION COMPLETE"
    Write-Log "Total restarts: $restartCount"
    Write-Log "Total session time: $([math]::Round($totalDuration.TotalHours, 1)) hours"
    Write-Log "=" * 60
 } 
--- a/scripts/start_live_trading.ps1
+++ b/scripts/start_live_trading.ps1
@@ -1,14 +0,0 @@
 # PowerShell script to start live trading demo and TensorBoard
 Write-Host "Starting Trading Bot Live Demo..." -ForegroundColor Green
 # Create a new PowerShell window for TensorBoard
 Start-Process powershell -ArgumentList "-Command python run_tensorboard.py" -WindowStyle Normal
 # Wait a moment for TensorBoard to start
 Write-Host "Starting TensorBoard... Please wait" -ForegroundColor Yellow
 Start-Sleep -Seconds 5
 # Start the live trading demo in the current window
 Write-Host "Starting Live Trading Demo with mock data..." -ForegroundColor Green
 python run_live_demo.py --symbol ETH/USDT --timeframe 1m --model models/trading_agent_best_pnl.pt --mock 
--- a/utils/audit_plotter.py
+++ b/utils/audit_plotter.py
@@ -0,0 +1,158 @@
 """
 Audit Plotter
 Create PNG snapshots of model input data at inference time:
 - Subplot 1: 1s candlesticks for recent window
 - Subplot 2: COB bucket volumes and imbalance near current price
 Windows-safe, ASCII-only logging messages.
 """
 from __future__ import annotations
 import os
 import math
 import logging
 from datetime import datetime
 from typing import List, Tuple
 try:
    import matplotlib
    # Use a non-interactive backend suitable for headless servers
    matplotlib.use("Agg")
    import matplotlib.pyplot as plt
 except Exception:
    matplotlib = None  # type: ignore
    plt = None  # type: ignore
 logger = logging.getLogger(__name__)
 def _extract_recent_ohlcv(base_data, max_bars: int = 120) -> Tuple[List[datetime], List[float], List[float], List[float], List[float]]:
    """Return recent 1s OHLCV arrays (time, open, high, low, close). Falls back to 1m if needed."""
    series = base_data.ohlcv_1s if getattr(base_data, "ohlcv_1s", None) else []
    if not series or len(series) < 5:
        series = base_data.ohlcv_1m if getattr(base_data, "ohlcv_1m", None) else []
    series = series[-max_bars:] if series else []
    times = [b.timestamp for b in series]
    opens = [float(b.open) for b in series]
    highs = [float(b.high) for b in series]
    lows = [float(b.low) for b in series]
    closes = [float(b.close) for b in series]
    return times, opens, highs, lows, closes
 def _extract_cob(base_data, max_buckets: int = 40):
    """Return sorted price buckets and metrics from COBData."""
    cob = getattr(base_data, "cob_data", None)
    if cob is None or not getattr(cob, "price_buckets", None):
        return [], [], [], []
    # Sort by price and clip
    prices = sorted(list(cob.price_buckets.keys()))[:max_buckets]
    bid_vol = []
    ask_vol = []
    imb = []
    for p in prices:
        bucket = cob.price_buckets.get(p, {})
        b = float(bucket.get("bid_volume", 0.0))
        a = float(bucket.get("ask_volume", 0.0))
        bid_vol.append(b)
        ask_vol.append(a)
        denom = (b + a) if (b + a) > 0 else 1.0
        imb.append((b - a) / denom)
    return prices, bid_vol, ask_vol, imb
 def save_inference_audit_image(base_data, model_name: str, symbol: str, out_root: str = "audit_inputs") -> str:
    """Save a PNG snapshot of input data. Returns path if saved, else empty string."""
    if matplotlib is None or plt is None:
        logger.warning("matplotlib not available; skipping audit image")
        return ""
    try:
        # Ensure output directory structure
        day_dir = datetime.utcnow().strftime("%Y%m%d")
        out_dir = os.path.join(out_root, day_dir)
        os.makedirs(out_dir, exist_ok=True)
        # File name: {ts}_{symbol}_{model}.png (ASCII-only)
        ts_str = datetime.utcnow().strftime("%H%M%S_%f")
        safe_symbol = symbol.replace("/", "-")
        fname = f"{ts_str}_{safe_symbol}_{model_name}.png"
        out_path = os.path.join(out_dir, fname)
        # Extract data
        times, o, h, l, c = _extract_recent_ohlcv(base_data)
        prices, bid_v, ask_v, imb = _extract_cob(base_data)
        current_price = float(getattr(getattr(base_data, "cob_data", None), "current_price", 0.0))
        # Prepare figure
        fig = plt.figure(figsize=(12, 7), dpi=110)
        gs = fig.add_gridspec(2, 1, height_ratios=[3, 2])
        # Candlestick subplot
        ax1 = fig.add_subplot(gs[0, 0])
        if times:
            x = list(range(len(times)))
            # Plot high-low wicks
            ax1.vlines(x, l, h, color="#444444", linewidth=1)
            # Plot body as rectangle via bar with bottom=min(open, close) and height=abs(diff)
            bodies = [c[i] - o[i] for i in range(len(o))]
            bottoms = [min(o[i], c[i]) for i in range(len(o))]
            colors = ["#00aa55" if bodies[i] >= 0 else "#cc3333" for i in range(len(bodies))]
            heights = [abs(bodies[i]) if abs(bodies[i]) > 1e-9 else 1e-9 for i in range(len(bodies))]
            ax1.bar(x, heights, bottom=bottoms, color=colors, width=0.6, align="center", edgecolor="#222222", linewidth=0.5)
            # Labels
            ax1.set_title(f"{safe_symbol} Candles (recent)")
            ax1.set_ylabel("Price")
            ax1.grid(True, linestyle=":", linewidth=0.6, alpha=0.6)
        else:
            ax1.text(0.5, 0.5, "No OHLCV data", ha="center", va="center")
        # COB subplot
        ax2 = fig.add_subplot(gs[1, 0])
        if prices:
            # Normalize x as offsets around current price if available
            if current_price > 0:
                xvals = [p - current_price for p in prices]
                ax2.axvline(0.0, color="#666666", linestyle="--", linewidth=1.0)
                ax2.set_xlabel("Price offset")
            else:
                xvals = prices
                ax2.set_xlabel("Price")
            # Plot bid/ask volumes
            ax2.plot(xvals, bid_v, label="bid_vol", color="#2c7fb8")
            ax2.plot(xvals, ask_v, label="ask_vol", color="#d95f0e")
            # Secondary axis for imbalance
            ax2b = ax2.twinx()
            ax2b.plot(xvals, imb, label="imbalance", color="#6a3d9a", linewidth=1.2)
            ax2b.set_ylabel("Imbalance")
            ax2.set_ylabel("Volume")
            ax2.grid(True, linestyle=":", linewidth=0.6, alpha=0.6)
            # Build combined legend
            lines, labels = ax2.get_legend_handles_labels()
            lines2, labels2 = ax2b.get_legend_handles_labels()
            ax2.legend(lines + lines2, labels + labels2, loc="upper right")
            ax2.set_title("COB Buckets (recent)")
        else:
            ax2.text(0.5, 0.5, "No COB data", ha="center", va="center")
        fig.tight_layout()
        fig.savefig(out_path, bbox_inches="tight")
        plt.close(fig)
        logger.info(f"Saved audit image: {out_path}")
        return out_path
    except Exception as ex:
        logger.error(f"Failed to save audit image: {ex}")
        try:
            plt.close("all")
        except Exception:
            pass
        return ""