gogo2/debug/trade_audit.py

#!/usr/bin/env python3
"""
Trade Audit Tool

This tool analyzes trade data to identify potential issues with:
- Duplicate entry prices
- Rapid consecutive trades
- P&L calculation accuracy
- Position tracking problems

Usage:
    python debug/trade_audit.py [--trades-file path/to/trades.json]
"""

import argparse
import json
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import os
import sys
from pathlib import Path

# Add project root to path
project_root = Path(__file__).parent.parent
sys.path.insert(0, str(project_root))

def parse_trade_time(time_str):
    """Parse trade time string to datetime object"""
    try:
        # Try HH:MM:SS format
        return datetime.strptime(time_str, "%H:%M:%S")
    except ValueError:
        try:
            # Try full datetime format
            return datetime.strptime(time_str, "%Y-%m-%d %H:%M:%S")
        except ValueError:
            # Return as is if parsing fails
            return time_str

def load_trades_from_file(file_path):
    """Load trades from JSON file"""
    try:
        with open(file_path, 'r') as f:
            return json.load(f)
    except FileNotFoundError:
        print(f"Error: File {file_path} not found")
        return []
    except json.JSONDecodeError:
        print(f"Error: File {file_path} is not valid JSON")
        return []

def load_trades_from_dashboard_cache():
    """Load trades from dashboard cache file if available"""
    cache_paths = [
        "cache/dashboard_trades.json",
        "cache/closed_trades.json",
        "data/trades_history.json"
    ]

    for path in cache_paths:
        if os.path.exists(path):
            print(f"Loading trades from cache: {path}")
            return load_trades_from_file(path)

    print("No trade cache files found")
    return []

def parse_trade_data(trades_data):
    """Parse trade data into a pandas DataFrame for analysis"""
    parsed_trades = []

    for trade in trades_data:
        # Handle different trade data formats
        parsed_trade = {}

        # Time field might be named entry_time or time
        if 'entry_time' in trade:
            parsed_trade['time'] = parse_trade_time(trade['entry_time'])
        elif 'time' in trade:
            parsed_trade['time'] = parse_trade_time(trade['time'])
        else:
            parsed_trade['time'] = None

        # Side might be named side or action
        parsed_trade['side'] = trade.get('side', trade.get('action', 'UNKNOWN'))

        # Size might be named size or quantity
        parsed_trade['size'] = float(trade.get('size', trade.get('quantity', 0)))

        # Entry and exit prices
        parsed_trade['entry_price'] = float(trade.get('entry_price', trade.get('entry', 0)))
        parsed_trade['exit_price'] = float(trade.get('exit_price', trade.get('exit', 0)))

        # Hold time in seconds
        parsed_trade['hold_time'] = float(trade.get('hold_time_seconds', trade.get('hold', 0)))

        # P&L and fees
        parsed_trade['pnl'] = float(trade.get('pnl', 0))
        parsed_trade['fees'] = float(trade.get('fees', 0))

        # Calculate expected P&L for verification
        if parsed_trade['side'] == 'LONG' or parsed_trade['side'] == 'BUY':
            expected_pnl = (parsed_trade['exit_price'] - parsed_trade['entry_price']) * parsed_trade['size']
        else:  # SHORT or SELL
            expected_pnl = (parsed_trade['entry_price'] - parsed_trade['exit_price']) * parsed_trade['size']

        parsed_trade['expected_pnl'] = expected_pnl
        parsed_trade['pnl_difference'] = parsed_trade['pnl'] - expected_pnl

        parsed_trades.append(parsed_trade)

    # Convert to DataFrame
    if parsed_trades:
        df = pd.DataFrame(parsed_trades)
        return df
    else:
        return pd.DataFrame()

def analyze_trades(df):
    """Analyze trades for potential issues"""
    if df.empty:
        print("No trades to analyze")
        return

    print(f"\n{'='*50}")
    print("TRADE AUDIT RESULTS")
    print(f"{'='*50}")
    print(f"Total trades analyzed: {len(df)}")

    # Check for duplicate entry prices
    entry_price_counts = df['entry_price'].value_counts()
    duplicate_entries = entry_price_counts[entry_price_counts > 1]

    print(f"\n{'='*20} DUPLICATE ENTRY PRICES {'='*20}")
    if not duplicate_entries.empty:
        print(f"Found {len(duplicate_entries)} prices with multiple entries:")
        for price, count in duplicate_entries.items():
            print(f"  ${price:.2f}: {count} trades")

        # Analyze the duplicate entry trades in more detail
        for price in duplicate_entries.index:
            duplicate_df = df[df['entry_price'] == price].copy()
            duplicate_df['time_diff'] = duplicate_df['time'].diff().dt.total_seconds()

            print(f"\nDetailed analysis for entry price ${price:.2f}:")
            print(f"  Time gaps between consecutive trades:")
            for i, (_, row) in enumerate(duplicate_df.iterrows()):
                if i > 0:  # Skip first row as it has no previous trade
                    time_diff = row['time_diff']
                    if pd.notna(time_diff):
                        print(f"    {row['time'].strftime('%H:%M:%S')}: {time_diff:.0f} seconds after previous trade")
    else:
        print("No duplicate entry prices found")

    # Check for rapid consecutive trades
    df = df.sort_values('time')
    df['time_since_last'] = df['time'].diff().dt.total_seconds()

    rapid_trades = df[df['time_since_last'] < 30].copy()

    print(f"\n{'='*20} RAPID CONSECUTIVE TRADES {'='*20}")
    if not rapid_trades.empty:
        print(f"Found {len(rapid_trades)} trades executed within 30 seconds of previous trade:")
        for _, row in rapid_trades.iterrows():
            if pd.notna(row['time_since_last']):
                print(f"  {row['time'].strftime('%H:%M:%S')} - {row['side']} ${row['size']:.2f} @ ${row['entry_price']:.2f} - {row['time_since_last']:.0f}s after previous")
    else:
        print("No rapid consecutive trades found")

    # Check for P&L calculation accuracy
    pnl_diff = df[abs(df['pnl_difference']) > 0.01].copy()

    print(f"\n{'='*20} P&L CALCULATION ISSUES {'='*20}")
    if not pnl_diff.empty:
        print(f"Found {len(pnl_diff)} trades with P&L calculation discrepancies:")
        for _, row in pnl_diff.iterrows():
            print(f"  {row['time'].strftime('%H:%M:%S')} - {row['side']} - Reported: ${row['pnl']:.2f}, Expected: ${row['expected_pnl']:.2f}, Diff: ${row['pnl_difference']:.2f}")
    else:
        print("No P&L calculation issues found")

    # Check for side distribution
    side_counts = df['side'].value_counts()

    print(f"\n{'='*20} TRADE SIDE DISTRIBUTION {'='*20}")
    for side, count in side_counts.items():
        print(f"  {side}: {count} trades ({count/len(df)*100:.1f}%)")

    # Check for hold time distribution
    print(f"\n{'='*20} HOLD TIME DISTRIBUTION {'='*20}")
    print(f"  Min hold time: {df['hold_time'].min():.0f} seconds")
    print(f"  Max hold time: {df['hold_time'].max():.0f} seconds")
    print(f"  Avg hold time: {df['hold_time'].mean():.0f} seconds")
    print(f"  Median hold time: {df['hold_time'].median():.0f} seconds")

    # Hold time buckets
    hold_buckets = [0, 30, 60, 120, 300, 600, 1800, 3600, float('inf')]
    hold_labels = ['0-30s', '30-60s', '1-2m', '2-5m', '5-10m', '10-30m', '30-60m', '60m+']

    df['hold_bucket'] = pd.cut(df['hold_time'], bins=hold_buckets, labels=hold_labels)
    hold_dist = df['hold_bucket'].value_counts().sort_index()

    for bucket, count in hold_dist.items():
        print(f"  {bucket}: {count} trades ({count/len(df)*100:.1f}%)")

    # Generate summary statistics
    print(f"\n{'='*20} TRADE PERFORMANCE SUMMARY {'='*20}")
    winning_trades = df[df['pnl'] > 0]
    losing_trades = df[df['pnl'] < 0]

    print(f"  Win rate: {len(winning_trades)/len(df)*100:.1f}% ({len(winning_trades)}W/{len(losing_trades)}L)")
    print(f"  Avg win: ${winning_trades['pnl'].mean():.2f}")
    print(f"  Avg loss: ${abs(losing_trades['pnl'].mean()):.2f}")
    print(f"  Total P&L: ${df['pnl'].sum():.2f}")
    print(f"  Total fees: ${df['fees'].sum():.2f}")
    print(f"  Net P&L: ${(df['pnl'].sum() - df['fees'].sum()):.2f}")

    # Plot entry price distribution
    plt.figure(figsize=(10, 6))
    plt.hist(df['entry_price'], bins=20, alpha=0.7)
    plt.title('Entry Price Distribution')
    plt.xlabel('Entry Price ($)')
    plt.ylabel('Number of Trades')
    plt.grid(True, alpha=0.3)
    plt.savefig('debug/entry_price_distribution.png')

    # Plot P&L distribution
    plt.figure(figsize=(10, 6))
    plt.hist(df['pnl'], bins=20, alpha=0.7)
    plt.title('P&L Distribution')
    plt.xlabel('P&L ($)')
    plt.ylabel('Number of Trades')
    plt.grid(True, alpha=0.3)
    plt.savefig('debug/pnl_distribution.png')

    print(f"\n{'='*20} AUDIT COMPLETE {'='*20}")
    print("Plots saved to debug/entry_price_distribution.png and debug/pnl_distribution.png")

def analyze_manual_trades(trades_data):
    """Analyze manually provided trade data"""
    # Parse the trade data into a structured format
    parsed_trades = []

    for line in trades_data.strip().split('\n'):
        if not line or line.startswith('from last session') or line.startswith('Recent Closed Trades') or line.startswith('Trading Performance'):
            continue

        if line.startswith('Win Rate:'):
            # This is the summary line, skip it
            continue

        try:
            # Parse trade line format: Time Side Size Entry Exit Hold P&L Fees
            parts = line.split('$')

            time_side = parts[0].strip().split()
            time = time_side[0]
            side = time_side[1]

            size = float(parts[1].split()[0])
            entry = float(parts[2].split()[0])
            exit = float(parts[3].split()[0])

            # The hold time and P&L are in the last parts
            remaining = parts[3].split()
            hold = int(remaining[1])
            pnl = float(parts[4].split()[0])

            # Fees might be in a different format
            if len(parts) > 5:
                fees = float(parts[5].strip())
            else:
                fees = 0.0

            parsed_trade = {
                'time': parse_trade_time(time),
                'side': side,
                'size': size,
                'entry_price': entry,
                'exit_price': exit,
                'hold_time': hold,
                'pnl': pnl,
                'fees': fees
            }

            # Calculate expected P&L
            if side == 'LONG' or side == 'BUY':
                expected_pnl = (exit - entry) * size
            else:  # SHORT or SELL
                expected_pnl = (entry - exit) * size

            parsed_trade['expected_pnl'] = expected_pnl
            parsed_trade['pnl_difference'] = pnl - expected_pnl

            parsed_trades.append(parsed_trade)

        except Exception as e:
            print(f"Error parsing trade line: {line}")
            print(f"Error details: {e}")

    # Convert to DataFrame
    if parsed_trades:
        df = pd.DataFrame(parsed_trades)
        return df
    else:
        return pd.DataFrame()

def main():
    parser = argparse.ArgumentParser(description='Trade Audit Tool')
    parser.add_argument('--trades-file', type=str, help='Path to trades JSON file')
    parser.add_argument('--manual-trades', type=str, help='Path to text file with manually entered trades')
    args = parser.parse_args()

    # Create debug directory if it doesn't exist
    os.makedirs('debug', exist_ok=True)

    if args.trades_file:
        trades_data = load_trades_from_file(args.trades_file)
        df = parse_trade_data(trades_data)
    elif args.manual_trades:
        try:
            with open(args.manual_trades, 'r') as f:
                manual_trades = f.read()
            df = analyze_manual_trades(manual_trades)
        except Exception as e:
            print(f"Error reading manual trades file: {e}")
            df = pd.DataFrame()
    else:
        # Try to load from dashboard cache
        trades_data = load_trades_from_dashboard_cache()
        if trades_data:
            df = parse_trade_data(trades_data)
        else:
            print("No trade data provided. Use --trades-file or --manual-trades")
            return

    if not df.empty:
        analyze_trades(df)
    else:
        print("No valid trade data to analyze")

if __name__ == "__main__":
    main()