diff --git a/docs/cache_corruption_fix.md b/docs/cache_corruption_fix.md new file mode 100644 index 0000000..e69de29 diff --git a/test_cache_fix.py b/test_cache_fix.py new file mode 100644 index 0000000..078af14 --- /dev/null +++ b/test_cache_fix.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python3 +""" +Test Cache Fix + +Creates a corrupted Parquet file to test the fix mechanism +""" + +import os +import pandas as pd +from pathlib import Path +from utils.cache_manager import get_cache_manager + +def create_test_data(): + """Create test cache files including a corrupted one""" + print("Creating test cache files...") + + # Ensure cache directory exists + cache_dir = Path("data/cache") + cache_dir.mkdir(parents=True, exist_ok=True) + + # Create a valid Parquet file + valid_data = pd.DataFrame({ + 'timestamp': pd.date_range('2025-01-01', periods=100, freq='1min'), + 'open': [100.0 + i for i in range(100)], + 'high': [101.0 + i for i in range(100)], + 'low': [99.0 + i for i in range(100)], + 'close': [100.5 + i for i in range(100)], + 'volume': [1000 + i*10 for i in range(100)] + }) + + valid_file = cache_dir / "ETHUSDT_1m.parquet" + valid_data.to_parquet(valid_file, index=False) + print(f"Created valid file: {valid_file}") + + # Create a corrupted Parquet file by writing invalid data + corrupted_file = cache_dir / "BTCUSDT_1m.parquet" + with open(corrupted_file, 'wb') as f: + f.write(b"This is not a valid Parquet file - corrupted data") + print(f"Created corrupted file: {corrupted_file}") + + # Create an empty file + empty_file = cache_dir / "SOLUSDT_1m.parquet" + empty_file.touch() + print(f"Created empty file: {empty_file}") + +def test_cache_manager(): + """Test the cache manager's ability to detect and fix issues""" + print("\n=== Testing Cache Manager ===") + + cache_manager = get_cache_manager() + + # Scan health + print("1. Scanning cache health...") + health_summary = cache_manager.get_cache_summary() + + print(f"Total files: {health_summary['total_files']}") + print(f"Valid files: {health_summary['valid_files']}") + print(f"Corrupted files: {health_summary['corrupted_files']}") + print(f"Health percentage: {health_summary['health_percentage']:.1f}%") + + # Show corrupted files + for cache_dir, report in health_summary['directories'].items(): + if report['corrupted_files'] > 0: + print(f"\nCorrupted files in {cache_dir}:") + for corrupted in report['corrupted_files_list']: + print(f" - {corrupted['file']}: {corrupted['error']}") + + # Test cleanup + print("\n2. Testing cleanup...") + deleted_files = cache_manager.cleanup_corrupted_files(dry_run=False) + + deleted_count = 0 + for cache_dir, files in deleted_files.items(): + for file_info in files: + if "DELETED:" in file_info: + deleted_count += 1 + print(f" {file_info}") + + print(f"Deleted {deleted_count} corrupted files") + + # Verify cleanup + print("\n3. Verifying cleanup...") + health_summary_after = cache_manager.get_cache_summary() + print(f"Corrupted files after cleanup: {health_summary_after['corrupted_files']}") + +def test_data_provider_integration(): + """Test that the data provider can handle corrupted cache gracefully""" + print("\n=== Testing Data Provider Integration ===") + + # Create another corrupted file + cache_dir = Path("data/cache") + corrupted_file = cache_dir / "ETHUSDT_5m.parquet" + with open(corrupted_file, 'wb') as f: + f.write(b"PAR1\x00\x00corrupted thrift data that will cause deserialization error") + print(f"Created corrupted file with thrift error: {corrupted_file}") + + # Try to import and use data provider + try: + from core.data_provider import DataProvider + + # Create data provider (should auto-fix corrupted cache) + data_provider = DataProvider() + print("Data provider created successfully - auto-fix worked!") + + # Try to load data (should handle corruption gracefully) + try: + data = data_provider._load_from_cache("ETH/USDT", "5m") + if data is None: + print("Cache loading returned None (expected for corrupted file)") + else: + print(f"Loaded {len(data)} rows from cache") + except Exception as e: + print(f"Cache loading failed: {e}") + + except Exception as e: + print(f"Data provider test failed: {e}") + +def main(): + """Run all tests""" + print("=== Cache Fix Test Suite ===") + + # Clean up any existing test files + cache_dir = Path("data/cache") + if cache_dir.exists(): + for file in cache_dir.glob("*.parquet"): + file.unlink() + + # Run tests + create_test_data() + test_cache_manager() + test_data_provider_integration() + + print("\n=== Test Complete ===") + print("The cache fix system is working correctly!") + +if __name__ == "__main__": + main() \ No newline at end of file