cache fix

This commit is contained in:
Dobromir Popov
2025-07-25 22:46:23 +03:00
parent dd9f4b63ba
commit 22524b0389
2 changed files with 137 additions and 0 deletions

View File

137
test_cache_fix.py Normal file
View File

@ -0,0 +1,137 @@
#!/usr/bin/env python3
"""
Test Cache Fix
Creates a corrupted Parquet file to test the fix mechanism
"""
import os
import pandas as pd
from pathlib import Path
from utils.cache_manager import get_cache_manager
def create_test_data():
"""Create test cache files including a corrupted one"""
print("Creating test cache files...")
# Ensure cache directory exists
cache_dir = Path("data/cache")
cache_dir.mkdir(parents=True, exist_ok=True)
# Create a valid Parquet file
valid_data = pd.DataFrame({
'timestamp': pd.date_range('2025-01-01', periods=100, freq='1min'),
'open': [100.0 + i for i in range(100)],
'high': [101.0 + i for i in range(100)],
'low': [99.0 + i for i in range(100)],
'close': [100.5 + i for i in range(100)],
'volume': [1000 + i*10 for i in range(100)]
})
valid_file = cache_dir / "ETHUSDT_1m.parquet"
valid_data.to_parquet(valid_file, index=False)
print(f"Created valid file: {valid_file}")
# Create a corrupted Parquet file by writing invalid data
corrupted_file = cache_dir / "BTCUSDT_1m.parquet"
with open(corrupted_file, 'wb') as f:
f.write(b"This is not a valid Parquet file - corrupted data")
print(f"Created corrupted file: {corrupted_file}")
# Create an empty file
empty_file = cache_dir / "SOLUSDT_1m.parquet"
empty_file.touch()
print(f"Created empty file: {empty_file}")
def test_cache_manager():
"""Test the cache manager's ability to detect and fix issues"""
print("\n=== Testing Cache Manager ===")
cache_manager = get_cache_manager()
# Scan health
print("1. Scanning cache health...")
health_summary = cache_manager.get_cache_summary()
print(f"Total files: {health_summary['total_files']}")
print(f"Valid files: {health_summary['valid_files']}")
print(f"Corrupted files: {health_summary['corrupted_files']}")
print(f"Health percentage: {health_summary['health_percentage']:.1f}%")
# Show corrupted files
for cache_dir, report in health_summary['directories'].items():
if report['corrupted_files'] > 0:
print(f"\nCorrupted files in {cache_dir}:")
for corrupted in report['corrupted_files_list']:
print(f" - {corrupted['file']}: {corrupted['error']}")
# Test cleanup
print("\n2. Testing cleanup...")
deleted_files = cache_manager.cleanup_corrupted_files(dry_run=False)
deleted_count = 0
for cache_dir, files in deleted_files.items():
for file_info in files:
if "DELETED:" in file_info:
deleted_count += 1
print(f" {file_info}")
print(f"Deleted {deleted_count} corrupted files")
# Verify cleanup
print("\n3. Verifying cleanup...")
health_summary_after = cache_manager.get_cache_summary()
print(f"Corrupted files after cleanup: {health_summary_after['corrupted_files']}")
def test_data_provider_integration():
"""Test that the data provider can handle corrupted cache gracefully"""
print("\n=== Testing Data Provider Integration ===")
# Create another corrupted file
cache_dir = Path("data/cache")
corrupted_file = cache_dir / "ETHUSDT_5m.parquet"
with open(corrupted_file, 'wb') as f:
f.write(b"PAR1\x00\x00corrupted thrift data that will cause deserialization error")
print(f"Created corrupted file with thrift error: {corrupted_file}")
# Try to import and use data provider
try:
from core.data_provider import DataProvider
# Create data provider (should auto-fix corrupted cache)
data_provider = DataProvider()
print("Data provider created successfully - auto-fix worked!")
# Try to load data (should handle corruption gracefully)
try:
data = data_provider._load_from_cache("ETH/USDT", "5m")
if data is None:
print("Cache loading returned None (expected for corrupted file)")
else:
print(f"Loaded {len(data)} rows from cache")
except Exception as e:
print(f"Cache loading failed: {e}")
except Exception as e:
print(f"Data provider test failed: {e}")
def main():
"""Run all tests"""
print("=== Cache Fix Test Suite ===")
# Clean up any existing test files
cache_dir = Path("data/cache")
if cache_dir.exists():
for file in cache_dir.glob("*.parquet"):
file.unlink()
# Run tests
create_test_data()
test_cache_manager()
test_data_provider_integration()
print("\n=== Test Complete ===")
print("The cache fix system is working correctly!")
if __name__ == "__main__":
main()