""" Integration tests for complete data pipeline from exchanges to storage. """ import pytest import asyncio import time from datetime import datetime, timezone from unittest.mock import Mock, AsyncMock, patch from typing import List, Dict, Any from ..connectors.binance_connector import BinanceConnector from ..processing.data_processor import DataProcessor from ..aggregation.aggregation_engine import AggregationEngine from ..storage.timescale_manager import TimescaleManager from ..caching.redis_manager import RedisManager from ..models.core import OrderBookSnapshot, TradeEvent, PriceLevel from ..utils.logging import get_logger logger = get_logger(__name__) class TestDataPipelineIntegration: """Test complete data pipeline integration""" @pytest.fixture async def mock_components(self): """Setup mock components for testing""" # Mock exchange connector connector = Mock(spec=BinanceConnector) connector.exchange_name = "binance" connector.connect = AsyncMock(return_value=True) connector.disconnect = AsyncMock() connector.subscribe_orderbook = AsyncMock() connector.subscribe_trades = AsyncMock() # Mock data processor processor = Mock(spec=DataProcessor) processor.process_orderbook = Mock() processor.process_trade = Mock() processor.validate_data = Mock(return_value=True) # Mock aggregation engine aggregator = Mock(spec=AggregationEngine) aggregator.aggregate_orderbook = Mock() aggregator.create_heatmap = Mock() # Mock storage manager storage = Mock(spec=TimescaleManager) storage.store_orderbook = AsyncMock(return_value=True) storage.store_trade = AsyncMock(return_value=True) storage.is_connected = Mock(return_value=True) # Mock cache manager cache = Mock(spec=RedisManager) cache.set = AsyncMock(return_value=True) cache.get = AsyncMock(return_value=None) cache.is_connected = Mock(return_value=True) return { 'connector': connector, 'processor': processor, 'aggregator': aggregator, 'storage': storage, 'cache': cache } @pytest.fixture def sample_orderbook(self): """Create sample order book data""" return OrderBookSnapshot( symbol="BTCUSDT", exchange="binance", timestamp=datetime.now(timezone.utc), bids=[ PriceLevel(price=50000.0, size=1.5), PriceLevel(price=49990.0, size=2.0), PriceLevel(price=49980.0, size=1.0) ], asks=[ PriceLevel(price=50010.0, size=1.2), PriceLevel(price=50020.0, size=1.8), PriceLevel(price=50030.0, size=0.8) ] ) @pytest.fixture def sample_trade(self): """Create sample trade data""" return TradeEvent( symbol="BTCUSDT", exchange="binance", timestamp=datetime.now(timezone.utc), price=50005.0, size=0.5, side="buy", trade_id="12345" ) @pytest.mark.asyncio async def test_complete_orderbook_pipeline(self, mock_components, sample_orderbook): """Test complete order book processing pipeline""" components = mock_components # Setup processor to return processed data components['processor'].process_orderbook.return_value = sample_orderbook # Simulate pipeline flow # 1. Receive data from exchange raw_data = {"symbol": "BTCUSDT", "bids": [], "asks": []} # 2. Process data processed_data = components['processor'].process_orderbook(raw_data, "binance") # 3. Validate data is_valid = components['processor'].validate_data(processed_data) assert is_valid # 4. Aggregate data components['aggregator'].aggregate_orderbook(processed_data) # 5. Store in database await components['storage'].store_orderbook(processed_data) # 6. Cache latest data await components['cache'].set(f"orderbook:BTCUSDT:binance", processed_data) # Verify all components were called components['processor'].process_orderbook.assert_called_once() components['processor'].validate_data.assert_called_once() components['aggregator'].aggregate_orderbook.assert_called_once() components['storage'].store_orderbook.assert_called_once() components['cache'].set.assert_called_once() @pytest.mark.asyncio async def test_complete_trade_pipeline(self, mock_components, sample_trade): """Test complete trade processing pipeline""" components = mock_components # Setup processor to return processed data components['processor'].process_trade.return_value = sample_trade # Simulate pipeline flow raw_data = {"symbol": "BTCUSDT", "price": 50005.0, "quantity": 0.5} # Process through pipeline processed_data = components['processor'].process_trade(raw_data, "binance") is_valid = components['processor'].validate_data(processed_data) assert is_valid await components['storage'].store_trade(processed_data) await components['cache'].set(f"trade:BTCUSDT:binance:latest", processed_data) # Verify calls components['processor'].process_trade.assert_called_once() components['storage'].store_trade.assert_called_once() components['cache'].set.assert_called_once() @pytest.mark.asyncio async def test_multi_exchange_pipeline(self, mock_components): """Test pipeline with multiple exchanges""" components = mock_components exchanges = ["binance", "coinbase", "kraken"] # Simulate data from multiple exchanges for exchange in exchanges: # Create exchange-specific data orderbook = OrderBookSnapshot( symbol="BTCUSDT", exchange=exchange, timestamp=datetime.now(timezone.utc), bids=[PriceLevel(price=50000.0, size=1.0)], asks=[PriceLevel(price=50010.0, size=1.0)] ) components['processor'].process_orderbook.return_value = orderbook components['processor'].validate_data.return_value = True # Process through pipeline processed_data = components['processor'].process_orderbook({}, exchange) is_valid = components['processor'].validate_data(processed_data) assert is_valid await components['storage'].store_orderbook(processed_data) await components['cache'].set(f"orderbook:BTCUSDT:{exchange}", processed_data) # Verify multiple calls assert components['processor'].process_orderbook.call_count == len(exchanges) assert components['storage'].store_orderbook.call_count == len(exchanges) assert components['cache'].set.call_count == len(exchanges) @pytest.mark.asyncio async def test_pipeline_error_handling(self, mock_components, sample_orderbook): """Test pipeline error handling and recovery""" components = mock_components # Setup storage to fail initially components['storage'].store_orderbook.side_effect = [ Exception("Database connection failed"), True # Success on retry ] components['processor'].process_orderbook.return_value = sample_orderbook components['processor'].validate_data.return_value = True # First attempt should fail with pytest.raises(Exception): await components['storage'].store_orderbook(sample_orderbook) # Second attempt should succeed result = await components['storage'].store_orderbook(sample_orderbook) assert result is True # Verify retry logic assert components['storage'].store_orderbook.call_count == 2 @pytest.mark.asyncio async def test_pipeline_performance(self, mock_components): """Test pipeline performance with high throughput""" components = mock_components # Setup fast responses components['processor'].process_orderbook.return_value = Mock() components['processor'].validate_data.return_value = True components['storage'].store_orderbook.return_value = True components['cache'].set.return_value = True # Process multiple items quickly start_time = time.time() tasks = [] for i in range(100): # Simulate processing 100 order books task = asyncio.create_task(self._process_single_orderbook(components, i)) tasks.append(task) await asyncio.gather(*tasks) end_time = time.time() processing_time = end_time - start_time throughput = 100 / processing_time # Should process at least 50 items per second assert throughput > 50, f"Throughput too low: {throughput:.2f} items/sec" # Verify all items were processed assert components['processor'].process_orderbook.call_count == 100 assert components['storage'].store_orderbook.call_count == 100 async def _process_single_orderbook(self, components, index): """Helper method to process a single order book""" raw_data = {"symbol": "BTCUSDT", "index": index} processed_data = components['processor'].process_orderbook(raw_data, "binance") is_valid = components['processor'].validate_data(processed_data) if is_valid: await components['storage'].store_orderbook(processed_data) await components['cache'].set(f"orderbook:BTCUSDT:binance:{index}", processed_data) @pytest.mark.asyncio async def test_data_consistency_across_pipeline(self, mock_components, sample_orderbook): """Test data consistency throughout the pipeline""" components = mock_components # Track data transformations original_data = {"symbol": "BTCUSDT", "timestamp": "2024-01-01T00:00:00Z"} # Setup processor to modify data modified_orderbook = sample_orderbook modified_orderbook.symbol = "BTCUSDT" # Ensure consistency components['processor'].process_orderbook.return_value = modified_orderbook components['processor'].validate_data.return_value = True # Process data processed_data = components['processor'].process_orderbook(original_data, "binance") # Verify data consistency assert processed_data.symbol == "BTCUSDT" assert processed_data.exchange == "binance" assert len(processed_data.bids) > 0 assert len(processed_data.asks) > 0 # Verify all price levels are valid for bid in processed_data.bids: assert bid.price > 0 assert bid.size > 0 for ask in processed_data.asks: assert ask.price > 0 assert ask.size > 0 # Verify bid/ask ordering bid_prices = [bid.price for bid in processed_data.bids] ask_prices = [ask.price for ask in processed_data.asks] assert bid_prices == sorted(bid_prices, reverse=True) # Bids descending assert ask_prices == sorted(ask_prices) # Asks ascending # Verify spread is positive if bid_prices and ask_prices: spread = min(ask_prices) - max(bid_prices) assert spread >= 0, f"Negative spread detected: {spread}" @pytest.mark.asyncio async def test_pipeline_memory_usage(self, mock_components): """Test pipeline memory usage under load""" import psutil import gc components = mock_components process = psutil.Process() # Get initial memory usage initial_memory = process.memory_info().rss / 1024 / 1024 # MB # Process large amount of data for i in range(1000): orderbook = OrderBookSnapshot( symbol="BTCUSDT", exchange="binance", timestamp=datetime.now(timezone.utc), bids=[PriceLevel(price=50000.0 + i, size=1.0)], asks=[PriceLevel(price=50010.0 + i, size=1.0)] ) components['processor'].process_orderbook.return_value = orderbook components['processor'].validate_data.return_value = True # Process data processed_data = components['processor'].process_orderbook({}, "binance") await components['storage'].store_orderbook(processed_data) # Force garbage collection every 100 items if i % 100 == 0: gc.collect() # Get final memory usage final_memory = process.memory_info().rss / 1024 / 1024 # MB memory_increase = final_memory - initial_memory # Memory increase should be reasonable (less than 100MB for 1000 items) assert memory_increase < 100, f"Memory usage increased by {memory_increase:.2f}MB" logger.info(f"Memory usage: {initial_memory:.2f}MB -> {final_memory:.2f}MB (+{memory_increase:.2f}MB)") class TestPipelineResilience: """Test pipeline resilience and fault tolerance""" @pytest.mark.asyncio async def test_database_reconnection(self): """Test database reconnection handling""" storage = Mock(spec=TimescaleManager) # Simulate connection failure then recovery storage.is_connected.side_effect = [False, False, True] storage.connect.return_value = True storage.store_orderbook.return_value = True # Should attempt reconnection for attempt in range(3): if not storage.is_connected(): storage.connect() else: break assert storage.connect.call_count == 1 assert storage.is_connected.call_count == 3 @pytest.mark.asyncio async def test_cache_fallback(self): """Test cache fallback when Redis is unavailable""" cache = Mock(spec=RedisManager) # Simulate cache failure cache.is_connected.return_value = False cache.set.side_effect = Exception("Redis connection failed") # Should handle cache failure gracefully try: await cache.set("test_key", "test_value") except Exception: # Should continue processing even if cache fails pass assert not cache.is_connected() @pytest.mark.asyncio async def test_exchange_failover(self): """Test exchange failover when one exchange fails""" exchanges = ["binance", "coinbase", "kraken"] failed_exchange = "binance" # Simulate one exchange failing for exchange in exchanges: if exchange == failed_exchange: # This exchange fails assert exchange == failed_exchange else: # Other exchanges continue working assert exchange != failed_exchange # Should continue with remaining exchanges working_exchanges = [ex for ex in exchanges if ex != failed_exchange] assert len(working_exchanges) == 2 assert "coinbase" in working_exchanges assert "kraken" in working_exchanges @pytest.mark.integration class TestRealDataPipeline: """Integration tests with real components (requires running services)""" @pytest.mark.skipif(not pytest.config.getoption("--integration"), reason="Integration tests require --integration flag") @pytest.mark.asyncio async def test_real_database_integration(self): """Test with real TimescaleDB instance""" # This test requires a running TimescaleDB instance # Skip if not available try: from ..storage.timescale_manager import TimescaleManager storage = TimescaleManager() await storage.connect() # Test basic operations assert storage.is_connected() # Create test data orderbook = OrderBookSnapshot( symbol="BTCUSDT", exchange="test", timestamp=datetime.now(timezone.utc), bids=[PriceLevel(price=50000.0, size=1.0)], asks=[PriceLevel(price=50010.0, size=1.0)] ) # Store and verify result = await storage.store_orderbook(orderbook) assert result is True await storage.disconnect() except Exception as e: pytest.skip(f"Real database not available: {e}") @pytest.mark.skipif(not pytest.config.getoption("--integration"), reason="Integration tests require --integration flag") @pytest.mark.asyncio async def test_real_cache_integration(self): """Test with real Redis instance""" try: from ..caching.redis_manager import RedisManager cache = RedisManager() await cache.connect() assert cache.is_connected() # Test basic operations await cache.set("test_key", {"test": "data"}) result = await cache.get("test_key") assert result is not None await cache.disconnect() except Exception as e: pytest.skip(f"Real cache not available: {e}") def pytest_configure(config): """Configure pytest with custom markers""" config.addinivalue_line("markers", "integration: mark test as integration test") def pytest_addoption(parser): """Add custom command line options""" parser.addoption( "--integration", action="store_true", default=False, help="run integration tests with real services" )