""" Data serialization for Redis caching. """ import json import pickle import gzip from typing import Any, Union, Dict, List from datetime import datetime from ..models.core import ( OrderBookSnapshot, TradeEvent, HeatmapData, PriceBuckets, OrderBookMetrics, ImbalanceMetrics, ConsolidatedOrderBook ) from ..utils.logging import get_logger from ..utils.exceptions import ProcessingError logger = get_logger(__name__) class DataSerializer: """ Handles serialization and deserialization of data for Redis storage. Supports multiple serialization formats: - JSON for simple data - Pickle for complex objects - Compressed formats for large data """ def __init__(self, use_compression: bool = True): """ Initialize data serializer. Args: use_compression: Whether to use gzip compression """ self.use_compression = use_compression self.serialization_stats = { 'serialized': 0, 'deserialized': 0, 'compression_ratio': 0.0, 'errors': 0 } logger.info(f"Data serializer initialized (compression: {use_compression})") def serialize(self, data: Any, format_type: str = 'auto') -> bytes: """ Serialize data for Redis storage. Args: data: Data to serialize format_type: Serialization format ('json', 'pickle', 'auto') Returns: bytes: Serialized data """ try: # Determine format if format_type == 'auto': format_type = self._determine_format(data) # Serialize based on format if format_type == 'json': serialized = self._serialize_json(data) elif format_type == 'pickle': serialized = self._serialize_pickle(data) else: raise ValueError(f"Unsupported format: {format_type}") # Apply compression if enabled if self.use_compression: original_size = len(serialized) serialized = gzip.compress(serialized) compressed_size = len(serialized) # Update compression ratio if original_size > 0: ratio = compressed_size / original_size self.serialization_stats['compression_ratio'] = ( (self.serialization_stats['compression_ratio'] * self.serialization_stats['serialized'] + ratio) / (self.serialization_stats['serialized'] + 1) ) self.serialization_stats['serialized'] += 1 return serialized except Exception as e: self.serialization_stats['errors'] += 1 logger.error(f"Serialization error: {e}") raise ProcessingError(f"Serialization failed: {e}", "SERIALIZE_ERROR") def deserialize(self, data: bytes, format_type: str = 'auto') -> Any: """ Deserialize data from Redis storage. Args: data: Serialized data format_type: Expected format ('json', 'pickle', 'auto') Returns: Any: Deserialized data """ try: # Decompress if needed if self.use_compression: try: data = gzip.decompress(data) except gzip.BadGzipFile: # Data might not be compressed pass # Determine format if auto if format_type == 'auto': format_type = self._detect_format(data) # Deserialize based on format if format_type == 'json': result = self._deserialize_json(data) elif format_type == 'pickle': result = self._deserialize_pickle(data) else: raise ValueError(f"Unsupported format: {format_type}") self.serialization_stats['deserialized'] += 1 return result except Exception as e: self.serialization_stats['errors'] += 1 logger.error(f"Deserialization error: {e}") raise ProcessingError(f"Deserialization failed: {e}", "DESERIALIZE_ERROR") def _determine_format(self, data: Any) -> str: """Determine best serialization format for data""" # Use JSON for simple data types if isinstance(data, (dict, list, str, int, float, bool)) or data is None: return 'json' # Use pickle for complex objects return 'pickle' def _detect_format(self, data: bytes) -> str: """Detect serialization format from data""" try: # Try JSON first json.loads(data.decode('utf-8')) return 'json' except (json.JSONDecodeError, UnicodeDecodeError): # Assume pickle return 'pickle' def _serialize_json(self, data: Any) -> bytes: """Serialize data as JSON""" # Convert complex objects to dictionaries if hasattr(data, '__dict__'): data = self._object_to_dict(data) elif isinstance(data, list): data = [self._object_to_dict(item) if hasattr(item, '__dict__') else item for item in data] json_str = json.dumps(data, default=self._json_serializer, ensure_ascii=False) return json_str.encode('utf-8') def _deserialize_json(self, data: bytes) -> Any: """Deserialize JSON data""" json_str = data.decode('utf-8') return json.loads(json_str, object_hook=self._json_deserializer) def _serialize_pickle(self, data: Any) -> bytes: """Serialize data as pickle""" return pickle.dumps(data, protocol=pickle.HIGHEST_PROTOCOL) def _deserialize_pickle(self, data: bytes) -> Any: """Deserialize pickle data""" return pickle.loads(data) def _object_to_dict(self, obj: Any) -> Dict: """Convert object to dictionary for JSON serialization""" if isinstance(obj, (OrderBookSnapshot, TradeEvent, HeatmapData, PriceBuckets, OrderBookMetrics, ImbalanceMetrics, ConsolidatedOrderBook)): result = { '__type__': obj.__class__.__name__, '__data__': {} } # Convert object attributes for key, value in obj.__dict__.items(): if isinstance(value, datetime): result['__data__'][key] = { '__datetime__': value.isoformat() } elif isinstance(value, list): result['__data__'][key] = [ self._object_to_dict(item) if hasattr(item, '__dict__') else item for item in value ] elif hasattr(value, '__dict__'): result['__data__'][key] = self._object_to_dict(value) else: result['__data__'][key] = value return result else: return obj.__dict__ if hasattr(obj, '__dict__') else obj def _json_serializer(self, obj: Any) -> Any: """Custom JSON serializer for special types""" if isinstance(obj, datetime): return {'__datetime__': obj.isoformat()} elif hasattr(obj, '__dict__'): return self._object_to_dict(obj) else: return str(obj) def _json_deserializer(self, obj: Dict) -> Any: """Custom JSON deserializer for special types""" if '__datetime__' in obj: return datetime.fromisoformat(obj['__datetime__']) elif '__type__' in obj and '__data__' in obj: return self._reconstruct_object(obj['__type__'], obj['__data__']) else: return obj def _reconstruct_object(self, type_name: str, data: Dict) -> Any: """Reconstruct object from serialized data""" # Import required classes from ..models.core import ( OrderBookSnapshot, TradeEvent, HeatmapData, PriceBuckets, OrderBookMetrics, ImbalanceMetrics, ConsolidatedOrderBook, PriceLevel, HeatmapPoint ) # Map type names to classes type_map = { 'OrderBookSnapshot': OrderBookSnapshot, 'TradeEvent': TradeEvent, 'HeatmapData': HeatmapData, 'PriceBuckets': PriceBuckets, 'OrderBookMetrics': OrderBookMetrics, 'ImbalanceMetrics': ImbalanceMetrics, 'ConsolidatedOrderBook': ConsolidatedOrderBook, 'PriceLevel': PriceLevel, 'HeatmapPoint': HeatmapPoint } if type_name in type_map: cls = type_map[type_name] # Recursively deserialize nested objects processed_data = {} for key, value in data.items(): if isinstance(value, dict) and '__datetime__' in value: processed_data[key] = datetime.fromisoformat(value['__datetime__']) elif isinstance(value, dict) and '__type__' in value: processed_data[key] = self._reconstruct_object( value['__type__'], value['__data__'] ) elif isinstance(value, list): processed_data[key] = [ self._reconstruct_object(item['__type__'], item['__data__']) if isinstance(item, dict) and '__type__' in item else item for item in value ] else: processed_data[key] = value try: return cls(**processed_data) except Exception as e: logger.warning(f"Failed to reconstruct {type_name}: {e}") return processed_data else: logger.warning(f"Unknown type for reconstruction: {type_name}") return data def serialize_heatmap(self, heatmap: HeatmapData) -> bytes: """Specialized serialization for heatmap data""" try: # Create optimized representation heatmap_dict = { 'symbol': heatmap.symbol, 'timestamp': heatmap.timestamp.isoformat(), 'bucket_size': heatmap.bucket_size, 'points': [ { 'p': point.price, # price 'v': point.volume, # volume 'i': point.intensity, # intensity 's': point.side # side } for point in heatmap.data ] } return self.serialize(heatmap_dict, 'json') except Exception as e: logger.error(f"Heatmap serialization error: {e}") # Fallback to standard serialization return self.serialize(heatmap, 'pickle') def deserialize_heatmap(self, data: bytes) -> HeatmapData: """Specialized deserialization for heatmap data""" try: # Try optimized format first heatmap_dict = self.deserialize(data, 'json') if isinstance(heatmap_dict, dict) and 'points' in heatmap_dict: from ..models.core import HeatmapData, HeatmapPoint # Reconstruct heatmap points points = [] for point_data in heatmap_dict['points']: point = HeatmapPoint( price=point_data['p'], volume=point_data['v'], intensity=point_data['i'], side=point_data['s'] ) points.append(point) # Create heatmap heatmap = HeatmapData( symbol=heatmap_dict['symbol'], timestamp=datetime.fromisoformat(heatmap_dict['timestamp']), bucket_size=heatmap_dict['bucket_size'] ) heatmap.data = points return heatmap else: # Fallback to standard deserialization return self.deserialize(data, 'pickle') except Exception as e: logger.error(f"Heatmap deserialization error: {e}") # Final fallback return self.deserialize(data, 'pickle') def get_stats(self) -> Dict[str, Any]: """Get serialization statistics""" return self.serialization_stats.copy() def reset_stats(self) -> None: """Reset serialization statistics""" self.serialization_stats = { 'serialized': 0, 'deserialized': 0, 'compression_ratio': 0.0, 'errors': 0 } logger.info("Serialization statistics reset")