Files
gogo2/COBY/caching/data_serializer.py
Dobromir Popov ff75af566c caching
2025-08-04 17:55:00 +03:00

355 lines
13 KiB
Python

"""
Data serialization for Redis caching.
"""
import json
import pickle
import gzip
from typing import Any, Union, Dict, List
from datetime import datetime
from ..models.core import (
OrderBookSnapshot, TradeEvent, HeatmapData, PriceBuckets,
OrderBookMetrics, ImbalanceMetrics, ConsolidatedOrderBook
)
from ..utils.logging import get_logger
from ..utils.exceptions import ProcessingError
logger = get_logger(__name__)
class DataSerializer:
"""
Handles serialization and deserialization of data for Redis storage.
Supports multiple serialization formats:
- JSON for simple data
- Pickle for complex objects
- Compressed formats for large data
"""
def __init__(self, use_compression: bool = True):
"""
Initialize data serializer.
Args:
use_compression: Whether to use gzip compression
"""
self.use_compression = use_compression
self.serialization_stats = {
'serialized': 0,
'deserialized': 0,
'compression_ratio': 0.0,
'errors': 0
}
logger.info(f"Data serializer initialized (compression: {use_compression})")
def serialize(self, data: Any, format_type: str = 'auto') -> bytes:
"""
Serialize data for Redis storage.
Args:
data: Data to serialize
format_type: Serialization format ('json', 'pickle', 'auto')
Returns:
bytes: Serialized data
"""
try:
# Determine format
if format_type == 'auto':
format_type = self._determine_format(data)
# Serialize based on format
if format_type == 'json':
serialized = self._serialize_json(data)
elif format_type == 'pickle':
serialized = self._serialize_pickle(data)
else:
raise ValueError(f"Unsupported format: {format_type}")
# Apply compression if enabled
if self.use_compression:
original_size = len(serialized)
serialized = gzip.compress(serialized)
compressed_size = len(serialized)
# Update compression ratio
if original_size > 0:
ratio = compressed_size / original_size
self.serialization_stats['compression_ratio'] = (
(self.serialization_stats['compression_ratio'] *
self.serialization_stats['serialized'] + ratio) /
(self.serialization_stats['serialized'] + 1)
)
self.serialization_stats['serialized'] += 1
return serialized
except Exception as e:
self.serialization_stats['errors'] += 1
logger.error(f"Serialization error: {e}")
raise ProcessingError(f"Serialization failed: {e}", "SERIALIZE_ERROR")
def deserialize(self, data: bytes, format_type: str = 'auto') -> Any:
"""
Deserialize data from Redis storage.
Args:
data: Serialized data
format_type: Expected format ('json', 'pickle', 'auto')
Returns:
Any: Deserialized data
"""
try:
# Decompress if needed
if self.use_compression:
try:
data = gzip.decompress(data)
except gzip.BadGzipFile:
# Data might not be compressed
pass
# Determine format if auto
if format_type == 'auto':
format_type = self._detect_format(data)
# Deserialize based on format
if format_type == 'json':
result = self._deserialize_json(data)
elif format_type == 'pickle':
result = self._deserialize_pickle(data)
else:
raise ValueError(f"Unsupported format: {format_type}")
self.serialization_stats['deserialized'] += 1
return result
except Exception as e:
self.serialization_stats['errors'] += 1
logger.error(f"Deserialization error: {e}")
raise ProcessingError(f"Deserialization failed: {e}", "DESERIALIZE_ERROR")
def _determine_format(self, data: Any) -> str:
"""Determine best serialization format for data"""
# Use JSON for simple data types
if isinstance(data, (dict, list, str, int, float, bool)) or data is None:
return 'json'
# Use pickle for complex objects
return 'pickle'
def _detect_format(self, data: bytes) -> str:
"""Detect serialization format from data"""
try:
# Try JSON first
json.loads(data.decode('utf-8'))
return 'json'
except (json.JSONDecodeError, UnicodeDecodeError):
# Assume pickle
return 'pickle'
def _serialize_json(self, data: Any) -> bytes:
"""Serialize data as JSON"""
# Convert complex objects to dictionaries
if hasattr(data, '__dict__'):
data = self._object_to_dict(data)
elif isinstance(data, list):
data = [self._object_to_dict(item) if hasattr(item, '__dict__') else item
for item in data]
json_str = json.dumps(data, default=self._json_serializer, ensure_ascii=False)
return json_str.encode('utf-8')
def _deserialize_json(self, data: bytes) -> Any:
"""Deserialize JSON data"""
json_str = data.decode('utf-8')
return json.loads(json_str, object_hook=self._json_deserializer)
def _serialize_pickle(self, data: Any) -> bytes:
"""Serialize data as pickle"""
return pickle.dumps(data, protocol=pickle.HIGHEST_PROTOCOL)
def _deserialize_pickle(self, data: bytes) -> Any:
"""Deserialize pickle data"""
return pickle.loads(data)
def _object_to_dict(self, obj: Any) -> Dict:
"""Convert object to dictionary for JSON serialization"""
if isinstance(obj, (OrderBookSnapshot, TradeEvent, HeatmapData,
PriceBuckets, OrderBookMetrics, ImbalanceMetrics,
ConsolidatedOrderBook)):
result = {
'__type__': obj.__class__.__name__,
'__data__': {}
}
# Convert object attributes
for key, value in obj.__dict__.items():
if isinstance(value, datetime):
result['__data__'][key] = {
'__datetime__': value.isoformat()
}
elif isinstance(value, list):
result['__data__'][key] = [
self._object_to_dict(item) if hasattr(item, '__dict__') else item
for item in value
]
elif hasattr(value, '__dict__'):
result['__data__'][key] = self._object_to_dict(value)
else:
result['__data__'][key] = value
return result
else:
return obj.__dict__ if hasattr(obj, '__dict__') else obj
def _json_serializer(self, obj: Any) -> Any:
"""Custom JSON serializer for special types"""
if isinstance(obj, datetime):
return {'__datetime__': obj.isoformat()}
elif hasattr(obj, '__dict__'):
return self._object_to_dict(obj)
else:
return str(obj)
def _json_deserializer(self, obj: Dict) -> Any:
"""Custom JSON deserializer for special types"""
if '__datetime__' in obj:
return datetime.fromisoformat(obj['__datetime__'])
elif '__type__' in obj and '__data__' in obj:
return self._reconstruct_object(obj['__type__'], obj['__data__'])
else:
return obj
def _reconstruct_object(self, type_name: str, data: Dict) -> Any:
"""Reconstruct object from serialized data"""
# Import required classes
from ..models.core import (
OrderBookSnapshot, TradeEvent, HeatmapData, PriceBuckets,
OrderBookMetrics, ImbalanceMetrics, ConsolidatedOrderBook,
PriceLevel, HeatmapPoint
)
# Map type names to classes
type_map = {
'OrderBookSnapshot': OrderBookSnapshot,
'TradeEvent': TradeEvent,
'HeatmapData': HeatmapData,
'PriceBuckets': PriceBuckets,
'OrderBookMetrics': OrderBookMetrics,
'ImbalanceMetrics': ImbalanceMetrics,
'ConsolidatedOrderBook': ConsolidatedOrderBook,
'PriceLevel': PriceLevel,
'HeatmapPoint': HeatmapPoint
}
if type_name in type_map:
cls = type_map[type_name]
# Recursively deserialize nested objects
processed_data = {}
for key, value in data.items():
if isinstance(value, dict) and '__datetime__' in value:
processed_data[key] = datetime.fromisoformat(value['__datetime__'])
elif isinstance(value, dict) and '__type__' in value:
processed_data[key] = self._reconstruct_object(
value['__type__'], value['__data__']
)
elif isinstance(value, list):
processed_data[key] = [
self._reconstruct_object(item['__type__'], item['__data__'])
if isinstance(item, dict) and '__type__' in item
else item
for item in value
]
else:
processed_data[key] = value
try:
return cls(**processed_data)
except Exception as e:
logger.warning(f"Failed to reconstruct {type_name}: {e}")
return processed_data
else:
logger.warning(f"Unknown type for reconstruction: {type_name}")
return data
def serialize_heatmap(self, heatmap: HeatmapData) -> bytes:
"""Specialized serialization for heatmap data"""
try:
# Create optimized representation
heatmap_dict = {
'symbol': heatmap.symbol,
'timestamp': heatmap.timestamp.isoformat(),
'bucket_size': heatmap.bucket_size,
'points': [
{
'p': point.price, # price
'v': point.volume, # volume
'i': point.intensity, # intensity
's': point.side # side
}
for point in heatmap.data
]
}
return self.serialize(heatmap_dict, 'json')
except Exception as e:
logger.error(f"Heatmap serialization error: {e}")
# Fallback to standard serialization
return self.serialize(heatmap, 'pickle')
def deserialize_heatmap(self, data: bytes) -> HeatmapData:
"""Specialized deserialization for heatmap data"""
try:
# Try optimized format first
heatmap_dict = self.deserialize(data, 'json')
if isinstance(heatmap_dict, dict) and 'points' in heatmap_dict:
from ..models.core import HeatmapData, HeatmapPoint
# Reconstruct heatmap points
points = []
for point_data in heatmap_dict['points']:
point = HeatmapPoint(
price=point_data['p'],
volume=point_data['v'],
intensity=point_data['i'],
side=point_data['s']
)
points.append(point)
# Create heatmap
heatmap = HeatmapData(
symbol=heatmap_dict['symbol'],
timestamp=datetime.fromisoformat(heatmap_dict['timestamp']),
bucket_size=heatmap_dict['bucket_size']
)
heatmap.data = points
return heatmap
else:
# Fallback to standard deserialization
return self.deserialize(data, 'pickle')
except Exception as e:
logger.error(f"Heatmap deserialization error: {e}")
# Final fallback
return self.deserialize(data, 'pickle')
def get_stats(self) -> Dict[str, Any]:
"""Get serialization statistics"""
return self.serialization_stats.copy()
def reset_stats(self) -> None:
"""Reset serialization statistics"""
self.serialization_stats = {
'serialized': 0,
'deserialized': 0,
'compression_ratio': 0.0,
'errors': 0
}
logger.info("Serialization statistics reset")