COBY : specs + task 1

This commit is contained in:
Dobromir Popov
2025-08-04 15:50:54 +03:00
parent e223bc90e9
commit de9fa4a421
28 changed files with 4165 additions and 1 deletions

22
COBY/utils/__init__.py Normal file
View File

@@ -0,0 +1,22 @@
"""
Utility functions and helpers for the multi-exchange data aggregation system.
"""
from .logging import setup_logging, get_logger
from .validation import validate_symbol, validate_price, validate_volume
from .timing import get_current_timestamp, format_timestamp
from .exceptions import COBYException, ConnectionError, ValidationError, ProcessingError
__all__ = [
'setup_logging',
'get_logger',
'validate_symbol',
'validate_price',
'validate_volume',
'get_current_timestamp',
'format_timestamp',
'COBYException',
'ConnectionError',
'ValidationError',
'ProcessingError'
]

57
COBY/utils/exceptions.py Normal file
View File

@@ -0,0 +1,57 @@
"""
Custom exceptions for the COBY system.
"""
class COBYException(Exception):
"""Base exception for COBY system"""
def __init__(self, message: str, error_code: str = None, details: dict = None):
super().__init__(message)
self.message = message
self.error_code = error_code
self.details = details or {}
def to_dict(self) -> dict:
"""Convert exception to dictionary"""
return {
'error': self.__class__.__name__,
'message': self.message,
'error_code': self.error_code,
'details': self.details
}
class ConnectionError(COBYException):
"""Exception raised for connection-related errors"""
pass
class ValidationError(COBYException):
"""Exception raised for data validation errors"""
pass
class ProcessingError(COBYException):
"""Exception raised for data processing errors"""
pass
class StorageError(COBYException):
"""Exception raised for storage-related errors"""
pass
class ConfigurationError(COBYException):
"""Exception raised for configuration errors"""
pass
class ReplayError(COBYException):
"""Exception raised for replay-related errors"""
pass
class AggregationError(COBYException):
"""Exception raised for aggregation errors"""
pass

149
COBY/utils/logging.py Normal file
View File

@@ -0,0 +1,149 @@
"""
Logging utilities for the COBY system.
"""
import logging
import logging.handlers
import sys
import uuid
from pathlib import Path
from typing import Optional
from contextvars import ContextVar
# Context variable for correlation ID
correlation_id: ContextVar[Optional[str]] = ContextVar('correlation_id', default=None)
class CorrelationFilter(logging.Filter):
"""Add correlation ID to log records"""
def filter(self, record):
record.correlation_id = correlation_id.get() or 'N/A'
return True
class COBYFormatter(logging.Formatter):
"""Custom formatter with correlation ID support"""
def __init__(self, include_correlation_id: bool = True):
self.include_correlation_id = include_correlation_id
if include_correlation_id:
fmt = '%(asctime)s - %(name)s - %(levelname)s - [%(correlation_id)s] - %(message)s'
else:
fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
super().__init__(fmt, datefmt='%Y-%m-%d %H:%M:%S')
def setup_logging(
level: str = 'INFO',
log_file: Optional[str] = None,
max_file_size: int = 100, # MB
backup_count: int = 5,
enable_correlation_id: bool = True,
console_output: bool = True
) -> None:
"""
Set up logging configuration for the COBY system.
Args:
level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
log_file: Path to log file (None = no file logging)
max_file_size: Maximum log file size in MB
backup_count: Number of backup files to keep
enable_correlation_id: Whether to include correlation IDs in logs
console_output: Whether to output logs to console
"""
# Convert string level to logging constant
numeric_level = getattr(logging, level.upper(), logging.INFO)
# Create root logger
root_logger = logging.getLogger()
root_logger.setLevel(numeric_level)
# Clear existing handlers
root_logger.handlers.clear()
# Create formatter
formatter = COBYFormatter(include_correlation_id=enable_correlation_id)
# Add correlation filter if enabled
correlation_filter = CorrelationFilter() if enable_correlation_id else None
# Console handler
if console_output:
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setLevel(numeric_level)
console_handler.setFormatter(formatter)
if correlation_filter:
console_handler.addFilter(correlation_filter)
root_logger.addHandler(console_handler)
# File handler
if log_file:
# Create log directory if it doesn't exist
log_path = Path(log_file)
log_path.parent.mkdir(parents=True, exist_ok=True)
# Rotating file handler
file_handler = logging.handlers.RotatingFileHandler(
log_file,
maxBytes=max_file_size * 1024 * 1024, # Convert MB to bytes
backupCount=backup_count
)
file_handler.setLevel(numeric_level)
file_handler.setFormatter(formatter)
if correlation_filter:
file_handler.addFilter(correlation_filter)
root_logger.addHandler(file_handler)
# Set specific logger levels
logging.getLogger('websockets').setLevel(logging.WARNING)
logging.getLogger('urllib3').setLevel(logging.WARNING)
logging.getLogger('requests').setLevel(logging.WARNING)
def get_logger(name: str) -> logging.Logger:
"""
Get a logger instance with the specified name.
Args:
name: Logger name (typically __name__)
Returns:
logging.Logger: Logger instance
"""
return logging.getLogger(name)
def set_correlation_id(corr_id: Optional[str] = None) -> str:
"""
Set correlation ID for current context.
Args:
corr_id: Correlation ID (generates UUID if None)
Returns:
str: The correlation ID that was set
"""
if corr_id is None:
corr_id = str(uuid.uuid4())[:8] # Short UUID
correlation_id.set(corr_id)
return corr_id
def get_correlation_id() -> Optional[str]:
"""
Get current correlation ID.
Returns:
str: Current correlation ID or None
"""
return correlation_id.get()
def clear_correlation_id() -> None:
"""Clear correlation ID from current context."""
correlation_id.set(None)

206
COBY/utils/timing.py Normal file
View File

@@ -0,0 +1,206 @@
"""
Timing utilities for the COBY system.
"""
import time
from datetime import datetime, timezone
from typing import Optional
def get_current_timestamp() -> datetime:
"""
Get current UTC timestamp.
Returns:
datetime: Current UTC timestamp
"""
return datetime.now(timezone.utc)
def format_timestamp(timestamp: datetime, format_str: str = "%Y-%m-%d %H:%M:%S.%f") -> str:
"""
Format timestamp to string.
Args:
timestamp: Timestamp to format
format_str: Format string
Returns:
str: Formatted timestamp string
"""
return timestamp.strftime(format_str)
def parse_timestamp(timestamp_str: str, format_str: str = "%Y-%m-%d %H:%M:%S.%f") -> datetime:
"""
Parse timestamp string to datetime.
Args:
timestamp_str: Timestamp string to parse
format_str: Format string
Returns:
datetime: Parsed timestamp
"""
dt = datetime.strptime(timestamp_str, format_str)
# Ensure timezone awareness
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
return dt
def timestamp_to_unix(timestamp: datetime) -> float:
"""
Convert datetime to Unix timestamp.
Args:
timestamp: Datetime to convert
Returns:
float: Unix timestamp
"""
return timestamp.timestamp()
def unix_to_timestamp(unix_time: float) -> datetime:
"""
Convert Unix timestamp to datetime.
Args:
unix_time: Unix timestamp
Returns:
datetime: Converted datetime (UTC)
"""
return datetime.fromtimestamp(unix_time, tz=timezone.utc)
def calculate_time_diff(start: datetime, end: datetime) -> float:
"""
Calculate time difference in seconds.
Args:
start: Start timestamp
end: End timestamp
Returns:
float: Time difference in seconds
"""
return (end - start).total_seconds()
def is_timestamp_recent(timestamp: datetime, max_age_seconds: int = 60) -> bool:
"""
Check if timestamp is recent (within max_age_seconds).
Args:
timestamp: Timestamp to check
max_age_seconds: Maximum age in seconds
Returns:
bool: True if recent, False otherwise
"""
now = get_current_timestamp()
age = calculate_time_diff(timestamp, now)
return age <= max_age_seconds
def sleep_until(target_time: datetime) -> None:
"""
Sleep until target time.
Args:
target_time: Target timestamp to sleep until
"""
now = get_current_timestamp()
sleep_seconds = calculate_time_diff(now, target_time)
if sleep_seconds > 0:
time.sleep(sleep_seconds)
def get_milliseconds() -> int:
"""
Get current timestamp in milliseconds.
Returns:
int: Current timestamp in milliseconds
"""
return int(time.time() * 1000)
def milliseconds_to_timestamp(ms: int) -> datetime:
"""
Convert milliseconds to datetime.
Args:
ms: Milliseconds timestamp
Returns:
datetime: Converted datetime (UTC)
"""
return datetime.fromtimestamp(ms / 1000.0, tz=timezone.utc)
def round_timestamp(timestamp: datetime, seconds: int) -> datetime:
"""
Round timestamp to nearest interval.
Args:
timestamp: Timestamp to round
seconds: Interval in seconds
Returns:
datetime: Rounded timestamp
"""
unix_time = timestamp_to_unix(timestamp)
rounded_unix = round(unix_time / seconds) * seconds
return unix_to_timestamp(rounded_unix)
class Timer:
"""Simple timer for measuring execution time"""
def __init__(self):
self.start_time: Optional[float] = None
self.end_time: Optional[float] = None
def start(self) -> None:
"""Start the timer"""
self.start_time = time.perf_counter()
self.end_time = None
def stop(self) -> float:
"""
Stop the timer and return elapsed time.
Returns:
float: Elapsed time in seconds
"""
if self.start_time is None:
raise ValueError("Timer not started")
self.end_time = time.perf_counter()
return self.elapsed()
def elapsed(self) -> float:
"""
Get elapsed time.
Returns:
float: Elapsed time in seconds
"""
if self.start_time is None:
return 0.0
end = self.end_time or time.perf_counter()
return end - self.start_time
def __enter__(self):
"""Context manager entry"""
self.start()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""Context manager exit"""
self.stop()

217
COBY/utils/validation.py Normal file
View File

@@ -0,0 +1,217 @@
"""
Data validation utilities for the COBY system.
"""
import re
from typing import List, Optional
from decimal import Decimal, InvalidOperation
def validate_symbol(symbol: str) -> bool:
"""
Validate trading symbol format.
Args:
symbol: Trading symbol to validate
Returns:
bool: True if valid, False otherwise
"""
if not symbol or not isinstance(symbol, str):
return False
# Basic symbol format validation (e.g., BTCUSDT, ETH-USD)
pattern = r'^[A-Z0-9]{2,10}[-/]?[A-Z0-9]{2,10}$'
return bool(re.match(pattern, symbol.upper()))
def validate_price(price: float) -> bool:
"""
Validate price value.
Args:
price: Price to validate
Returns:
bool: True if valid, False otherwise
"""
if not isinstance(price, (int, float, Decimal)):
return False
try:
price_decimal = Decimal(str(price))
return price_decimal > 0 and price_decimal < Decimal('1e10') # Reasonable upper bound
except (InvalidOperation, ValueError):
return False
def validate_volume(volume: float) -> bool:
"""
Validate volume value.
Args:
volume: Volume to validate
Returns:
bool: True if valid, False otherwise
"""
if not isinstance(volume, (int, float, Decimal)):
return False
try:
volume_decimal = Decimal(str(volume))
return volume_decimal >= 0 and volume_decimal < Decimal('1e15') # Reasonable upper bound
except (InvalidOperation, ValueError):
return False
def validate_exchange_name(exchange: str) -> bool:
"""
Validate exchange name.
Args:
exchange: Exchange name to validate
Returns:
bool: True if valid, False otherwise
"""
if not exchange or not isinstance(exchange, str):
return False
# Exchange name should be alphanumeric with possible underscores/hyphens
pattern = r'^[a-zA-Z0-9_-]{2,20}$'
return bool(re.match(pattern, exchange))
def validate_timestamp_range(start_time, end_time) -> List[str]:
"""
Validate timestamp range.
Args:
start_time: Start timestamp
end_time: End timestamp
Returns:
List[str]: List of validation errors (empty if valid)
"""
errors = []
if start_time is None:
errors.append("Start time cannot be None")
if end_time is None:
errors.append("End time cannot be None")
if start_time and end_time and start_time >= end_time:
errors.append("Start time must be before end time")
return errors
def validate_bucket_size(bucket_size: float) -> bool:
"""
Validate price bucket size.
Args:
bucket_size: Bucket size to validate
Returns:
bool: True if valid, False otherwise
"""
if not isinstance(bucket_size, (int, float, Decimal)):
return False
try:
size_decimal = Decimal(str(bucket_size))
return size_decimal > 0 and size_decimal <= Decimal('1000') # Reasonable upper bound
except (InvalidOperation, ValueError):
return False
def validate_speed_multiplier(speed: float) -> bool:
"""
Validate replay speed multiplier.
Args:
speed: Speed multiplier to validate
Returns:
bool: True if valid, False otherwise
"""
if not isinstance(speed, (int, float)):
return False
return 0.01 <= speed <= 100.0 # 1% to 100x speed
def sanitize_symbol(symbol: str) -> str:
"""
Sanitize and normalize symbol format.
Args:
symbol: Symbol to sanitize
Returns:
str: Sanitized symbol
"""
if not symbol:
return ""
# Remove whitespace and convert to uppercase
sanitized = symbol.strip().upper()
# Remove invalid characters
sanitized = re.sub(r'[^A-Z0-9/-]', '', sanitized)
return sanitized
def validate_percentage(value: float, min_val: float = 0.0, max_val: float = 100.0) -> bool:
"""
Validate percentage value.
Args:
value: Percentage value to validate
min_val: Minimum allowed value
max_val: Maximum allowed value
Returns:
bool: True if valid, False otherwise
"""
if not isinstance(value, (int, float)):
return False
return min_val <= value <= max_val
def validate_connection_config(config: dict) -> List[str]:
"""
Validate connection configuration.
Args:
config: Configuration dictionary
Returns:
List[str]: List of validation errors (empty if valid)
"""
errors = []
# Required fields
required_fields = ['host', 'port']
for field in required_fields:
if field not in config:
errors.append(f"Missing required field: {field}")
# Validate host
if 'host' in config:
host = config['host']
if not isinstance(host, str) or not host.strip():
errors.append("Host must be a non-empty string")
# Validate port
if 'port' in config:
port = config['port']
if not isinstance(port, int) or not (1 <= port <= 65535):
errors.append("Port must be an integer between 1 and 65535")
return errors