timeseries storage

This commit is contained in:
Dobromir Popov 2025-03-18 23:03:30 +02:00
parent c1ad6cddd6
commit 4aefca2d6c

View File

@ -52,16 +52,38 @@ class TradeTickStorage:
def get_ticks_as_df(self) -> pd.DataFrame:
"""Return ticks as a DataFrame"""
if not self.ticks:
logger.warning("No ticks available for DataFrame conversion")
return pd.DataFrame()
df = pd.DataFrame(self.ticks)
if not df.empty:
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
logger.debug(f"Converting timestamps for {len(df)} ticks")
# Ensure timestamp column exists
if 'timestamp' not in df.columns:
logger.error("Tick data missing timestamp column")
return pd.DataFrame()
# Check timestamp datatype before conversion
sample_ts = df['timestamp'].iloc[0] if len(df) > 0 else None
logger.debug(f"Sample timestamp before conversion: {sample_ts}, type: {type(sample_ts)}")
# Convert timestamps to datetime
try:
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
logger.debug(f"Timestamps converted to datetime successfully")
if len(df) > 0:
logger.debug(f"Sample converted timestamp: {df['timestamp'].iloc[0]}")
except Exception as e:
logger.error(f"Error converting timestamps: {str(e)}")
import traceback
logger.error(traceback.format_exc())
return pd.DataFrame()
return df
def get_candles(self, interval_seconds: int = 1) -> pd.DataFrame:
"""Convert ticks to OHLCV candles at specified interval"""
if not self.ticks:
logger.warning("No ticks available for candle formation")
return pd.DataFrame()
# Ensure ticks are up to date
@ -70,28 +92,52 @@ class TradeTickStorage:
# Convert to DataFrame
df = self.get_ticks_as_df()
if df.empty:
logger.warning("Tick DataFrame is empty after conversion")
return pd.DataFrame()
# Use timestamp column for resampling
df = df.set_index('timestamp')
logger.info(f"Preparing to create candles from {len(df)} ticks")
# Create interval string for resampling
interval_str = f'{interval_seconds}S'
try:
# Use timestamp column for resampling
df = df.set_index('timestamp')
# Resample to create OHLCV candles
candles = df.resample(interval_str).agg({
'price': ['first', 'max', 'min', 'last'],
'volume': 'sum'
})
# Create interval string for resampling - use 's' instead of deprecated 'S'
interval_str = f'{interval_seconds}s'
# Flatten MultiIndex columns
candles.columns = ['open', 'high', 'low', 'close', 'volume']
# Resample to create OHLCV candles
logger.debug(f"Resampling with interval: {interval_str}")
candles = df.resample(interval_str).agg({
'price': ['first', 'max', 'min', 'last'],
'volume': 'sum'
})
# Reset index to get timestamp as column
candles = candles.reset_index()
# Check if resampling produced any data
if candles.empty:
logger.warning("Resampling produced empty dataframe - check timestamp distribution")
# Show timestamp ranges to diagnose potential resampling issues
if not df.empty:
min_time = df.index.min()
max_time = df.index.max()
logger.info(f"Tick timestamp range: {min_time} to {max_time}")
return pd.DataFrame()
logger.debug(f"Generated {len(candles)} candles from {len(self.ticks)} ticks")
return candles
# Flatten MultiIndex columns
candles.columns = ['open', 'high', 'low', 'close', 'volume']
# Reset index to get timestamp as column
candles = candles.reset_index()
# Ensure no NaN values
candles = candles.dropna()
logger.debug(f"Generated {len(candles)} candles from {len(self.ticks)} ticks")
return candles
except Exception as e:
logger.error(f"Error in candle formation: {str(e)}")
import traceback
logger.error(traceback.format_exc())
return pd.DataFrame()
class CandlestickData:
def __init__(self, max_length: int = 300):
@ -508,51 +554,59 @@ class RealTimeChart:
Input('interval-store', 'data')]
)
def update_chart(n, interval_data):
interval = interval_data.get('interval', 1)
try:
interval = interval_data.get('interval', 1)
logger.info(f"Updating chart for {self.symbol} with interval {interval}s")
fig = make_subplots(
rows=2, cols=1,
shared_xaxis=True,
vertical_spacing=0.03,
subplot_titles=(f'{self.symbol} Price ({interval}s)', 'Volume'),
row_heights=[0.7, 0.3]
)
# Get candlesticks from tick storage
df = self.tick_storage.get_candles(interval_seconds=interval)
if not df.empty:
# Add candlestick chart
fig.add_trace(
go.Candlestick(
x=df['timestamp'],
open=df['open'],
high=df['high'],
low=df['low'],
close=df['close'],
name='Price',
increasing_line_color='#33CC33', # Green
decreasing_line_color='#FF4136' # Red
),
row=1, col=1
fig = make_subplots(
rows=2, cols=1,
shared_xaxis=True,
vertical_spacing=0.03,
subplot_titles=(f'{self.symbol} Price ({interval}s)', 'Volume'),
row_heights=[0.7, 0.3]
)
# Add volume bars
colors = ['#33CC33' if close >= open else '#FF4136'
for close, open in zip(df['close'], df['open'])]
# Get candlesticks from tick storage
df = self.tick_storage.get_candles(interval_seconds=interval)
fig.add_trace(
go.Bar(
x=df['timestamp'],
y=df['volume'],
name='Volume',
marker_color=colors
),
row=2, col=1
)
# Debug information about the dataframe
logger.info(f"Candles dataframe empty: {df.empty}, tick count: {len(self.tick_storage.ticks)}")
# Add latest price line
if len(df) > 0:
if not df.empty and len(df) > 0:
logger.info(f"Candles dataframe shape: {df.shape}")
logger.info(f"Candles dataframe columns: {df.columns.tolist()}")
logger.info(f"Candles dataframe first row: {df.iloc[0].to_dict() if len(df) > 0 else 'No rows'}")
# Add candlestick chart
fig.add_trace(
go.Candlestick(
x=df['timestamp'],
open=df['open'],
high=df['high'],
low=df['low'],
close=df['close'],
name='Price',
increasing_line_color='#33CC33', # Green
decreasing_line_color='#FF4136' # Red
),
row=1, col=1
)
# Add volume bars
colors = ['#33CC33' if close >= open else '#FF4136'
for close, open in zip(df['close'], df['open'])]
fig.add_trace(
go.Bar(
x=df['timestamp'],
y=df['volume'],
name='Volume',
marker_color=colors
),
row=2, col=1
)
# Add latest price line and annotation
latest_price = df['close'].iloc[-1]
fig.add_shape(
type="line",
@ -564,7 +618,6 @@ class RealTimeChart:
row=1, col=1
)
# Add price label
fig.add_annotation(
x=df['timestamp'].max(),
y=latest_price,
@ -574,31 +627,67 @@ class RealTimeChart:
xshift=50,
row=1, col=1
)
else:
# If no data, add a text annotation to the chart
logger.warning(f"No data to display for {self.symbol} - tick count: {len(self.tick_storage.ticks)}")
if self.tick_storage.ticks:
logger.info(f"Sample tick: {self.tick_storage.ticks[0]}")
# Update layout with improved styling
fig.update_layout(
title_text=f"{self.symbol} Real-Time Data ({interval}s candles)",
title_x=0.5, # Center the title
xaxis_rangeslider_visible=False,
height=800,
template='plotly_dark',
paper_bgcolor='rgba(0,0,0,0)',
plot_bgcolor='rgba(0,0,0,0)',
font=dict(family="Arial, sans-serif", size=12, color="#2c3e50"),
showlegend=True,
legend=dict(
yanchor="top",
y=0.99,
xanchor="left",
x=0.01
# Add a message to the empty chart
fig.add_annotation(
x=0.5, y=0.5,
text=f"Waiting for {self.symbol} data...",
showarrow=False,
font=dict(size=20, color="white"),
xref="paper", yref="paper"
)
# Update layout with improved styling
fig.update_layout(
title_text=f"{self.symbol} Real-Time Data ({interval}s candles)",
title_x=0.5, # Center the title
xaxis_rangeslider_visible=False,
height=800,
template='plotly_dark',
paper_bgcolor='rgba(0,0,0,0)',
plot_bgcolor='rgba(0,0,0,0)',
font=dict(family="Arial, sans-serif", size=12, color="#2c3e50"),
showlegend=True,
legend=dict(
yanchor="top",
y=0.99,
xanchor="left",
x=0.01
)
)
)
# Update axes styling
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='rgba(128,128,128,0.2)')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='rgba(128,128,128,0.2)')
# Update axes styling
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='rgba(128,128,128,0.2)')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='rgba(128,128,128,0.2)')
return fig
return fig
except Exception as e:
logger.error(f"Error updating chart: {str(e)}")
import traceback
logger.error(traceback.format_exc())
# Create a minimal figure with error message
fig = go.Figure()
fig.add_annotation(
x=0.5, y=0.5,
text=f"Error updating chart: {str(e)}",
showarrow=False,
font=dict(size=14, color="red"),
xref="paper", yref="paper"
)
fig.update_layout(
height=800,
template='plotly_dark',
paper_bgcolor='rgba(0,0,0,0)',
plot_bgcolor='rgba(0,0,0,0)'
)
return fig
async def start_websocket(self):
ws = ExchangeWebSocket(self.symbol)
@ -610,6 +699,10 @@ class RealTimeChart:
continue
try:
logger.info(f"WebSocket connected for {self.symbol}, beginning data collection")
tick_count = 0
last_tick_count_log = time.time()
while True:
if not ws.running:
logger.warning("WebSocket not running, breaking loop")
@ -627,6 +720,7 @@ class RealTimeChart:
'high': data['high'],
'low': data['low']
}
logger.debug(f"Received kline data: {data}")
else:
# Use trade data
trade_data = {
@ -634,14 +728,26 @@ class RealTimeChart:
'price': data['price'],
'volume': data['volume']
}
logger.debug(f"Updating candlestick with data: {trade_data}")
# Store raw tick in the tick storage
self.tick_storage.add_tick(trade_data)
tick_count += 1
# Also update the old candlestick data for backward compatibility
self.candlestick_data.update_from_trade(trade_data)
# Log tick counts periodically
current_time = time.time()
if current_time - last_tick_count_log >= 10: # Log every 10 seconds
logger.info(f"{self.symbol}: Collected {tick_count} ticks in last {current_time - last_tick_count_log:.1f}s, total: {len(self.tick_storage.ticks)}")
last_tick_count_log = current_time
tick_count = 0
# Check if ticks are being converted to candles
if len(self.tick_storage.ticks) > 0:
sample_df = self.tick_storage.get_candles(interval_seconds=1)
logger.info(f"{self.symbol}: Sample candle count: {len(sample_df)}")
await asyncio.sleep(0.01)
except Exception as e:
logger.error(f"Error in WebSocket loop: {str(e)}")
@ -652,8 +758,23 @@ class RealTimeChart:
await asyncio.sleep(5)
def run(self, host='localhost', port=8050):
logger.info(f"Starting Dash server on {host}:{port}")
self.app.run(debug=False, host=host, port=port)
try:
logger.info(f"Starting Dash server for {self.symbol} on {host}:{port}")
self.app.run(debug=False, host=host, port=port)
except Exception as e:
logger.error(f"Error running Dash server on port {port}: {str(e)}")
# Try an alternative port if the primary one is in use
if "Address already in use" in str(e):
alt_port = port + 100
logger.warning(f"Port {port} is busy, trying alternative port {alt_port}")
try:
self.app.run(debug=False, host=host, port=alt_port)
except Exception as alt_e:
logger.error(f"Error running Dash server on alternative port {alt_port}: {str(alt_e)}")
else:
# Re-raise other exceptions
raise
async def main():
symbols = ["ETH/USDT", "BTC/USDT"]
@ -672,10 +793,12 @@ async def main():
server_threads = []
for i, chart in enumerate(charts):
port = 8050 + i # Use different ports for each chart
thread = Thread(target=lambda c=chart, p=8050+i: c.run(port=p)) # Fix lambda capture
logger.info(f"Starting chart for {chart.symbol} on port {port}")
thread = Thread(target=lambda c=chart, p=port: c.run(port=p)) # Ensure correct port is passed
thread.daemon = True
thread.start()
server_threads.append(thread)
logger.info(f"Thread started for {chart.symbol} on port {port}")
try:
# Keep the main task running