wip
This commit is contained in:
@ -12,6 +12,15 @@ from datetime import datetime, timedelta
|
||||
import json
|
||||
import pickle
|
||||
from sklearn.preprocessing import MinMaxScaler
|
||||
import sys
|
||||
|
||||
# Add project root to sys.path
|
||||
project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
if project_root not in sys.path:
|
||||
sys.path.append(project_root)
|
||||
|
||||
# Import BinanceHistoricalData from the root module
|
||||
from realtime import BinanceHistoricalData
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -39,6 +48,9 @@ class DataInterface:
|
||||
self.data_dir = data_dir
|
||||
self.scalers = {} # Store scalers for each timeframe
|
||||
|
||||
# Initialize the historical data fetcher
|
||||
self.historical_data = BinanceHistoricalData()
|
||||
|
||||
# Create data directory if it doesn't exist
|
||||
os.makedirs(self.data_dir, exist_ok=True)
|
||||
|
||||
@ -59,138 +71,39 @@ class DataInterface:
|
||||
Returns:
|
||||
pd.DataFrame: DataFrame with OHLCV data
|
||||
"""
|
||||
cache_file = os.path.join(self.data_dir, f"{self.symbol.replace('/', '_')}_{timeframe}.csv")
|
||||
|
||||
# For 1s timeframe, always fetch fresh data
|
||||
if timeframe == '1s':
|
||||
use_cache = False
|
||||
|
||||
# Check if cached data exists and is recent
|
||||
if use_cache and os.path.exists(cache_file):
|
||||
try:
|
||||
df = pd.read_csv(cache_file, parse_dates=['timestamp'])
|
||||
# If we have enough data and it's recent, use it
|
||||
if len(df) >= n_candles:
|
||||
logger.info(f"Using cached data for {self.symbol} {timeframe} ({len(df)} candles)")
|
||||
self.dataframes[timeframe] = df
|
||||
return df.tail(n_candles)
|
||||
except Exception as e:
|
||||
logger.error(f"Error reading cached data: {str(e)}")
|
||||
|
||||
# If we get here, we need to fetch data
|
||||
try:
|
||||
logger.info(f"Fetching historical data for {self.symbol} {timeframe}")
|
||||
|
||||
# For 1s timeframe, we need more data points
|
||||
if timeframe == '1s':
|
||||
n_candles = min(n_candles * 60, 10000) # Up to 10k ticks
|
||||
|
||||
# Placeholder for real data fetching
|
||||
self._fetch_data_from_exchange(timeframe, n_candles)
|
||||
|
||||
# Save to cache (except for 1s timeframe)
|
||||
if self.dataframes[timeframe] is not None and timeframe != '1s':
|
||||
self.dataframes[timeframe].to_csv(cache_file, index=False)
|
||||
return self.dataframes[timeframe]
|
||||
else:
|
||||
# Create dummy data as fallback
|
||||
logger.warning(f"Could not fetch data for {self.symbol} {timeframe}, using dummy data")
|
||||
df = self._create_dummy_data(timeframe, n_candles)
|
||||
self.dataframes[timeframe] = df
|
||||
return df
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching data: {str(e)}")
|
||||
return None
|
||||
|
||||
def _fetch_data_from_exchange(self, timeframe, n_candles):
|
||||
"""
|
||||
Placeholder method for fetching data from an exchange.
|
||||
In a real implementation, this would connect to an exchange API.
|
||||
"""
|
||||
# This is a placeholder - in a real implementation this would make API calls
|
||||
# to a cryptocurrency exchange to fetch OHLCV data
|
||||
|
||||
# For now, just generate dummy data
|
||||
self.dataframes[timeframe] = self._create_dummy_data(timeframe, n_candles)
|
||||
|
||||
def _create_dummy_data(self, timeframe, n_candles):
|
||||
"""
|
||||
Create dummy OHLCV data for testing purposes.
|
||||
|
||||
Args:
|
||||
timeframe (str): Timeframe to create data for
|
||||
n_candles (int): Number of candles to create
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: DataFrame with dummy OHLCV data
|
||||
"""
|
||||
# Map timeframe to seconds
|
||||
tf_seconds = {
|
||||
'1s': 1, # Added 1s timeframe
|
||||
# Map timeframe string to seconds for BinanceHistoricalData
|
||||
timeframe_to_seconds = {
|
||||
'1s': 1,
|
||||
'1m': 60,
|
||||
'5m': 300,
|
||||
'15m': 900,
|
||||
'30m': 1800,
|
||||
'1h': 3600,
|
||||
'4h': 14400,
|
||||
'1d': 86400
|
||||
}
|
||||
seconds = tf_seconds.get(timeframe, 3600) # Default to 1h
|
||||
|
||||
# Create timestamps
|
||||
end_time = datetime.now()
|
||||
timestamps = [end_time - timedelta(seconds=seconds * i) for i in range(n_candles)]
|
||||
timestamps.reverse() # Oldest first
|
||||
interval_seconds = timeframe_to_seconds.get(timeframe, 3600) # Default to 1h if not found
|
||||
|
||||
# Generate random price data with realistic patterns
|
||||
np.random.seed(42) # For reproducibility
|
||||
|
||||
# Start price
|
||||
price = 50000 # For BTC/USDT
|
||||
prices = []
|
||||
volumes = []
|
||||
|
||||
for i in range(n_candles):
|
||||
# Random walk with drift and volatility based on timeframe
|
||||
drift = 0.0001 * seconds # Larger drift for larger timeframes
|
||||
volatility = 0.01 * np.sqrt(seconds / 3600) # Scale volatility by sqrt of time
|
||||
try:
|
||||
# Fetch data using BinanceHistoricalData
|
||||
df = self.historical_data.get_historical_candles(
|
||||
symbol=self.symbol,
|
||||
interval_seconds=interval_seconds,
|
||||
limit=n_candles
|
||||
)
|
||||
|
||||
# Daily/weekly patterns
|
||||
if timeframe in ['1d', '4h']:
|
||||
# Add some cyclical patterns
|
||||
cycle = np.sin(i / 7 * np.pi) * 0.02 # Weekly cycle
|
||||
if not df.empty:
|
||||
logger.info(f"Using data for {self.symbol} {timeframe} ({len(df)} candles)")
|
||||
self.dataframes[timeframe] = df
|
||||
return df
|
||||
else:
|
||||
cycle = np.sin(i / 24 * np.pi) * 0.01 # Daily cycle
|
||||
|
||||
# Calculate price change with random walk + cycles (clamped to prevent overflow)
|
||||
price_change = price * np.clip(drift + volatility * np.random.randn() + cycle, -0.1, 0.1)
|
||||
price = np.clip(price + price_change, 1000, 100000) # Keep price in reasonable range
|
||||
|
||||
# Generate OHLC from the price
|
||||
open_price = price
|
||||
high_price = price * (1 + abs(0.005 * np.random.randn()))
|
||||
low_price = price * (1 - abs(0.005 * np.random.randn()))
|
||||
close_price = price * (1 + 0.002 * np.random.randn())
|
||||
|
||||
# Ensure high >= open, close, low and low <= open, close
|
||||
high_price = max(high_price, open_price, close_price)
|
||||
low_price = min(low_price, open_price, close_price)
|
||||
|
||||
# Generate volume (higher for larger price movements) with safe calculation
|
||||
volume = 10000 + 5000 * np.random.rand() + abs(price_change)/price * 10000
|
||||
|
||||
prices.append((open_price, high_price, low_price, close_price))
|
||||
volumes.append(volume)
|
||||
|
||||
# Update price for next iteration
|
||||
price = close_price
|
||||
|
||||
# Create DataFrame
|
||||
df = pd.DataFrame(
|
||||
[(t, o, h, l, c, v) for t, (o, h, l, c), v in zip(timestamps, prices, volumes)],
|
||||
columns=['timestamp', 'open', 'high', 'low', 'close', 'volume']
|
||||
)
|
||||
|
||||
return df
|
||||
logger.error(f"No data available for {self.symbol} {timeframe}")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching data for {self.symbol} {timeframe}: {str(e)}")
|
||||
return None
|
||||
|
||||
def prepare_nn_input(self, timeframes=None, n_candles=500, window_size=20):
|
||||
"""
|
||||
@ -459,24 +372,27 @@ class DataInterface:
|
||||
n_candles (int): Number of future candles to look at
|
||||
|
||||
Returns:
|
||||
np.ndarray: Future prices array
|
||||
np.ndarray: Future prices array (1D array)
|
||||
"""
|
||||
if len(prices) < n_candles + 1:
|
||||
if prices is None or len(prices) < n_candles + 1:
|
||||
return None
|
||||
|
||||
# For each price point, get the maximum price in the next n_candles
|
||||
future_prices = np.zeros(len(prices))
|
||||
|
||||
for i in range(len(prices) - n_candles):
|
||||
# Convert to numpy array if it's not already
|
||||
prices_np = np.array(prices).flatten() if not isinstance(prices, np.ndarray) else prices.flatten()
|
||||
|
||||
# For each price point, get the maximum price in the next n_candles
|
||||
future_prices = np.zeros(len(prices_np))
|
||||
|
||||
for i in range(len(prices_np) - n_candles):
|
||||
# Get the next n candles
|
||||
next_candles = prices[i+1:i+n_candles+1]
|
||||
next_candles = prices_np[i+1:i+n_candles+1]
|
||||
# Use the maximum price as the future price
|
||||
future_prices[i] = np.max(next_candles)
|
||||
|
||||
# For the last n_candles points, use the last available price
|
||||
future_prices[-n_candles:] = prices[-1]
|
||||
future_prices[-n_candles:] = prices_np[-1]
|
||||
|
||||
return future_prices
|
||||
return future_prices.flatten() # Ensure it's a 1D array
|
||||
|
||||
def prepare_training_data(self, refresh=False, refresh_interval=300):
|
||||
"""
|
||||
|
Reference in New Issue
Block a user