in the bussiness -but wip
This commit is contained in:
@ -149,7 +149,7 @@ class MEXCInterface(ExchangeInterface):
|
||||
return {}
|
||||
|
||||
def _send_private_request(self, method: str, endpoint: str, params: Optional[Dict[str, Any]] = None) -> Optional[Dict[str, Any]]:
|
||||
"""Send a private request to the exchange with proper signature"""
|
||||
"""Send a private request to the exchange with proper signature and MEXC error handling"""
|
||||
if params is None:
|
||||
params = {}
|
||||
|
||||
@ -191,8 +191,51 @@ class MEXCInterface(ExchangeInterface):
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
else:
|
||||
logger.error(f"API error: Status Code: {response.status_code}, Response: {response.text}")
|
||||
return None
|
||||
# Parse error response for specific error codes
|
||||
try:
|
||||
error_data = response.json()
|
||||
error_code = error_data.get('code')
|
||||
error_msg = error_data.get('msg', 'Unknown error')
|
||||
|
||||
# Handle specific MEXC error codes
|
||||
if error_code == 30005: # Oversold
|
||||
logger.warning(f"MEXC Oversold detected (Code 30005) for {endpoint}. This indicates risk control measures are active.")
|
||||
logger.warning(f"Possible causes: Market manipulation detection, abnormal trading patterns, or position limits.")
|
||||
logger.warning(f"Action: Waiting before retry and reducing position size if needed.")
|
||||
|
||||
# For oversold errors, we should not retry immediately
|
||||
# Return a special error structure that the trading executor can handle
|
||||
return {
|
||||
'error': 'oversold',
|
||||
'code': 30005,
|
||||
'message': error_msg,
|
||||
'retry_after': 60 # Suggest waiting 60 seconds
|
||||
}
|
||||
elif error_code == 30001: # Transaction direction not allowed
|
||||
logger.error(f"MEXC: Transaction direction not allowed for {endpoint}")
|
||||
return {
|
||||
'error': 'direction_not_allowed',
|
||||
'code': 30001,
|
||||
'message': error_msg
|
||||
}
|
||||
elif error_code == 30004: # Insufficient position
|
||||
logger.error(f"MEXC: Insufficient position for {endpoint}")
|
||||
return {
|
||||
'error': 'insufficient_position',
|
||||
'code': 30004,
|
||||
'message': error_msg
|
||||
}
|
||||
else:
|
||||
logger.error(f"MEXC API error: Code: {error_code}, Message: {error_msg}")
|
||||
return {
|
||||
'error': 'api_error',
|
||||
'code': error_code,
|
||||
'message': error_msg
|
||||
}
|
||||
except:
|
||||
# Fallback if response is not JSON
|
||||
logger.error(f"API error: Status Code: {response.status_code}, Response: {response.text}")
|
||||
return None
|
||||
|
||||
except requests.exceptions.HTTPError as http_err:
|
||||
logger.error(f"HTTP error for {endpoint}: Status Code: {response.status_code}, Response: {response.text}")
|
||||
@ -297,75 +340,92 @@ class MEXCInterface(ExchangeInterface):
|
||||
|
||||
def place_order(self, symbol: str, side: str, order_type: str, quantity: float, price: Optional[float] = None) -> Dict[str, Any]:
|
||||
"""Place a new order on MEXC."""
|
||||
formatted_symbol = self._format_spot_symbol(symbol)
|
||||
|
||||
# Check if symbol is supported for API trading
|
||||
if not self.is_symbol_supported(symbol):
|
||||
supported_symbols = self.get_api_symbols()
|
||||
logger.error(f"Symbol {formatted_symbol} is not supported for API trading")
|
||||
logger.info(f"Supported symbols include: {supported_symbols[:10]}...") # Show first 10
|
||||
return {}
|
||||
|
||||
# Round quantity to MEXC precision requirements and ensure minimum order value
|
||||
# MEXC ETHUSDC requires precision based on baseAssetPrecision (5 decimals for ETH)
|
||||
if 'ETH' in formatted_symbol:
|
||||
quantity = round(quantity, 5) # MEXC ETHUSDC precision: 5 decimals
|
||||
# Ensure minimum order value (typically $10+ for MEXC)
|
||||
if price and quantity * price < 10.0:
|
||||
quantity = round(10.0 / price, 5) # Adjust to minimum $10 order
|
||||
elif 'BTC' in formatted_symbol:
|
||||
quantity = round(quantity, 6) # MEXC BTCUSDC precision: 6 decimals
|
||||
if price and quantity * price < 10.0:
|
||||
quantity = round(10.0 / price, 6) # Adjust to minimum $10 order
|
||||
else:
|
||||
quantity = round(quantity, 5) # Default precision for MEXC
|
||||
if price and quantity * price < 10.0:
|
||||
quantity = round(10.0 / price, 5) # Adjust to minimum $10 order
|
||||
|
||||
# MEXC doesn't support MARKET orders for many pairs - use LIMIT orders instead
|
||||
if order_type.upper() == 'MARKET':
|
||||
# Convert market order to limit order with aggressive pricing for immediate execution
|
||||
if price is None:
|
||||
ticker = self.get_ticker(symbol)
|
||||
if ticker and 'last' in ticker:
|
||||
current_price = float(ticker['last'])
|
||||
# For buy orders, use slightly above market to ensure immediate execution
|
||||
# For sell orders, use slightly below market to ensure immediate execution
|
||||
if side.upper() == 'BUY':
|
||||
price = current_price * 1.002 # 0.2% premium for immediate buy execution
|
||||
else:
|
||||
price = current_price * 0.998 # 0.2% discount for immediate sell execution
|
||||
else:
|
||||
logger.error("Cannot get current price for market order conversion")
|
||||
return {}
|
||||
try:
|
||||
logger.info(f"MEXC: place_order called with symbol={symbol}, side={side}, order_type={order_type}, quantity={quantity}, price={price}")
|
||||
|
||||
# Convert to limit order with immediate execution pricing
|
||||
order_type = 'LIMIT'
|
||||
logger.info(f"MEXC: Converting MARKET to aggressive LIMIT order at ${price:.2f} for immediate execution")
|
||||
formatted_symbol = self._format_spot_symbol(symbol)
|
||||
logger.info(f"MEXC: Formatted symbol: {symbol} -> {formatted_symbol}")
|
||||
|
||||
# Check if symbol is supported for API trading
|
||||
if not self.is_symbol_supported(symbol):
|
||||
supported_symbols = self.get_api_symbols()
|
||||
logger.error(f"Symbol {formatted_symbol} is not supported for API trading")
|
||||
logger.info(f"Supported symbols include: {supported_symbols[:10]}...") # Show first 10
|
||||
return {}
|
||||
|
||||
# Round quantity to MEXC precision requirements and ensure minimum order value
|
||||
# MEXC ETHUSDC requires precision based on baseAssetPrecision (5 decimals for ETH)
|
||||
original_quantity = quantity
|
||||
if 'ETH' in formatted_symbol:
|
||||
quantity = round(quantity, 5) # MEXC ETHUSDC precision: 5 decimals
|
||||
# Ensure minimum order value (typically $10+ for MEXC)
|
||||
if price and quantity * price < 10.0:
|
||||
quantity = round(10.0 / price, 5) # Adjust to minimum $10 order
|
||||
elif 'BTC' in formatted_symbol:
|
||||
quantity = round(quantity, 6) # MEXC BTCUSDC precision: 6 decimals
|
||||
if price and quantity * price < 10.0:
|
||||
quantity = round(10.0 / price, 6) # Adjust to minimum $10 order
|
||||
else:
|
||||
quantity = round(quantity, 5) # Default precision for MEXC
|
||||
if price and quantity * price < 10.0:
|
||||
quantity = round(10.0 / price, 5) # Adjust to minimum $10 order
|
||||
|
||||
if quantity != original_quantity:
|
||||
logger.info(f"MEXC: Adjusted quantity: {original_quantity} -> {quantity}")
|
||||
|
||||
# MEXC doesn't support MARKET orders for many pairs - use LIMIT orders instead
|
||||
if order_type.upper() == 'MARKET':
|
||||
# Convert market order to limit order with aggressive pricing for immediate execution
|
||||
if price is None:
|
||||
ticker = self.get_ticker(symbol)
|
||||
if ticker and 'last' in ticker:
|
||||
current_price = float(ticker['last'])
|
||||
# For buy orders, use slightly above market to ensure immediate execution
|
||||
# For sell orders, use slightly below market to ensure immediate execution
|
||||
if side.upper() == 'BUY':
|
||||
price = current_price * 1.002 # 0.2% premium for immediate buy execution
|
||||
else:
|
||||
price = current_price * 0.998 # 0.2% discount for immediate sell execution
|
||||
else:
|
||||
logger.error("Cannot get current price for market order conversion")
|
||||
return {}
|
||||
|
||||
# Convert to limit order with immediate execution pricing
|
||||
order_type = 'LIMIT'
|
||||
logger.info(f"MEXC: Converting MARKET to aggressive LIMIT order at ${price:.2f} for immediate execution")
|
||||
|
||||
# Prepare order parameters
|
||||
params = {
|
||||
'symbol': formatted_symbol,
|
||||
'side': side.upper(),
|
||||
'type': order_type.upper(),
|
||||
'quantity': str(quantity) # Quantity must be a string
|
||||
}
|
||||
|
||||
if price is not None:
|
||||
# Format price to remove unnecessary decimal places (e.g., 2900.0 -> 2900)
|
||||
params['price'] = str(int(price)) if price == int(price) else str(price)
|
||||
# Prepare order parameters
|
||||
params = {
|
||||
'symbol': formatted_symbol,
|
||||
'side': side.upper(),
|
||||
'type': order_type.upper(),
|
||||
'quantity': str(quantity) # Quantity must be a string
|
||||
}
|
||||
|
||||
if price is not None:
|
||||
# Format price to remove unnecessary decimal places (e.g., 2900.0 -> 2900)
|
||||
params['price'] = str(int(price)) if price == int(price) else str(price)
|
||||
|
||||
logger.info(f"MEXC: Placing {side.upper()} {order_type.upper()} order for {quantity} {formatted_symbol} at price {price}")
|
||||
|
||||
# Use the standard private request method which handles timestamp and signature
|
||||
endpoint = "order"
|
||||
result = self._send_private_request("POST", endpoint, params)
|
||||
|
||||
if result:
|
||||
logger.info(f"MEXC: Order placed successfully: {result}")
|
||||
return result
|
||||
else:
|
||||
logger.error(f"MEXC: Failed to place order")
|
||||
logger.info(f"MEXC: Placing {side.upper()} {order_type.upper()} order for {quantity} {formatted_symbol} at price {price}")
|
||||
logger.info(f"MEXC: Order parameters: {params}")
|
||||
|
||||
# Use the standard private request method which handles timestamp and signature
|
||||
endpoint = "order"
|
||||
result = self._send_private_request("POST", endpoint, params)
|
||||
|
||||
if result:
|
||||
logger.info(f"MEXC: Order placed successfully: {result}")
|
||||
return result
|
||||
else:
|
||||
logger.error(f"MEXC: Failed to place order - _send_private_request returned None/empty result")
|
||||
logger.error(f"MEXC: Failed order details - symbol: {formatted_symbol}, side: {side}, type: {order_type}, quantity: {quantity}, price: {price}")
|
||||
return {}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"MEXC: Exception in place_order: {e}")
|
||||
logger.error(f"MEXC: Exception details - symbol: {symbol}, side: {side}, type: {order_type}, quantity: {quantity}, price: {price}")
|
||||
import traceback
|
||||
logger.error(f"MEXC: Full traceback: {traceback.format_exc()}")
|
||||
return {}
|
||||
|
||||
def cancel_order(self, symbol: str, order_id: str) -> Dict[str, Any]:
|
||||
|
@ -757,20 +757,98 @@ class DQNAgent:
|
||||
# Sanitize and stack states and next_states
|
||||
sanitized_states = []
|
||||
sanitized_next_states = []
|
||||
sanitized_experiences = []
|
||||
|
||||
for i, e in enumerate(experiences):
|
||||
try:
|
||||
state = np.asarray(e[0], dtype=np.float32)
|
||||
next_state = np.asarray(e[3], dtype=np.float32)
|
||||
# Extract experience components
|
||||
state, action, reward, next_state, done = e
|
||||
|
||||
# Sanitize state - convert any dict/object to float arrays
|
||||
state = self._sanitize_state_data(state)
|
||||
next_state = self._sanitize_state_data(next_state)
|
||||
|
||||
# Sanitize action - ensure it's an integer
|
||||
if isinstance(action, dict):
|
||||
# If action is a dict, try to extract action value
|
||||
action = action.get('action', action.get('value', 0))
|
||||
action = int(action) if not isinstance(action, (int, np.integer)) else action
|
||||
|
||||
# Sanitize reward - ensure it's a float
|
||||
if isinstance(reward, dict):
|
||||
# If reward is a dict, try to extract reward value
|
||||
reward = reward.get('reward', reward.get('value', 0.0))
|
||||
reward = float(reward) if not isinstance(reward, (float, np.floating)) else reward
|
||||
|
||||
# Sanitize done - ensure it's a boolean/float
|
||||
if isinstance(done, dict):
|
||||
done = done.get('done', done.get('value', False))
|
||||
done = bool(done) if not isinstance(done, (bool, np.bool_)) else done
|
||||
|
||||
# Convert state to proper numpy array
|
||||
state = np.asarray(state, dtype=np.float32)
|
||||
next_state = np.asarray(next_state, dtype=np.float32)
|
||||
|
||||
# Add to sanitized lists
|
||||
sanitized_states.append(state)
|
||||
sanitized_next_states.append(next_state)
|
||||
sanitized_experiences.append((state, action, reward, next_state, done))
|
||||
|
||||
except Exception as ex:
|
||||
print(f"[DQNAgent] Bad experience at index {i}: {ex}")
|
||||
continue
|
||||
|
||||
if not sanitized_states or not sanitized_next_states:
|
||||
print("[DQNAgent] No valid states in replay batch.")
|
||||
return 0.0 # Return float instead of None for consistency
|
||||
states = torch.FloatTensor(np.stack(sanitized_states)).to(self.device)
|
||||
next_states = torch.FloatTensor(np.stack(sanitized_next_states)).to(self.device)
|
||||
|
||||
# Validate all states have the same dimensions before stacking
|
||||
expected_dim = getattr(self, 'state_size', getattr(self, 'state_dim', 403))
|
||||
if isinstance(expected_dim, tuple):
|
||||
expected_dim = np.prod(expected_dim)
|
||||
|
||||
# Filter out states with wrong dimensions and fix them
|
||||
valid_states = []
|
||||
valid_next_states = []
|
||||
valid_experiences = []
|
||||
|
||||
for i, (state, next_state, exp) in enumerate(zip(sanitized_states, sanitized_next_states, sanitized_experiences)):
|
||||
# Ensure states have correct dimensions
|
||||
if len(state) != expected_dim:
|
||||
logger.debug(f"Fixing state dimension: {len(state)} -> {expected_dim}")
|
||||
if len(state) < expected_dim:
|
||||
# Pad with zeros
|
||||
padded_state = np.zeros(expected_dim, dtype=np.float32)
|
||||
padded_state[:len(state)] = state
|
||||
state = padded_state
|
||||
else:
|
||||
# Truncate
|
||||
state = state[:expected_dim]
|
||||
|
||||
if len(next_state) != expected_dim:
|
||||
logger.debug(f"Fixing next_state dimension: {len(next_state)} -> {expected_dim}")
|
||||
if len(next_state) < expected_dim:
|
||||
# Pad with zeros
|
||||
padded_next_state = np.zeros(expected_dim, dtype=np.float32)
|
||||
padded_next_state[:len(next_state)] = next_state
|
||||
next_state = padded_next_state
|
||||
else:
|
||||
# Truncate
|
||||
next_state = next_state[:expected_dim]
|
||||
|
||||
valid_states.append(state)
|
||||
valid_next_states.append(next_state)
|
||||
valid_experiences.append(exp)
|
||||
|
||||
if not valid_states:
|
||||
print("[DQNAgent] No valid states after dimension fixing.")
|
||||
return 0.0
|
||||
|
||||
# Use validated experiences for training
|
||||
experiences = valid_experiences
|
||||
|
||||
states = torch.FloatTensor(np.stack(valid_states)).to(self.device)
|
||||
next_states = torch.FloatTensor(np.stack(valid_next_states)).to(self.device)
|
||||
|
||||
# Choose appropriate replay method
|
||||
if self.use_mixed_precision:
|
||||
@ -797,28 +875,42 @@ class DQNAgent:
|
||||
extrema_indices = np.random.choice(len(self.extrema_memory), size=min(self.batch_size, len(self.extrema_memory)), replace=False)
|
||||
extrema_batch = [self.extrema_memory[i] for i in extrema_indices]
|
||||
|
||||
# Extract tensors from extrema batch
|
||||
extrema_states = torch.FloatTensor(np.array([e[0] for e in extrema_batch])).to(self.device)
|
||||
extrema_actions = torch.LongTensor(np.array([e[1] for e in extrema_batch])).to(self.device)
|
||||
extrema_rewards = torch.FloatTensor(np.array([e[2] for e in extrema_batch])).to(self.device)
|
||||
extrema_next_states = torch.FloatTensor(np.array([e[3] for e in extrema_batch])).to(self.device)
|
||||
extrema_dones = torch.FloatTensor(np.array([e[4] for e in extrema_batch])).to(self.device)
|
||||
# Sanitize extrema batch
|
||||
sanitized_extrema = []
|
||||
for e in extrema_batch:
|
||||
try:
|
||||
state, action, reward, next_state, done = e
|
||||
state = self._sanitize_state_data(state)
|
||||
next_state = self._sanitize_state_data(next_state)
|
||||
state = np.asarray(state, dtype=np.float32)
|
||||
next_state = np.asarray(next_state, dtype=np.float32)
|
||||
sanitized_extrema.append((state, action, reward, next_state, done))
|
||||
except:
|
||||
continue
|
||||
|
||||
# Use a slightly reduced learning rate for extrema training
|
||||
old_lr = self.optimizer.param_groups[0]['lr']
|
||||
self.optimizer.param_groups[0]['lr'] = old_lr * 0.8
|
||||
|
||||
# Train on extrema memory
|
||||
if self.use_mixed_precision:
|
||||
extrema_loss = self._replay_mixed_precision(extrema_states, extrema_actions, extrema_rewards, extrema_next_states, extrema_dones)
|
||||
else:
|
||||
extrema_loss = self._replay_standard(extrema_batch)
|
||||
|
||||
# Reset learning rate
|
||||
self.optimizer.param_groups[0]['lr'] = old_lr
|
||||
|
||||
# Log extrema loss
|
||||
logger.info(f"Extra training on extrema points, loss: {extrema_loss:.4f}")
|
||||
if sanitized_extrema:
|
||||
# Extract tensors from extrema batch
|
||||
extrema_states = torch.FloatTensor(np.array([e[0] for e in sanitized_extrema])).to(self.device)
|
||||
extrema_actions = torch.LongTensor(np.array([e[1] for e in sanitized_extrema])).to(self.device)
|
||||
extrema_rewards = torch.FloatTensor(np.array([e[2] for e in sanitized_extrema])).to(self.device)
|
||||
extrema_next_states = torch.FloatTensor(np.array([e[3] for e in sanitized_extrema])).to(self.device)
|
||||
extrema_dones = torch.FloatTensor(np.array([e[4] for e in sanitized_extrema])).to(self.device)
|
||||
|
||||
# Use a slightly reduced learning rate for extrema training
|
||||
old_lr = self.optimizer.param_groups[0]['lr']
|
||||
self.optimizer.param_groups[0]['lr'] = old_lr * 0.8
|
||||
|
||||
# Train on extrema memory
|
||||
if self.use_mixed_precision:
|
||||
extrema_loss = self._replay_mixed_precision(extrema_states, extrema_actions, extrema_rewards, extrema_next_states, extrema_dones)
|
||||
else:
|
||||
extrema_loss = self._replay_standard(sanitized_extrema)
|
||||
|
||||
# Reset learning rate
|
||||
self.optimizer.param_groups[0]['lr'] = old_lr
|
||||
|
||||
# Log extrema loss
|
||||
logger.info(f"Extra training on extrema points, loss: {extrema_loss:.4f}")
|
||||
|
||||
# Randomly train on price movement examples (similar to extrema)
|
||||
if random.random() < 0.3 and len(self.price_movement_memory) >= self.batch_size:
|
||||
@ -826,28 +918,42 @@ class DQNAgent:
|
||||
price_indices = np.random.choice(len(self.price_movement_memory), size=min(self.batch_size, len(self.price_movement_memory)), replace=False)
|
||||
price_batch = [self.price_movement_memory[i] for i in price_indices]
|
||||
|
||||
# Extract tensors from price movement batch
|
||||
price_states = torch.FloatTensor(np.array([e[0] for e in price_batch])).to(self.device)
|
||||
price_actions = torch.LongTensor(np.array([e[1] for e in price_batch])).to(self.device)
|
||||
price_rewards = torch.FloatTensor(np.array([e[2] for e in price_batch])).to(self.device)
|
||||
price_next_states = torch.FloatTensor(np.array([e[3] for e in price_batch])).to(self.device)
|
||||
price_dones = torch.FloatTensor(np.array([e[4] for e in price_batch])).to(self.device)
|
||||
# Sanitize price movement batch
|
||||
sanitized_price = []
|
||||
for e in price_batch:
|
||||
try:
|
||||
state, action, reward, next_state, done = e
|
||||
state = self._sanitize_state_data(state)
|
||||
next_state = self._sanitize_state_data(next_state)
|
||||
state = np.asarray(state, dtype=np.float32)
|
||||
next_state = np.asarray(next_state, dtype=np.float32)
|
||||
sanitized_price.append((state, action, reward, next_state, done))
|
||||
except:
|
||||
continue
|
||||
|
||||
# Use a slightly reduced learning rate for price movement training
|
||||
old_lr = self.optimizer.param_groups[0]['lr']
|
||||
self.optimizer.param_groups[0]['lr'] = old_lr * 0.75
|
||||
|
||||
# Train on price movement memory
|
||||
if self.use_mixed_precision:
|
||||
price_loss = self._replay_mixed_precision(price_states, price_actions, price_rewards, price_next_states, price_dones)
|
||||
else:
|
||||
price_loss = self._replay_standard(price_batch)
|
||||
|
||||
# Reset learning rate
|
||||
self.optimizer.param_groups[0]['lr'] = old_lr
|
||||
|
||||
# Log price movement loss
|
||||
logger.info(f"Extra training on price movement examples, loss: {price_loss:.4f}")
|
||||
if sanitized_price:
|
||||
# Extract tensors from price movement batch
|
||||
price_states = torch.FloatTensor(np.array([e[0] for e in sanitized_price])).to(self.device)
|
||||
price_actions = torch.LongTensor(np.array([e[1] for e in sanitized_price])).to(self.device)
|
||||
price_rewards = torch.FloatTensor(np.array([e[2] for e in sanitized_price])).to(self.device)
|
||||
price_next_states = torch.FloatTensor(np.array([e[3] for e in sanitized_price])).to(self.device)
|
||||
price_dones = torch.FloatTensor(np.array([e[4] for e in sanitized_price])).to(self.device)
|
||||
|
||||
# Use a slightly reduced learning rate for price movement training
|
||||
old_lr = self.optimizer.param_groups[0]['lr']
|
||||
self.optimizer.param_groups[0]['lr'] = old_lr * 0.75
|
||||
|
||||
# Train on price movement memory
|
||||
if self.use_mixed_precision:
|
||||
price_loss = self._replay_mixed_precision(price_states, price_actions, price_rewards, price_next_states, price_dones)
|
||||
else:
|
||||
price_loss = self._replay_standard(sanitized_price)
|
||||
|
||||
# Reset learning rate
|
||||
self.optimizer.param_groups[0]['lr'] = old_lr
|
||||
|
||||
# Log price movement loss
|
||||
logger.info(f"Extra training on price movement examples, loss: {price_loss:.4f}")
|
||||
|
||||
return loss
|
||||
|
||||
@ -1452,4 +1558,106 @@ class DQNAgent:
|
||||
total_params = 0
|
||||
for param in self.policy_net.parameters():
|
||||
total_params += param.numel()
|
||||
return total_params
|
||||
return total_params
|
||||
|
||||
def _sanitize_state_data(self, state):
|
||||
"""Sanitize state data to ensure it's a proper numeric array"""
|
||||
try:
|
||||
# If state is already a numpy array, return it
|
||||
if isinstance(state, np.ndarray):
|
||||
# Check for non-numeric data and handle it
|
||||
if state.dtype == object:
|
||||
# Convert object array to float array
|
||||
sanitized = np.zeros_like(state, dtype=np.float32)
|
||||
for i in range(state.shape[0]):
|
||||
if len(state.shape) > 1:
|
||||
for j in range(state.shape[1]):
|
||||
sanitized[i, j] = self._extract_numeric_value(state[i, j])
|
||||
else:
|
||||
sanitized[i] = self._extract_numeric_value(state[i])
|
||||
return sanitized
|
||||
else:
|
||||
return state.astype(np.float32)
|
||||
|
||||
# If state is a list or tuple, convert to array
|
||||
elif isinstance(state, (list, tuple)):
|
||||
# Recursively sanitize each element
|
||||
sanitized = []
|
||||
for item in state:
|
||||
if isinstance(item, (list, tuple)):
|
||||
sanitized_row = []
|
||||
for sub_item in item:
|
||||
sanitized_row.append(self._extract_numeric_value(sub_item))
|
||||
sanitized.append(sanitized_row)
|
||||
else:
|
||||
sanitized.append(self._extract_numeric_value(item))
|
||||
return np.array(sanitized, dtype=np.float32)
|
||||
|
||||
# If state is a dict, try to extract values
|
||||
elif isinstance(state, dict):
|
||||
# Try to extract meaningful values from dict
|
||||
values = []
|
||||
for key in sorted(state.keys()): # Sort for consistency
|
||||
values.append(self._extract_numeric_value(state[key]))
|
||||
return np.array(values, dtype=np.float32)
|
||||
|
||||
# If state is a single value, make it an array
|
||||
else:
|
||||
return np.array([self._extract_numeric_value(state)], dtype=np.float32)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error sanitizing state data: {e}. Using zero array with expected dimensions.")
|
||||
# Return a zero array as fallback with the expected state dimension
|
||||
# Use the state_dim from initialization, fallback to 403 if not available
|
||||
expected_size = getattr(self, 'state_size', getattr(self, 'state_dim', 403))
|
||||
if isinstance(expected_size, tuple):
|
||||
expected_size = np.prod(expected_size)
|
||||
return np.zeros(int(expected_size), dtype=np.float32)
|
||||
|
||||
def _extract_numeric_value(self, value):
|
||||
"""Extract a numeric value from various data types"""
|
||||
try:
|
||||
# Handle None values
|
||||
if value is None:
|
||||
return 0.0
|
||||
|
||||
# Handle numeric types
|
||||
if isinstance(value, (int, float, np.number)):
|
||||
return float(value)
|
||||
|
||||
# Handle dict values
|
||||
elif isinstance(value, dict):
|
||||
# Try common keys for numeric data
|
||||
for key in ['value', 'price', 'close', 'last', 'amount', 'quantity']:
|
||||
if key in value:
|
||||
return self._extract_numeric_value(value[key])
|
||||
# If no common keys, try to get first numeric value
|
||||
for v in value.values():
|
||||
if isinstance(v, (int, float, np.number)):
|
||||
return float(v)
|
||||
return 0.0
|
||||
|
||||
# Handle string values that might be numeric
|
||||
elif isinstance(value, str):
|
||||
try:
|
||||
return float(value)
|
||||
except:
|
||||
return 0.0
|
||||
|
||||
# Handle datetime objects
|
||||
elif hasattr(value, 'timestamp'):
|
||||
return float(value.timestamp())
|
||||
|
||||
# Handle boolean values
|
||||
elif isinstance(value, bool):
|
||||
return float(value)
|
||||
|
||||
# Handle list/tuple - take first numeric value
|
||||
elif isinstance(value, (list, tuple)) and len(value) > 0:
|
||||
return self._extract_numeric_value(value[0])
|
||||
|
||||
else:
|
||||
return 0.0
|
||||
|
||||
except:
|
||||
return 0.0
|
Reference in New Issue
Block a user