reduce cob model to 400m
This commit is contained in:
@ -29,14 +29,14 @@ class MassiveRLNetwork(nn.Module):
|
||||
future price movements with high confidence. Designed for 200ms inference cycles.
|
||||
"""
|
||||
|
||||
def __init__(self, input_size: int = 2000, hidden_size: int = 4096, num_layers: int = 12):
|
||||
def __init__(self, input_size: int = 2000, hidden_size: int = 2048, num_layers: int = 8):
|
||||
super(MassiveRLNetwork, self).__init__()
|
||||
|
||||
self.input_size = input_size
|
||||
self.hidden_size = hidden_size
|
||||
self.num_layers = num_layers
|
||||
|
||||
# Massive input processing layers
|
||||
# Optimized input processing layers for 400M params
|
||||
self.input_projection = nn.Sequential(
|
||||
nn.Linear(input_size, hidden_size),
|
||||
nn.LayerNorm(hidden_size),
|
||||
@ -44,25 +44,25 @@ class MassiveRLNetwork(nn.Module):
|
||||
nn.Dropout(0.1)
|
||||
)
|
||||
|
||||
# Massive transformer-style encoder layers
|
||||
# Efficient transformer-style encoder layers (400M target)
|
||||
self.encoder_layers = nn.ModuleList([
|
||||
nn.TransformerEncoderLayer(
|
||||
d_model=hidden_size,
|
||||
nhead=32, # Large number of attention heads
|
||||
dim_feedforward=hidden_size * 4, # 16K feedforward
|
||||
nhead=16, # Reduced attention heads for efficiency
|
||||
dim_feedforward=hidden_size * 3, # 6K feedforward (reduced from 16K)
|
||||
dropout=0.1,
|
||||
activation='gelu',
|
||||
batch_first=True
|
||||
) for _ in range(num_layers)
|
||||
])
|
||||
|
||||
# Market regime understanding layers
|
||||
# Market regime understanding layers (optimized for 400M)
|
||||
self.regime_encoder = nn.Sequential(
|
||||
nn.Linear(hidden_size, hidden_size * 2),
|
||||
nn.LayerNorm(hidden_size * 2),
|
||||
nn.Linear(hidden_size, hidden_size + 512), # Smaller expansion
|
||||
nn.LayerNorm(hidden_size + 512),
|
||||
nn.GELU(),
|
||||
nn.Dropout(0.1),
|
||||
nn.Linear(hidden_size * 2, hidden_size),
|
||||
nn.Linear(hidden_size + 512, hidden_size),
|
||||
nn.LayerNorm(hidden_size),
|
||||
nn.GELU()
|
||||
)
|
||||
|
Reference in New Issue
Block a user