reduce cob model to 400m

This commit is contained in:
Dobromir Popov
2025-06-25 13:11:00 +03:00
parent 2cbc202d45
commit fdb9e83cf9
6 changed files with 195 additions and 29 deletions

View File

@ -29,14 +29,14 @@ class MassiveRLNetwork(nn.Module):
future price movements with high confidence. Designed for 200ms inference cycles.
"""
def __init__(self, input_size: int = 2000, hidden_size: int = 4096, num_layers: int = 12):
def __init__(self, input_size: int = 2000, hidden_size: int = 2048, num_layers: int = 8):
super(MassiveRLNetwork, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.num_layers = num_layers
# Massive input processing layers
# Optimized input processing layers for 400M params
self.input_projection = nn.Sequential(
nn.Linear(input_size, hidden_size),
nn.LayerNorm(hidden_size),
@ -44,25 +44,25 @@ class MassiveRLNetwork(nn.Module):
nn.Dropout(0.1)
)
# Massive transformer-style encoder layers
# Efficient transformer-style encoder layers (400M target)
self.encoder_layers = nn.ModuleList([
nn.TransformerEncoderLayer(
d_model=hidden_size,
nhead=32, # Large number of attention heads
dim_feedforward=hidden_size * 4, # 16K feedforward
nhead=16, # Reduced attention heads for efficiency
dim_feedforward=hidden_size * 3, # 6K feedforward (reduced from 16K)
dropout=0.1,
activation='gelu',
batch_first=True
) for _ in range(num_layers)
])
# Market regime understanding layers
# Market regime understanding layers (optimized for 400M)
self.regime_encoder = nn.Sequential(
nn.Linear(hidden_size, hidden_size * 2),
nn.LayerNorm(hidden_size * 2),
nn.Linear(hidden_size, hidden_size + 512), # Smaller expansion
nn.LayerNorm(hidden_size + 512),
nn.GELU(),
nn.Dropout(0.1),
nn.Linear(hidden_size * 2, hidden_size),
nn.Linear(hidden_size + 512, hidden_size),
nn.LayerNorm(hidden_size),
nn.GELU()
)