We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent ddf46e4 commit 89f50bfCopy full SHA for 89f50bf
docs/ARCHITECTURE.md
@@ -72,7 +72,7 @@ Key hyperparameters in `CRSMConfig`:
72
```python
73
@dataclass
74
class CRSMConfig:
75
- vocab_size: int = 32000 # Vocabulary size
+ vocab_size: int = 50257 # Vocabulary size
76
hidden_size: int = 2048 # Model hidden dimension (continuous state size)
77
intermediate_size: int = 8192 # FFN intermediate dimension
78
num_hidden_layers: int = 24 # Number of Mamba SSM layers
0 commit comments