gugarosa commited on
Commit
92557d0
1 Parent(s): 45f4b21

Improves type hinting on configuration arguments.

Browse files
Files changed (1) hide show
  1. configuration_mixformer_sequential.py +12 -12
configuration_mixformer_sequential.py CHANGED
@@ -21,24 +21,24 @@ class MixFormerSequentialConfig(PretrainedConfig):
21
 
22
  def __init__(
23
  self,
24
- vocab_size: Optional[int] = 50304,
25
- n_positions: Optional[int] = 2048,
26
- n_embd: Optional[int] = 1024,
27
- n_layer: Optional[int] = 20,
28
  n_inner: Optional[int] = None,
29
- n_head: Optional[int] = 16,
30
  n_head_kv: Optional[int] = None,
31
  rotary_dim: Optional[int] = 32,
32
  activation_function: Optional[str] = "gelu_new",
33
  flash_rotary: bool = False,
34
  fused_dense: bool = False,
35
- attn_pdrop: Optional[float] = 0.0,
36
- embd_pdrop: Optional[float] = 0.0,
37
- resid_pdrop: Optional[float] = 0.0,
38
- layer_norm_epsilon: Optional[float] = 1e-5,
39
- initializer_range: Optional[float] = 0.02,
40
- tie_word_embeddings: Optional[bool] = False,
41
- pad_vocab_size_multiple: Optional[int] = 64,
42
  **kwargs
43
  ) -> None:
44
  self.vocab_size = int(math.ceil(vocab_size / pad_vocab_size_multiple) * pad_vocab_size_multiple)
 
21
 
22
  def __init__(
23
  self,
24
+ vocab_size: int = 50304,
25
+ n_positions: int = 2048,
26
+ n_embd: int = 1024,
27
+ n_layer: int = 20,
28
  n_inner: Optional[int] = None,
29
+ n_head: int = 16,
30
  n_head_kv: Optional[int] = None,
31
  rotary_dim: Optional[int] = 32,
32
  activation_function: Optional[str] = "gelu_new",
33
  flash_rotary: bool = False,
34
  fused_dense: bool = False,
35
+ attn_pdrop: float = 0.0,
36
+ embd_pdrop: float = 0.0,
37
+ resid_pdrop: float = 0.0,
38
+ layer_norm_epsilon: float = 1e-5,
39
+ initializer_range: float = 0.02,
40
+ tie_word_embeddings: bool = False,
41
+ pad_vocab_size_multiple: int = 64,
42
  **kwargs
43
  ) -> None:
44
  self.vocab_size = int(math.ceil(vocab_size / pad_vocab_size_multiple) * pad_vocab_size_multiple)