visheratin commited on
Commit
4a20042
1 Parent(s): 6a81f25

Upload configuration_llava.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. configuration_llava.py +1 -56
configuration_llava.py CHANGED
@@ -2,62 +2,7 @@
2
 
3
  from transformers.configuration_utils import PretrainedConfig
4
  from open_clip import get_model_config
5
- import math
6
- from typing import Optional
7
-
8
- class PhiConfig(PretrainedConfig):
9
- """Phi configuration."""
10
-
11
- model_type = "phi-msft"
12
- attribute_map = {
13
- "max_position_embeddings": "n_positions",
14
- "hidden_size": "n_embd",
15
- "num_attention_heads": "n_head",
16
- "num_hidden_layers": "n_layer",
17
- }
18
-
19
- def __init__(
20
- self,
21
- vocab_size: int = 51200,
22
- n_positions: int = 2048,
23
- n_embd: int = 1024,
24
- n_layer: int = 20,
25
- n_inner: Optional[int] = None,
26
- n_head: int = 16,
27
- n_head_kv: Optional[int] = None,
28
- rotary_dim: Optional[int] = 32,
29
- activation_function: Optional[str] = "gelu_new",
30
- flash_attn: bool = False,
31
- flash_rotary: bool = False,
32
- fused_dense: bool = False,
33
- attn_pdrop: float = 0.0,
34
- embd_pdrop: float = 0.0,
35
- resid_pdrop: float = 0.0,
36
- layer_norm_epsilon: float = 1e-5,
37
- initializer_range: float = 0.02,
38
- tie_word_embeddings: bool = False,
39
- pad_vocab_size_multiple: int = 64,
40
- **kwargs
41
- ) -> None:
42
- self.vocab_size = int(math.ceil(vocab_size / pad_vocab_size_multiple) * pad_vocab_size_multiple)
43
- self.n_positions = n_positions
44
- self.n_embd = n_embd
45
- self.n_layer = n_layer
46
- self.n_inner = n_inner
47
- self.n_head = n_head
48
- self.n_head_kv = n_head_kv
49
- self.rotary_dim = min(rotary_dim, n_embd // n_head)
50
- self.activation_function = activation_function
51
- self.flash_attn = flash_attn
52
- self.flash_rotary = flash_rotary
53
- self.fused_dense = fused_dense
54
- self.attn_pdrop = attn_pdrop
55
- self.embd_pdrop = embd_pdrop
56
- self.resid_pdrop = resid_pdrop
57
- self.layer_norm_epsilon = layer_norm_epsilon
58
- self.initializer_range = initializer_range
59
-
60
- super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)
61
 
62
 
63
  class LlavaConfig(PretrainedConfig):
 
2
 
3
  from transformers.configuration_utils import PretrainedConfig
4
  from open_clip import get_model_config
5
+ from configuration_phi import PhiConfig
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
 
8
  class LlavaConfig(PretrainedConfig):