robinzixuan
commited on
Commit
•
64f43b6
1
Parent(s):
e20d3f0
Update configuration_opt.py
Browse files- configuration_opt.py +3 -0
configuration_opt.py
CHANGED
@@ -20,6 +20,8 @@ from ...utils import logging
|
|
20 |
|
21 |
logger = logging.get_logger(__name__)
|
22 |
|
|
|
|
|
23 |
|
24 |
class OPTConfig(PretrainedConfig):
|
25 |
r"""
|
@@ -51,6 +53,7 @@ class OPTConfig(PretrainedConfig):
|
|
51 |
The maximum sequence length that this model might ever be used with. Typically set this to something large
|
52 |
just in case (e.g., 512 or 1024 or 2048).
|
53 |
do_layer_norm_before (`bool`, *optional*, defaults to `True`):
|
|
|
54 |
Whether to perform layer normalization before the attention block.
|
55 |
word_embed_proj_dim (`int`, *optional*):
|
56 |
`word_embed_proj_dim` can be set to down-project word embeddings, *e.g.* `opt-350m`. Defaults to
|
|
|
20 |
|
21 |
logger = logging.get_logger(__name__)
|
22 |
|
23 |
+
OPTConfig.register_for_auto_class()
|
24 |
+
|
25 |
|
26 |
class OPTConfig(PretrainedConfig):
|
27 |
r"""
|
|
|
53 |
The maximum sequence length that this model might ever be used with. Typically set this to something large
|
54 |
just in case (e.g., 512 or 1024 or 2048).
|
55 |
do_layer_norm_before (`bool`, *optional*, defaults to `True`):
|
56 |
+
|
57 |
Whether to perform layer normalization before the attention block.
|
58 |
word_embed_proj_dim (`int`, *optional*):
|
59 |
`word_embed_proj_dim` can be set to down-project word embeddings, *e.g.* `opt-350m`. Defaults to
|