Commit
•
a709b51
1
Parent(s):
57dbe22
lora-instructions (#36)
Browse files- feat: add lora instructions for retrieval (7af97e787979d0496a49416b8972adb1817d13fa)
- fix: when sentences is one (c35a42b4b8cf2e60a7b34f7cb8da522d0d6f0f52)
- fix: sentences as a str (5f8e4b6771a158cdfb84b3c381051ad555655426)
- merge recent changes (8f83a352fa175fb5547dae6330c05e1c3a56395e)
- refactor: prompts (aeb99cb139baff8daa0845e9e9be0d9c39f21271)
Co-authored-by: Jack Min Ong <[email protected]>
- configuration_xlm_roberta.py +2 -2
- modeling_lora.py +15 -10
configuration_xlm_roberta.py
CHANGED
@@ -31,7 +31,7 @@ class XLMRobertaFlashConfig(PretrainedConfig):
|
|
31 |
use_reentrant: bool = False,
|
32 |
classifier_dropout: Optional[float] = None,
|
33 |
lora_adaptations: Optional[List[str]] = None,
|
34 |
-
|
35 |
lora_rank: int = 4,
|
36 |
lora_dropout_p: float = 0.0,
|
37 |
lora_alpha: int = 1,
|
@@ -109,7 +109,7 @@ class XLMRobertaFlashConfig(PretrainedConfig):
|
|
109 |
self.classifier_dropout = classifier_dropout
|
110 |
self.load_trained_adapters = load_trained_adapters
|
111 |
self.lora_adaptations = lora_adaptations
|
112 |
-
self.
|
113 |
self.lora_rank = lora_rank
|
114 |
self.lora_dropout_p = lora_dropout_p
|
115 |
self.lora_alpha = lora_alpha
|
|
|
31 |
use_reentrant: bool = False,
|
32 |
classifier_dropout: Optional[float] = None,
|
33 |
lora_adaptations: Optional[List[str]] = None,
|
34 |
+
task_instructions: Optional[Dict[str, str]] = None,
|
35 |
lora_rank: int = 4,
|
36 |
lora_dropout_p: float = 0.0,
|
37 |
lora_alpha: int = 1,
|
|
|
109 |
self.classifier_dropout = classifier_dropout
|
110 |
self.load_trained_adapters = load_trained_adapters
|
111 |
self.lora_adaptations = lora_adaptations
|
112 |
+
self.task_instructions = task_instructions
|
113 |
self.lora_rank = lora_rank
|
114 |
self.lora_dropout_p = lora_dropout_p
|
115 |
self.lora_alpha = lora_alpha
|
modeling_lora.py
CHANGED
@@ -165,7 +165,6 @@ class LoRAParametrization(nn.Module):
|
|
165 |
):
|
166 |
"""
|
167 |
Registering LoRA adapters to all embedding and linear layers.
|
168 |
-
|
169 |
Additionally, we implement a custom forward function for LoRA parametrization.
|
170 |
This function modifies the layer's forward pass to optionally use task-specific
|
171 |
parameters. When a `task_id` is provided, it employs a LoRA parametrization
|
@@ -242,6 +241,7 @@ class XLMRobertaLoRA(XLMRobertaPreTrainedModel):
|
|
242 |
"""
|
243 |
A wrapper class around the Jina XLM-RoBERTa model that integrates LoRA (Low-Rank Adaptation) adapters.
|
244 |
"""
|
|
|
245 |
def __init__(
|
246 |
self, config: XLMRobertaFlashConfig, roberta: Optional[XLMRobertaModel] = None
|
247 |
):
|
@@ -259,15 +259,17 @@ class XLMRobertaLoRA(XLMRobertaPreTrainedModel):
|
|
259 |
raise ValueError(
|
260 |
f"`lora_adaptations` must be a list and contain at least one element"
|
261 |
)
|
262 |
-
self.
|
263 |
if (
|
264 |
-
not isinstance(self.
|
265 |
-
or len(self.
|
266 |
-
or not all(
|
|
|
|
|
267 |
):
|
268 |
raise ValueError(
|
269 |
-
f"`
|
270 |
-
f"as `lora_adaptations` with all keys in `
|
271 |
)
|
272 |
self._adaptation_map = {
|
273 |
name: idx for idx, name in enumerate(self._lora_adaptations)
|
@@ -323,11 +325,11 @@ class XLMRobertaLoRA(XLMRobertaPreTrainedModel):
|
|
323 |
use_safetensors: bool = None,
|
324 |
**kwargs,
|
325 |
):
|
326 |
-
if config.load_trained_adapters:
|
327 |
return super().from_pretrained(
|
328 |
pretrained_model_name_or_path, *model_args, use_flash_attn=config.use_flash_attn, **kwargs
|
329 |
)
|
330 |
-
else:
|
331 |
roberta = XLMRobertaModel.from_pretrained(
|
332 |
pretrained_model_name_or_path, *model_args, use_flash_attn=config.use_flash_attn, **kwargs
|
333 |
)
|
@@ -370,7 +372,6 @@ class XLMRobertaLoRA(XLMRobertaPreTrainedModel):
|
|
370 |
) -> Union[List[torch.Tensor], np.ndarray, torch.Tensor]:
|
371 |
"""
|
372 |
Computes sentence embeddings.
|
373 |
-
|
374 |
sentences(`str` or `List[str]`):
|
375 |
Sentence or sentences to be encoded
|
376 |
task_type(`str`, *optional*, defaults to `None`):
|
@@ -391,6 +392,10 @@ class XLMRobertaLoRA(XLMRobertaPreTrainedModel):
|
|
391 |
adapter_mask = torch.full(
|
392 |
(num_examples,), task_id, dtype=torch.int32, device=self.device
|
393 |
)
|
|
|
|
|
|
|
|
|
394 |
return self.roberta.encode(
|
395 |
sentences, *args, adapter_mask=adapter_mask, **kwargs
|
396 |
)
|
|
|
165 |
):
|
166 |
"""
|
167 |
Registering LoRA adapters to all embedding and linear layers.
|
|
|
168 |
Additionally, we implement a custom forward function for LoRA parametrization.
|
169 |
This function modifies the layer's forward pass to optionally use task-specific
|
170 |
parameters. When a `task_id` is provided, it employs a LoRA parametrization
|
|
|
241 |
"""
|
242 |
A wrapper class around the Jina XLM-RoBERTa model that integrates LoRA (Low-Rank Adaptation) adapters.
|
243 |
"""
|
244 |
+
|
245 |
def __init__(
|
246 |
self, config: XLMRobertaFlashConfig, roberta: Optional[XLMRobertaModel] = None
|
247 |
):
|
|
|
259 |
raise ValueError(
|
260 |
f"`lora_adaptations` must be a list and contain at least one element"
|
261 |
)
|
262 |
+
self._task_instructions = config.task_instructions
|
263 |
if (
|
264 |
+
not isinstance(self._task_instructions, dict)
|
265 |
+
or len(self._task_instructions) != len(self._lora_adaptations)
|
266 |
+
or not all(
|
267 |
+
[v in self._lora_adaptations for v in self._task_instructions.keys()]
|
268 |
+
)
|
269 |
):
|
270 |
raise ValueError(
|
271 |
+
f"`task_instructions` must be a dict and contain the same number of elements "
|
272 |
+
f"as `lora_adaptations` with all keys in `task_instructions` present in `lora_adaptations`."
|
273 |
)
|
274 |
self._adaptation_map = {
|
275 |
name: idx for idx, name in enumerate(self._lora_adaptations)
|
|
|
325 |
use_safetensors: bool = None,
|
326 |
**kwargs,
|
327 |
):
|
328 |
+
if config.load_trained_adapters: # checkpoint already contains LoRA adapters
|
329 |
return super().from_pretrained(
|
330 |
pretrained_model_name_or_path, *model_args, use_flash_attn=config.use_flash_attn, **kwargs
|
331 |
)
|
332 |
+
else: # initializing new adapters
|
333 |
roberta = XLMRobertaModel.from_pretrained(
|
334 |
pretrained_model_name_or_path, *model_args, use_flash_attn=config.use_flash_attn, **kwargs
|
335 |
)
|
|
|
372 |
) -> Union[List[torch.Tensor], np.ndarray, torch.Tensor]:
|
373 |
"""
|
374 |
Computes sentence embeddings.
|
|
|
375 |
sentences(`str` or `List[str]`):
|
376 |
Sentence or sentences to be encoded
|
377 |
task_type(`str`, *optional*, defaults to `None`):
|
|
|
392 |
adapter_mask = torch.full(
|
393 |
(num_examples,), task_id, dtype=torch.int32, device=self.device
|
394 |
)
|
395 |
+
if isinstance(sentences, str):
|
396 |
+
sentences = self._task_instructions[task_type] + sentences
|
397 |
+
else:
|
398 |
+
sentences = [self._task_instructions[task_type] + sentence for sentence in sentences]
|
399 |
return self.roberta.encode(
|
400 |
sentences, *args, adapter_mask=adapter_mask, **kwargs
|
401 |
)
|