ArthurCamara
commited on
Commit
•
b24c154
1
Parent(s):
64da127
Integrate with Sentence Transformers (+ third parties like LangChain/Haystack/LlamaIndex, etc.) (#1)
Browse files- Integrate with Sentence Transformers (a0c6c801332cf3f2c70d76f80ad58442b1838ec3)
- 1_Pooling/config.json +10 -0
- README.md +29 -0
- config_sentence_transformers.json +10 -0
- modules.json +20 -0
- sentence_bert_config.json +4 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 4096,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": false,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": true,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
CHANGED
@@ -4967,6 +4967,8 @@ model-index:
|
|
4967 |
type: PairClassification
|
4968 |
tags:
|
4969 |
- mteb
|
|
|
|
|
4970 |
---
|
4971 |
## Zeta-Alpha-E5-Mistral
|
4972 |
|
@@ -4987,6 +4989,33 @@ The model was trained with the same instruction-tuning strategy as the original
|
|
4987 |
Instruct: <task description>\nQuery: <query>
|
4988 |
```
|
4989 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4990 |
### Transformers
|
4991 |
``` python
|
4992 |
import torch
|
|
|
4967 |
type: PairClassification
|
4968 |
tags:
|
4969 |
- mteb
|
4970 |
+
- transformers
|
4971 |
+
- sentence-transformers
|
4972 |
---
|
4973 |
## Zeta-Alpha-E5-Mistral
|
4974 |
|
|
|
4989 |
Instruct: <task description>\nQuery: <query>
|
4990 |
```
|
4991 |
|
4992 |
+
### Sentence Transformers
|
4993 |
+
```python
|
4994 |
+
|
4995 |
+
from sentence_transformers import SentenceTransformer
|
4996 |
+
|
4997 |
+
model = SentenceTransformer("zeta-alpha-ai/Zeta-Alpha-E5-Mistral")
|
4998 |
+
|
4999 |
+
def get_detailed_instruct(task_description: str, query: str) -> str:
|
5000 |
+
return f'Instruct: {task_description}\nQuery: {query}'
|
5001 |
+
|
5002 |
+
task = "Given a claim about climate change, retrieve documents that support or refute the claim"
|
5003 |
+
queries = [
|
5004 |
+
get_detailed_instruct(task, "In Alaska, brown bears are changing their feeding habits to eat elderberries that ripen earlier."),
|
5005 |
+
get_detailed_instruct(task, "Local and regional sea levels continue to exhibit typical natural variability—in some places rising and in others falling.")
|
5006 |
+
]
|
5007 |
+
|
5008 |
+
passages = [
|
5009 |
+
"The brown bear ( Ursus arctos ) is a large bear with the widest distribution of any living ursid . The species is distributed across much of northern Eurasia and North America . It is one of the two largest terrestrial carnivorans alive today , rivaled in body size only by its close cousin , the polar bear ( Ursus maritimus ) , which is much less variable in size and averages larger due to this . There are several recognized subspecies , many of which are quite well-known within their native ranges , found in the brown bear species . The brown bear 's principal range includes parts of Russia , Central Asia , China , Canada , the United States ( mostly Alaska ) , Scandinavia and the Carpathian region ( especially Romania ) , Anatolia , and Caucasus . The brown bear is recognized as a national and state animal in several European countries . While the brown bear 's range has shrunk and it has faced local extinctions , it remains listed as a least concern species by the International Union for Conservation of Nature ( IUCN ) with a total population of approximately 200,000 . As of 2012 , this and the American black bear are the only bear species not classified as threatened by the IUCN . However , the Californian , North African ( Atlas bear ) , and Mexican subspecies were hunted to extinction in the nineteenth and early twentieth centuries , and many of the southern Asian subspecies are highly endangered . One of the smaller-bodied subspecies , the Himalayan brown bear , is critically endangered , occupying only 2 % of its former range and threatened by uncontrolled poaching for its parts . The Marsican brown bear , one of several currently isolated populations of the main Eurasian brown bear race , in central Italy is believed to have a population of just 30 to 40 bears .",
|
5010 |
+
"ean sea level ( MSL ) ( abbreviated simply sea level ) is an average level of the surface of one or more of Earth 's oceans from which heights such as elevations may be measured . MSL is a type of vertical datuma standardised geodetic reference pointthat is used , for example , as a chart datum in cartography and marine navigation , or , in aviation , as the standard sea level at which atmospheric pressure is measured in order to calibrate altitude and , consequently , aircraft flight levels . A common and relatively straightforward mean sea-level standard is the midpoint between a mean low and mean high tide at a particular location . Sea levels can be affected by many factors and are known to have varied greatly over geological time scales . The careful measurement of variations in MSL can offer insights into ongoing climate change , and sea level rise has been widely quoted as evidence of ongoing global warming . The term above sea level generally refers to above mean sea level ( AMSL ) ."
|
5011 |
+
]
|
5012 |
+
|
5013 |
+
embeddings = model.encode(queries + passages)
|
5014 |
+
scores = model.similarity(embeddings[:2], embeddings[2:]) * 100
|
5015 |
+
print(scores.tolist())
|
5016 |
+
# [[66.12603759765625, 43.760101318359375], [47.67058563232422, 63.7889518737793]]
|
5017 |
+
```
|
5018 |
+
|
5019 |
### Transformers
|
5020 |
``` python
|
5021 |
import torch
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.0.1",
|
4 |
+
"transformers": "4.43.4",
|
5 |
+
"pytorch": "2.4.0"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": null
|
10 |
+
}
|
modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 4096,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|