Update README.md
Browse files
README.md
CHANGED
@@ -2,4 +2,55 @@
|
|
2 |
license: apache-2.0
|
3 |
---
|
4 |
|
5 |
-
GGUF conversion of
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
license: apache-2.0
|
3 |
---
|
4 |
|
5 |
+
GGUF conversion of
|
6 |
+
|
7 |
+
# gte-Qwen2-1.5B-instruct GGUF
|
8 |
+
GGUF conversion of [Alibaba-NLP/gte-Qwen2-1.5B-instruct](https://huggingface.co/Alibaba-NLP/gte-Qwen2-1.5B-instruct)
|
9 |
+
|
10 |
+
Avaiable formats:
|
11 |
+
- Q2_K.gguf
|
12 |
+
- Q3_K.gguf
|
13 |
+
- Q4_K.gguf
|
14 |
+
- Q5_K.gguf
|
15 |
+
- Q6_K.gguf
|
16 |
+
- Q8_0.gguf
|
17 |
+
- F16.gguf
|
18 |
+
- BF16.gguf
|
19 |
+
|
20 |
+
## Usage
|
21 |
+
|
22 |
+
Requires: [llama-cpp-python](https://github.com/abetlen/llama-cpp-python)
|
23 |
+
|
24 |
+
```python
|
25 |
+
from functools import partial
|
26 |
+
|
27 |
+
import numpy as np
|
28 |
+
from llama_cpp import Llama
|
29 |
+
|
30 |
+
max_length = 512
|
31 |
+
|
32 |
+
model = Llama.from_pretrained(
|
33 |
+
repo_id="mm/gte-Qwen2-1.5B-instruct-gguf",
|
34 |
+
filename="*Q4_K.gguf", # Choose from the avaiable formats,
|
35 |
+
embedding=True,
|
36 |
+
n_ctx=max_length,
|
37 |
+
n_batch=max_length,
|
38 |
+
verbose=False,
|
39 |
+
)
|
40 |
+
model.tokenize = partial(model.tokenize, special=True)
|
41 |
+
|
42 |
+
|
43 |
+
def calc_emb(s: str):
|
44 |
+
if len(model.tokenize(s.encode())) > max_length - 1:
|
45 |
+
print(
|
46 |
+
"The output will be calculated with truncation because of the length exceeding."
|
47 |
+
)
|
48 |
+
v = model.embed(s, normalize=True, truncate=True)
|
49 |
+
return np.asarray(v[-1])
|
50 |
+
|
51 |
+
|
52 |
+
s = "今日の天気は?"
|
53 |
+
t = "本日の天候は?"
|
54 |
+
|
55 |
+
print(f"cossim({s}, {t}) = {(calc_emb(s) * calc_emb(t)).sum()}")
|
56 |
+
```
|