mm commited on
Commit
0e064ec
1 Parent(s): df21559

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +52 -1
README.md CHANGED
@@ -2,4 +2,55 @@
2
  license: apache-2.0
3
  ---
4
 
5
- GGUF conversion of https://huggingface.co/Alibaba-NLP/gte-Qwen2-1.5B-instruct
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  license: apache-2.0
3
  ---
4
 
5
+ GGUF conversion of
6
+
7
+ # gte-Qwen2-1.5B-instruct GGUF
8
+ GGUF conversion of [Alibaba-NLP/gte-Qwen2-1.5B-instruct](https://huggingface.co/Alibaba-NLP/gte-Qwen2-1.5B-instruct)
9
+
10
+ Avaiable formats:
11
+ - Q2_K.gguf
12
+ - Q3_K.gguf
13
+ - Q4_K.gguf
14
+ - Q5_K.gguf
15
+ - Q6_K.gguf
16
+ - Q8_0.gguf
17
+ - F16.gguf
18
+ - BF16.gguf
19
+
20
+ ## Usage
21
+
22
+ Requires: [llama-cpp-python](https://github.com/abetlen/llama-cpp-python)
23
+
24
+ ```python
25
+ from functools import partial
26
+
27
+ import numpy as np
28
+ from llama_cpp import Llama
29
+
30
+ max_length = 512
31
+
32
+ model = Llama.from_pretrained(
33
+ repo_id="mm/gte-Qwen2-1.5B-instruct-gguf",
34
+ filename="*Q4_K.gguf", # Choose from the avaiable formats,
35
+ embedding=True,
36
+ n_ctx=max_length,
37
+ n_batch=max_length,
38
+ verbose=False,
39
+ )
40
+ model.tokenize = partial(model.tokenize, special=True)
41
+
42
+
43
+ def calc_emb(s: str):
44
+ if len(model.tokenize(s.encode())) > max_length - 1:
45
+ print(
46
+ "The output will be calculated with truncation because of the length exceeding."
47
+ )
48
+ v = model.embed(s, normalize=True, truncate=True)
49
+ return np.asarray(v[-1])
50
+
51
+
52
+ s = "今日の天気は?"
53
+ t = "本日の天候は?"
54
+
55
+ print(f"cossim({s}, {t}) = {(calc_emb(s) * calc_emb(t)).sum()}")
56
+ ```