Spaces:
Build error
Build error
File size: 1,898 Bytes
c5e4524 bd9fae2 c5e4524 bb343a5 bd9fae2 c5e4524 18468cb c5e4524 18468cb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import torch
import json
import numpy as np
def create_dense_embeddings(query, model, instruction=None):
if instruction == None:
dense_emb = model.encode([query]).tolist()
else:
# Fetching embedding from API for Instructor
json_output_embedding = model.predict(
instruction,
query,
api_name="/predict",
)
json_file = open(json_output_embedding, "r")
json_dict = json.load(json_file)
dense_array = np.array(json_dict["data"], dtype=np.float64)
dense_emb = dense_array.tolist()
return dense_emb
def create_sparse_embeddings(query, model, tokenizer):
device = "cuda" if torch.cuda.is_available() else "cpu"
inputs = tokenizer(query, return_tensors="pt").to(device)
with torch.no_grad():
logits = model(**inputs).logits
inter = torch.log1p(torch.relu(logits[0]))
token_max = torch.max(inter, dim=0) # sum over input tokens
nz_tokens = torch.where(token_max.values > 0)[0]
nz_weights = token_max.values[nz_tokens]
order = torch.sort(nz_weights, descending=True)
nz_weights = nz_weights[order[1]]
nz_tokens = nz_tokens[order[1]]
return {
"indices": nz_tokens.cpu().numpy().tolist(),
"values": nz_weights.cpu().numpy().tolist(),
}
def hybrid_score_norm(dense, sparse, alpha: float):
"""Hybrid score using a convex combination
alpha * dense + (1 - alpha) * sparse
Args:
dense: Array of floats representing
sparse: a dict of `indices` and `values`
alpha: scale between 0 and 1
"""
if alpha < 0 or alpha > 1:
raise ValueError("Alpha must be between 0 and 1")
hsparse = {
"indices": sparse["indices"],
"values": [v * (1 - alpha) for v in sparse["values"]],
}
hdense = [[v * alpha for v in dense[0]]]
return hdense, hsparse
|