Spaces:
Build error
Build error
import torch | |
import json | |
import numpy as np | |
def create_dense_embeddings(query, model, instruction=None): | |
if instruction == None: | |
dense_emb = model.encode([query]).tolist() | |
else: | |
# Fetching embedding from API for Instructor | |
json_output_embedding = model.predict( | |
instruction, | |
query, | |
api_name="/predict", | |
) | |
json_file = open(json_output_embedding, "r") | |
json_dict = json.load(json_file) | |
dense_array = np.array(json_dict["data"], dtype=np.float64) | |
dense_emb = dense_array.tolist() | |
return dense_emb | |
def create_sparse_embeddings(query, model, tokenizer): | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
inputs = tokenizer(query, return_tensors="pt").to(device) | |
with torch.no_grad(): | |
logits = model(**inputs).logits | |
inter = torch.log1p(torch.relu(logits[0])) | |
token_max = torch.max(inter, dim=0) # sum over input tokens | |
nz_tokens = torch.where(token_max.values > 0)[0] | |
nz_weights = token_max.values[nz_tokens] | |
order = torch.sort(nz_weights, descending=True) | |
nz_weights = nz_weights[order[1]] | |
nz_tokens = nz_tokens[order[1]] | |
return { | |
"indices": nz_tokens.cpu().numpy().tolist(), | |
"values": nz_weights.cpu().numpy().tolist(), | |
} | |
def hybrid_score_norm(dense, sparse, alpha: float): | |
"""Hybrid score using a convex combination | |
alpha * dense + (1 - alpha) * sparse | |
Args: | |
dense: Array of floats representing | |
sparse: a dict of `indices` and `values` | |
alpha: scale between 0 and 1 | |
""" | |
if alpha < 0 or alpha > 1: | |
raise ValueError("Alpha must be between 0 and 1") | |
hsparse = { | |
"indices": sparse["indices"], | |
"values": [v * (1 - alpha) for v in sparse["values"]], | |
} | |
hdense = [[v * alpha for v in dense[0]]] | |
return hdense, hsparse | |