Spaces:
Build error
Build error
import torch | |
def create_dense_embeddings(query, model, instruction=None): | |
if instruction == None: | |
dense_emb = model.encode([query]).tolist() | |
else: | |
dense_emb = model.encode([[instruction, query]]).tolist() | |
return dense_emb | |
def create_sparse_embeddings(query, model, tokenizer): | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
inputs = tokenizer(query, return_tensors="pt").to(device) | |
with torch.no_grad(): | |
logits = model(**inputs).logits | |
inter = torch.log1p(torch.relu(logits[0])) | |
token_max = torch.max(inter, dim=0) # sum over input tokens | |
nz_tokens = torch.where(token_max.values > 0)[0] | |
nz_weights = token_max.values[nz_tokens] | |
order = torch.sort(nz_weights, descending=True) | |
nz_weights = nz_weights[order[1]] | |
nz_tokens = nz_tokens[order[1]] | |
return { | |
"indices": nz_tokens.cpu().numpy().tolist(), | |
"values": nz_weights.cpu().numpy().tolist(), | |
} | |
def hybrid_score_norm(dense, sparse, alpha: float): | |
"""Hybrid score using a convex combination | |
alpha * dense + (1 - alpha) * sparse | |
Args: | |
dense: Array of floats representing | |
sparse: a dict of `indices` and `values` | |
alpha: scale between 0 and 1 | |
""" | |
if alpha < 0 or alpha > 1: | |
raise ValueError("Alpha must be between 0 and 1") | |
hs = { | |
"indices": sparse["indices"], | |
"values": [v * (1 - alpha) for v in sparse["values"]], | |
} | |
return [v * alpha for v in dense], hs | |