Spaces:

zama-fhe
/

encrypted-anonymization

Running on CPU Upgrade

App Files Files Community

encrypted-anonymization / utils_demo.py

kcelia

chore: handling user query

d812385 unverified 8 months ago

raw

history blame

1.84 kB

	import torch
	import numpy as np


	MAX_USER_QUERY_LEN = 35

	# List of example queries for easy access
	DEFAULT_QUERIES = {
	"Example Query 1": "Who visited microsoft.com on September 18?",
	"Example Query 2": "Does Kate has drive ?",
	"Example Query 3": "What phone number can be used to contact David Johnson?",
	}

	def get_batch_text_representation(texts, model, tokenizer, batch_size=1):
	"""
	Get mean-pooled representations of given texts in batches.
	"""
	mean_pooled_batch = []
	for i in range(0, len(texts), batch_size):
	batch_texts = texts[i:i+batch_size]
	inputs = tokenizer(batch_texts, return_tensors="pt", padding=True, truncation=True)
	with torch.no_grad():
	outputs = model(**inputs, output_hidden_states=False)
	last_hidden_states = outputs.last_hidden_state
	input_mask_expanded = inputs['attention_mask'].unsqueeze(-1).expand(last_hidden_states.size()).float()
	sum_embeddings = torch.sum(last_hidden_states * input_mask_expanded, 1)
	sum_mask = input_mask_expanded.sum(1)
	mean_pooled = sum_embeddings / sum_mask
	mean_pooled_batch.extend(mean_pooled.cpu().detach().numpy())
	return np.array(mean_pooled_batch)


	def is_user_query_valid(user_query: str) -> bool:
	"""
	Check if the `user_query` is None and not empty.
	Args:
	user_query (str): The input text to be checked.
	Returns:
	bool: True if the `user_query` is None or empty, False otherwise.
	"""
	# If the query is not part of the default queries
	is_default_query = user_query in DEFAULT_QUERIES.values()

	# Check if the query exceeds the length limit
	is_exceeded_max_length = user_query is not None and len(user_query) <= MAX_USER_QUERY_LEN

	return not is_default_query and not is_exceeded_max_length