Nvidia-Embed-V1

Build error

App Files Files Community

Tonic commited on Jan 18

Commit

ace4204

•

1 Parent(s): 964b92e

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -59

app.py CHANGED Viewed

@@ -44,75 +44,72 @@ def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tenso
         sequence_lengths = attention_mask.sum(dim=1) - 1
         batch_size = last_hidden_states.shape[0]
         return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
 def clear_cuda_cache():
     torch.cuda.empty_cache()
 def free_memory(*args):
     for arg in args:
         del arg
-class EmbeddingModel:
-    def __init__(self):
-        self.tokenizer = AutoTokenizer.from_pretrained('intfloat/e5-mistral-7b-instruct')
-        self.model = AutoModel.from_pretrained('intfloat/e5-mistral-7b-instruct', torch_dtype=torch.float16, device_map=device)
-    def _compute_cosine_similarity(self, emb1, emb2):
-        tensor1 = torch.tensor(emb1).to(device).half()
-        tensor2 = torch.tensor(emb2).to(device).half()
-        similarity = F.cosine_similarity(tensor1, tensor2).item()
-        free_memory(tensor1, tensor2)
-        return similarity
-    def compute_embeddings(self, selected_task, input_text):
-        try:
-            task_description = tasks[selected_task]
-        except KeyError:
-            print(f"Selected task not found: {selected_task}")
-            return f"Error: Task '{selected_task}' not found. Please select a valid task."
-        max_length = 2042
-        processed_texts = [f'Instruct: {task_description}\nQuery: {input_text}']
-        batch_dict = self.tokenizer(processed_texts, max_length=max_length - 1, return_attention_mask=False, padding=False, truncation=True)
-        batch_dict['input_ids'] = [input_ids + [self.tokenizer.eos_token_id] for input_ids in batch_dict['input_ids']]
-        batch_dict = self.tokenizer.pad(batch_dict, padding=True, return_attention_mask=True, return_tensors='pt')
-        batch_dict = {k: v.to(device) for k, v in batch_dict.items()}
-        outputs = self.model(**batch_dict)
-        embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])
-        embeddings = F.normalize(embeddings, p=2, dim=1)
-        embeddings_list = embeddings.detach().cpu().numpy().tolist()
-        return embeddings_list
-    def compute_similarity(self, selected_task, sentence1, sentence2, extra_sentence1, extra_sentence2):
-        try:
-            task_description = tasks[selected_task]
-        except KeyError:
-            print(f"Selected task not found: {selected_task}")
-            return f"Error: Task '{selected_task}' not found. Please select a valid task."
-        # Compute embeddings for each sentence
-        embeddings1 = self.compute_embeddings(self.selected_task, sentence1)
-        embeddings2 = self.compute_embeddings(self.selected_task, sentence2)
-        embeddings3 = self.compute_embeddings(self.selected_task, extra_sentence1)
-        embeddings4 = self.compute_embeddings(self.selected_task, extra_sentence2)
-        # Convert embeddings to tensors
-        embeddings_tensor1 = torch.tensor(embeddings1).to(device).half()
-        embeddings_tensor2 = torch.tensor(embeddings2).to(device).half()
-        embeddings_tensor3 = torch.tensor(embeddings3).to(device).half()
-        embeddings_tensor4 = torch.tensor(embeddings4).to(device).half()
-        # Compute cosine similarity
-        similarity1 = self._compute_cosine_similarity(embeddings1, embeddings2)
-        similarity2 = self._compute_cosine_similarity(embeddings1, embeddings3)
-        similarity3 = self._compute_cosine_similarity(embeddings1, embeddings4)
-        # Free memory
-        free_memory(embeddings1, embeddings2, embeddings3, embeddings4)
-        return similarity1, similarity2, similarity3
 def app_interface():
-    embedding_model = EmbeddingModel()
     with gr.Blocks() as demo:
         gr.Markdown(title)
         gr.Markdown(description)
@@ -124,7 +121,7 @@ def app_interface():
             compute_button = gr.Button("Try🐣🛌🏻e5")
             output_display = gr.Textbox(label="🐣e5-mistral🛌🏻 Embeddings")
             compute_button.click(
-                fn=embedding_model.compute_embeddings,
                 inputs=[task_dropdown, input_text_box],
                 outputs=output_display
             )
@@ -137,8 +134,8 @@ def app_interface():
             similarity_button = gr.Button("Compute Similarity")
             similarity_output = gr.Label(label="🐣e5-mistral🛌🏻 Similarity Scores")
             similarity_button.click(
-                fn=embedding_model.compute_similarity,
-                inputs=[task_dropdown, sentence1_box, sentence2_box],
                 outputs=similarity_output
             )

         sequence_lengths = attention_mask.sum(dim=1) - 1
         batch_size = last_hidden_states.shape[0]
         return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
 def clear_cuda_cache():
     torch.cuda.empty_cache()
 def free_memory(*args):
     for arg in args:
         del arg
+# @spaces.GPU
+def compute_embeddings(selected_task, input_text):
+    try:
+        task_description = tasks[selected_task]
+    except KeyError:
+        print(f"Selected task not found: {selected_task}")
+        return f"Error: Task '{selected_task}' not found. Please select a valid task."
+    max_length = 2042
+    processed_texts = [f'Instruct: {task_description}\nQuery: {input_text}']
+    batch_dict = self.tokenizer(processed_texts, max_length=max_length - 1, return_attention_mask=False, padding=False, truncation=True)
+    batch_dict['input_ids'] = [input_ids + [self.tokenizer.eos_token_id] for input_ids in batch_dict['input_ids']]
+    batch_dict = self.tokenizer.pad(batch_dict, padding=True, return_attention_mask=True, return_tensors='pt')
+    batch_dict = {k: v.to(device) for k, v in batch_dict.items()}
+    outputs = self.model(**batch_dict)
+    embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])
+    embeddings = F.normalize(embeddings, p=2, dim=1)
+    embeddings_list = embeddings.detach().cpu().numpy().tolist()
+    return embeddings_list
+# @spaces.GPU
+def compute_similarity(selected_task, sentence1, sentence2, extra_sentence1, extra_sentence2):
+    try:
+        task_description = tasks[selected_task]
+    except KeyError:
+        print(f"Selected task not found: {selected_task}")
+        return f"Error: Task '{selected_task}' not found. Please select a valid task."
+    # Compute embeddings for each sentence
+    embeddings1 = self.compute_embeddings(self.selected_task, sentence1)
+    embeddings2 = self.compute_embeddings(self.selected_task, sentence2)
+    embeddings3 = self.compute_embeddings(self.selected_task, extra_sentence1)
+    embeddings4 = self.compute_embeddings(self.selected_task, extra_sentence2)
+    # Convert embeddings to tensors
+    embeddings_tensor1 = torch.tensor(embeddings1).to(device).half()
+    embeddings_tensor2 = torch.tensor(embeddings2).to(device).half()
+    embeddings_tensor3 = torch.tensor(embeddings3).to(device).half()
+    embeddings_tensor4 = torch.tensor(embeddings4).to(device).half()
+    # Compute cosine similarity
+    similarity1 = self._compute_cosine_similarity(embeddings1, embeddings2)
+    similarity2 = self._compute_cosine_similarity(embeddings1, embeddings3)
+    similarity3 = self._compute_cosine_similarity(embeddings1, embeddings4)
+    # Free memory
+    free_memory(embeddings1, embeddings2, embeddings3, embeddings4)
+    return similarity1, similarity2, similarity3
+# @spaces.GPU
+def _compute_cosine_similarity(emb1, emb2):
+    tensor1 = torch.tensor(emb1).to(device).half()
+    tensor2 = torch.tensor(emb2).to(device).half()
+    similarity = F.cosine_similarity(tensor1, tensor2).item()
+    free_memory(tensor1, tensor2)
+    return similarity
 def app_interface():
     with gr.Blocks() as demo:
         gr.Markdown(title)
         gr.Markdown(description)
             compute_button = gr.Button("Try🐣🛌🏻e5")
             output_display = gr.Textbox(label="🐣e5-mistral🛌🏻 Embeddings")
             compute_button.click(
+                fn=compute_embeddings,
                 inputs=[task_dropdown, input_text_box],
                 outputs=output_display
             )
             similarity_button = gr.Button("Compute Similarity")
             similarity_output = gr.Label(label="🐣e5-mistral🛌🏻 Similarity Scores")
             similarity_button.click(
+                fn=compute_similarity,
+                inputs=[task_dropdown, sentence1_box, sentence2_box, extra_sentence1_box, extra_sentence2_box],
                 outputs=similarity_output
             )