Amirizaniani commited on
Commit
608a413
1 Parent(s): 07f2e3e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -0
app.py CHANGED
@@ -7,6 +7,7 @@ from transformers import pipeline
7
  from langchain.llms.huggingface_pipeline import HuggingFacePipeline
8
 
9
  load_dotenv()
 
10
  def generate_prompts(user_input):
11
  prompt_template = PromptTemplate(
12
  input_variables=["Question"],
@@ -39,6 +40,55 @@ def answer_question(prompt):
39
  generated_answer = hub_chain.run(input_data)
40
  return generated_answer
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  text_list = []
43
 
44
  def updateChoices(prompt):
 
7
  from langchain.llms.huggingface_pipeline import HuggingFacePipeline
8
 
9
  load_dotenv()
10
+
11
  def generate_prompts(user_input):
12
  prompt_template = PromptTemplate(
13
  input_variables=["Question"],
 
40
  generated_answer = hub_chain.run(input_data)
41
  return generated_answer
42
 
43
+
44
+ def calculate_similarity(word, other_sentences, model, threshold=0.1, upper_limit=0.80):
45
+ word_embedding = model.encode([word], convert_to_tensor=True)
46
+ sentence_embeddings = model.encode(other_sentences, convert_to_tensor=True)
47
+ similarities = scipy.spatial.distance.cdist(word_embedding, sentence_embeddings, "cosine")[0]
48
+ return [(i, 1-similarity) for i, similarity in enumerate(similarities) if threshold < 1-similarity < upper_limit]
49
+
50
+
51
+ def highlight_words(sentence, other_sentences, model, exclude_words):
52
+ words = word_tokenize(sentence)
53
+ color_codes = ["\033[41m", "\033[42m", "\033[43m", "\033[44m", "\033[45m", "\033[46m", "\033[47m"]
54
+ html_color_codes = ["red", "green", "blue", "purple", "cyan", "fuchsia", "lime", "maroon", "olive", "navy", "teal", "gray", "DodgerBlue", "Tomato"]
55
+
56
+ all_matched_pairs = []
57
+ for i, word in enumerate(words):
58
+ if word.lower() not in exclude_words and word.isalnum():
59
+ matches = calculate_similarity(word, other_sentences, model)
60
+ for match_index, similarity in matches:
61
+ if word not in all_matched_pairs:
62
+ all_matched_pairs.append((i, match_index, similarity))
63
+
64
+
65
+ # Correction for variable name and HTML formatting
66
+ color_index = 0
67
+ for pair in all_matched_pairs:
68
+ color_code = html_color_codes[color_index % len(html_color_codes)]
69
+ # Correctly apply HTML span with style for coloring
70
+ words[pair[0]] = f"<span style='color: {color_code};'>{words[pair[0]]}</span>"
71
+ tokenized_other_sentence = word_tokenize(other_sentences[pair[1]])
72
+ tokenized_other_sentence = [f"<span style='color: {color_code};'>{word}</span>" if idx == pair[0] else word for idx, word in enumerate(tokenized_other_sentence)]
73
+ other_sentences[pair[1]] = ' '.join(tokenized_other_sentence)
74
+ color_index += 1
75
+
76
+ return ' '.join(words)
77
+
78
+
79
+ model = SentenceTransformer('all-mpnet-base-v2')
80
+
81
+ sentences = ["In a quaint little town nestled in the heart of the mountains, a small bakery famous for its artisanal breads and pastries had a line of customers stretching out the door, eagerly waiting to savor the freshly baked goods that were known far and wide for their delightful flavors.",
82
+
83
+ "Within a picturesque mountain village, there stood a renowned bakery, celebrated for its handcrafted bread and sweet treats, attracting a long queue of patrons each morning, all keen to enjoy the baked delicacies that had gained widespread acclaim for their exceptional taste.",
84
+
85
+ "A charming bakery, located in a small mountainous hamlet, renowned for producing exquisite handmade pastries and bread, was bustling with a crowd of eager customers lined up outside, each anticipating the chance to indulge in the famous baked items celebrated for their extraordinary deliciousness.",
86
+
87
+ "In a cozy, mountain-encircled village, a beloved bakery was the center of attraction, known for its traditional baking methods and delightful pastries, drawing a consistent stream of people waiting outside, all desiring to experience the renowned flavors that made the bakery's products distinctively mouth-watering."]
88
+
89
+
90
+
91
+
92
  text_list = []
93
 
94
  def updateChoices(prompt):