Amirizaniani commited on
Commit
ceda1ed
1 Parent(s): baf4b99

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -21
app.py CHANGED
@@ -3,6 +3,11 @@ from langchain.chains import LLMChain
3
  from langchain_community.llms import CTransformers
4
  from langchain_core.prompts import PromptTemplate
5
  from sentence_transformers import SentenceTransformer
 
 
 
 
 
6
 
7
 
8
  def generate_prompts(user_input):
@@ -12,45 +17,49 @@ def generate_prompts(user_input):
12
  )
13
  config = {'max_new_tokens': 2048, 'temperature': 0.7, 'context_length': 4096}
14
  llm = CTransformers(model="TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
15
- config=config)
 
16
  hub_chain = LLMChain(prompt = prompt_template, llm = llm)
17
 
18
  input_data = {"Question": user_input}
19
 
20
  # Here you would integrate your prompt template with your model
21
  # For demonstration, this is just a placeholder
22
- generated_prompts = hub_chain.run(input_data)
23
  questions_list = generated_prompts.split('\n')
24
 
25
 
26
  formatted_questions = "\n".join(f"Question: {question}" for i, question in enumerate(questions_list) if question.strip())
27
  questions_list = formatted_questions.split("Question:")[1:]
 
28
 
29
  return questions_list
30
 
31
- def answer_question(prompt):
32
- prompt_template = PromptTemplate.from_template(
33
  input_variables=["Question"],
34
- template=f"give one answer for {prompt} and do not consider the number behind it."
35
  )
36
- config = {'max_new_tokens': 2048, 'temperature': 0.7, 'context_length': 4096}
37
- llm = CTransformers(model="TheBloke/Llama-2-7B-Chat-GGML",
38
  config=config,
39
  threads=os.cpu_count())
40
  hub_chain = LLMChain(prompt = prompt_template, llm = llm)
 
41
  input_data = {"Question": prompt}
42
- generated_answer = hub_chain.run(input_data)
43
- return generated_answer
44
 
45
- def calculate_similarity(word, other_words, model, threshold=0.5):
46
- embeddings_word = model.encode([word])
47
- embeddings_other_words = model.encode(other_words)
48
- for i, embedding in enumerate(embeddings_other_words):
49
- similarity = 1 - scipy.spatial.distance.cosine(embeddings_word[0], embedding)
50
- if similarity > threshold and similarity < 0.85:
51
- return i, similarity
52
- return None, None
53
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
  def highlight_words_within_cluster(sentences, model, exclude_words):
56
  # Create a dictionary to map words to color codes
@@ -102,9 +111,6 @@ def highlight_words_within_cluster(sentences, model, exclude_words):
102
  return highlighted_sentences
103
 
104
  # Rest of the code, including the cluster_sentences function, remains the same
105
-
106
- exclude_words = {"a", "the", "for", "from", "of", "in","over", "as", "on", "is", "am", "have", "an","has", "had", "and", "by", "it", "its", "those", "these", "was", "were", "their", "them", "I", "you", "also", "your", "me", "after"}
107
-
108
  def cluster_sentences(sentences, model, num_clusters=3):
109
  embeddings = model.encode(sentences)
110
  kmeans = KMeans(n_clusters=num_clusters)
@@ -215,6 +221,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
215
  prompt_input = gr.Textbox(label="Enter your email address", placeholder="Enter Your Email Address")
216
  with gr.Row():
217
  generate_button = gr.Button("Submit", variant="primary")
218
-
219
  # Launch the Gradio app
220
  demo.launch()
 
3
  from langchain_community.llms import CTransformers
4
  from langchain_core.prompts import PromptTemplate
5
  from sentence_transformers import SentenceTransformer
6
+ from sklearn.cluster import KMeans
7
+ from nltk.tokenize import word_tokenize
8
+ import numpy as np
9
+ import scipy.spatial
10
+ from scipy.spatial.distance import cosine
11
 
12
 
13
  def generate_prompts(user_input):
 
17
  )
18
  config = {'max_new_tokens': 2048, 'temperature': 0.7, 'context_length': 4096}
19
  llm = CTransformers(model="TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
20
+ config=config,
21
+ threads=os.cpu_count())
22
  hub_chain = LLMChain(prompt = prompt_template, llm = llm)
23
 
24
  input_data = {"Question": user_input}
25
 
26
  # Here you would integrate your prompt template with your model
27
  # For demonstration, this is just a placeholder
28
+ generated_prompts = hub_chain.run(input_data) # Modify this part based on how you run the model
29
  questions_list = generated_prompts.split('\n')
30
 
31
 
32
  formatted_questions = "\n".join(f"Question: {question}" for i, question in enumerate(questions_list) if question.strip())
33
  questions_list = formatted_questions.split("Question:")[1:]
34
+
35
 
36
  return questions_list
37
 
38
+ def answer_question(prompt, model_name):
39
+ prompt_template = PromptTemplate(
40
  input_variables=["Question"],
41
+ template=f"Give a short answer to this question '{prompt}' and do not consider the number behind it."
42
  )
43
+ config = {'max_new_tokens': 512, 'temperature': 0.7, 'context_length': 512}
44
+ llm = CTransformers(model=model_name, #"TheBloke/Llama-2-7B-Chat-GGML",
45
  config=config,
46
  threads=os.cpu_count())
47
  hub_chain = LLMChain(prompt = prompt_template, llm = llm)
48
+
49
  input_data = {"Question": prompt}
 
 
50
 
 
 
 
 
 
 
 
 
51
 
52
+ generated_answer = hub_chain.run(input_data)
53
+
54
+ return generated_answer
55
+ def calculate_similarity(word, other_words, model, threshold=0.5):
56
+ embeddings_word = model.encode([word])
57
+ embeddings_other_words = model.encode(other_words)
58
+ for i, embedding in enumerate(embeddings_other_words):
59
+ similarity = 1 - scipy.spatial.distance.cosine(embeddings_word[0], embedding)
60
+ if similarity > threshold and similarity < 0.85:
61
+ return i, similarity
62
+ return None, None
63
 
64
  def highlight_words_within_cluster(sentences, model, exclude_words):
65
  # Create a dictionary to map words to color codes
 
111
  return highlighted_sentences
112
 
113
  # Rest of the code, including the cluster_sentences function, remains the same
 
 
 
114
  def cluster_sentences(sentences, model, num_clusters=3):
115
  embeddings = model.encode(sentences)
116
  kmeans = KMeans(n_clusters=num_clusters)
 
221
  prompt_input = gr.Textbox(label="Enter your email address", placeholder="Enter Your Email Address")
222
  with gr.Row():
223
  generate_button = gr.Button("Submit", variant="primary")
224
+
225
  # Launch the Gradio app
226
  demo.launch()