ctankso_americas_corpdir_net commited on
Commit
d62eaf7
1 Parent(s): fcd1351

fix: simplified features for stability

Browse files
InnovationHub/llm/__pycache__/chain.cpython-310.pyc CHANGED
Binary files a/InnovationHub/llm/__pycache__/chain.cpython-310.pyc and b/InnovationHub/llm/__pycache__/chain.cpython-310.pyc differ
 
InnovationHub/llm/__pycache__/vector_store.cpython-310.pyc CHANGED
Binary files a/InnovationHub/llm/__pycache__/vector_store.cpython-310.pyc and b/InnovationHub/llm/__pycache__/vector_store.cpython-310.pyc differ
 
InnovationHub/llm/chain.py CHANGED
@@ -15,35 +15,42 @@ db_paths = {
15
  "S-Class": "data/s-class-manual",
16
  "EQS": "data/eqs-manual"
17
  }
18
- cookie_path = 'data/cookies.json'
19
  embeddings = HuggingFaceEmbeddings()
20
- bot = None
21
 
22
- def init_chain():
23
- template = """
24
- {history}
25
- Human: {human_input}
26
- Assistant:"""
27
 
28
- prompt = PromptTemplate(
29
- input_variables=["history", "human_input"],
30
- template=template
31
- )
32
 
33
- chatgpt_chain = LLMChain(
34
- llm=OpenAI(temperature=0),
35
- prompt=prompt,
36
- verbose=True,
37
- memory=ConversationalBufferWindowMemory(k=2),
38
- )
39
- human_input = """I want you to act as a voice assistant for a Mercedes-Benz vehicle. I will provide you with excerpts from a vehicle manual. You must use the excerpts to answer the user's question as best as you can. If you are unsure about the answer, you will truthfully say "not sure"."""
40
- bot_response = chatgpt_chain.predict(human_input=human_input)
41
- print(bot_response)
42
- return chatgpt_chain
 
 
 
 
 
 
 
 
43
 
44
 
45
  def get_prompt(question, vehicle, embeddings, k=4):
46
- prompt = f"""I need information from my {vehicle} manual. I will provide an excerpt from the manual. Use the excerpt and nothing else to answer the question. You must refer to the excerpt as "{vehicle} Manual" in your response. Here is the excerpt:"""
 
 
 
 
 
47
  index = FAISS.load_local(folder_path=db_paths[vehicle], embeddings=embeddings)
48
  similar_docs = index.similarity_search(query=question, k=k)
49
  context = []
@@ -55,24 +62,20 @@ def get_prompt(question, vehicle, embeddings, k=4):
55
  return user_input
56
 
57
 
58
- async def ask_question(question, vehicle, embeddings, backend='bing', k=2, create_bot=False):
59
- global bot
60
- if bot is None or create_bot:
61
- bot = Chatbot(cookiePath=cookie_path)
62
- if backend == 'bing':
63
- prompt = get_prompt(question=question, vehicle=vehicle, embeddings=embeddings, k=k)
64
- response = (await bot.ask(prompt=prompt))["item"]["messages"][1]["adaptiveCards"][0]["body"][0]["text"]
65
- elif backend == 'gpt3':
66
- prompt = get_prompt(question=question, vehicle=vehicle, embeddings=embeddings, k=k)
67
- response = chatgpt_chain.predict(human_input=prompt)
68
- else:
69
- raise ValueError(f"Invalid backend specified: {backend}")
70
  return response
71
 
72
 
73
- async def chatbot(question, vehicle, create_bot=False, k=2):
74
- response = await ask_question(question=question, vehicle=vehicle, embeddings=embeddings, backend='bing', k=k, create_bot=create_bot)
75
- return response
 
76
 
77
 
78
  def start_ui():
@@ -82,17 +85,16 @@ def start_ui():
82
  inputs=[
83
  "text",
84
  gradio.inputs.Dropdown(vehicle_options, label="Select Vehicle Model"),
85
- gradio.inputs.Checkbox(label="Create bot"),
86
  gradio.inputs.Slider(minimum=1, maximum=10, step=1, label="k")
87
  ],
88
  outputs="text",
89
  title="Owner's Manual",
90
  description="Ask your vehicle manual and get a response.",
91
  examples=[
92
- ["What are the different features of the dashboard console?", "S-Class", True, 2],
93
- ["What is the maximum towing capacity?", "S-Class", False, 3],
94
- ["How do I set the clock?", "EQS", True, 2],
95
- ["What is the fuel economy rating?", "EQS", False, 3]
96
  ]
97
  )
98
 
 
15
  "S-Class": "data/s-class-manual",
16
  "EQS": "data/eqs-manual"
17
  }
18
+
19
  embeddings = HuggingFaceEmbeddings()
 
20
 
 
 
 
 
 
21
 
22
+ template = """
23
+ {history}
24
+ Human: {human_input}
25
+ Assistant:"""
26
 
27
+ prompt = PromptTemplate(
28
+ input_variables=["history", "human_input"],
29
+ template=template
30
+ )
31
+
32
+ chatgpt_chain = LLMChain(
33
+ llm=OpenAI(temperature=0),
34
+ prompt=prompt,
35
+ verbose=True,
36
+ memory=ConversationalBufferWindowMemory(k=2),
37
+ )
38
+ human_input = """I want you to act as a voice assistant for a Mercedes-Benz vehicle.
39
+ I will provide you with excerpts from a vehicle manual.
40
+ You must use the excerpts to answer the user's question as best as you can.
41
+ If you are unsure about the answer, you will truthfully say "not sure".
42
+ Let's think step by step.
43
+ """
44
+ bot_response = chatgpt_chain.predict(human_input=human_input)
45
 
46
 
47
  def get_prompt(question, vehicle, embeddings, k=4):
48
+ prompt = f"""
49
+ I need information from my {vehicle} manual.
50
+ I will provide an excerpt from the manual. Use the excerpt and nothing else to answer the question.
51
+ You must refer to the excerpt as "{vehicle} Manual" in your response. Here is the excerpt:
52
+ """
53
+
54
  index = FAISS.load_local(folder_path=db_paths[vehicle], embeddings=embeddings)
55
  similar_docs = index.similarity_search(query=question, k=k)
56
  context = []
 
62
  return user_input
63
 
64
 
65
+ def ask_question(question, vehicle, embeddings, chatgpt_chain, k=2):
66
+ index = FAISS.load_local(
67
+ folder_path=db_paths[vehicle], embeddings=embeddings)
68
+
69
+ prompt = get_prompt(question=question, vehicle=vehicle,
70
+ embeddings=embeddings, k=k)
71
+ response = chatgpt_chain.predict(human_input=prompt)
 
 
 
 
 
72
  return response
73
 
74
 
75
+ async def chatbot(question, vehicle, k=2):
76
+ response = ask_question(question=question, vehicle=vehicle,
77
+ embeddings=embeddings, chatgpt_chain=chatgpt_chain, k=2)
78
+ return response
79
 
80
 
81
  def start_ui():
 
85
  inputs=[
86
  "text",
87
  gradio.inputs.Dropdown(vehicle_options, label="Select Vehicle Model"),
 
88
  gradio.inputs.Slider(minimum=1, maximum=10, step=1, label="k")
89
  ],
90
  outputs="text",
91
  title="Owner's Manual",
92
  description="Ask your vehicle manual and get a response.",
93
  examples=[
94
+ ["What are the different features of the dashboard console?", "S-Class", 2],
95
+ ["What is flacon?", "S-Class", 3],
96
+ ["What is hyperscreen?", "EQS", 2],
97
+ ["Where can I find my vin?", "EQS", 3]
98
  ]
99
  )
100
 
InnovationHub/llm/vector_store.py CHANGED
@@ -1,3 +1,8 @@
 
 
 
 
 
1
  import os
2
  import pprint
3
  import codecs
@@ -29,6 +34,24 @@ def get_content(input_file):
29
  return raw_text
30
 
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  def create_docs(input_file):
33
  # Create a text splitter object with a separator character
34
  text_splitter = RecursiveCharacterTextSplitter(
@@ -45,8 +68,8 @@ def create_docs(input_file):
45
  return docs
46
 
47
 
48
- def get_similar_docs(query, index):
49
- similar_docs = index.similarity_search(query=query)
50
  result = [(d.summary, d.metadata) for d in similar_docs]
51
  return result
52
 
@@ -62,6 +85,82 @@ def convert_to_html(similar_docs):
62
  return html
63
 
64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  def start_ui(index):
66
  def query_index(query):
67
  similar_docs = get_similar_docs(query=query, index=index)
 
1
+ import plotly.graph_objs as go
2
+ from sklearn.cluster import KMeans
3
+ from sklearn.decomposition import PCA
4
+ import plotly.express as px
5
+ import numpy as np
6
  import os
7
  import pprint
8
  import codecs
 
34
  return raw_text
35
 
36
 
37
+ def split_text(input_file, chunk_size=1000, chunk_overlap=0):
38
+ text_splitter = RecursiveCharacterTextSplitter(
39
+ chunk_size=chunk_size,
40
+ chunk_overlap=chunk_overlap,
41
+ length_function=len,
42
+ )
43
+
44
+ basename = os.path.basename(input_file)
45
+ basename = os.path.splitext(basename)[0]
46
+ raw_text = get_content(input_file=input_file)
47
+
48
+ texts = text_splitter.split_text(text=raw_text)
49
+ metadatas = [{"source": f"{basename}[{i}]"} for i in range(len(texts))]
50
+ docs = text_splitter.create_documents(texts=texts, metadatas=metadatas)
51
+
52
+ return texts, metadatas, docs
53
+
54
+
55
  def create_docs(input_file):
56
  # Create a text splitter object with a separator character
57
  text_splitter = RecursiveCharacterTextSplitter(
 
68
  return docs
69
 
70
 
71
+ def get_similar_docs(query, index, k=5):
72
+ similar_docs = index.similarity_search(query=query, k=k)
73
  result = [(d.summary, d.metadata) for d in similar_docs]
74
  return result
75
 
 
85
  return html
86
 
87
 
88
+ def create_similarity_plot(embeddings, labels, query_index, n_clusters=3):
89
+ # Only include embeddings that have corresponding labels
90
+ embeddings_with_labels = [
91
+ embedding for i, embedding in enumerate(embeddings) if i < len(labels)]
92
+
93
+ # Reduce the dimensionality of the embeddings using PCA
94
+ pca = PCA(n_components=3)
95
+ pca_embeddings = pca.fit_transform(embeddings_with_labels)
96
+
97
+ # Cluster the embeddings using k-means
98
+ kmeans = KMeans(n_clusters=n_clusters)
99
+ kmeans.fit(embeddings_with_labels)
100
+
101
+ # Create a trace for the query point
102
+ query_trace = go.Scatter3d(
103
+ x=[pca_embeddings[query_index, 0]],
104
+ y=[pca_embeddings[query_index, 1]],
105
+ z=[pca_embeddings[query_index, 2]],
106
+ mode='markers',
107
+ marker=dict(
108
+ color='black',
109
+ symbol='diamond',
110
+ size=10
111
+ ),
112
+ name='Query'
113
+ )
114
+
115
+ # Create a trace for the other points
116
+ points_trace = go.Scatter3d(
117
+ x=pca_embeddings[:, 0],
118
+ y=pca_embeddings[:, 1],
119
+ z=pca_embeddings[:, 2],
120
+ mode='markers',
121
+ marker=dict(
122
+ color=kmeans.labels_,
123
+ colorscale=px.colors.qualitative.Alphabet,
124
+ size=5
125
+ ),
126
+ text=labels,
127
+ name='Points'
128
+ )
129
+
130
+ # Create the figure
131
+ fig = go.Figure(data=[query_trace, points_trace])
132
+
133
+ # Add a title and legend
134
+ fig.update_layout(
135
+ title="3D Similarity Plot",
136
+ legend_title_text="Cluster"
137
+ )
138
+
139
+ # Show the plot
140
+ fig.show()
141
+
142
+
143
+ def plot_similarities(query, index, embeddings=HuggingFaceEmbeddings(), k=5):
144
+ query_embeddings = embeddings.embed_query(text=query)
145
+
146
+ similar_docs = get_similar_docs(query=query, index=index, k=k)
147
+ texts = []
148
+ for d in similar_docs:
149
+ texts.append(d[0])
150
+
151
+ embeddings_array = embeddings.embed_documents(texts=texts)
152
+
153
+ # Get the index of the query point
154
+ query_index = len(embeddings_array) - 1
155
+
156
+ create_similarity_plot(
157
+ embeddings=embeddings_array,
158
+ labels=texts,
159
+ query_index=query_index,
160
+ n_clusters=3
161
+ )
162
+
163
+
164
  def start_ui(index):
165
  def query_index(query):
166
  similar_docs = get_similar_docs(query=query, index=index)