Spaces:

chasetank
/

owner-manual

Runtime error

App Files Files Community

ctankso_americas_corpdir_net commited on Mar 3, 2023

Commit

d62eaf7

•

1 Parent(s): fcd1351

fix: simplified features for stability

Browse files

Files changed (4) hide show

InnovationHub/llm/__pycache__/chain.cpython-310.pyc +0 -0
InnovationHub/llm/__pycache__/vector_store.cpython-310.pyc +0 -0
InnovationHub/llm/chain.py +44 -42
InnovationHub/llm/vector_store.py +101 -2

InnovationHub/llm/__pycache__/chain.cpython-310.pyc CHANGED Viewed

Binary files a/InnovationHub/llm/__pycache__/chain.cpython-310.pyc and b/InnovationHub/llm/__pycache__/chain.cpython-310.pyc differ

InnovationHub/llm/__pycache__/vector_store.cpython-310.pyc CHANGED Viewed

Binary files a/InnovationHub/llm/__pycache__/vector_store.cpython-310.pyc and b/InnovationHub/llm/__pycache__/vector_store.cpython-310.pyc differ

InnovationHub/llm/chain.py CHANGED Viewed

@@ -15,35 +15,42 @@ db_paths = {
     "S-Class": "data/s-class-manual",
     "EQS": "data/eqs-manual"
 }
-cookie_path = 'data/cookies.json'
 embeddings = HuggingFaceEmbeddings()
-bot = None
-def init_chain():
-    template = """
-    {history}
-    Human: {human_input}
-    Assistant:"""
-    prompt = PromptTemplate(
-        input_variables=["history", "human_input"],
-        template=template
-    )
-    chatgpt_chain = LLMChain(
-        llm=OpenAI(temperature=0),
-        prompt=prompt,
-        verbose=True,
-        memory=ConversationalBufferWindowMemory(k=2),
-    )
-    human_input = """I want you to act as a voice assistant for a Mercedes-Benz vehicle. I will provide you with excerpts from a vehicle manual. You must use the excerpts to answer the user's question as best as you can. If you are unsure about the answer, you will truthfully say "not sure"."""
-    bot_response = chatgpt_chain.predict(human_input=human_input)
-    print(bot_response)
-    return chatgpt_chain
 def get_prompt(question, vehicle, embeddings, k=4):
-    prompt = f"""I need information from my {vehicle} manual. I will provide an excerpt from the manual. Use the excerpt and nothing else to answer the question. You must refer to the excerpt as "{vehicle} Manual" in your response. Here is the excerpt:"""
     index = FAISS.load_local(folder_path=db_paths[vehicle], embeddings=embeddings)
     similar_docs = index.similarity_search(query=question, k=k)
     context = []
@@ -55,24 +62,20 @@ def get_prompt(question, vehicle, embeddings, k=4):
     return user_input
-async def ask_question(question, vehicle, embeddings, backend='bing', k=2, create_bot=False):
-    global bot
-    if bot is None or create_bot:
-        bot = Chatbot(cookiePath=cookie_path)
-    if backend == 'bing':
-        prompt = get_prompt(question=question, vehicle=vehicle, embeddings=embeddings, k=k)
-        response = (await bot.ask(prompt=prompt))["item"]["messages"][1]["adaptiveCards"][0]["body"][0]["text"]
-    elif backend == 'gpt3':
-        prompt = get_prompt(question=question, vehicle=vehicle, embeddings=embeddings, k=k)
-        response = chatgpt_chain.predict(human_input=prompt)
-    else:
-        raise ValueError(f"Invalid backend specified: {backend}")
     return response
-async def chatbot(question, vehicle, create_bot=False, k=2):
-    response = await ask_question(question=question, vehicle=vehicle, embeddings=embeddings, backend='bing', k=k, create_bot=create_bot)
-    return response
 def start_ui():
@@ -82,17 +85,16 @@ def start_ui():
         inputs=[
             "text",
             gradio.inputs.Dropdown(vehicle_options, label="Select Vehicle Model"),
-            gradio.inputs.Checkbox(label="Create bot"),
             gradio.inputs.Slider(minimum=1, maximum=10, step=1, label="k")
         ],
         outputs="text",
         title="Owner's Manual",
         description="Ask your vehicle manual and get a response.",
         examples=[
-            ["What are the different features of the dashboard console?", "S-Class", True, 2],
-            ["What is the maximum towing capacity?", "S-Class", False, 3],
-            ["How do I set the clock?", "EQS", True, 2],
-            ["What is the fuel economy rating?", "EQS", False, 3]
         ]
     )

     "S-Class": "data/s-class-manual",
     "EQS": "data/eqs-manual"
 }
 embeddings = HuggingFaceEmbeddings()
+template = """
+{history}
+Human: {human_input}
+Assistant:"""
+prompt = PromptTemplate(
+    input_variables=["history", "human_input"],
+    template=template
+)
+chatgpt_chain = LLMChain(
+    llm=OpenAI(temperature=0),
+    prompt=prompt,
+    verbose=True,
+    memory=ConversationalBufferWindowMemory(k=2),
+)
+human_input = """I want you to act as a voice assistant for a Mercedes-Benz vehicle.
+    I will provide you with excerpts from a vehicle manual.
+    You must use the excerpts to answer the user's question as best as you can.
+    If you are unsure about the answer, you will truthfully say "not sure".
+    Let's think step by step.
+"""
+bot_response = chatgpt_chain.predict(human_input=human_input)
 def get_prompt(question, vehicle, embeddings, k=4):
+    prompt = f"""
+    I need information from my {vehicle} manual.
+    I will provide an excerpt from the manual. Use the excerpt and nothing else to answer the question.
+    You must refer to the excerpt as "{vehicle} Manual" in your response. Here is the excerpt:
+    """
     index = FAISS.load_local(folder_path=db_paths[vehicle], embeddings=embeddings)
     similar_docs = index.similarity_search(query=question, k=k)
     context = []
     return user_input
+def ask_question(question, vehicle, embeddings, chatgpt_chain, k=2):
+    index = FAISS.load_local(
+        folder_path=db_paths[vehicle], embeddings=embeddings)
+    prompt = get_prompt(question=question, vehicle=vehicle,
+                        embeddings=embeddings, k=k)
+    response = chatgpt_chain.predict(human_input=prompt)
     return response
+async def chatbot(question, vehicle, k=2):
+    response = ask_question(question=question, vehicle=vehicle,
+                        embeddings=embeddings, chatgpt_chain=chatgpt_chain, k=2)
+    return response
 def start_ui():
         inputs=[
             "text",
             gradio.inputs.Dropdown(vehicle_options, label="Select Vehicle Model"),
             gradio.inputs.Slider(minimum=1, maximum=10, step=1, label="k")
         ],
         outputs="text",
         title="Owner's Manual",
         description="Ask your vehicle manual and get a response.",
         examples=[
+            ["What are the different features of the dashboard console?", "S-Class", 2],
+            ["What is flacon?", "S-Class", 3],
+            ["What is hyperscreen?", "EQS", 2],
+            ["Where can I find my vin?", "EQS", 3]
         ]
     )

InnovationHub/llm/vector_store.py CHANGED Viewed

@@ -1,3 +1,8 @@
 import os
 import pprint
 import codecs
@@ -29,6 +34,24 @@ def get_content(input_file):
     return raw_text
 def create_docs(input_file):
     # Create a text splitter object with a separator character
     text_splitter = RecursiveCharacterTextSplitter(
@@ -45,8 +68,8 @@ def create_docs(input_file):
     return docs
-def get_similar_docs(query, index):
-    similar_docs = index.similarity_search(query=query)
     result = [(d.summary, d.metadata) for d in similar_docs]
     return result
@@ -62,6 +85,82 @@ def convert_to_html(similar_docs):
     return html
 def start_ui(index):
     def query_index(query):
         similar_docs = get_similar_docs(query=query, index=index)

+import plotly.graph_objs as go
+from sklearn.cluster import KMeans
+from sklearn.decomposition import PCA
+import plotly.express as px
+import numpy as np
 import os
 import pprint
 import codecs
     return raw_text
+def split_text(input_file, chunk_size=1000, chunk_overlap=0):
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=chunk_size,
+        chunk_overlap=chunk_overlap,
+        length_function=len,
+    )
+    basename = os.path.basename(input_file)
+    basename = os.path.splitext(basename)[0]
+    raw_text = get_content(input_file=input_file)
+    texts = text_splitter.split_text(text=raw_text)
+    metadatas = [{"source": f"{basename}[{i}]"} for i in range(len(texts))]
+    docs = text_splitter.create_documents(texts=texts, metadatas=metadatas)
+    return texts, metadatas, docs
 def create_docs(input_file):
     # Create a text splitter object with a separator character
     text_splitter = RecursiveCharacterTextSplitter(
     return docs
+def get_similar_docs(query, index, k=5):
+    similar_docs = index.similarity_search(query=query, k=k)
     result = [(d.summary, d.metadata) for d in similar_docs]
     return result
     return html
+def create_similarity_plot(embeddings, labels, query_index, n_clusters=3):
+    # Only include embeddings that have corresponding labels
+    embeddings_with_labels = [
+        embedding for i, embedding in enumerate(embeddings) if i < len(labels)]
+    # Reduce the dimensionality of the embeddings using PCA
+    pca = PCA(n_components=3)
+    pca_embeddings = pca.fit_transform(embeddings_with_labels)
+    # Cluster the embeddings using k-means
+    kmeans = KMeans(n_clusters=n_clusters)
+    kmeans.fit(embeddings_with_labels)
+    # Create a trace for the query point
+    query_trace = go.Scatter3d(
+        x=[pca_embeddings[query_index, 0]],
+        y=[pca_embeddings[query_index, 1]],
+        z=[pca_embeddings[query_index, 2]],
+        mode='markers',
+        marker=dict(
+            color='black',
+            symbol='diamond',
+            size=10
+        ),
+        name='Query'
+    )
+    # Create a trace for the other points
+    points_trace = go.Scatter3d(
+        x=pca_embeddings[:, 0],
+        y=pca_embeddings[:, 1],
+        z=pca_embeddings[:, 2],
+        mode='markers',
+        marker=dict(
+            color=kmeans.labels_,
+            colorscale=px.colors.qualitative.Alphabet,
+            size=5
+        ),
+        text=labels,
+        name='Points'
+    )
+    # Create the figure
+    fig = go.Figure(data=[query_trace, points_trace])
+    # Add a title and legend
+    fig.update_layout(
+        title="3D Similarity Plot",
+        legend_title_text="Cluster"
+    )
+    # Show the plot
+    fig.show()
+def plot_similarities(query, index, embeddings=HuggingFaceEmbeddings(), k=5):
+    query_embeddings = embeddings.embed_query(text=query)
+    similar_docs = get_similar_docs(query=query, index=index, k=k)
+    texts = []
+    for d in similar_docs:
+        texts.append(d[0])
+    embeddings_array = embeddings.embed_documents(texts=texts)
+    # Get the index of the query point
+    query_index = len(embeddings_array) - 1
+    create_similarity_plot(
+        embeddings=embeddings_array,
+        labels=texts,
+        query_index=query_index,
+        n_clusters=3
+    )
 def start_ui(index):
     def query_index(query):
         similar_docs = get_similar_docs(query=query, index=index)