Spaces:
Runtime error
Runtime error
import gradio as gr | |
from dotenv import load_dotenv | |
from langchain import PromptTemplate, LLMChain, HuggingFaceHub | |
from langchain.llms import CTransformers | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
from transformers import pipeline | |
from langchain.llms.huggingface_pipeline import HuggingFacePipeline | |
from sentence_transformers import SentenceTransformer | |
from sklearn.cluster import KMeans | |
from nltk.tokenize import word_tokenize | |
import numpy as np | |
import scipy.spatial | |
load_dotenv() | |
def generate_prompts(user_input): | |
prompt_template = PromptTemplate( | |
input_variables=["Question"], | |
template=f"Just list 10 question prompts for {user_input} and don't put number before each of the prompts." | |
) | |
config = {'max_new_tokens': 64, 'temperature': 0.7, 'context_length': 64} | |
llm = CTransformers(model="TheBloke/Mistral-7B-Instruct-v0.1-GGUF", | |
config=config) | |
hub_chain = LLMChain(prompt = prompt_template, llm = llm) | |
input_data = {"Question": user_input} | |
generated_prompts = hub_chain.run(input_data) | |
questions_list = generated_prompts.split('\n') | |
formatted_questions = "\n".join(f"Question: {question}" for i, question in enumerate(questions_list) if question.strip()) | |
questions_list = formatted_questions.split("Question:")[1:] | |
return questions_list | |
def answer_question(prompt): | |
prompt_template = PromptTemplate( | |
input_variables=["Question"], | |
template=f"give one answer for {prompt} and do not consider the number behind it." | |
) | |
config = {'max_new_tokens': 64, 'temperature': 0.7, 'context_length': 64} | |
llm = CTransformers(model="TheBloke/Llama-2-7B-Chat-GGML", | |
config=config) | |
hub_chain = LLMChain(prompt = prompt_template, llm = llm) | |
input_data = {"Question": prompt} | |
generated_answer = hub_chain.run(input_data) | |
return generated_answer | |
def calculate_similarity(word, other_words, model, threshold=0.5): | |
embeddings_word = model.encode([word]) | |
embeddings_other_words = model.encode(other_words) | |
for i, embedding in enumerate(embeddings_other_words): | |
similarity = 1 - scipy.spatial.distance.cosine(embeddings_word[0], embedding) | |
if similarity > threshold and similarity < 0.85: | |
return i, similarity | |
return None, None | |
def highlight_words_within_cluster(sentences, model, exclude_words): | |
# Create a dictionary to map words to color codes | |
word_to_color = {} | |
color_codes = [ | |
"\033[41m", # Background Red | |
"\033[42m", # Background Green | |
"\033[43m", # Background Yellow | |
"\033[44m", # Background Blue | |
"\033[45m", # Background Purple | |
"\033[46m", # Background Cyan | |
"\033[100m", # Background Dark Gray | |
"\033[101m", # Background Light Red | |
"\033[102m", # Background Light Green | |
"\033[103m", # Background Light Yellow | |
"\033[104m", # Background Light Blue | |
"\033[105m", # Background Light Purple | |
"\033[106m", # Background Light Cyan | |
"\033[47m" # Background Gray | |
] | |
html_color_codes = ["red", "green", "blue", "purple", "cyan", "fuchsia", "lime", "maroon", "olive", "navy", "teal", "gray"] | |
color_index = 0 | |
highlighted_sentences = [] | |
for sentence in sentences: | |
words = word_tokenize(sentence) | |
other_sentences = [s for s in sentences if s != sentence] | |
all_other_words = [word for s in other_sentences for word in word_tokenize(s) if word.lower() not in exclude_words and word.isalnum()] | |
highlighted_words = [] | |
for word in words: | |
if word.lower() not in exclude_words and word.isalnum(): | |
match_index, similarity = calculate_similarity(word, all_other_words, model) | |
if match_index is not None: | |
# Assign color to the word if not already assigned | |
if word not in word_to_color: | |
word_to_color[word] = html_color_codes[color_index % len(html_color_codes)] | |
color_index += 1 | |
# Highlight the word | |
#highlighted_word = f"{word_to_color[word]}{word}\033[0m" | |
highlighted_word = "<span style='color: "+ word_to_color[word] +"'>"+ word +"</span>" | |
else: | |
highlighted_word = word | |
highlighted_words.append(highlighted_word) | |
else: | |
highlighted_words.append(word) | |
highlighted_sentences.append(' '.join(highlighted_words)) | |
return highlighted_sentences | |
# Rest of the code, including the cluster_sentences function, remains the same | |
exclude_words = {"a", "the", "for", "from", "of", "in","over", "as", "on", "is", "am", "have", "an","has", "had", "and", "by", "it", "its", "those", "these", "was", "were", "their", "them", "I", "you", "also", "your", "me", "after"} | |
def cluster_sentences(sentences, model, num_clusters=3): | |
embeddings = model.encode(sentences) | |
kmeans = KMeans(n_clusters=num_clusters) | |
kmeans.fit(embeddings) | |
return kmeans.labels_ | |
model = SentenceTransformer('all-mpnet-base-v2') | |
exclude_words = {"a", "the", "for", "from", "of", "in", "over", "as", "on", "is", "am", "have", "an", "has", "had", "and", "by", "it", "its", "those", "these", "above", "to"} | |
sentences = ["In a quaint little town nestled in the heart of the mountains, a small bakery famous for its artisanal breads and pastries had a line of customers stretching out the door, eagerly waiting to savor the freshly baked goods that were known far and wide for their delightful flavors.", | |
"Within a picturesque mountain village, there stood a renowned bakery, celebrated for its handcrafted bread and sweet treats, attracting a long queue of patrons each morning, all keen to enjoy the baked delicacies that had gained widespread acclaim for their exceptional taste.", | |
"A charming bakery, located in a small mountainous hamlet, renowned for producing exquisite handmade pastries and bread, was bustling with a crowd of eager customers lined up outside, each anticipating the chance to indulge in the famous baked items celebrated for their extraordinary deliciousness.", | |
"In a cozy, mountain-encircled village, a beloved bakery was the center of attraction, known for its traditional baking methods and delightful pastries, drawing a consistent stream of people waiting outside, all desiring to experience the renowned flavors that made the bakery's products distinctively mouth-watering."] | |
# Step 1: Cluster the sentences | |
num_clusters = 1 | |
sentence_clusters = cluster_sentences(sentences, model, num_clusters) | |
# Step 2: Highlight similar words within each cluster | |
clustered_sentences = [[] for _ in range(num_clusters)] | |
for sentence, cluster_id in zip(sentences, sentence_clusters): | |
clustered_sentences[cluster_id].append(sentence) | |
highlighted_clustered_sentences = [] | |
for cluster in clustered_sentences: | |
highlighted_clustered_sentences.extend(highlight_words_within_cluster(cluster, model, exclude_words)) | |
text_list = [] | |
def updateChoices(prompt): | |
newChoices = generate_prompts(prompt) | |
return gr.CheckboxGroup(choices=newChoices) | |
def setTextVisibility(cbg, model_name_input): | |
sentences = [] | |
result = [] | |
model = SentenceTransformer('all-mpnet-base-v2') | |
exclude_words = {"a", "the", "for", "from", "of", "in", "over", "as", "on", "is", "am", "have", "an", "has", "had", "and", "by", "it", "its", "those", "these", "above", "to"} | |
sentences_org = ["In a quaint little town nestled in the heart of the mountains, a small bakery famous for its artisanal breads and pastries had a line of customers stretching out the door, eagerly waiting to savor the freshly baked goods that were known far and wide for their delightful flavors.", | |
"Within a picturesque mountain village, there stood a renowned bakery, celebrated for its handcrafted bread and sweet treats, attracting a long queue of patrons each morning, all keen to enjoy the baked delicacies that had gained widespread acclaim for their exceptional taste.", | |
"A charming bakery, located in a small mountainous hamlet, renowned for producing exquisite handmade pastries and bread, was bustling with a crowd of eager customers lined up outside, each anticipating the chance to indulge in the famous baked items celebrated for their extraordinary deliciousness.", | |
"In a cozy, mountain-encircled village, a beloved bakery was the center of attraction, known for its traditional baking methods and delightful pastries, drawing a consistent stream of people waiting outside, all desiring to experience the renowned flavors that made the bakery's products distinctively mouth-watering."] | |
for text in cbg: | |
sentences.append(answer_question(text, model_name_input)) | |
# Step 1: Cluster the sentences | |
num_clusters = 1 | |
sentence_clusters = cluster_sentences(sentences, model, num_clusters) | |
# Step 2: Highlight similar words within each cluster | |
clustered_sentences = [[] for _ in range(num_clusters)] | |
for sentence, cluster_id in zip(sentences, sentence_clusters): | |
clustered_sentences[cluster_id].append(sentence) | |
highlighted_clustered_sentences = [] | |
for cluster in clustered_sentences: | |
highlighted_clustered_sentences.extend(highlight_words_within_cluster(cluster, model, exclude_words)) | |
for idx, sentence in enumerate(highlighted_clustered_sentences): | |
result.append("<p><strong>"+ cbg[idx] +"</strong></p><p>"+ sentence +"</p><br/>") | |
return result | |
# update_show = [gr.Textbox(visible=True, label=text, value=answer_question(text, model_name_input)) for text in cbg] | |
# update_hide = [gr.Textbox(visible=False, label="") for _ in range(10-len(cbg))] | |
# return update_show + update_hide | |
with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
gr.HTML(""" | |
<div style="text-align: center; max-width: 1240px; margin: 0 auto;"> | |
<h1 style="font-weight: 200; font-size: 20px; margin-bottom:8px; margin-top:0px;"> | |
Auditing LLMs | |
</h1> | |
<hr style="margin-bottom:5px; margin-top:5px;"> | |
</div> | |
""") | |
with gr.Tab("Live Mode"): | |
with gr.Row(): | |
model_name_input = gr.Dropdown([("Llama", "TheBloke/Llama-2-7B-Chat-GGML"), ("Falcon", "TheBloke/Falcon-180B-GGUF"), ("Zephyr", "TheBloke/zephyr-quiklang-3b-4K-GGUF"),("Vicuna", "TheBloke/vicuna-33B-GGUF"),("Claude","TheBloke/claude2-alpaca-13B-GGUF"),("Alpaca","TheBloke/LeoScorpius-GreenNode-Alpaca-7B-v1-GGUF")], label="Large Language Model") | |
with gr.Row(): | |
prompt_input = gr.Textbox(label="Enter your question", placeholder="Enter Your Question") | |
with gr.Row(): | |
generate_button = gr.Button("Generate", variant="primary", min_width=300) | |
with gr.Column(): | |
cbg = gr.CheckboxGroup(choices=[], label="List of the prompts", interactive=True) | |
generate_button.click(updateChoices, inputs=[prompt_input], outputs=[cbg]) | |
with gr.Row() as exec: | |
btnExec = gr.Button("Execute", variant="primary", min_width=200) | |
with gr.Column() as texts: | |
for i in range(10): | |
text = gr.Textbox(label="_", visible=False) | |
text_list.append(text) | |
with gr.Column(): | |
html_result = gr.HTML("""<div style="color: red"></div>""") | |
#btnExec.click(setTextVisibility, inputs=[cbg, model_name_input], outputs=text_list) | |
btnExec.click(setTextVisibility, inputs=[cbg, model_name_input], outputs=html_result) | |
gr.HTML(""" | |
<div style="text-align: center; font-size: 24px; font-weight: bold;">Similarity Score: 76%</div> | |
""") | |
clear = gr.ClearButton(link = "http://127.0.0.1:7865") | |
with gr.Tab("Batch Mode"): | |
with gr.Row(): | |
model_name_input = gr.Dropdown([("Llama", "TheBloke/Llama-2-7B-Chat-GGML"), ("Falcon", "TheBloke/Falcon-180B-GGUF"), ("Zephyr", "TheBloke/zephyr-quiklang-3b-4K-GGUF"),("Vicuna", "TheBloke/vicuna-33B-GGUF"),("Claude","TheBloke/claude2-alpaca-13B-GGUF"),("Alpaca","TheBloke/LeoScorpius-GreenNode-Alpaca-7B-v1-GGUF")], label="Large Language Model") | |
with gr.Row(): | |
prompt_input = gr.Textbox(label="Enter your question", placeholder="Enter Your Question") | |
with gr.Row(): | |
prompt_input = gr.Textbox(label="RELAVENCY", placeholder="Relavancy") | |
prompt_input = gr.Textbox(label="Diversity", placeholder="Diversity") | |
with gr.Row(): | |
prompt_input = gr.Textbox(label="Enter your email address", placeholder="Enter Your Email Address") | |
with gr.Row(): | |
generate_button = gr.Button("Submit", variant="primary") | |
# Launch the Gradio app | |
demo.launch() |