Spaces:

Amirizaniani
/

AuditLLM

Runtime error

App Files Files Community

AuditLLM / app.py

Amirizaniani

Update app.py

aa00158 verified 9 months ago

raw

history blame

12.8 kB

	import gradio as gr
	from dotenv import load_dotenv
	from langchain import PromptTemplate, LLMChain, HuggingFaceHub
	from langchain.llms import CTransformers
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from transformers import pipeline
	from langchain.llms.huggingface_pipeline import HuggingFacePipeline
	from sentence_transformers import SentenceTransformer
	from sklearn.cluster import KMeans
	from nltk.tokenize import word_tokenize
	import numpy as np
	import scipy.spatial


	load_dotenv()

	def generate_prompts(user_input):
	prompt_template = PromptTemplate(
	input_variables=["Question"],
	template=f"Just list 10 question prompts for {user_input} and don't put number before each of the prompts."
	)
	config = {'max_new_tokens': 64, 'temperature': 0.7, 'context_length': 64}
	llm = CTransformers(model="TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
	config=config)
	hub_chain = LLMChain(prompt = prompt_template, llm = llm)
	input_data = {"Question": user_input}

	generated_prompts = hub_chain.run(input_data)
	questions_list = generated_prompts.split('\n')


	formatted_questions = "\n".join(f"Question: {question}" for i, question in enumerate(questions_list) if question.strip())
	questions_list = formatted_questions.split("Question:")[1:]
	return questions_list

	def answer_question(prompt):
	prompt_template = PromptTemplate(
	input_variables=["Question"],
	template=f"give one answer for {prompt} and do not consider the number behind it."
	)
	config = {'max_new_tokens': 64, 'temperature': 0.7, 'context_length': 64}
	llm = CTransformers(model="TheBloke/Llama-2-7B-Chat-GGML",
	config=config)
	hub_chain = LLMChain(prompt = prompt_template, llm = llm)
	input_data = {"Question": prompt}
	generated_answer = hub_chain.run(input_data)
	return generated_answer

	def calculate_similarity(word, other_words, model, threshold=0.5):
	embeddings_word = model.encode([word])
	embeddings_other_words = model.encode(other_words)
	for i, embedding in enumerate(embeddings_other_words):
	similarity = 1 - scipy.spatial.distance.cosine(embeddings_word[0], embedding)
	if similarity > threshold and similarity < 0.85:
	return i, similarity
	return None, None


	def highlight_words_within_cluster(sentences, model, exclude_words):
	# Create a dictionary to map words to color codes
	word_to_color = {}
	color_codes = [
	"\033[41m", # Background Red
	"\033[42m", # Background Green
	"\033[43m", # Background Yellow
	"\033[44m", # Background Blue
	"\033[45m", # Background Purple
	"\033[46m", # Background Cyan
	"\033[100m", # Background Dark Gray
	"\033[101m", # Background Light Red
	"\033[102m", # Background Light Green
	"\033[103m", # Background Light Yellow
	"\033[104m", # Background Light Blue
	"\033[105m", # Background Light Purple
	"\033[106m", # Background Light Cyan
	"\033[47m" # Background Gray
	]
	html_color_codes = ["red", "green", "blue", "purple", "cyan", "fuchsia", "lime", "maroon", "olive", "navy", "teal", "gray"]
	color_index = 0

	highlighted_sentences = []
	for sentence in sentences:
	words = word_tokenize(sentence)
	other_sentences = [s for s in sentences if s != sentence]
	all_other_words = [word for s in other_sentences for word in word_tokenize(s) if word.lower() not in exclude_words and word.isalnum()]

	highlighted_words = []
	for word in words:
	if word.lower() not in exclude_words and word.isalnum():
	match_index, similarity = calculate_similarity(word, all_other_words, model)
	if match_index is not None:
	# Assign color to the word if not already assigned
	if word not in word_to_color:
	word_to_color[word] = html_color_codes[color_index % len(html_color_codes)]
	color_index += 1
	# Highlight the word
	#highlighted_word = f"{word_to_color[word]}{word}\033[0m"
	highlighted_word = "<span style='color: "+ word_to_color[word] +"'>"+ word +"</span>"
	else:
	highlighted_word = word
	highlighted_words.append(highlighted_word)
	else:
	highlighted_words.append(word)

	highlighted_sentences.append(' '.join(highlighted_words))
	return highlighted_sentences

	# Rest of the code, including the cluster_sentences function, remains the same

	exclude_words = {"a", "the", "for", "from", "of", "in","over", "as", "on", "is", "am", "have", "an","has", "had", "and", "by", "it", "its", "those", "these", "was", "were", "their", "them", "I", "you", "also", "your", "me", "after"}

	def cluster_sentences(sentences, model, num_clusters=3):
	embeddings = model.encode(sentences)
	kmeans = KMeans(n_clusters=num_clusters)
	kmeans.fit(embeddings)
	return kmeans.labels_

	model = SentenceTransformer('all-mpnet-base-v2')
	exclude_words = {"a", "the", "for", "from", "of", "in", "over", "as", "on", "is", "am", "have", "an", "has", "had", "and", "by", "it", "its", "those", "these", "above", "to"}

	sentences = ["In a quaint little town nestled in the heart of the mountains, a small bakery famous for its artisanal breads and pastries had a line of customers stretching out the door, eagerly waiting to savor the freshly baked goods that were known far and wide for their delightful flavors.",

	"Within a picturesque mountain village, there stood a renowned bakery, celebrated for its handcrafted bread and sweet treats, attracting a long queue of patrons each morning, all keen to enjoy the baked delicacies that had gained widespread acclaim for their exceptional taste.",

	"A charming bakery, located in a small mountainous hamlet, renowned for producing exquisite handmade pastries and bread, was bustling with a crowd of eager customers lined up outside, each anticipating the chance to indulge in the famous baked items celebrated for their extraordinary deliciousness.",

	"In a cozy, mountain-encircled village, a beloved bakery was the center of attraction, known for its traditional baking methods and delightful pastries, drawing a consistent stream of people waiting outside, all desiring to experience the renowned flavors that made the bakery's products distinctively mouth-watering."]

	# Step 1: Cluster the sentences
	num_clusters = 1
	sentence_clusters = cluster_sentences(sentences, model, num_clusters)

	# Step 2: Highlight similar words within each cluster
	clustered_sentences = [[] for _ in range(num_clusters)]
	for sentence, cluster_id in zip(sentences, sentence_clusters):
	clustered_sentences[cluster_id].append(sentence)

	highlighted_clustered_sentences = []
	for cluster in clustered_sentences:
	highlighted_clustered_sentences.extend(highlight_words_within_cluster(cluster, model, exclude_words))


	text_list = []

	def updateChoices(prompt):
	newChoices = generate_prompts(prompt)
	return gr.CheckboxGroup(choices=newChoices)

	def setTextVisibility(cbg, model_name_input):
	sentences = []
	result = []
	model = SentenceTransformer('all-mpnet-base-v2')
	exclude_words = {"a", "the", "for", "from", "of", "in", "over", "as", "on", "is", "am", "have", "an", "has", "had", "and", "by", "it", "its", "those", "these", "above", "to"}
	sentences_org = ["In a quaint little town nestled in the heart of the mountains, a small bakery famous for its artisanal breads and pastries had a line of customers stretching out the door, eagerly waiting to savor the freshly baked goods that were known far and wide for their delightful flavors.",
	"Within a picturesque mountain village, there stood a renowned bakery, celebrated for its handcrafted bread and sweet treats, attracting a long queue of patrons each morning, all keen to enjoy the baked delicacies that had gained widespread acclaim for their exceptional taste.",
	"A charming bakery, located in a small mountainous hamlet, renowned for producing exquisite handmade pastries and bread, was bustling with a crowd of eager customers lined up outside, each anticipating the chance to indulge in the famous baked items celebrated for their extraordinary deliciousness.",
	"In a cozy, mountain-encircled village, a beloved bakery was the center of attraction, known for its traditional baking methods and delightful pastries, drawing a consistent stream of people waiting outside, all desiring to experience the renowned flavors that made the bakery's products distinctively mouth-watering."]
	for text in cbg:
	sentences.append(answer_question(text, model_name_input))

	# Step 1: Cluster the sentences
	num_clusters = 1
	sentence_clusters = cluster_sentences(sentences, model, num_clusters)

	# Step 2: Highlight similar words within each cluster
	clustered_sentences = [[] for _ in range(num_clusters)]

	for sentence, cluster_id in zip(sentences, sentence_clusters):
	clustered_sentences[cluster_id].append(sentence)

	highlighted_clustered_sentences = []

	for cluster in clustered_sentences:
	highlighted_clustered_sentences.extend(highlight_words_within_cluster(cluster, model, exclude_words))

	for idx, sentence in enumerate(highlighted_clustered_sentences):
	result.append("<p><strong>"+ cbg[idx] +"</strong></p><p>"+ sentence +"</p><br/>")

	return result


	# update_show = [gr.Textbox(visible=True, label=text, value=answer_question(text, model_name_input)) for text in cbg]
	# update_hide = [gr.Textbox(visible=False, label="") for _ in range(10-len(cbg))]
	# return update_show + update_hide

	with gr.Blocks(theme=gr.themes.Soft()) as demo:

	gr.HTML("""
	<div style="text-align: center; max-width: 1240px; margin: 0 auto;">
	<h1 style="font-weight: 200; font-size: 20px; margin-bottom:8px; margin-top:0px;">
	Auditing LLMs
	</h1>
	<hr style="margin-bottom:5px; margin-top:5px;">


	</div>
	""")
	with gr.Tab("Live Mode"):
	with gr.Row():
	model_name_input = gr.Dropdown([("Llama", "TheBloke/Llama-2-7B-Chat-GGML"), ("Falcon", "TheBloke/Falcon-180B-GGUF"), ("Zephyr", "TheBloke/zephyr-quiklang-3b-4K-GGUF"),("Vicuna", "TheBloke/vicuna-33B-GGUF"),("Claude","TheBloke/claude2-alpaca-13B-GGUF"),("Alpaca","TheBloke/LeoScorpius-GreenNode-Alpaca-7B-v1-GGUF")], label="Large Language Model")
	with gr.Row():
	prompt_input = gr.Textbox(label="Enter your question", placeholder="Enter Your Question")
	with gr.Row():
	generate_button = gr.Button("Generate", variant="primary", min_width=300)
	with gr.Column():
	cbg = gr.CheckboxGroup(choices=[], label="List of the prompts", interactive=True)

	generate_button.click(updateChoices, inputs=[prompt_input], outputs=[cbg])

	with gr.Row() as exec:
	btnExec = gr.Button("Execute", variant="primary", min_width=200)


	with gr.Column() as texts:
	for i in range(10):
	text = gr.Textbox(label="_", visible=False)
	text_list.append(text)

	with gr.Column():
	html_result = gr.HTML("""<div style="color: red"></div>""")

	#btnExec.click(setTextVisibility, inputs=[cbg, model_name_input], outputs=text_list)
	btnExec.click(setTextVisibility, inputs=[cbg, model_name_input], outputs=html_result)
	gr.HTML("""
	<div style="text-align: center; font-size: 24px; font-weight: bold;">Similarity Score: 76%</div>
	""")

	clear = gr.ClearButton(link = "http://127.0.0.1:7865")

	with gr.Tab("Batch Mode"):
	with gr.Row():
	model_name_input = gr.Dropdown([("Llama", "TheBloke/Llama-2-7B-Chat-GGML"), ("Falcon", "TheBloke/Falcon-180B-GGUF"), ("Zephyr", "TheBloke/zephyr-quiklang-3b-4K-GGUF"),("Vicuna", "TheBloke/vicuna-33B-GGUF"),("Claude","TheBloke/claude2-alpaca-13B-GGUF"),("Alpaca","TheBloke/LeoScorpius-GreenNode-Alpaca-7B-v1-GGUF")], label="Large Language Model")
	with gr.Row():
	prompt_input = gr.Textbox(label="Enter your question", placeholder="Enter Your Question")
	with gr.Row():
	prompt_input = gr.Textbox(label="RELAVENCY", placeholder="Relavancy")
	prompt_input = gr.Textbox(label="Diversity", placeholder="Diversity")

	with gr.Row():
	prompt_input = gr.Textbox(label="Enter your email address", placeholder="Enter Your Email Address")
	with gr.Row():
	generate_button = gr.Button("Submit", variant="primary")


	# Launch the Gradio app
	demo.launch()