import gradio as gr from dotenv import load_dotenv from langchain.chains import LLMChain from langchain_community.llms import CTransformers from langchain_core.prompts import PromptTemplate from sentence_transformers import SentenceTransformer from sklearn.cluster import KMeans from nltk.tokenize import word_tokenize import numpy as np import scipy.spatial from scipy.spatial.distance import cosine load_dotenv() def generate_prompts(user_input): prompt_template = PromptTemplate( input_variables=["Question"], template= f"Your task is to formulate 5 unique queries for each given question. These queries must adhere to the criteria of relevance and diversity.write the questions in seperate lines.{user_input} " ) config = {'max_new_tokens': 2048, 'temperature': 0.7, 'context_length': 4096} llm = CTransformers(model="TheBloke/Mistral-7B-Instruct-v0.1-GGUF", config=config, threads=os.cpu_count()) hub_chain = LLMChain(prompt = prompt_template, llm = llm) input_data = {"Question": user_input} # Here you would integrate your prompt template with your model # For demonstration, this is just a placeholder generated_prompts = hub_chain.run(input_data) # Modify this part based on how you run the model questions_list = generated_prompts.split('\n') formatted_questions = "\n".join(f"Question: {question}" for i, question in enumerate(questions_list) if question.strip()) questions_list = formatted_questions.split("Question:")[1:] return questions_list def answer_question(prompt, model_name): prompt_template = PromptTemplate( input_variables=["Question"], template=f"Give a short answer to this question '{prompt}' and do not consider the number behind it." ) config = {'max_new_tokens': 512, 'temperature': 0.7, 'context_length': 512} llm = CTransformers(model=model_name, #"TheBloke/Llama-2-7B-Chat-GGML", config=config, threads=os.cpu_count()) hub_chain = LLMChain(prompt = prompt_template, llm = llm) input_data = {"Question": prompt} generated_answer = hub_chain.run(input_data) return generated_answer def calculate_similarity(word, other_words, model, threshold=0.5): embeddings_word = model.encode([word]) embeddings_other_words = model.encode(other_words) for i, embedding in enumerate(embeddings_other_words): similarity = 1 - scipy.spatial.distance.cosine(embeddings_word[0], embedding) if similarity > threshold and similarity < 0.85: return i, similarity return None, None def highlight_words_within_cluster(sentences, model, exclude_words): # Create a dictionary to map words to color codes word_to_color = {} color_codes = [ "\033[41m", # Background Red "\033[42m", # Background Green "\033[43m", # Background Yellow "\033[44m", # Background Blue "\033[45m", # Background Purple "\033[46m", # Background Cyan "\033[100m", # Background Dark Gray "\033[101m", # Background Light Red "\033[102m", # Background Light Green "\033[103m", # Background Light Yellow "\033[104m", # Background Light Blue "\033[105m", # Background Light Purple "\033[106m", # Background Light Cyan "\033[47m" # Background Gray ] html_color_codes = ["red", "green", "blue", "purple", "cyan", "fuchsia", "lime", "maroon", "olive", "navy", "teal", "gray"] color_index = 0 highlighted_sentences = [] for sentence in sentences: words = word_tokenize(sentence) other_sentences = [s for s in sentences if s != sentence] all_other_words = [word for s in other_sentences for word in word_tokenize(s) if word.lower() not in exclude_words and word.isalnum()] highlighted_words = [] for word in words: if word.lower() not in exclude_words and word.isalnum(): match_index, similarity = calculate_similarity(word, all_other_words, model) if match_index is not None: # Assign color to the word if not already assigned if word not in word_to_color: word_to_color[word] = html_color_codes[color_index % len(html_color_codes)] color_index += 1 # Highlight the word #highlighted_word = f"{word_to_color[word]}{word}\033[0m" highlighted_word = ""+ word +"" else: highlighted_word = word highlighted_words.append(highlighted_word) else: highlighted_words.append(word) highlighted_sentences.append(' '.join(highlighted_words)) return highlighted_sentences # Rest of the code, including the cluster_sentences function, remains the same def cluster_sentences(sentences, model, num_clusters=3): embeddings = model.encode(sentences) kmeans = KMeans(n_clusters=num_clusters) kmeans.fit(embeddings) return kmeans.labels_ model = SentenceTransformer('all-mpnet-base-v2') exclude_words = {"a", "the", "for", "from", "of", "in", "over", "as", "on", "is", "am", "have", "an", "has", "had", "and", "by", "it", "its", "those", "these", "above", "to"} text_list = [] def updateChoices(prompt): newChoices = generate_prompts(prompt) return gr.CheckboxGroup(choices=newChoices) def setTextVisibility(cbg, model_name_input): sentences = [] result = [] model = SentenceTransformer('all-mpnet-base-v2') exclude_words = {"a", "the", "for", "from", "of", "in", "over", "as", "on", "is", "am", "have", "an", "has", "had", "and", "by", "it", "its", "those", "these", "above", "to"} sentences_org = ["In a quaint little town nestled in the heart of the mountains, a small bakery famous for its artisanal breads and pastries had a line of customers stretching out the door, eagerly waiting to savor the freshly baked goods that were known far and wide for their delightful flavors.", "Within a picturesque mountain village, there stood a renowned bakery, celebrated for its handcrafted bread and sweet treats, attracting a long queue of patrons each morning, all keen to enjoy the baked delicacies that had gained widespread acclaim for their exceptional taste.", "A charming bakery, located in a small mountainous hamlet, renowned for producing exquisite handmade pastries and bread, was bustling with a crowd of eager customers lined up outside, each anticipating the chance to indulge in the famous baked items celebrated for their extraordinary deliciousness.", "In a cozy, mountain-encircled village, a beloved bakery was the center of attraction, known for its traditional baking methods and delightful pastries, drawing a consistent stream of people waiting outside, all desiring to experience the renowned flavors that made the bakery's products distinctively mouth-watering."] for text in cbg: sentences.append(answer_question(text, model_name_input)) # Step 1: Cluster the sentences num_clusters = 1 sentence_clusters = cluster_sentences(sentences, model, num_clusters) # Step 2: Highlight similar words within each cluster clustered_sentences = [[] for _ in range(num_clusters)] for sentence, cluster_id in zip(sentences, sentence_clusters): clustered_sentences[cluster_id].append(sentence) highlighted_clustered_sentences = [] for cluster in clustered_sentences: highlighted_clustered_sentences.extend(highlight_words_within_cluster(cluster, model, exclude_words)) for idx, sentence in enumerate(highlighted_clustered_sentences): result.append("

"+ cbg[idx] +"

"+ sentence +"


") return result # update_show = [gr.Textbox(visible=True, label=text, value=answer_question(text, model_name_input)) for text in cbg] # update_hide = [gr.Textbox(visible=False, label="") for _ in range(10-len(cbg))] # return update_show + update_hide with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.HTML("""

Auditing LLMs


""") with gr.Tab("Live Mode"): with gr.Row(): model_name_input = gr.Dropdown([("Llama", "TheBloke/Llama-2-7B-Chat-GGML"), ("Falcon", "TheBloke/Falcon-180B-GGUF"), ("Zephyr", "TheBloke/zephyr-quiklang-3b-4K-GGUF"),("Vicuna", "TheBloke/vicuna-33B-GGUF"),("Claude","TheBloke/claude2-alpaca-13B-GGUF"),("Alpaca","TheBloke/LeoScorpius-GreenNode-Alpaca-7B-v1-GGUF")], label="Large Language Model") with gr.Row(): prompt_input = gr.Textbox(label="Enter your question", placeholder="Enter Your Question") with gr.Row(): generate_button = gr.Button("Generate", variant="primary", min_width=300) with gr.Column(): cbg = gr.CheckboxGroup(choices=[], label="List of the prompts", interactive=True) generate_button.click(updateChoices, inputs=[prompt_input], outputs=[cbg]) with gr.Row() as exec: btnExec = gr.Button("Execute", variant="primary", min_width=200) with gr.Column() as texts: for i in range(10): text = gr.Textbox(label="_", visible=False) text_list.append(text) with gr.Column(): html_result = gr.HTML("""
""") #btnExec.click(setTextVisibility, inputs=[cbg, model_name_input], outputs=text_list) btnExec.click(setTextVisibility, inputs=[cbg, model_name_input], outputs=html_result) gr.HTML("""
Similarity Score: 76%
""") clear = gr.ClearButton(link = "http://127.0.0.1:7865") with gr.Tab("Batch Mode"): with gr.Row(): model_name_input = gr.Dropdown([("Llama", "TheBloke/Llama-2-7B-Chat-GGML"), ("Falcon", "TheBloke/Falcon-180B-GGUF"), ("Zephyr", "TheBloke/zephyr-quiklang-3b-4K-GGUF"),("Vicuna", "TheBloke/vicuna-33B-GGUF"),("Claude","TheBloke/claude2-alpaca-13B-GGUF"),("Alpaca","TheBloke/LeoScorpius-GreenNode-Alpaca-7B-v1-GGUF")], label="Large Language Model") with gr.Row(): prompt_input = gr.Textbox(label="Enter your question", placeholder="Enter Your Question") with gr.Row(): prompt_input = gr.Textbox(label="RELAVENCY", placeholder="Relavancy") prompt_input = gr.Textbox(label="Diversity", placeholder="Diversity") with gr.Row(): prompt_input = gr.Textbox(label="Enter your email address", placeholder="Enter Your Email Address") with gr.Row(): generate_button = gr.Button("Submit", variant="primary") # Launch the Gradio app demo.launch(share=True)