import gradio as gr from wordllama import WordLlama # Load the default WordLlama model wl = WordLlama.load() # Function to calculate similarity def calculate_similarity(text1, text2): score = wl.similarity(text1, text2) return f"Similarity Score: {score}" # Function to rank documents def rank_documents(query, candidates): candidates_list = candidates.split(";") ranked_docs = wl.rank(query, candidates_list) return ranked_docs # Function to deduplicate documents def deduplicate_docs(candidates, threshold): candidates_list = candidates.split(";") deduplicated_docs = wl.deduplicate(candidates_list, threshold=threshold) return deduplicated_docs # Function to cluster documents def cluster_docs(docs, k, max_iterations, tolerance): docs_list = docs.split(";") clusters = wl.cluster(docs_list, k=k, max_iterations=max_iterations, tolerance=tolerance) return clusters # Gradio interface with gr.Blocks() as demo: gr.Markdown("# WordLlama App") # Similarity Interface with gr.Tab("Similarity"): gr.Markdown("### Calculate Similarity between two texts") text1 = gr.Textbox(label="Text 1") text2 = gr.Textbox(label="Text 2") similarity_output = gr.Textbox(label="Similarity Score") similarity_button = gr.Button("Calculate Similarity") similarity_button.click(calculate_similarity, inputs=[text1, text2], outputs=similarity_output) # Ranking Interface with gr.Tab("Rank Documents"): gr.Markdown("### Rank documents based on a query") query = gr.Textbox(label="Query") candidates = gr.Textbox(label="Candidates (separate by semicolons)") rank_output = gr.JSON(label="Ranked Documents") rank_button = gr.Button("Rank Documents") rank_button.click(rank_documents, inputs=[query, candidates], outputs=rank_output) # Deduplication Interface with gr.Tab("Fuzzy Deduplication"): gr.Markdown("### Deduplicate similar documents") candidates = gr.Textbox(label="Candidates (separate by semicolons)") threshold = gr.Slider(0.0, 1.0, value=0.8, label="Threshold") deduplicate_output = gr.JSON(label="Deduplicated Documents") deduplicate_button = gr.Button("Deduplicate") deduplicate_button.click(deduplicate_docs, inputs=[candidates, threshold], outputs=deduplicate_output) # Clustering Interface with gr.Tab("Clustering"): gr.Markdown("### Cluster documents") docs = gr.Textbox(label="Documents (separate by semicolons)") k = gr.Number(label="Number of Clusters", value=5) max_iterations = gr.Number(label="Max Iterations", value=100) tolerance = gr.Number(label="Tolerance", value=1e-4) cluster_output = gr.JSON(label="Clusters") cluster_button = gr.Button("Cluster Documents") cluster_button.click(cluster_docs, inputs=[docs, k, max_iterations, tolerance], outputs=cluster_output) demo.launch()