wordllama / app.py
yasserrmd's picture
Update app.py
1f73e91 verified
raw
history blame contribute delete
No virus
3.01 kB
import gradio as gr
from wordllama import WordLlama
# Load the default WordLlama model
wl = WordLlama.load()
# Function to calculate similarity
def calculate_similarity(text1, text2):
score = wl.similarity(text1, text2)
return f"Similarity Score: {score}"
# Function to rank documents
def rank_documents(query, candidates):
candidates_list = candidates.split(";")
ranked_docs = wl.rank(query, candidates_list)
return ranked_docs
# Function to deduplicate documents
def deduplicate_docs(candidates, threshold):
candidates_list = candidates.split(";")
deduplicated_docs = wl.deduplicate(candidates_list, threshold=threshold)
return deduplicated_docs
# Function to cluster documents
def cluster_docs(docs, k, max_iterations, tolerance):
docs_list = docs.split(";")
clusters = wl.cluster(docs_list, k=k, max_iterations=max_iterations, tolerance=tolerance)
return clusters
# Gradio interface
with gr.Blocks(css="style.css") as demo:
gr.Markdown("# WordLlama App")
# Similarity Interface
with gr.Tab("Similarity"):
gr.Markdown("### Calculate Similarity between two texts")
text1 = gr.Textbox(label="Text 1")
text2 = gr.Textbox(label="Text 2")
similarity_output = gr.Textbox(label="Similarity Score")
similarity_button = gr.Button("Calculate Similarity")
similarity_button.click(calculate_similarity, inputs=[text1, text2], outputs=similarity_output)
# Ranking Interface
with gr.Tab("Rank Documents"):
gr.Markdown("### Rank documents based on a query")
query = gr.Textbox(label="Query")
candidates = gr.Textbox(label="Candidates (separate by semicolons)")
rank_output = gr.JSON(label="Ranked Documents")
rank_button = gr.Button("Rank Documents")
rank_button.click(rank_documents, inputs=[query, candidates], outputs=rank_output)
# Deduplication Interface
with gr.Tab("Fuzzy Deduplication"):
gr.Markdown("### Deduplicate similar documents")
candidates = gr.Textbox(label="Candidates (separate by semicolons)")
threshold = gr.Slider(0.0, 1.0, value=0.8, label="Threshold")
deduplicate_output = gr.JSON(label="Deduplicated Documents")
deduplicate_button = gr.Button("Deduplicate")
deduplicate_button.click(deduplicate_docs, inputs=[candidates, threshold], outputs=deduplicate_output)
# Clustering Interface
with gr.Tab("Clustering"):
gr.Markdown("### Cluster documents")
docs = gr.Textbox(label="Documents (separate by semicolons)")
k = gr.Number(label="Number of Clusters", value=5)
max_iterations = gr.Number(label="Max Iterations", value=100)
tolerance = gr.Number(label="Tolerance", value=1e-4)
cluster_output = gr.JSON(label="Clusters")
cluster_button = gr.Button("Cluster Documents")
cluster_button.click(cluster_docs, inputs=[docs, k, max_iterations, tolerance], outputs=cluster_output)
demo.launch()