File size: 2,998 Bytes
a0cb1f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d6b1f36
a0cb1f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import gradio as gr
from wordllama import WordLlama

# Load the default WordLlama model
wl = WordLlama.load()

# Function to calculate similarity
def calculate_similarity(text1, text2):
    score = wl.similarity(text1, text2)
    return f"Similarity Score: {score}"

# Function to rank documents
def rank_documents(query, candidates):
    candidates_list = candidates.split(";")
    ranked_docs = wl.rank(query, candidates_list)
    return ranked_docs

# Function to deduplicate documents
def deduplicate_docs(candidates, threshold):
    candidates_list = candidates.split(";")
    deduplicated_docs = wl.deduplicate(candidates_list, threshold=threshold)
    return deduplicated_docs

# Function to cluster documents
def cluster_docs(docs, k, max_iterations, tolerance):
    docs_list = docs.split(";")
    clusters = wl.cluster(docs_list, k=k, max_iterations=max_iterations, tolerance=tolerance)
    return clusters

# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# WordLlama App")
    
    # Similarity Interface
    with gr.Tab("Similarity"):
        gr.Markdown("### Calculate Similarity between two texts")
        text1 = gr.Textbox(label="Text 1")
        text2 = gr.Textbox(label="Text 2")
        similarity_output = gr.Textbox(label="Similarity Score")
        similarity_button = gr.Button("Calculate Similarity")
        similarity_button.click(calculate_similarity, inputs=[text1, text2], outputs=similarity_output)
    
    # Ranking Interface
    with gr.Tab("Rank Documents"):
        gr.Markdown("### Rank documents based on a query")
        query = gr.Textbox(label="Query")
        candidates = gr.Textbox(label="Candidates (separate by semicolons)")
        rank_output = gr.JSON(label="Ranked Documents")
        rank_button = gr.Button("Rank Documents")
        rank_button.click(rank_documents, inputs=[query, candidates], outputs=rank_output)
    
    # Deduplication Interface
    with gr.Tab("Fuzzy Deduplication"):
        gr.Markdown("### Deduplicate similar documents")
        candidates = gr.Textbox(label="Candidates (separate by semicolons)")
        threshold = gr.Slider(0.0, 1.0, value=0.8, label="Threshold")
        deduplicate_output = gr.JSON(label="Deduplicated Documents")
        deduplicate_button = gr.Button("Deduplicate")
        deduplicate_button.click(deduplicate_docs, inputs=[candidates, threshold], outputs=deduplicate_output)
    
    # Clustering Interface
    with gr.Tab("Clustering"):
        gr.Markdown("### Cluster documents")
        docs = gr.Textbox(label="Documents (separate by semicolons)")
        k = gr.Number(label="Number of Clusters", value=5)
        max_iterations = gr.Number(label="Max Iterations", value=100)
        tolerance = gr.Number(label="Tolerance", value=1e-4)
        cluster_output = gr.JSON(label="Clusters")
        cluster_button = gr.Button("Cluster Documents")
        cluster_button.click(cluster_docs, inputs=[docs, k, max_iterations, tolerance], outputs=cluster_output)

demo.launch()