yasserrmd commited on
Commit
a0cb1f5
1 Parent(s): c1fb0b9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -0
app.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from wordllama import WordLlama
3
+
4
+ # Load the default WordLlama model
5
+ wl = WordLlama.load()
6
+
7
+ # Function to calculate similarity
8
+ def calculate_similarity(text1, text2):
9
+ score = wl.similarity(text1, text2)
10
+ return f"Similarity Score: {score}"
11
+
12
+ # Function to rank documents
13
+ def rank_documents(query, candidates):
14
+ candidates_list = candidates.split(";")
15
+ ranked_docs = wl.rank(query, candidates_list)
16
+ return ranked_docs
17
+
18
+ # Function to deduplicate documents
19
+ def deduplicate_docs(candidates, threshold):
20
+ candidates_list = candidates.split(";")
21
+ deduplicated_docs = wl.deduplicate(candidates_list, threshold=threshold)
22
+ return deduplicated_docs
23
+
24
+ # Function to cluster documents
25
+ def cluster_docs(docs, k, max_iterations, tolerance):
26
+ docs_list = docs.split(";")
27
+ clusters = wl.cluster(docs_list, k=k, max_iterations=max_iterations, tolerance=tolerance)
28
+ return clusters
29
+
30
+ # Gradio interface
31
+ with gr.Blocks() as demo:
32
+ gr.Markdown("# WordLlama Gradio App")
33
+
34
+ # Similarity Interface
35
+ with gr.Tab("Similarity"):
36
+ gr.Markdown("### Calculate Similarity between two texts")
37
+ text1 = gr.Textbox(label="Text 1")
38
+ text2 = gr.Textbox(label="Text 2")
39
+ similarity_output = gr.Textbox(label="Similarity Score")
40
+ similarity_button = gr.Button("Calculate Similarity")
41
+ similarity_button.click(calculate_similarity, inputs=[text1, text2], outputs=similarity_output)
42
+
43
+ # Ranking Interface
44
+ with gr.Tab("Rank Documents"):
45
+ gr.Markdown("### Rank documents based on a query")
46
+ query = gr.Textbox(label="Query")
47
+ candidates = gr.Textbox(label="Candidates (separate by semicolons)")
48
+ rank_output = gr.JSON(label="Ranked Documents")
49
+ rank_button = gr.Button("Rank Documents")
50
+ rank_button.click(rank_documents, inputs=[query, candidates], outputs=rank_output)
51
+
52
+ # Deduplication Interface
53
+ with gr.Tab("Fuzzy Deduplication"):
54
+ gr.Markdown("### Deduplicate similar documents")
55
+ candidates = gr.Textbox(label="Candidates (separate by semicolons)")
56
+ threshold = gr.Slider(0.0, 1.0, value=0.8, label="Threshold")
57
+ deduplicate_output = gr.JSON(label="Deduplicated Documents")
58
+ deduplicate_button = gr.Button("Deduplicate")
59
+ deduplicate_button.click(deduplicate_docs, inputs=[candidates, threshold], outputs=deduplicate_output)
60
+
61
+ # Clustering Interface
62
+ with gr.Tab("Clustering"):
63
+ gr.Markdown("### Cluster documents")
64
+ docs = gr.Textbox(label="Documents (separate by semicolons)")
65
+ k = gr.Number(label="Number of Clusters", value=5)
66
+ max_iterations = gr.Number(label="Max Iterations", value=100)
67
+ tolerance = gr.Number(label="Tolerance", value=1e-4)
68
+ cluster_output = gr.JSON(label="Clusters")
69
+ cluster_button = gr.Button("Cluster Documents")
70
+ cluster_button.click(cluster_docs, inputs=[docs, k, max_iterations, tolerance], outputs=cluster_output)
71
+
72
+ demo.launch()