Spaces:

DexterSptizu
/

wordllam-text-similarity

Running

App Files Files Community

DexterSptizu commited on 2 days ago

Commit

83b72c1

•

1 Parent(s): 9732442

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -18

app.py CHANGED Viewed

@@ -1,37 +1,67 @@
 import gradio as gr
 from wordllama import WordLlama
 # Load the default WordLlama model
 wl = WordLlama.load()
-# Define the function that calculates similarity between two sentences
-def calculate_similarity(sentence1, sentence2):
-    similarity_score = wl.similarity(sentence1, sentence2)
-    return similarity_score
-# Define five example inputs
 examples = [
     ["I went to the car", "I went to the pawn shop"],
     ["The cat is on the roof", "A dog is in the yard"],
     ["She loves playing tennis", "She enjoys sports"],
     ["This is a bright day", "It's a sunny morning"],
-    ["I bought a new phone", "I got a new mobile"]
 ]
-# Define Gradio interface
 with gr.Blocks() as iface:
-    gr.Markdown("# Sentence Similarity with WordLlama")
-    gr.Markdown("Calculate the similarity between two sentences using the WordLlama model from Hugging Face.")
-    sentence1 = gr.Textbox(lines=2, placeholder="Enter first sentence...")
-    sentence2 = gr.Textbox(lines=2, placeholder="Enter second sentence...")
-    output = gr.Number()
-    # Button to trigger similarity calculation
-    button = gr.Button("Calculate Similarity")
-    button.click(calculate_similarity, inputs=[sentence1, sentence2], outputs=output)
-    # Examples section
-    gr.Examples(examples=examples, inputs=[sentence1, sentence2])
-iface.launch(share=True)

 import gradio as gr
 from wordllama import WordLlama
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+import numpy as np
 # Load the default WordLlama model
 wl = WordLlama.load()
+# Initialize TF-IDF vectorizer
+tfidf_vectorizer = TfidfVectorizer()
+def calculate_similarities(sentence1, sentence2):
+    # WordLlama similarity
+    wordllama_score = wl.similarity(sentence1, sentence2)
+    # TF-IDF similarity
+    tfidf_matrix = tfidf_vectorizer.fit_transform([sentence1, sentence2])
+    tfidf_score = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
+    return {
+        "WordLlama Similarity": float(wordllama_score),
+        "TF-IDF Similarity": float(tfidf_score)
+    }
+# Extended examples with more diverse sentences
 examples = [
     ["I went to the car", "I went to the pawn shop"],
     ["The cat is on the roof", "A dog is in the yard"],
     ["She loves playing tennis", "She enjoys sports"],
     ["This is a bright day", "It's a sunny morning"],
+    ["I bought a new phone", "I got a new mobile"],
+    ["The restaurant serves delicious food", "This place has great cuisine"],
+    ["Python is a programming language", "Java is used for coding"],
+    ["The movie was entertaining", "I enjoyed watching the film"],
+    ["Climate change affects our planet", "Global warming is a serious issue"],
+    ["Students study in the library", "People read books in the library"]
 ]
+# Define Gradio interface with updated layout
 with gr.Blocks() as iface:
+    gr.Markdown("# Text Similarity Comparison")
+    gr.Markdown("Compare sentences using both WordLlama and TF-IDF similarity metrics")
+    with gr.Row():
+        sentence1 = gr.Textbox(lines=2, placeholder="Enter first sentence...", label="First Sentence")
+        sentence2 = gr.Textbox(lines=2, placeholder="Enter second sentence...", label="Second Sentence")
+    button = gr.Button("Calculate Similarities")
+    with gr.Row():
+        wordllama_output = gr.Number(label="WordLlama Similarity")
+        tfidf_output = gr.Number(label="TF-IDF Similarity")
+    button.click(
+        calculate_similarities,
+        inputs=[sentence1, sentence2],
+        outputs=[wordllama_output, tfidf_output]
+    )
+    gr.Markdown("### Example Sentence Pairs")
+    gr.Examples(
+        examples=examples,
+        inputs=[sentence1, sentence2]
+    )
+iface.launch(share=True)