|
import gradio as gr |
|
from wordllama import WordLlama |
|
from sklearn.feature_extraction.text import TfidfVectorizer |
|
from sklearn.metrics.pairwise import cosine_similarity |
|
import numpy as np |
|
|
|
|
|
wl = WordLlama.load() |
|
|
|
|
|
tfidf_vectorizer = TfidfVectorizer() |
|
|
|
def calculate_similarities(sentence1, sentence2): |
|
|
|
wordllama_score = wl.similarity(sentence1, sentence2) |
|
|
|
|
|
tfidf_matrix = tfidf_vectorizer.fit_transform([sentence1, sentence2]) |
|
tfidf_score = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0] |
|
|
|
return float(wordllama_score), float(tfidf_score) |
|
|
|
|
|
examples = [ |
|
|
|
["The cat is sleeping on the couch", "The cat is resting on the sofa"], |
|
["I love eating pizza", "I enjoy eating pizza"], |
|
["The weather is sunny today", "It is a sunny day today"], |
|
|
|
|
|
["She is reading a book", "She is holding a novel"], |
|
["The car is red", "The automobile is crimson"], |
|
["The children are playing in the park", "Kids are having fun at the playground"], |
|
|
|
|
|
["The food was great", "The meal was excellent"], |
|
["The student is studying hard", "The pupil is working diligently"], |
|
["This movie is fantastic", "This film is amazing"], |
|
|
|
|
|
["The red car is parked", "The red car is moving"], |
|
["The book is on the table", "The book is under the table"], |
|
["She went to the store", "She went to the mall"], |
|
|
|
|
|
["The laptop is expensive", "The computer costs a lot"], |
|
["The dog is barking", "The canine is making noise"], |
|
["The house is large", "The home is spacious"], |
|
|
|
|
|
["The cat chased the mouse", "The mouse chased the cat"], |
|
["John gave Mary a book", "Mary gave John a book"], |
|
["The teacher helped the student", "The student helped the teacher"], |
|
|
|
|
|
["The car is fast", "The vehicle is quick"], |
|
["The building is tall", "The structure is high"], |
|
["The food is delicious", "The cuisine is tasty"] |
|
] |
|
|
|
|
|
with gr.Blocks(theme=gr.themes.Soft()) as iface: |
|
gr.Markdown("# Text Similarity Comparison") |
|
gr.Markdown(""" |
|
Compare sentences using both WordLlama and TF-IDF similarity metrics. |
|
Examples are categorized to demonstrate strengths of each method. |
|
""") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
sentence1 = gr.Textbox( |
|
lines=2, |
|
placeholder="Enter first sentence...", |
|
label="First Sentence", |
|
info="Type or select from examples below" |
|
) |
|
with gr.Column(): |
|
sentence2 = gr.Textbox( |
|
lines=2, |
|
placeholder="Enter second sentence...", |
|
label="Second Sentence", |
|
info="Type or select from examples below" |
|
) |
|
|
|
button = gr.Button("Calculate Similarities", variant="primary") |
|
|
|
with gr.Row(): |
|
wordllama_output = gr.Number( |
|
label="WordLlama Similarity", |
|
info="Contextual similarity score (0-1)", |
|
value=0.0 |
|
) |
|
tfidf_output = gr.Number( |
|
label="TF-IDF Similarity", |
|
info="Term frequency-based similarity score (0-1)", |
|
value=0.0 |
|
) |
|
|
|
gr.Markdown(""" |
|
### Understanding the Scores |
|
- **WordLlama Similarity**: Better at understanding semantic meaning and context |
|
- **TF-IDF Similarity**: Better at exact word matching and frequency-based comparison |
|
|
|
### Example Categories |
|
1. High Similarity: Both methods should show high scores |
|
2. Medium Similarity: Both methods should show moderate scores |
|
3. Semantic Similarity: WordLlama typically performs better |
|
4. Word Order Cases: Shows how each method handles word order |
|
5. Synonym Cases: Tests semantic understanding |
|
""") |
|
|
|
button.click( |
|
calculate_similarities, |
|
inputs=[sentence1, sentence2], |
|
outputs=[wordllama_output, tfidf_output] |
|
) |
|
|
|
gr.Examples( |
|
examples=examples, |
|
inputs=[sentence1, sentence2], |
|
label="Click on any example to load it" |
|
) |
|
|
|
|
|
iface.launch(share=True) |