|
import gradio as gr |
|
from wordllama import WordLlama |
|
from sklearn.feature_extraction.text import TfidfVectorizer |
|
from sklearn.metrics.pairwise import cosine_similarity |
|
import numpy as np |
|
|
|
|
|
wl = WordLlama.load() |
|
|
|
|
|
tfidf_vectorizer = TfidfVectorizer() |
|
|
|
def calculate_similarities(sentence1, sentence2): |
|
|
|
wordllama_score = wl.similarity(sentence1, sentence2) |
|
|
|
|
|
tfidf_matrix = tfidf_vectorizer.fit_transform([sentence1, sentence2]) |
|
tfidf_score = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0] |
|
|
|
return float(wordllama_score), float(tfidf_score) |
|
|
|
|
|
examples = [ |
|
|
|
["I went to the car", "I went to the pawn shop"], |
|
["The cat is on the roof", "A dog is in the yard"], |
|
["She loves playing tennis", "She enjoys sports"], |
|
["This is a bright day", "It's a sunny morning"], |
|
["I bought a new phone", "I got a new mobile"], |
|
["The restaurant serves delicious food", "This place has great cuisine"], |
|
["Python is a programming language", "Java is used for coding"], |
|
["The movie was entertaining", "I enjoyed watching the film"], |
|
["Climate change affects our planet", "Global warming is a serious issue"], |
|
["Students study in the library", "People read books in the library"], |
|
|
|
|
|
["The executive board met this morning", "I was so bored during the meeting"], |
|
["Don't waste your time on this", "The dress fits perfectly at the waist"], |
|
["The principal called a meeting", "It's a matter of principle"], |
|
["The weather is beautiful today", "I don't know whether to go or stay"], |
|
["I need a piece of the cake", "The world needs peace"], |
|
["The bass was swimming in the lake", "Turn up the bass in the speaker"], |
|
["The fair is in town this weekend", "That decision wasn't fair at all"], |
|
["I need to address this letter", "What's your new address?"], |
|
["The bank of the river is muddy", "I need to go to the bank for money"], |
|
["Can you bear this weight?", "I saw a bear in the woods"] |
|
] |
|
|
|
|
|
with gr.Blocks(theme=gr.themes.Soft()) as iface: |
|
gr.Markdown("# Advanced Text Similarity Comparison") |
|
gr.Markdown(""" |
|
Compare sentences using both WordLlama and TF-IDF similarity metrics. |
|
This tool includes examples of similar words with different meanings to demonstrate semantic understanding. |
|
""") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
sentence1 = gr.Textbox( |
|
lines=2, |
|
placeholder="Enter first sentence...", |
|
label="First Sentence", |
|
info="Type or select from examples below" |
|
) |
|
with gr.Column(): |
|
sentence2 = gr.Textbox( |
|
lines=2, |
|
placeholder="Enter second sentence...", |
|
label="Second Sentence", |
|
info="Type or select from examples below" |
|
) |
|
|
|
button = gr.Button("Calculate Similarities", variant="primary") |
|
|
|
with gr.Row(): |
|
wordllama_output = gr.Number( |
|
label="WordLlama Similarity", |
|
info="Contextual similarity score (0-1)", |
|
value=0.0 |
|
) |
|
tfidf_output = gr.Number( |
|
label="TF-IDF Similarity", |
|
info="Term frequency-based similarity score (0-1)", |
|
value=0.0 |
|
) |
|
|
|
gr.Markdown(""" |
|
### Understanding the Scores |
|
- **WordLlama Similarity**: Measures semantic similarity considering context and meaning |
|
- **TF-IDF Similarity**: Measures similarity based on word frequency and importance |
|
""") |
|
|
|
gr.Markdown("### Example Sentence Pairs") |
|
gr.Markdown(""" |
|
The examples include: |
|
- Regular sentence pairs |
|
- Sentences with similar words but different meanings (homophones) |
|
- Contextually related sentences |
|
""") |
|
|
|
button.click( |
|
calculate_similarities, |
|
inputs=[sentence1, sentence2], |
|
outputs=[wordllama_output, tfidf_output] |
|
) |
|
|
|
gr.Examples( |
|
examples=examples, |
|
inputs=[sentence1, sentence2], |
|
label="Click on any example to load it" |
|
) |
|
|
|
|
|
iface.launch(share=True) |