DexterSptizu's picture
Update app.py
6773f13 verified
raw
history blame
4.4 kB
import gradio as gr
from wordllama import WordLlama
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
# Load the default WordLlama model
wl = WordLlama.load()
# Initialize TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer()
def calculate_similarities(sentence1, sentence2):
# WordLlama similarity
wordllama_score = wl.similarity(sentence1, sentence2)
# TF-IDF similarity
tfidf_matrix = tfidf_vectorizer.fit_transform([sentence1, sentence2])
tfidf_score = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
return float(wordllama_score), float(tfidf_score)
# Examples combining original and new homophone-based examples
examples = [
# Original examples
["I went to the car", "I went to the pawn shop"],
["The cat is on the roof", "A dog is in the yard"],
["She loves playing tennis", "She enjoys sports"],
["This is a bright day", "It's a sunny morning"],
["I bought a new phone", "I got a new mobile"],
["The restaurant serves delicious food", "This place has great cuisine"],
["Python is a programming language", "Java is used for coding"],
["The movie was entertaining", "I enjoyed watching the film"],
["Climate change affects our planet", "Global warming is a serious issue"],
["Students study in the library", "People read books in the library"],
# New examples with similar words but different meanings
["The executive board met this morning", "I was so bored during the meeting"],
["Don't waste your time on this", "The dress fits perfectly at the waist"],
["The principal called a meeting", "It's a matter of principle"],
["The weather is beautiful today", "I don't know whether to go or stay"],
["I need a piece of the cake", "The world needs peace"],
["The bass was swimming in the lake", "Turn up the bass in the speaker"],
["The fair is in town this weekend", "That decision wasn't fair at all"],
["I need to address this letter", "What's your new address?"],
["The bank of the river is muddy", "I need to go to the bank for money"],
["Can you bear this weight?", "I saw a bear in the woods"]
]
# Define Gradio interface with updated layout
with gr.Blocks(theme=gr.themes.Soft()) as iface:
gr.Markdown("# Advanced Text Similarity Comparison")
gr.Markdown("""
Compare sentences using both WordLlama and TF-IDF similarity metrics.
This tool includes examples of similar words with different meanings to demonstrate semantic understanding.
""")
with gr.Row():
with gr.Column():
sentence1 = gr.Textbox(
lines=2,
placeholder="Enter first sentence...",
label="First Sentence",
info="Type or select from examples below"
)
with gr.Column():
sentence2 = gr.Textbox(
lines=2,
placeholder="Enter second sentence...",
label="Second Sentence",
info="Type or select from examples below"
)
button = gr.Button("Calculate Similarities", variant="primary")
with gr.Row():
wordllama_output = gr.Number(
label="WordLlama Similarity",
info="Contextual similarity score (0-1)",
value=0.0
)
tfidf_output = gr.Number(
label="TF-IDF Similarity",
info="Term frequency-based similarity score (0-1)",
value=0.0
)
gr.Markdown("""
### Understanding the Scores
- **WordLlama Similarity**: Measures semantic similarity considering context and meaning
- **TF-IDF Similarity**: Measures similarity based on word frequency and importance
""")
gr.Markdown("### Example Sentence Pairs")
gr.Markdown("""
The examples include:
- Regular sentence pairs
- Sentences with similar words but different meanings (homophones)
- Contextually related sentences
""")
button.click(
calculate_similarities,
inputs=[sentence1, sentence2],
outputs=[wordllama_output, tfidf_output]
)
gr.Examples(
examples=examples,
inputs=[sentence1, sentence2],
label="Click on any example to load it"
)
# Launch the interface
iface.launch(share=True)