File size: 4,400 Bytes
1107e2d 83b72c1 1107e2d 83b72c1 1107e2d 83b72c1 cfe8e38 83b72c1 6773f13 1107e2d 6773f13 1107e2d 83b72c1 6773f13 1107e2d 83b72c1 6773f13 83b72c1 6773f13 83b72c1 6773f13 9732442 83b72c1 6773f13 9732442 83b72c1 9732442 83b72c1 6773f13 83b72c1 9732442 6773f13 83b72c1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
import gradio as gr
from wordllama import WordLlama
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
# Load the default WordLlama model
wl = WordLlama.load()
# Initialize TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer()
def calculate_similarities(sentence1, sentence2):
# WordLlama similarity
wordllama_score = wl.similarity(sentence1, sentence2)
# TF-IDF similarity
tfidf_matrix = tfidf_vectorizer.fit_transform([sentence1, sentence2])
tfidf_score = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
return float(wordllama_score), float(tfidf_score)
# Examples combining original and new homophone-based examples
examples = [
# Original examples
["I went to the car", "I went to the pawn shop"],
["The cat is on the roof", "A dog is in the yard"],
["She loves playing tennis", "She enjoys sports"],
["This is a bright day", "It's a sunny morning"],
["I bought a new phone", "I got a new mobile"],
["The restaurant serves delicious food", "This place has great cuisine"],
["Python is a programming language", "Java is used for coding"],
["The movie was entertaining", "I enjoyed watching the film"],
["Climate change affects our planet", "Global warming is a serious issue"],
["Students study in the library", "People read books in the library"],
# New examples with similar words but different meanings
["The executive board met this morning", "I was so bored during the meeting"],
["Don't waste your time on this", "The dress fits perfectly at the waist"],
["The principal called a meeting", "It's a matter of principle"],
["The weather is beautiful today", "I don't know whether to go or stay"],
["I need a piece of the cake", "The world needs peace"],
["The bass was swimming in the lake", "Turn up the bass in the speaker"],
["The fair is in town this weekend", "That decision wasn't fair at all"],
["I need to address this letter", "What's your new address?"],
["The bank of the river is muddy", "I need to go to the bank for money"],
["Can you bear this weight?", "I saw a bear in the woods"]
]
# Define Gradio interface with updated layout
with gr.Blocks(theme=gr.themes.Soft()) as iface:
gr.Markdown("# Advanced Text Similarity Comparison")
gr.Markdown("""
Compare sentences using both WordLlama and TF-IDF similarity metrics.
This tool includes examples of similar words with different meanings to demonstrate semantic understanding.
""")
with gr.Row():
with gr.Column():
sentence1 = gr.Textbox(
lines=2,
placeholder="Enter first sentence...",
label="First Sentence",
info="Type or select from examples below"
)
with gr.Column():
sentence2 = gr.Textbox(
lines=2,
placeholder="Enter second sentence...",
label="Second Sentence",
info="Type or select from examples below"
)
button = gr.Button("Calculate Similarities", variant="primary")
with gr.Row():
wordllama_output = gr.Number(
label="WordLlama Similarity",
info="Contextual similarity score (0-1)",
value=0.0
)
tfidf_output = gr.Number(
label="TF-IDF Similarity",
info="Term frequency-based similarity score (0-1)",
value=0.0
)
gr.Markdown("""
### Understanding the Scores
- **WordLlama Similarity**: Measures semantic similarity considering context and meaning
- **TF-IDF Similarity**: Measures similarity based on word frequency and importance
""")
gr.Markdown("### Example Sentence Pairs")
gr.Markdown("""
The examples include:
- Regular sentence pairs
- Sentences with similar words but different meanings (homophones)
- Contextually related sentences
""")
button.click(
calculate_similarities,
inputs=[sentence1, sentence2],
outputs=[wordllama_output, tfidf_output]
)
gr.Examples(
examples=examples,
inputs=[sentence1, sentence2],
label="Click on any example to load it"
)
# Launch the interface
iface.launch(share=True) |