Spaces:

DexterSptizu
/

wordllam-text-similarity

Running

File size: 4,400 Bytes

import gradio as gr
from wordllama import WordLlama
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Load the default WordLlama model
wl = WordLlama.load()

# Initialize TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer()

def calculate_similarities(sentence1, sentence2):
    # WordLlama similarity
    wordllama_score = wl.similarity(sentence1, sentence2)
    
    # TF-IDF similarity
    tfidf_matrix = tfidf_vectorizer.fit_transform([sentence1, sentence2])
    tfidf_score = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
    
    return float(wordllama_score), float(tfidf_score)

# Examples combining original and new homophone-based examples
examples = [
    # Original examples
    ["I went to the car", "I went to the pawn shop"],
    ["The cat is on the roof", "A dog is in the yard"],
    ["She loves playing tennis", "She enjoys sports"],
    ["This is a bright day", "It's a sunny morning"],
    ["I bought a new phone", "I got a new mobile"],
    ["The restaurant serves delicious food", "This place has great cuisine"],
    ["Python is a programming language", "Java is used for coding"],
    ["The movie was entertaining", "I enjoyed watching the film"],
    ["Climate change affects our planet", "Global warming is a serious issue"],
    ["Students study in the library", "People read books in the library"],
    
    # New examples with similar words but different meanings
    ["The executive board met this morning", "I was so bored during the meeting"],
    ["Don't waste your time on this", "The dress fits perfectly at the waist"],
    ["The principal called a meeting", "It's a matter of principle"],
    ["The weather is beautiful today", "I don't know whether to go or stay"],
    ["I need a piece of the cake", "The world needs peace"],
    ["The bass was swimming in the lake", "Turn up the bass in the speaker"],
    ["The fair is in town this weekend", "That decision wasn't fair at all"],
    ["I need to address this letter", "What's your new address?"],
    ["The bank of the river is muddy", "I need to go to the bank for money"],
    ["Can you bear this weight?", "I saw a bear in the woods"]
]

# Define Gradio interface with updated layout
with gr.Blocks(theme=gr.themes.Soft()) as iface:
    gr.Markdown("# Advanced Text Similarity Comparison")
    gr.Markdown("""
    Compare sentences using both WordLlama and TF-IDF similarity metrics.
    This tool includes examples of similar words with different meanings to demonstrate semantic understanding.
    """)
    
    with gr.Row():
        with gr.Column():
            sentence1 = gr.Textbox(
                lines=2,
                placeholder="Enter first sentence...",
                label="First Sentence",
                info="Type or select from examples below"
            )
        with gr.Column():
            sentence2 = gr.Textbox(
                lines=2,
                placeholder="Enter second sentence...",
                label="Second Sentence",
                info="Type or select from examples below"
            )
    
    button = gr.Button("Calculate Similarities", variant="primary")
    
    with gr.Row():
        wordllama_output = gr.Number(
            label="WordLlama Similarity",
            info="Contextual similarity score (0-1)",
            value=0.0
        )
        tfidf_output = gr.Number(
            label="TF-IDF Similarity",
            info="Term frequency-based similarity score (0-1)",
            value=0.0
        )
    
    gr.Markdown("""
    ### Understanding the Scores
    - **WordLlama Similarity**: Measures semantic similarity considering context and meaning
    - **TF-IDF Similarity**: Measures similarity based on word frequency and importance
    """)
    
    gr.Markdown("### Example Sentence Pairs")
    gr.Markdown("""
    The examples include:
    - Regular sentence pairs
    - Sentences with similar words but different meanings (homophones)
    - Contextually related sentences
    """)
    
    button.click(
        calculate_similarities,
        inputs=[sentence1, sentence2],
        outputs=[wordllama_output, tfidf_output]
    )
    
    gr.Examples(
        examples=examples,
        inputs=[sentence1, sentence2],
        label="Click on any example to load it"
    )

# Launch the interface
iface.launch(share=True)