Spaces:

DexterSptizu
/

wordllam-text-similarity

Running

File size: 2,423 Bytes

import gradio as gr
from wordllama import WordLlama
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Load the default WordLlama model
wl = WordLlama.load()

# Initialize TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer()

def calculate_similarities(sentence1, sentence2):
    # WordLlama similarity
    wordllama_score = wl.similarity(sentence1, sentence2)
    
    # TF-IDF similarity
    tfidf_matrix = tfidf_vectorizer.fit_transform([sentence1, sentence2])
    tfidf_score = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
    
    return {
        "WordLlama Similarity": float(wordllama_score),
        "TF-IDF Similarity": float(tfidf_score)
    }

# Extended examples with more diverse sentences
examples = [
    ["I went to the car", "I went to the pawn shop"],
    ["The cat is on the roof", "A dog is in the yard"],
    ["She loves playing tennis", "She enjoys sports"],
    ["This is a bright day", "It's a sunny morning"],
    ["I bought a new phone", "I got a new mobile"],
    ["The restaurant serves delicious food", "This place has great cuisine"],
    ["Python is a programming language", "Java is used for coding"],
    ["The movie was entertaining", "I enjoyed watching the film"],
    ["Climate change affects our planet", "Global warming is a serious issue"],
    ["Students study in the library", "People read books in the library"]
]

# Define Gradio interface with updated layout
with gr.Blocks() as iface:
    gr.Markdown("# Text Similarity Comparison")
    gr.Markdown("Compare sentences using both WordLlama and TF-IDF similarity metrics")
    
    with gr.Row():
        sentence1 = gr.Textbox(lines=2, placeholder="Enter first sentence...", label="First Sentence")
        sentence2 = gr.Textbox(lines=2, placeholder="Enter second sentence...", label="Second Sentence")
    
    button = gr.Button("Calculate Similarities")
    
    with gr.Row():
        wordllama_output = gr.Number(label="WordLlama Similarity")
        tfidf_output = gr.Number(label="TF-IDF Similarity")
    
    button.click(
        calculate_similarities,
        inputs=[sentence1, sentence2],
        outputs=[wordllama_output, tfidf_output]
    )
    
    gr.Markdown("### Example Sentence Pairs")
    gr.Examples(
        examples=examples,
        inputs=[sentence1, sentence2]
    )

iface.launch(share=True)