Spaces:

DexterSptizu
/

wordllam-text-similarity

Running

App Files Files Community

wordllam-text-similarity / app.py

DexterSptizu

Update app.py

b1ec3a2 verified 2 days ago

raw

history blame

4.53 kB

	import gradio as gr
	from wordllama import WordLlama
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.metrics.pairwise import cosine_similarity
	import numpy as np

	# Load the default WordLlama model
	wl = WordLlama.load()

	# Initialize TF-IDF vectorizer
	tfidf_vectorizer = TfidfVectorizer()

	def calculate_similarities(sentence1, sentence2):
	# WordLlama similarity
	wordllama_score = wl.similarity(sentence1, sentence2)

	# TF-IDF similarity
	tfidf_matrix = tfidf_vectorizer.fit_transform([sentence1, sentence2])
	tfidf_score = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]

	return float(wordllama_score), float(tfidf_score)

	# Carefully selected examples to compare both methods
	examples = [
	# High similarity in both methods
	["The cat is sleeping on the couch", "The cat is resting on the sofa"],
	["I love eating pizza", "I enjoy eating pizza"],
	["The weather is sunny today", "It is a sunny day today"],

	# Medium similarity in both methods
	["She is reading a book", "She is holding a novel"],
	["The car is red", "The automobile is crimson"],
	["The children are playing in the park", "Kids are having fun at the playground"],

	# Cases where WordLlama should perform better
	["The food was great", "The meal was excellent"],
	["The student is studying hard", "The pupil is working diligently"],
	["This movie is fantastic", "This film is amazing"],

	# Cases where TF-IDF should perform better
	["The red car is parked", "The red car is moving"],
	["The book is on the table", "The book is under the table"],
	["She went to the store", "She went to the mall"],

	# Semantic similarity cases
	["The laptop is expensive", "The computer costs a lot"],
	["The dog is barking", "The canine is making noise"],
	["The house is large", "The home is spacious"],

	# Word order importance cases
	["The cat chased the mouse", "The mouse chased the cat"],
	["John gave Mary a book", "Mary gave John a book"],
	["The teacher helped the student", "The student helped the teacher"],

	# Synonym cases
	["The car is fast", "The vehicle is quick"],
	["The building is tall", "The structure is high"],
	["The food is delicious", "The cuisine is tasty"]
	]

	# Define Gradio interface with updated layout
	with gr.Blocks(theme=gr.themes.Soft()) as iface:
	gr.Markdown("# Text Similarity Comparison")
	gr.Markdown("""
	Compare sentences using both WordLlama and TF-IDF similarity metrics.
	Examples are categorized to demonstrate strengths of each method.
	""")

	with gr.Row():
	with gr.Column():
	sentence1 = gr.Textbox(
	lines=2,
	placeholder="Enter first sentence...",
	label="First Sentence",
	info="Type or select from examples below"
	)
	with gr.Column():
	sentence2 = gr.Textbox(
	lines=2,
	placeholder="Enter second sentence...",
	label="Second Sentence",
	info="Type or select from examples below"
	)

	button = gr.Button("Calculate Similarities", variant="primary")

	with gr.Row():
	wordllama_output = gr.Number(
	label="WordLlama Similarity",
	info="Contextual similarity score (0-1)",
	value=0.0
	)
	tfidf_output = gr.Number(
	label="TF-IDF Similarity",
	info="Term frequency-based similarity score (0-1)",
	value=0.0
	)

	gr.Markdown("""
	### Understanding the Scores
	- WordLlama Similarity: Better at understanding semantic meaning and context
	- TF-IDF Similarity: Better at exact word matching and frequency-based comparison

	### Example Categories
	1. High Similarity: Both methods should show high scores
	2. Medium Similarity: Both methods should show moderate scores
	3. Semantic Similarity: WordLlama typically performs better
	4. Word Order Cases: Shows how each method handles word order
	5. Synonym Cases: Tests semantic understanding
	""")

	button.click(
	calculate_similarities,
	inputs=[sentence1, sentence2],
	outputs=[wordllama_output, tfidf_output]
	)

	gr.Examples(
	examples=examples,
	inputs=[sentence1, sentence2],
	label="Click on any example to load it"
	)

	# Launch the interface
	iface.launch(share=True)