Spaces:

DexterSptizu
/

wordllam-text-similarity

Running

App Files Files Community

wordllam-text-similarity / app.py

DexterSptizu

Update app.py

6773f13 verified 2 days ago

raw

history blame

4.4 kB

	import gradio as gr
	from wordllama import WordLlama
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.metrics.pairwise import cosine_similarity
	import numpy as np

	# Load the default WordLlama model
	wl = WordLlama.load()

	# Initialize TF-IDF vectorizer
	tfidf_vectorizer = TfidfVectorizer()

	def calculate_similarities(sentence1, sentence2):
	# WordLlama similarity
	wordllama_score = wl.similarity(sentence1, sentence2)

	# TF-IDF similarity
	tfidf_matrix = tfidf_vectorizer.fit_transform([sentence1, sentence2])
	tfidf_score = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]

	return float(wordllama_score), float(tfidf_score)

	# Examples combining original and new homophone-based examples
	examples = [
	# Original examples
	["I went to the car", "I went to the pawn shop"],
	["The cat is on the roof", "A dog is in the yard"],
	["She loves playing tennis", "She enjoys sports"],
	["This is a bright day", "It's a sunny morning"],
	["I bought a new phone", "I got a new mobile"],
	["The restaurant serves delicious food", "This place has great cuisine"],
	["Python is a programming language", "Java is used for coding"],
	["The movie was entertaining", "I enjoyed watching the film"],
	["Climate change affects our planet", "Global warming is a serious issue"],
	["Students study in the library", "People read books in the library"],

	# New examples with similar words but different meanings
	["The executive board met this morning", "I was so bored during the meeting"],
	["Don't waste your time on this", "The dress fits perfectly at the waist"],
	["The principal called a meeting", "It's a matter of principle"],
	["The weather is beautiful today", "I don't know whether to go or stay"],
	["I need a piece of the cake", "The world needs peace"],
	["The bass was swimming in the lake", "Turn up the bass in the speaker"],
	["The fair is in town this weekend", "That decision wasn't fair at all"],
	["I need to address this letter", "What's your new address?"],
	["The bank of the river is muddy", "I need to go to the bank for money"],
	["Can you bear this weight?", "I saw a bear in the woods"]
	]

	# Define Gradio interface with updated layout
	with gr.Blocks(theme=gr.themes.Soft()) as iface:
	gr.Markdown("# Advanced Text Similarity Comparison")
	gr.Markdown("""
	Compare sentences using both WordLlama and TF-IDF similarity metrics.
	This tool includes examples of similar words with different meanings to demonstrate semantic understanding.
	""")

	with gr.Row():
	with gr.Column():
	sentence1 = gr.Textbox(
	lines=2,
	placeholder="Enter first sentence...",
	label="First Sentence",
	info="Type or select from examples below"
	)
	with gr.Column():
	sentence2 = gr.Textbox(
	lines=2,
	placeholder="Enter second sentence...",
	label="Second Sentence",
	info="Type or select from examples below"
	)

	button = gr.Button("Calculate Similarities", variant="primary")

	with gr.Row():
	wordllama_output = gr.Number(
	label="WordLlama Similarity",
	info="Contextual similarity score (0-1)",
	value=0.0
	)
	tfidf_output = gr.Number(
	label="TF-IDF Similarity",
	info="Term frequency-based similarity score (0-1)",
	value=0.0
	)

	gr.Markdown("""
	### Understanding the Scores
	- WordLlama Similarity: Measures semantic similarity considering context and meaning
	- TF-IDF Similarity: Measures similarity based on word frequency and importance
	""")

	gr.Markdown("### Example Sentence Pairs")
	gr.Markdown("""
	The examples include:
	- Regular sentence pairs
	- Sentences with similar words but different meanings (homophones)
	- Contextually related sentences
	""")

	button.click(
	calculate_similarities,
	inputs=[sentence1, sentence2],
	outputs=[wordllama_output, tfidf_output]
	)

	gr.Examples(
	examples=examples,
	inputs=[sentence1, sentence2],
	label="Click on any example to load it"
	)

	# Launch the interface
	iface.launch(share=True)