File size: 4,400 Bytes
1107e2d
 
83b72c1
 
 
1107e2d
 
 
 
83b72c1
 
1107e2d
83b72c1
 
 
 
 
 
 
 
cfe8e38
83b72c1
6773f13
1107e2d
6773f13
1107e2d
 
 
 
83b72c1
 
 
 
 
6773f13
 
 
 
 
 
 
 
 
 
 
 
 
1107e2d
 
83b72c1
6773f13
 
 
 
 
 
83b72c1
 
6773f13
 
 
 
 
 
 
 
 
 
 
 
 
 
83b72c1
6773f13
9732442
83b72c1
6773f13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9732442
83b72c1
 
 
 
 
9732442
83b72c1
 
6773f13
 
83b72c1
9732442
6773f13
83b72c1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import gradio as gr
from wordllama import WordLlama
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Load the default WordLlama model
wl = WordLlama.load()

# Initialize TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer()

def calculate_similarities(sentence1, sentence2):
    # WordLlama similarity
    wordllama_score = wl.similarity(sentence1, sentence2)
    
    # TF-IDF similarity
    tfidf_matrix = tfidf_vectorizer.fit_transform([sentence1, sentence2])
    tfidf_score = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
    
    return float(wordllama_score), float(tfidf_score)

# Examples combining original and new homophone-based examples
examples = [
    # Original examples
    ["I went to the car", "I went to the pawn shop"],
    ["The cat is on the roof", "A dog is in the yard"],
    ["She loves playing tennis", "She enjoys sports"],
    ["This is a bright day", "It's a sunny morning"],
    ["I bought a new phone", "I got a new mobile"],
    ["The restaurant serves delicious food", "This place has great cuisine"],
    ["Python is a programming language", "Java is used for coding"],
    ["The movie was entertaining", "I enjoyed watching the film"],
    ["Climate change affects our planet", "Global warming is a serious issue"],
    ["Students study in the library", "People read books in the library"],
    
    # New examples with similar words but different meanings
    ["The executive board met this morning", "I was so bored during the meeting"],
    ["Don't waste your time on this", "The dress fits perfectly at the waist"],
    ["The principal called a meeting", "It's a matter of principle"],
    ["The weather is beautiful today", "I don't know whether to go or stay"],
    ["I need a piece of the cake", "The world needs peace"],
    ["The bass was swimming in the lake", "Turn up the bass in the speaker"],
    ["The fair is in town this weekend", "That decision wasn't fair at all"],
    ["I need to address this letter", "What's your new address?"],
    ["The bank of the river is muddy", "I need to go to the bank for money"],
    ["Can you bear this weight?", "I saw a bear in the woods"]
]

# Define Gradio interface with updated layout
with gr.Blocks(theme=gr.themes.Soft()) as iface:
    gr.Markdown("# Advanced Text Similarity Comparison")
    gr.Markdown("""
    Compare sentences using both WordLlama and TF-IDF similarity metrics.
    This tool includes examples of similar words with different meanings to demonstrate semantic understanding.
    """)
    
    with gr.Row():
        with gr.Column():
            sentence1 = gr.Textbox(
                lines=2,
                placeholder="Enter first sentence...",
                label="First Sentence",
                info="Type or select from examples below"
            )
        with gr.Column():
            sentence2 = gr.Textbox(
                lines=2,
                placeholder="Enter second sentence...",
                label="Second Sentence",
                info="Type or select from examples below"
            )
    
    button = gr.Button("Calculate Similarities", variant="primary")
    
    with gr.Row():
        wordllama_output = gr.Number(
            label="WordLlama Similarity",
            info="Contextual similarity score (0-1)",
            value=0.0
        )
        tfidf_output = gr.Number(
            label="TF-IDF Similarity",
            info="Term frequency-based similarity score (0-1)",
            value=0.0
        )
    
    gr.Markdown("""
    ### Understanding the Scores
    - **WordLlama Similarity**: Measures semantic similarity considering context and meaning
    - **TF-IDF Similarity**: Measures similarity based on word frequency and importance
    """)
    
    gr.Markdown("### Example Sentence Pairs")
    gr.Markdown("""
    The examples include:
    - Regular sentence pairs
    - Sentences with similar words but different meanings (homophones)
    - Contextually related sentences
    """)
    
    button.click(
        calculate_similarities,
        inputs=[sentence1, sentence2],
        outputs=[wordllama_output, tfidf_output]
    )
    
    gr.Examples(
        examples=examples,
        inputs=[sentence1, sentence2],
        label="Click on any example to load it"
    )

# Launch the interface
iface.launch(share=True)