DexterSptizu
commited on
Commit
•
b1ec3a2
1
Parent(s):
47c3959
Update app.py
Browse files
app.py
CHANGED
@@ -20,39 +20,50 @@ def calculate_similarities(sentence1, sentence2):
|
|
20 |
|
21 |
return float(wordllama_score), float(tfidf_score)
|
22 |
|
23 |
-
#
|
24 |
examples = [
|
25 |
-
#
|
26 |
-
["
|
27 |
-
["
|
28 |
-
["
|
29 |
-
["This is a bright day", "It's a sunny morning"],
|
30 |
-
["I bought a new phone", "I got a new mobile"],
|
31 |
-
["The restaurant serves delicious food", "This place has great cuisine"],
|
32 |
-
["Python is a programming language", "Java is used for coding"],
|
33 |
-
["The movie was entertaining", "I enjoyed watching the film"],
|
34 |
-
["Climate change affects our planet", "Global warming is a serious issue"],
|
35 |
-
["Students study in the library", "People read books in the library"],
|
36 |
|
37 |
-
#
|
38 |
-
["
|
39 |
-
["
|
40 |
-
["The
|
41 |
-
|
42 |
-
|
43 |
-
["The
|
44 |
-
["The
|
45 |
-
["
|
46 |
-
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
]
|
49 |
|
50 |
# Define Gradio interface with updated layout
|
51 |
with gr.Blocks(theme=gr.themes.Soft()) as iface:
|
52 |
-
gr.Markdown("#
|
53 |
gr.Markdown("""
|
54 |
Compare sentences using both WordLlama and TF-IDF similarity metrics.
|
55 |
-
|
56 |
""")
|
57 |
|
58 |
with gr.Row():
|
@@ -87,16 +98,15 @@ with gr.Blocks(theme=gr.themes.Soft()) as iface:
|
|
87 |
|
88 |
gr.Markdown("""
|
89 |
### Understanding the Scores
|
90 |
-
- **WordLlama Similarity**:
|
91 |
-
- **TF-IDF Similarity**:
|
92 |
-
""")
|
93 |
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
""")
|
101 |
|
102 |
button.click(
|
|
|
20 |
|
21 |
return float(wordllama_score), float(tfidf_score)
|
22 |
|
23 |
+
# Carefully selected examples to compare both methods
|
24 |
examples = [
|
25 |
+
# High similarity in both methods
|
26 |
+
["The cat is sleeping on the couch", "The cat is resting on the sofa"],
|
27 |
+
["I love eating pizza", "I enjoy eating pizza"],
|
28 |
+
["The weather is sunny today", "It is a sunny day today"],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
+
# Medium similarity in both methods
|
31 |
+
["She is reading a book", "She is holding a novel"],
|
32 |
+
["The car is red", "The automobile is crimson"],
|
33 |
+
["The children are playing in the park", "Kids are having fun at the playground"],
|
34 |
+
|
35 |
+
# Cases where WordLlama should perform better
|
36 |
+
["The food was great", "The meal was excellent"],
|
37 |
+
["The student is studying hard", "The pupil is working diligently"],
|
38 |
+
["This movie is fantastic", "This film is amazing"],
|
39 |
+
|
40 |
+
# Cases where TF-IDF should perform better
|
41 |
+
["The red car is parked", "The red car is moving"],
|
42 |
+
["The book is on the table", "The book is under the table"],
|
43 |
+
["She went to the store", "She went to the mall"],
|
44 |
+
|
45 |
+
# Semantic similarity cases
|
46 |
+
["The laptop is expensive", "The computer costs a lot"],
|
47 |
+
["The dog is barking", "The canine is making noise"],
|
48 |
+
["The house is large", "The home is spacious"],
|
49 |
+
|
50 |
+
# Word order importance cases
|
51 |
+
["The cat chased the mouse", "The mouse chased the cat"],
|
52 |
+
["John gave Mary a book", "Mary gave John a book"],
|
53 |
+
["The teacher helped the student", "The student helped the teacher"],
|
54 |
+
|
55 |
+
# Synonym cases
|
56 |
+
["The car is fast", "The vehicle is quick"],
|
57 |
+
["The building is tall", "The structure is high"],
|
58 |
+
["The food is delicious", "The cuisine is tasty"]
|
59 |
]
|
60 |
|
61 |
# Define Gradio interface with updated layout
|
62 |
with gr.Blocks(theme=gr.themes.Soft()) as iface:
|
63 |
+
gr.Markdown("# Text Similarity Comparison")
|
64 |
gr.Markdown("""
|
65 |
Compare sentences using both WordLlama and TF-IDF similarity metrics.
|
66 |
+
Examples are categorized to demonstrate strengths of each method.
|
67 |
""")
|
68 |
|
69 |
with gr.Row():
|
|
|
98 |
|
99 |
gr.Markdown("""
|
100 |
### Understanding the Scores
|
101 |
+
- **WordLlama Similarity**: Better at understanding semantic meaning and context
|
102 |
+
- **TF-IDF Similarity**: Better at exact word matching and frequency-based comparison
|
|
|
103 |
|
104 |
+
### Example Categories
|
105 |
+
1. High Similarity: Both methods should show high scores
|
106 |
+
2. Medium Similarity: Both methods should show moderate scores
|
107 |
+
3. Semantic Similarity: WordLlama typically performs better
|
108 |
+
4. Word Order Cases: Shows how each method handles word order
|
109 |
+
5. Synonym Cases: Tests semantic understanding
|
110 |
""")
|
111 |
|
112 |
button.click(
|