Spaces:

DexterSptizu
/

word-llama-embeddings-explorer

Running

App Files Files Community

DexterSptizu commited on 2 days ago

Commit

005b8a9

•

1 Parent(s): 8afde48

Update app.py

Browse files

Files changed (1) hide show

app.py +145 -107

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ import numpy as np
 from wordllama import WordLlama
 import plotly.graph_objects as go
 import plotly.express as px
-from sklearn.decomposition import PCA
 import pandas as pd
 # Page configuration
@@ -16,17 +16,6 @@ st.set_page_config(
 # Custom CSS
 st.markdown("""
     <style>
-    .main {
-        background-color: #f8f9fa;
-    }
-    .stTabs [data-baseweb="tab-list"] {
-        gap: 24px;
-    }
-    .stTabs [data-baseweb="tab"] {
-        height: 50px;
-        padding-left: 20px;
-        padding-right: 20px;
-    }
     .title-font {
         font-size: 28px !important;
         font-weight: bold;
@@ -41,104 +30,112 @@ def load_wordllama():
 wl = load_wordllama()
-def create_visualization(texts, embeddings):
-    """Create appropriate visualization based on number of samples"""
-    n_samples = len(embeddings)
-    # Create DataFrame with original embeddings
-    df = pd.DataFrame(embeddings)
-    df['text'] = texts
-    if n_samples == 2:
-        # For 2 samples, create a 2D visualization
         fig = go.Figure()
-        # Add points
         fig.add_trace(go.Scatter(
-            x=[0, 1],
-            y=[0, wl.similarity(texts[0], texts[1])],
             mode='markers+text',
             text=texts,
             textposition='top center',
-            marker=dict(size=10)
         ))
         fig.update_layout(
-            title="Text Similarity Visualization",
-            xaxis_title="Position",
-            yaxis_title="Similarity",
             height=400,
-            showlegend=False
         )
     else:
-        # For 3 or more samples, use PCA for 3D visualization
-        pca = PCA(n_components=min(3, n_samples))
-        embeddings_reduced = pca.fit_transform(embeddings)
-        # Pad with zeros if needed
-        if embeddings_reduced.shape[1] < 3:
-            padding = np.zeros((embeddings_reduced.shape[0], 3 - embeddings_reduced.shape[1]))
-            embeddings_reduced = np.hstack([embeddings_reduced, padding])
         # Create DataFrame for plotting
-        df_plot = pd.DataFrame(
-            embeddings_reduced,
             columns=['X', 'Y', 'Z']
         )
-        df_plot['text'] = texts
-        fig = px.scatter_3d(
-            df_plot, x='X', y='Y', z='Z',
-            text='text',
-            title='Text Embeddings Visualization'
-        )
-        fig.update_traces(
-            marker=dict(size=8, opacity=0.8),
-            textposition='top center'
-        )
         fig.update_layout(
             scene=dict(
-                xaxis_title='Component 1',
-                yaxis_title='Component 2',
-                zaxis_title='Component 3'
             ),
             height=700
         )
     return fig
-def create_similarity_matrix(texts):
-    n = len(texts)
-    similarity_matrix = np.zeros((n, n))
-    for i in range(n):
-        for j in range(n):
-            similarity_matrix[i][j] = wl.similarity(texts[i], texts[j])
-    fig = go.Figure(data=go.Heatmap(
-        z=similarity_matrix,
-        x=texts,
-        y=texts,
-        colorscale='Viridis',
-        text=np.round(similarity_matrix, 3),
-        texttemplate='%{text}',
-        textfont={"size": 10},
-    ))
-    fig.update_layout(
-        title="Similarity Matrix",
-        height=400
-    )
-    return fig
 def main():
-    st.title("🦙 WordLlama Embedding Explorer")
-    st.markdown("<p class='title-font'>Explore the power of WordLlama embeddings</p>",
                 unsafe_allow_html=True)
     tabs = st.tabs(["💫 Text Similarity", "🎯 Multi-Text Analysis"])
     with tabs[0]:
@@ -146,14 +143,21 @@ def main():
         col1, col2 = st.columns(2)
         with col1:
-            text1 = st.text_area("First Text", value="I love programming in Python", height=100)
         with col2:
-            text2 = st.text_area("Second Text", value="Coding with Python is amazing", height=100)
-        if st.button("Calculate Similarity", key="sim_button"):
             similarity = wl.similarity(text1, text2)
-            st.markdown("### Results")
             col1, col2 = st.columns(2)
             with col1:
@@ -164,60 +168,94 @@ def main():
                 )
                 interpretation = (
-                    "Very Similar" if similarity > 0.8
-                    else "Moderately Similar" if similarity > 0.5
-                    else "Different"
                 )
                 st.info(f"Interpretation: {interpretation}")
             with col2:
-                embeddings = wl.embed([text1, text2])
                 st.plotly_chart(
-                    create_visualization([text1, text2], embeddings),
                     use_container_width=True
                 )
     with tabs[1]:
         st.markdown("### Analyze Multiple Texts")
         num_texts = st.slider("Number of texts:", 2, 6, 3)
         texts = []
         for i in range(num_texts):
             text = st.text_area(
-                f"Text {i+1}",
-                value=f"Example text {i+1}",
                 height=100,
                 key=f"text_{i}"
             )
             texts.append(text)
         if st.button("Analyze Texts", key="analyze_button"):
-            embeddings = wl.embed(texts)
-            st.markdown("### Visualization")
             st.plotly_chart(
-                create_visualization(texts, embeddings),
                 use_container_width=True
             )
             st.markdown("### Similarity Matrix")
-            st.plotly_chart(
-                create_similarity_matrix(texts),
-                use_container_width=True
             )
-            # Pairwise similarity analysis
-            st.markdown("### Pairwise Similarities")
-            for i in range(len(texts)):
-                for j in range(i+1, len(texts)):
-                    similarity = wl.similarity(texts[i], texts[j])
-                    interpretation = (
-                        "🟢 Very Similar" if similarity > 0.8
-                        else "🟡 Moderately Similar" if similarity > 0.5
-                        else "🔴 Different"
-                    )
-                    st.write(f"{interpretation} ({similarity:.3f}): Text {i+1} vs Text {j+1}")
 if __name__ == "__main__":
     main()

 from wordllama import WordLlama
 import plotly.graph_objects as go
 import plotly.express as px
+from sklearn.manifold import MDS
 import pandas as pd
 # Page configuration
 # Custom CSS
 st.markdown("""
     <style>
     .title-font {
         font-size: 28px !important;
         font-weight: bold;
 wl = load_wordllama()
+def create_similarity_based_visualization(texts):
+    """Create visualization based on similarity distances"""
+    n = len(texts)
+    # Create similarity matrix
+    similarity_matrix = np.zeros((n, n))
+    for i in range(n):
+        for j in range(n):
+            similarity_matrix[i][j] = wl.similarity(texts[i], texts[j])
+    # Convert similarities to distances (1 - similarity)
+    distance_matrix = 1 - similarity_matrix
+    if n == 2:
+        # For 2 texts, create a simple 2D visualization
         fig = go.Figure()
+        # Place points based on similarity
+        similarity = similarity_matrix[0][1]
         fig.add_trace(go.Scatter(
+            x=[0, 1-similarity],  # Distance proportional to similarity
+            y=[0, 0],
             mode='markers+text',
             text=texts,
             textposition='top center',
+            marker=dict(size=10, color=['blue', 'red'])
         ))
         fig.update_layout(
+            title=f"Text Similarity Visualization (Similarity: {similarity:.3f})",
+            xaxis_title="Relative Distance",
+            yaxis_title="",
             height=400,
+            showlegend=False,
+            xaxis=dict(range=[-0.1, 1.1]),
+            yaxis=dict(range=[-0.5, 0.5])
         )
     else:
+        # For 3 or more texts, use MDS for 3D visualization
+        mds = MDS(n_components=3, dissimilarity='precomputed', random_state=42)
+        coords = mds.fit_transform(distance_matrix)
         # Create DataFrame for plotting
+        df = pd.DataFrame(
+            coords,
             columns=['X', 'Y', 'Z']
         )
+        df['text'] = texts
+        # Create 3D scatter plot
+        fig = go.Figure(data=[go.Scatter3d(
+            x=df['X'],
+            y=df['Y'],
+            z=df['Z'],
+            mode='markers+text',
+            text=texts,
+            textposition='top center',
+            marker=dict(
+                size=10,
+                color=list(range(len(texts))),
+                colorscale='Viridis',
+                opacity=0.8
+            )
+        )])
+        # Add lines between points to show connections
+        for i in range(n):
+            for j in range(i+1, n):
+                fig.add_trace(go.Scatter3d(
+                    x=[coords[i,0], coords[j,0]],
+                    y=[coords[i,1], coords[j,1]],
+                    z=[coords[i,2], coords[j,2]],
+                    mode='lines',
+                    line=dict(
+                        color=f'rgba(100,100,100,{similarity_matrix[i,j]:.2f})',
+                        width=2
+                    ),
+                    showlegend=False
+                ))
         fig.update_layout(
+            title="3D Similarity Visualization",
             scene=dict(
+                xaxis_title="Dimension 1",
+                yaxis_title="Dimension 2",
+                zaxis_title="Dimension 3"
             ),
             height=700
         )
     return fig
 def main():
+    st.title("🦙 WordLlama Similarity Explorer")
+    st.markdown("<p class='title-font'>Visualize text similarities in 3D space</p>",
                 unsafe_allow_html=True)
+    with st.expander("ℹ️ How to interpret the visualization", expanded=True):
+        st.markdown("""
+        - **Distance between points** represents dissimilarity (farther = less similar)
+        - **Line opacity** indicates similarity strength (darker = more similar)
+        - **Colors** help distinguish different texts
+        - **Hover** over points to see full text content
+        """)
     tabs = st.tabs(["💫 Text Similarity", "🎯 Multi-Text Analysis"])
     with tabs[0]:
         col1, col2 = st.columns(2)
         with col1:
+            text1 = st.text_area(
+                "First Text",
+                value="I love programming in Python",
+                height=100
+            )
         with col2:
+            text2 = st.text_area(
+                "Second Text",
+                value="Coding with Python is amazing",
+                height=100
+            )
+        if st.button("Analyze Similarity", key="sim_button"):
             similarity = wl.similarity(text1, text2)
             col1, col2 = st.columns(2)
             with col1:
                 )
                 interpretation = (
+                    "🟢 Very Similar" if similarity > 0.8
+                    else "🟡 Moderately Similar" if similarity > 0.5
+                    else "🔴 Different"
                 )
                 st.info(f"Interpretation: {interpretation}")
             with col2:
                 st.plotly_chart(
+                    create_similarity_based_visualization([text1, text2]),
                     use_container_width=True
                 )
     with tabs[1]:
         st.markdown("### Analyze Multiple Texts")
+        # Example templates
+        examples = {
+            "Similar Texts": [
+                "I love programming in Python",
+                "Python programming is my passion",
+                "I enjoy coding with Python"
+            ],
+            "Mixed Similarity": [
+                "The cat sleeps on the mat",
+                "A cat is sleeping on the rug",
+                "Python is a programming language"
+            ],
+            "Different Topics": [
+                "The weather is sunny today",
+                "Python is a programming language",
+                "Cats are wonderful pets"
+            ]
+        }
+        col1, col2 = st.columns([3, 1])
+        with col1:
+            selected_example = st.selectbox(
+                "Choose an example set:",
+                list(examples.keys())
+            )
+        with col2:
+            if st.button("Load Example"):
+                st.session_state.texts = examples[selected_example]
         num_texts = st.slider("Number of texts:", 2, 6, 3)
         texts = []
         for i in range(num_texts):
+            default_text = (examples[selected_example][i]
+                          if selected_example in examples and i < len(examples[selected_example])
+                          else f"Example text {i+1}")
             text = st.text_area(
+                f"Text {i+1}",
+                value=default_text,
                 height=100,
                 key=f"text_{i}"
             )
             texts.append(text)
         if st.button("Analyze Texts", key="analyze_button"):
             st.plotly_chart(
+                create_similarity_based_visualization(texts),
                 use_container_width=True
             )
+            # Show similarity matrix
             st.markdown("### Similarity Matrix")
+            similarity_matrix = np.zeros((len(texts), len(texts)))
+            for i in range(len(texts)):
+                for j in range(len(texts)):
+                    similarity_matrix[i][j] = wl.similarity(texts[i], texts[j])
+            fig = go.Figure(data=go.Heatmap(
+                z=similarity_matrix,
+                x=[f"Text {i+1}" for i in range(len(texts))],
+                y=[f"Text {i+1}" for i in range(len(texts))],
+                colorscale='Viridis',
+                text=np.round(similarity_matrix, 3),
+                texttemplate='%{text}',
+                textfont={"size": 12},
+            ))
+            fig.update_layout(
+                title="Similarity Matrix",
+                height=400
             )
+            st.plotly_chart(fig, use_container_width=True)
 if __name__ == "__main__":
     main()