Spaces:
Sleeping
Sleeping
Commit
β’
356174d
1
Parent(s):
d5f15cb
Updating topic_word
Browse files
app/pages/01_Topic_Explorer_π.py
CHANGED
@@ -44,12 +44,13 @@ def main():
|
|
44 |
The `y` axis shows which words are closest to a topic centroid. The `x` axis shows how correlated they are.""")
|
45 |
|
46 |
topic_num = st.sidebar.slider("Topic Number", 0, 19, value=0)
|
|
|
47 |
fig = go.Figure(go.Bar(
|
48 |
x=st.session_state.model.topic_word_scores_reduced[topic_num][::-1],
|
49 |
y=st.session_state.model.topic_words_reduced[topic_num][::-1],
|
50 |
orientation='h'))
|
51 |
fig.update_layout(
|
52 |
-
title=f'Words for Topic {
|
53 |
yaxis_title='Top 20 topic words',
|
54 |
xaxis_title='Distance to topic centroid'
|
55 |
)
|
|
|
44 |
The `y` axis shows which words are closest to a topic centroid. The `x` axis shows how correlated they are.""")
|
45 |
|
46 |
topic_num = st.sidebar.slider("Topic Number", 0, 19, value=0)
|
47 |
+
topic_num_str = f"{topic_num:02}"
|
48 |
fig = go.Figure(go.Bar(
|
49 |
x=st.session_state.model.topic_word_scores_reduced[topic_num][::-1],
|
50 |
y=st.session_state.model.topic_words_reduced[topic_num][::-1],
|
51 |
orientation='h'))
|
52 |
fig.update_layout(
|
53 |
+
title=f'Words for Topic {topic_num_str}: {st.session_state.topic_str_to_word[topic_num_str]}',
|
54 |
yaxis_title='Top 20 topic words',
|
55 |
xaxis_title='Distance to topic centroid'
|
56 |
)
|
app/pages/02_Document_Explorer_π.py
CHANGED
@@ -72,6 +72,7 @@ def main():
|
|
72 |
st.button("Reset", help="Will Reset the selected points and the selected topics", on_click=reset)
|
73 |
data_to_model = st.session_state.data.sort_values(by='topic_id',
|
74 |
ascending=True) # to make legend sorted https://bioinformatics.stackexchange.com/a/18847
|
|
|
75 |
fig = px.scatter(data_to_model, x='x', y='y', color='topic_id', template='plotly_dark',
|
76 |
hover_data=['id', 'topic_id', 'x', 'y'])
|
77 |
st.session_state.selected_points = plotly_events(fig, select_event=True, click_event=False)
|
@@ -84,7 +85,9 @@ def main():
|
|
84 |
filter_df()
|
85 |
cols = ['id', 'topic_id', 'documents']
|
86 |
data = st.session_state.selected_data[cols]
|
87 |
-
|
|
|
|
|
88 |
builder.configure_pagination()
|
89 |
go = builder.build()
|
90 |
AgGrid(st.session_state.selected_data[cols], theme='streamlit', gridOptions=go,
|
|
|
72 |
st.button("Reset", help="Will Reset the selected points and the selected topics", on_click=reset)
|
73 |
data_to_model = st.session_state.data.sort_values(by='topic_id',
|
74 |
ascending=True) # to make legend sorted https://bioinformatics.stackexchange.com/a/18847
|
75 |
+
data_to_model['topic_id'].replace(st.session_state.topic_str_to_word, inplace=True)
|
76 |
fig = px.scatter(data_to_model, x='x', y='y', color='topic_id', template='plotly_dark',
|
77 |
hover_data=['id', 'topic_id', 'x', 'y'])
|
78 |
st.session_state.selected_points = plotly_events(fig, select_event=True, click_event=False)
|
|
|
85 |
filter_df()
|
86 |
cols = ['id', 'topic_id', 'documents']
|
87 |
data = st.session_state.selected_data[cols]
|
88 |
+
data['topic_word'] = data.topic_id.replace(st.session_state.topic_str_to_word)
|
89 |
+
ordered_cols = ['id', 'topic_id', 'topic_word', 'documents']
|
90 |
+
builder = GridOptionsBuilder.from_dataframe(data[ordered_cols])
|
91 |
builder.configure_pagination()
|
92 |
go = builder.build()
|
93 |
AgGrid(st.session_state.selected_data[cols], theme='streamlit', gridOptions=go,
|
app/pages/03_Semantic_Search_π.py
CHANGED
@@ -89,7 +89,9 @@ def main():
|
|
89 |
|
90 |
with tab1:
|
91 |
cols = ['id', 'document_scores', 'topic_id', 'documents']
|
92 |
-
|
|
|
|
|
93 |
builder.configure_pagination()
|
94 |
builder.configure_column('document_scores', type=["numericColumn", "numberColumnFilter", "customNumericFormat"],
|
95 |
precision=2)
|
|
|
89 |
|
90 |
with tab1:
|
91 |
cols = ['id', 'document_scores', 'topic_id', 'documents']
|
92 |
+
data = st.session_state.data_to_model_without_point.loc[:, cols]
|
93 |
+
data['topic_word'] = data.topic_id.replace(st.session_state.topic_str_to_word)
|
94 |
+
builder = GridOptionsBuilder.from_dataframe(data)
|
95 |
builder.configure_pagination()
|
96 |
builder.configure_column('document_scores', type=["numericColumn", "numberColumnFilter", "customNumericFormat"],
|
97 |
precision=2)
|
app/utilities.py
CHANGED
@@ -35,6 +35,9 @@ def initialization():
|
|
35 |
topics = pd.read_csv(proj_dir / 'data' / 'topics.csv')
|
36 |
topics['topic_id'] = topics['topic_id'].apply(lambda x: f'{x:02d}')
|
37 |
st.session_state.topics = topics
|
|
|
|
|
|
|
38 |
|
39 |
if 'selected_points' not in st.session_state:
|
40 |
st.session_state.selected_points = []
|
|
|
35 |
topics = pd.read_csv(proj_dir / 'data' / 'topics.csv')
|
36 |
topics['topic_id'] = topics['topic_id'].apply(lambda x: f'{x:02d}')
|
37 |
st.session_state.topics = topics
|
38 |
+
topics_dict = topics[['topic_id', 'topic_0']].to_dict()
|
39 |
+
topic_str_to_word = {topics_dict['topic_id'][i]: topics_dict['topic_0'][i] for i in range(20)}
|
40 |
+
st.session_state.topic_str_to_word = topic_str_to_word
|
41 |
|
42 |
if 'selected_points' not in st.session_state:
|
43 |
st.session_state.selected_points = []
|