n CLean up code
Browse files- .idea/workspace.xml +2 -1
- app.py +3 -17
- custom_renderer.py +25 -26
.idea/workspace.xml
CHANGED
@@ -2,6 +2,7 @@
|
|
2 |
<project version="4">
|
3 |
<component name="ChangeListManager">
|
4 |
<list default="true" id="57f23431-346d-451d-8d77-db859508e831" name="Changes" comment="">
|
|
|
5 |
<change beforePath="$PROJECT_DIR$/app.py" beforeDir="false" afterPath="$PROJECT_DIR$/app.py" afterDir="false" />
|
6 |
<change beforePath="$PROJECT_DIR$/custom_renderer.py" beforeDir="false" afterPath="$PROJECT_DIR$/custom_renderer.py" afterDir="false" />
|
7 |
</list>
|
@@ -43,7 +44,7 @@
|
|
43 |
<component name="PropertiesComponent"><![CDATA[{
|
44 |
"keyToString": {
|
45 |
"last_opened_file_path": "/home/matthias/Documents/Summarization-fact-checker/HugginfaceSpace/HFSummSpace",
|
46 |
-
"settings.editor.selected.configurable": "editor.preferences.
|
47 |
}
|
48 |
}]]></component>
|
49 |
<component name="RecentsManager">
|
|
|
2 |
<project version="4">
|
3 |
<component name="ChangeListManager">
|
4 |
<list default="true" id="57f23431-346d-451d-8d77-db859508e831" name="Changes" comment="">
|
5 |
+
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
|
6 |
<change beforePath="$PROJECT_DIR$/app.py" beforeDir="false" afterPath="$PROJECT_DIR$/app.py" afterDir="false" />
|
7 |
<change beforePath="$PROJECT_DIR$/custom_renderer.py" beforeDir="false" afterPath="$PROJECT_DIR$/custom_renderer.py" afterDir="false" />
|
8 |
</list>
|
|
|
44 |
<component name="PropertiesComponent"><![CDATA[{
|
45 |
"keyToString": {
|
46 |
"last_opened_file_path": "/home/matthias/Documents/Summarization-fact-checker/HugginfaceSpace/HFSummSpace",
|
47 |
+
"settings.editor.selected.configurable": "editor.preferences.folding"
|
48 |
}
|
49 |
}]]></component>
|
50 |
<component name="RecentsManager">
|
app.py
CHANGED
@@ -19,7 +19,8 @@ from transformers import pipeline
|
|
19 |
import os
|
20 |
|
21 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
22 |
-
|
|
|
23 |
|
24 |
@st.experimental_singleton
|
25 |
def get_sentence_embedding_model():
|
@@ -108,7 +109,6 @@ def fetch_dependency_svg(filename: str) -> AnyStr:
|
|
108 |
def display_summary(summary_content: str):
|
109 |
st.session_state.summary_output = summary_content
|
110 |
soup = BeautifulSoup(summary_content, features="html.parser")
|
111 |
-
HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>"""
|
112 |
return HTML_WRAPPER.format(soup)
|
113 |
|
114 |
|
@@ -149,7 +149,6 @@ def get_all_entities(text):
|
|
149 |
return list(itertools.chain.from_iterable(all_entities_per_sentence))
|
150 |
|
151 |
|
152 |
-
# TODO: this functionality can be cached (e.g. by storing html file output) if wanted (or just store list of entities idk)
|
153 |
def get_and_compare_entities():
|
154 |
# article_content = fetch_article_contents(article_name)
|
155 |
article_content = st.session_state.article_text
|
@@ -194,10 +193,6 @@ def highlight_entities():
|
|
194 |
for entity in unmatched_entities:
|
195 |
summary_content = summary_content.replace(entity, markdown_start_red + entity + markdown_end)
|
196 |
soup = BeautifulSoup(summary_content, features="html.parser")
|
197 |
-
|
198 |
-
HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem;
|
199 |
-
margin-bottom: 2.5rem">{}</div> """
|
200 |
-
|
201 |
return HTML_WRAPPER.format(soup)
|
202 |
|
203 |
|
@@ -207,9 +202,7 @@ def render_dependency_parsing(text: Dict):
|
|
207 |
st.write(get_svg(html), unsafe_allow_html=True)
|
208 |
|
209 |
|
210 |
-
# If deps for article: True, otherwise deps for summary calc
|
211 |
def check_dependency(article: bool):
|
212 |
-
# nlp = spacy.load('en_core_web_lg')
|
213 |
if article:
|
214 |
text = st.session_state.article_text
|
215 |
all_entities = get_all_entities_per_sentence(text)
|
@@ -220,7 +213,6 @@ def check_dependency(article: bool):
|
|
220 |
# all_entities = st.session_state.entities_per_sentence_summary
|
221 |
doc = nlp(text)
|
222 |
tok_l = doc.to_json()['tokens']
|
223 |
-
# all_deps = ""
|
224 |
test_list_dict_output = []
|
225 |
|
226 |
sentences = list(doc.sents)
|
@@ -244,7 +236,6 @@ def check_dependency(article: bool):
|
|
244 |
"target_word_index": (t['head'] - sentence.start),
|
245 |
"identifier": identifier, "sentence": str(sentence)})
|
246 |
elif object_target in all_entities[i]:
|
247 |
-
# all_deps = all_deps.join(str(sentence))
|
248 |
identifier = object_here + t['dep'] + object_target
|
249 |
test_list_dict_output.append({"dep": t['dep'], "cur_word_index": (t['id'] - sentence.start),
|
250 |
"target_word_index": (t['head'] - sentence.start),
|
@@ -252,7 +243,6 @@ def check_dependency(article: bool):
|
|
252 |
else:
|
253 |
continue
|
254 |
return test_list_dict_output
|
255 |
-
# return all_deps
|
256 |
|
257 |
|
258 |
def render_svg(svg_file):
|
@@ -320,7 +310,7 @@ st.markdown("Letβs start by selecting an article text for which we want to gen
|
|
320 |
"generated from it might not be optimal, leading to suboptimal performance of the post-processing steps.")
|
321 |
|
322 |
selected_article = st.selectbox('Select an article or provide your own:',
|
323 |
-
list_all_article_names())
|
324 |
st.session_state.article_text = fetch_article_contents(selected_article)
|
325 |
article_text = st.text_area(
|
326 |
label='Full article text',
|
@@ -391,8 +381,6 @@ if summarize_button:
|
|
391 |
if st.session_state.unchanged_text:
|
392 |
entity_specific_text = fetch_entity_specific_contents(selected_article)
|
393 |
soup = BeautifulSoup(entity_specific_text, features="html.parser")
|
394 |
-
HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem;
|
395 |
-
margin-bottom: 2.5rem">{}</div> """
|
396 |
st.write("π‘π **Specific example explanation** ππ‘", HTML_WRAPPER.format(soup), unsafe_allow_html=True)
|
397 |
|
398 |
# DEPENDENCY PARSING PART
|
@@ -429,8 +417,6 @@ if summarize_button:
|
|
429 |
st.write(cur_svg_image, unsafe_allow_html=True)
|
430 |
dep_specific_text = fetch_dependency_specific_contents(selected_article)
|
431 |
soup = BeautifulSoup(dep_specific_text, features="html.parser")
|
432 |
-
HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem;
|
433 |
-
margin-bottom: 2.5rem">{}</div> """
|
434 |
st.write("π‘π **Specific example explanation** ππ‘", HTML_WRAPPER.format(soup), unsafe_allow_html=True)
|
435 |
else:
|
436 |
summary_deps = check_dependency(False)
|
|
|
19 |
import os
|
20 |
|
21 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
22 |
+
HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem;
|
23 |
+
margin-bottom: 2.5rem">{}</div> """
|
24 |
|
25 |
@st.experimental_singleton
|
26 |
def get_sentence_embedding_model():
|
|
|
109 |
def display_summary(summary_content: str):
|
110 |
st.session_state.summary_output = summary_content
|
111 |
soup = BeautifulSoup(summary_content, features="html.parser")
|
|
|
112 |
return HTML_WRAPPER.format(soup)
|
113 |
|
114 |
|
|
|
149 |
return list(itertools.chain.from_iterable(all_entities_per_sentence))
|
150 |
|
151 |
|
|
|
152 |
def get_and_compare_entities():
|
153 |
# article_content = fetch_article_contents(article_name)
|
154 |
article_content = st.session_state.article_text
|
|
|
193 |
for entity in unmatched_entities:
|
194 |
summary_content = summary_content.replace(entity, markdown_start_red + entity + markdown_end)
|
195 |
soup = BeautifulSoup(summary_content, features="html.parser")
|
|
|
|
|
|
|
|
|
196 |
return HTML_WRAPPER.format(soup)
|
197 |
|
198 |
|
|
|
202 |
st.write(get_svg(html), unsafe_allow_html=True)
|
203 |
|
204 |
|
|
|
205 |
def check_dependency(article: bool):
|
|
|
206 |
if article:
|
207 |
text = st.session_state.article_text
|
208 |
all_entities = get_all_entities_per_sentence(text)
|
|
|
213 |
# all_entities = st.session_state.entities_per_sentence_summary
|
214 |
doc = nlp(text)
|
215 |
tok_l = doc.to_json()['tokens']
|
|
|
216 |
test_list_dict_output = []
|
217 |
|
218 |
sentences = list(doc.sents)
|
|
|
236 |
"target_word_index": (t['head'] - sentence.start),
|
237 |
"identifier": identifier, "sentence": str(sentence)})
|
238 |
elif object_target in all_entities[i]:
|
|
|
239 |
identifier = object_here + t['dep'] + object_target
|
240 |
test_list_dict_output.append({"dep": t['dep'], "cur_word_index": (t['id'] - sentence.start),
|
241 |
"target_word_index": (t['head'] - sentence.start),
|
|
|
243 |
else:
|
244 |
continue
|
245 |
return test_list_dict_output
|
|
|
246 |
|
247 |
|
248 |
def render_svg(svg_file):
|
|
|
310 |
"generated from it might not be optimal, leading to suboptimal performance of the post-processing steps.")
|
311 |
|
312 |
selected_article = st.selectbox('Select an article or provide your own:',
|
313 |
+
list_all_article_names())
|
314 |
st.session_state.article_text = fetch_article_contents(selected_article)
|
315 |
article_text = st.text_area(
|
316 |
label='Full article text',
|
|
|
381 |
if st.session_state.unchanged_text:
|
382 |
entity_specific_text = fetch_entity_specific_contents(selected_article)
|
383 |
soup = BeautifulSoup(entity_specific_text, features="html.parser")
|
|
|
|
|
384 |
st.write("π‘π **Specific example explanation** ππ‘", HTML_WRAPPER.format(soup), unsafe_allow_html=True)
|
385 |
|
386 |
# DEPENDENCY PARSING PART
|
|
|
417 |
st.write(cur_svg_image, unsafe_allow_html=True)
|
418 |
dep_specific_text = fetch_dependency_specific_contents(selected_article)
|
419 |
soup = BeautifulSoup(dep_specific_text, features="html.parser")
|
|
|
|
|
420 |
st.write("π‘π **Specific example explanation** ππ‘", HTML_WRAPPER.format(soup), unsafe_allow_html=True)
|
421 |
else:
|
422 |
summary_deps = check_dependency(False)
|
custom_renderer.py
CHANGED
@@ -1,6 +1,26 @@
|
|
1 |
from typing import Dict
|
2 |
from PIL import ImageFont
|
3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
|
6 |
def get_pil_text_size(text, font_size, font_name):
|
@@ -21,15 +41,7 @@ def render_arrow(
|
|
21 |
i (int): Unique ID, typically arrow index.
|
22 |
RETURNS (str): Rendered SVG markup.
|
23 |
"""
|
24 |
-
|
25 |
-
<g class="displacy-arrow">
|
26 |
-
<path class="displacy-arc" id="arrow-{id}-{i}" stroke-width="{stroke}px" d="{arc}" fill="none" stroke="red"/>
|
27 |
-
<text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
|
28 |
-
<textPath xlink:href="#arrow-{id}-{i}" class="displacy-label" startOffset="50%" side="{label_side}" fill="red" text-anchor="middle">{label}</textPath>
|
29 |
-
</text>
|
30 |
-
<path class="displacy-arrowhead" d="{head}" fill="red"/>
|
31 |
-
</g>
|
32 |
-
"""
|
33 |
arc = get_arc(start + 10, 50, 5, end + 10)
|
34 |
arrowhead = get_arrowhead(direction, start + 10, 50, end + 10)
|
35 |
label_side = "right" if direction == "rtl" else "left"
|
@@ -75,26 +87,15 @@ def get_arrowhead(direction: str, x: int, y: int, end: int) -> str:
|
|
75 |
|
76 |
|
77 |
def render_sentence_custom(unmatched_list: Dict, nlp):
|
78 |
-
TPL_DEP_WORDS = """
|
79 |
-
<text class="displacy-token" fill="currentColor" text-anchor="start" y="{y}">
|
80 |
-
<tspan class="displacy-word" fill="currentColor" x="{x}">{text}</tspan>
|
81 |
-
<tspan class="displacy-tag" dy="2em" fill="currentColor" x="{x}">{tag}</tspan>
|
82 |
-
</text>
|
83 |
-
"""
|
84 |
-
|
85 |
-
TPL_DEP_SVG = """
|
86 |
-
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:lang="{lang}" id="{id}" class="displacy" width="{width}" height="{height}" direction="{dir}" style="max-width: none; height: {height}px; color: {color}; background: {bg}; font-family: {font}; direction: {dir}">{content}</svg>
|
87 |
-
"""
|
88 |
arcs_svg = []
|
89 |
-
#nlp = spacy.load('en_core_web_lg')
|
90 |
doc = nlp(unmatched_list["sentence"])
|
91 |
|
92 |
x_value_counter = 10
|
93 |
index_counter = 0
|
94 |
svg_words = []
|
95 |
-
|
96 |
-
coords_test = []
|
97 |
direction_current = "rtl"
|
|
|
98 |
if unmatched_list["cur_word_index"] < unmatched_list["target_word_index"]:
|
99 |
min_index = unmatched_list["cur_word_index"]
|
100 |
max_index = unmatched_list["target_word_index"]
|
@@ -108,13 +109,13 @@ def render_sentence_custom(unmatched_list: Dict, nlp):
|
|
108 |
pixel_x_length = get_pil_text_size(word, 16, 'arial.ttf')[0]
|
109 |
svg_words.append(TPL_DEP_WORDS.format(text=word, tag="", x=x_value_counter, y=70))
|
110 |
if min_index <= index_counter <= max_index:
|
111 |
-
|
112 |
if index_counter < max_index - 1:
|
113 |
x_value_counter += 50
|
114 |
index_counter += 1
|
115 |
x_value_counter += pixel_x_length + 4
|
116 |
|
117 |
-
arcs_svg.append(render_arrow(unmatched_list['dep'],
|
118 |
|
119 |
content = "".join(svg_words) + "".join(arcs_svg)
|
120 |
|
@@ -130,5 +131,3 @@ def render_sentence_custom(unmatched_list: Dict, nlp):
|
|
130 |
lang="en",
|
131 |
)
|
132 |
return full_svg
|
133 |
-
|
134 |
-
|
|
|
1 |
from typing import Dict
|
2 |
from PIL import ImageFont
|
3 |
|
4 |
+
TPL_DEP_WORDS = """
|
5 |
+
<text class="displacy-token" fill="currentColor" text-anchor="start" y="{y}">
|
6 |
+
<tspan class="displacy-word" fill="currentColor" x="{x}">{text}</tspan>
|
7 |
+
<tspan class="displacy-tag" dy="2em" fill="currentColor" x="{x}">{tag}</tspan>
|
8 |
+
</text>
|
9 |
+
"""
|
10 |
+
|
11 |
+
TPL_DEP_SVG = """
|
12 |
+
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:lang="{lang}" id="{id}" class="displacy" width="{width}" height="{height}" direction="{dir}" style="max-width: none; height: {height}px; color: {color}; background: {bg}; font-family: {font}; direction: {dir}">{content}</svg>
|
13 |
+
"""
|
14 |
+
|
15 |
+
TPL_DEP_ARCS = """
|
16 |
+
<g class="displacy-arrow">
|
17 |
+
<path class="displacy-arc" id="arrow-{id}-{i}" stroke-width="{stroke}px" d="{arc}" fill="none" stroke="red"/>
|
18 |
+
<text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
|
19 |
+
<textPath xlink:href="#arrow-{id}-{i}" class="displacy-label" startOffset="50%" side="{label_side}" fill="red" text-anchor="middle">{label}</textPath>
|
20 |
+
</text>
|
21 |
+
<path class="displacy-arrowhead" d="{head}" fill="red"/>
|
22 |
+
</g>
|
23 |
+
"""
|
24 |
|
25 |
|
26 |
def get_pil_text_size(text, font_size, font_name):
|
|
|
41 |
i (int): Unique ID, typically arrow index.
|
42 |
RETURNS (str): Rendered SVG markup.
|
43 |
"""
|
44 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
arc = get_arc(start + 10, 50, 5, end + 10)
|
46 |
arrowhead = get_arrowhead(direction, start + 10, 50, end + 10)
|
47 |
label_side = "right" if direction == "rtl" else "left"
|
|
|
87 |
|
88 |
|
89 |
def render_sentence_custom(unmatched_list: Dict, nlp):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
arcs_svg = []
|
|
|
91 |
doc = nlp(unmatched_list["sentence"])
|
92 |
|
93 |
x_value_counter = 10
|
94 |
index_counter = 0
|
95 |
svg_words = []
|
96 |
+
words_under_arc = []
|
|
|
97 |
direction_current = "rtl"
|
98 |
+
|
99 |
if unmatched_list["cur_word_index"] < unmatched_list["target_word_index"]:
|
100 |
min_index = unmatched_list["cur_word_index"]
|
101 |
max_index = unmatched_list["target_word_index"]
|
|
|
109 |
pixel_x_length = get_pil_text_size(word, 16, 'arial.ttf')[0]
|
110 |
svg_words.append(TPL_DEP_WORDS.format(text=word, tag="", x=x_value_counter, y=70))
|
111 |
if min_index <= index_counter <= max_index:
|
112 |
+
words_under_arc.append(x_value_counter)
|
113 |
if index_counter < max_index - 1:
|
114 |
x_value_counter += 50
|
115 |
index_counter += 1
|
116 |
x_value_counter += pixel_x_length + 4
|
117 |
|
118 |
+
arcs_svg.append(render_arrow(unmatched_list['dep'], words_under_arc[0], words_under_arc[-1], direction_current, i))
|
119 |
|
120 |
content = "".join(svg_words) + "".join(arcs_svg)
|
121 |
|
|
|
131 |
lang="en",
|
132 |
)
|
133 |
return full_svg
|
|
|
|