Spaces:

ml6team
/

post-processing-summarization

Running

App Files Files Community

MatthiasC commited on Apr 21, 2022

Commit

937ed20

•

1 Parent(s): 065051d

n CLean up code

Browse files

Files changed (3) hide show

.idea/workspace.xml +2 -1
app.py +3 -17
custom_renderer.py +25 -26

.idea/workspace.xml CHANGED Viewed

@@ -2,6 +2,7 @@
 <project version="4">
   <component name="ChangeListManager">
     <list default="true" id="57f23431-346d-451d-8d77-db859508e831" name="Changes" comment="">
       <change beforePath="$PROJECT_DIR$/app.py" beforeDir="false" afterPath="$PROJECT_DIR$/app.py" afterDir="false" />
       <change beforePath="$PROJECT_DIR$/custom_renderer.py" beforeDir="false" afterPath="$PROJECT_DIR$/custom_renderer.py" afterDir="false" />
     </list>
@@ -43,7 +44,7 @@
   <component name="PropertiesComponent"><![CDATA[{
   "keyToString": {
     "last_opened_file_path": "/home/matthias/Documents/Summarization-fact-checker/HugginfaceSpace/HFSummSpace",
-    "settings.editor.selected.configurable": "editor.preferences.fonts.default"
   }
 }]]></component>
   <component name="RecentsManager">

 <project version="4">
   <component name="ChangeListManager">
     <list default="true" id="57f23431-346d-451d-8d77-db859508e831" name="Changes" comment="">
+      <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
       <change beforePath="$PROJECT_DIR$/app.py" beforeDir="false" afterPath="$PROJECT_DIR$/app.py" afterDir="false" />
       <change beforePath="$PROJECT_DIR$/custom_renderer.py" beforeDir="false" afterPath="$PROJECT_DIR$/custom_renderer.py" afterDir="false" />
     </list>
   <component name="PropertiesComponent"><![CDATA[{
   "keyToString": {
     "last_opened_file_path": "/home/matthias/Documents/Summarization-fact-checker/HugginfaceSpace/HFSummSpace",
+    "settings.editor.selected.configurable": "editor.preferences.folding"
   }
 }]]></component>
   <component name="RecentsManager">

app.py CHANGED Viewed

@@ -19,7 +19,8 @@ from transformers import pipeline
 import os
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 @st.experimental_singleton
 def get_sentence_embedding_model():
@@ -108,7 +109,6 @@ def fetch_dependency_svg(filename: str) -> AnyStr:
 def display_summary(summary_content: str):
     st.session_state.summary_output = summary_content
     soup = BeautifulSoup(summary_content, features="html.parser")
-    HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>"""
     return HTML_WRAPPER.format(soup)
@@ -149,7 +149,6 @@ def get_all_entities(text):
     return list(itertools.chain.from_iterable(all_entities_per_sentence))
-# TODO: this functionality can be cached (e.g. by storing html file output) if wanted (or just store list of entities idk)
 def get_and_compare_entities():
     # article_content = fetch_article_contents(article_name)
     article_content = st.session_state.article_text
@@ -194,10 +193,6 @@ def highlight_entities():
     for entity in unmatched_entities:
         summary_content = summary_content.replace(entity, markdown_start_red + entity + markdown_end)
     soup = BeautifulSoup(summary_content, features="html.parser")
-    HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem;
-    margin-bottom: 2.5rem">{}</div> """
     return HTML_WRAPPER.format(soup)
@@ -207,9 +202,7 @@ def render_dependency_parsing(text: Dict):
     st.write(get_svg(html), unsafe_allow_html=True)
-# If deps for article: True, otherwise deps for summary calc
 def check_dependency(article: bool):
-    # nlp = spacy.load('en_core_web_lg')
     if article:
         text = st.session_state.article_text
         all_entities = get_all_entities_per_sentence(text)
@@ -220,7 +213,6 @@ def check_dependency(article: bool):
         # all_entities = st.session_state.entities_per_sentence_summary
     doc = nlp(text)
     tok_l = doc.to_json()['tokens']
-    # all_deps = ""
     test_list_dict_output = []
     sentences = list(doc.sents)
@@ -244,7 +236,6 @@ def check_dependency(article: bool):
                                                   "target_word_index": (t['head'] - sentence.start),
                                                   "identifier": identifier, "sentence": str(sentence)})
                 elif object_target in all_entities[i]:
-                    # all_deps = all_deps.join(str(sentence))
                     identifier = object_here + t['dep'] + object_target
                     test_list_dict_output.append({"dep": t['dep'], "cur_word_index": (t['id'] - sentence.start),
                                                   "target_word_index": (t['head'] - sentence.start),
@@ -252,7 +243,6 @@ def check_dependency(article: bool):
                 else:
                     continue
     return test_list_dict_output
-    # return all_deps
 def render_svg(svg_file):
@@ -320,7 +310,7 @@ st.markdown("Let’s start by selecting an article text for which we want to gen
             "generated from it might not be optimal, leading to suboptimal performance of the post-processing steps.")
 selected_article = st.selectbox('Select an article or provide your own:',
-                                list_all_article_names())  # index=0, format_func=special_internal_function, key=None, help=None, on_change=None, args=None, kwargs=None, *, disabled=False)
 st.session_state.article_text = fetch_article_contents(selected_article)
 article_text = st.text_area(
     label='Full article text',
@@ -391,8 +381,6 @@ if summarize_button:
         if st.session_state.unchanged_text:
             entity_specific_text = fetch_entity_specific_contents(selected_article)
             soup = BeautifulSoup(entity_specific_text, features="html.parser")
-            HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem;
-            margin-bottom: 2.5rem">{}</div> """
             st.write("💡👇 **Specific example explanation** 👇💡", HTML_WRAPPER.format(soup), unsafe_allow_html=True)
     # DEPENDENCY PARSING PART
@@ -429,8 +417,6 @@ if summarize_button:
                 st.write(cur_svg_image, unsafe_allow_html=True)
             dep_specific_text = fetch_dependency_specific_contents(selected_article)
             soup = BeautifulSoup(dep_specific_text, features="html.parser")
-            HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem;
-            margin-bottom: 2.5rem">{}</div> """
             st.write("💡👇 **Specific example explanation** 👇💡", HTML_WRAPPER.format(soup), unsafe_allow_html=True)
         else:
             summary_deps = check_dependency(False)

 import os
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem;
+margin-bottom: 2.5rem">{}</div> """
 @st.experimental_singleton
 def get_sentence_embedding_model():
 def display_summary(summary_content: str):
     st.session_state.summary_output = summary_content
     soup = BeautifulSoup(summary_content, features="html.parser")
     return HTML_WRAPPER.format(soup)
     return list(itertools.chain.from_iterable(all_entities_per_sentence))
 def get_and_compare_entities():
     # article_content = fetch_article_contents(article_name)
     article_content = st.session_state.article_text
     for entity in unmatched_entities:
         summary_content = summary_content.replace(entity, markdown_start_red + entity + markdown_end)
     soup = BeautifulSoup(summary_content, features="html.parser")
     return HTML_WRAPPER.format(soup)
     st.write(get_svg(html), unsafe_allow_html=True)
 def check_dependency(article: bool):
     if article:
         text = st.session_state.article_text
         all_entities = get_all_entities_per_sentence(text)
         # all_entities = st.session_state.entities_per_sentence_summary
     doc = nlp(text)
     tok_l = doc.to_json()['tokens']
     test_list_dict_output = []
     sentences = list(doc.sents)
                                                   "target_word_index": (t['head'] - sentence.start),
                                                   "identifier": identifier, "sentence": str(sentence)})
                 elif object_target in all_entities[i]:
                     identifier = object_here + t['dep'] + object_target
                     test_list_dict_output.append({"dep": t['dep'], "cur_word_index": (t['id'] - sentence.start),
                                                   "target_word_index": (t['head'] - sentence.start),
                 else:
                     continue
     return test_list_dict_output
 def render_svg(svg_file):
             "generated from it might not be optimal, leading to suboptimal performance of the post-processing steps.")
 selected_article = st.selectbox('Select an article or provide your own:',
+                                list_all_article_names())
 st.session_state.article_text = fetch_article_contents(selected_article)
 article_text = st.text_area(
     label='Full article text',
         if st.session_state.unchanged_text:
             entity_specific_text = fetch_entity_specific_contents(selected_article)
             soup = BeautifulSoup(entity_specific_text, features="html.parser")
             st.write("💡👇 **Specific example explanation** 👇💡", HTML_WRAPPER.format(soup), unsafe_allow_html=True)
     # DEPENDENCY PARSING PART
                 st.write(cur_svg_image, unsafe_allow_html=True)
             dep_specific_text = fetch_dependency_specific_contents(selected_article)
             soup = BeautifulSoup(dep_specific_text, features="html.parser")
             st.write("💡👇 **Specific example explanation** 👇💡", HTML_WRAPPER.format(soup), unsafe_allow_html=True)
         else:
             summary_deps = check_dependency(False)

custom_renderer.py CHANGED Viewed

@@ -1,6 +1,26 @@
 from typing import Dict
 from PIL import ImageFont
 def get_pil_text_size(text, font_size, font_name):
@@ -21,15 +41,7 @@ def render_arrow(
     i (int): Unique ID, typically arrow index.
     RETURNS (str): Rendered SVG markup.
     """
-    TPL_DEP_ARCS = """
-    <g class="displacy-arrow">
-        <path class="displacy-arc" id="arrow-{id}-{i}" stroke-width="{stroke}px" d="{arc}" fill="none" stroke="red"/>
-        <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
-            <textPath xlink:href="#arrow-{id}-{i}" class="displacy-label" startOffset="50%" side="{label_side}" fill="red" text-anchor="middle">{label}</textPath>
-        </text>
-        <path class="displacy-arrowhead" d="{head}" fill="red"/>
-    </g>
-    """
     arc = get_arc(start + 10, 50, 5, end + 10)
     arrowhead = get_arrowhead(direction, start + 10, 50, end + 10)
     label_side = "right" if direction == "rtl" else "left"
@@ -75,26 +87,15 @@ def get_arrowhead(direction: str, x: int, y: int, end: int) -> str:
 def render_sentence_custom(unmatched_list: Dict, nlp):
-    TPL_DEP_WORDS = """
-  <text class="displacy-token" fill="currentColor" text-anchor="start" y="{y}">
-      <tspan class="displacy-word" fill="currentColor" x="{x}">{text}</tspan>
-      <tspan class="displacy-tag" dy="2em" fill="currentColor" x="{x}">{tag}</tspan>
-  </text>
-  """
-    TPL_DEP_SVG = """
-  <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:lang="{lang}" id="{id}" class="displacy" width="{width}" height="{height}" direction="{dir}" style="max-width: none; height: {height}px; color: {color}; background: {bg}; font-family: {font}; direction: {dir}">{content}</svg>
-  """
     arcs_svg = []
-    #nlp = spacy.load('en_core_web_lg')
     doc = nlp(unmatched_list["sentence"])
     x_value_counter = 10
     index_counter = 0
     svg_words = []
-    #words = unmatched_list["sentence"].split(" ")
-    coords_test = []
     direction_current = "rtl"
     if unmatched_list["cur_word_index"] < unmatched_list["target_word_index"]:
         min_index = unmatched_list["cur_word_index"]
         max_index = unmatched_list["target_word_index"]
@@ -108,13 +109,13 @@ def render_sentence_custom(unmatched_list: Dict, nlp):
         pixel_x_length = get_pil_text_size(word, 16, 'arial.ttf')[0]
         svg_words.append(TPL_DEP_WORDS.format(text=word, tag="", x=x_value_counter, y=70))
         if min_index <= index_counter <= max_index:
-            coords_test.append(x_value_counter)
             if index_counter < max_index - 1:
                 x_value_counter += 50
         index_counter += 1
         x_value_counter += pixel_x_length + 4
-    arcs_svg.append(render_arrow(unmatched_list['dep'], coords_test[0], coords_test[-1], direction_current, i))
     content = "".join(svg_words) + "".join(arcs_svg)
@@ -130,5 +131,3 @@ def render_sentence_custom(unmatched_list: Dict, nlp):
         lang="en",
     )
     return full_svg

 from typing import Dict
 from PIL import ImageFont
+TPL_DEP_WORDS = """
+<text class="displacy-token" fill="currentColor" text-anchor="start" y="{y}">
+    <tspan class="displacy-word" fill="currentColor" x="{x}">{text}</tspan>
+    <tspan class="displacy-tag" dy="2em" fill="currentColor" x="{x}">{tag}</tspan>
+</text>
+"""
+TPL_DEP_SVG = """
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:lang="{lang}" id="{id}" class="displacy" width="{width}" height="{height}" direction="{dir}" style="max-width: none; height: {height}px; color: {color}; background: {bg}; font-family: {font}; direction: {dir}">{content}</svg>
+"""
+TPL_DEP_ARCS = """
+<g class="displacy-arrow">
+    <path class="displacy-arc" id="arrow-{id}-{i}" stroke-width="{stroke}px" d="{arc}" fill="none" stroke="red"/>
+    <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
+        <textPath xlink:href="#arrow-{id}-{i}" class="displacy-label" startOffset="50%" side="{label_side}" fill="red" text-anchor="middle">{label}</textPath>
+    </text>
+    <path class="displacy-arrowhead" d="{head}" fill="red"/>
+</g>
+"""
 def get_pil_text_size(text, font_size, font_name):
     i (int): Unique ID, typically arrow index.
     RETURNS (str): Rendered SVG markup.
     """
     arc = get_arc(start + 10, 50, 5, end + 10)
     arrowhead = get_arrowhead(direction, start + 10, 50, end + 10)
     label_side = "right" if direction == "rtl" else "left"
 def render_sentence_custom(unmatched_list: Dict, nlp):
     arcs_svg = []
     doc = nlp(unmatched_list["sentence"])
     x_value_counter = 10
     index_counter = 0
     svg_words = []
+    words_under_arc = []
     direction_current = "rtl"
     if unmatched_list["cur_word_index"] < unmatched_list["target_word_index"]:
         min_index = unmatched_list["cur_word_index"]
         max_index = unmatched_list["target_word_index"]
         pixel_x_length = get_pil_text_size(word, 16, 'arial.ttf')[0]
         svg_words.append(TPL_DEP_WORDS.format(text=word, tag="", x=x_value_counter, y=70))
         if min_index <= index_counter <= max_index:
+            words_under_arc.append(x_value_counter)
             if index_counter < max_index - 1:
                 x_value_counter += 50
         index_counter += 1
         x_value_counter += pixel_x_length + 4
+    arcs_svg.append(render_arrow(unmatched_list['dep'], words_under_arc[0], words_under_arc[-1], direction_current, i))
     content = "".join(svg_words) + "".join(arcs_svg)
         lang="en",
     )
     return full_svg