Spaces:
Runtime error
Runtime error
Merge pull request #6 from pleonova/pleonova-patch-1
Browse files
models.py
CHANGED
@@ -15,14 +15,12 @@ def create_nest_sentences(document:str, token_max_length = 1024):
|
|
15 |
for sentence in re.split(r'(?<=[^A-Z].[.?]) +(?=[A-Z])', document.replace("\n", ' ')):
|
16 |
tokens_in_sentence = tokenizer(str(sentence), truncation=False, padding=False)[0] # hugging face transformer tokenizer
|
17 |
length += len(tokens_in_sentence)
|
18 |
-
|
19 |
if length < token_max_length:
|
20 |
sent.append(sentence)
|
21 |
else:
|
22 |
nested.append(sent)
|
23 |
sent = [sentence]
|
24 |
length = 0
|
25 |
-
|
26 |
if sent:
|
27 |
nested.append(sent)
|
28 |
return nested
|
@@ -42,8 +40,6 @@ def keyword_gen(kw_model, sequence:str):
|
|
42 |
top_n=10)
|
43 |
return keywords
|
44 |
|
45 |
-
|
46 |
-
|
47 |
# Reference: https://huggingface.co/facebook/bart-large-mnli
|
48 |
@st.cache_resource
|
49 |
def load_summary_model():
|
@@ -69,7 +65,6 @@ def summarizer_gen(summarizer, sequence:str, maximum_tokens:int, minimum_tokens:
|
|
69 |
no_repeat_ngram_size=3)
|
70 |
return output[0].get('summary_text')
|
71 |
|
72 |
-
|
73 |
# # Reference: https://www.datatrigger.org/post/nlp_hugging_face/
|
74 |
# # Custom summarization pipeline (to handle long articles)
|
75 |
# def summarize(text, minimum_length_of_summary = 100):
|
@@ -80,7 +75,6 @@ def summarizer_gen(summarizer, sequence:str, maximum_tokens:int, minimum_tokens:
|
|
80 |
# # Untokenize
|
81 |
# return([tokenizer_bart.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summary_ids][0])
|
82 |
|
83 |
-
|
84 |
# Reference: https://huggingface.co/spaces/team-zero-shot-nli/zero-shot-nli/blob/main/utils.py
|
85 |
@st.cache_resource
|
86 |
def load_model():
|
@@ -93,4 +87,3 @@ def load_model():
|
|
93 |
def classifier_zero(classifier, sequence:str, labels:list, multi_class:bool):
|
94 |
outputs = classifier(sequence, labels, multi_label=multi_class)
|
95 |
return outputs['labels'], outputs['scores']
|
96 |
-
|
|
|
15 |
for sentence in re.split(r'(?<=[^A-Z].[.?]) +(?=[A-Z])', document.replace("\n", ' ')):
|
16 |
tokens_in_sentence = tokenizer(str(sentence), truncation=False, padding=False)[0] # hugging face transformer tokenizer
|
17 |
length += len(tokens_in_sentence)
|
|
|
18 |
if length < token_max_length:
|
19 |
sent.append(sentence)
|
20 |
else:
|
21 |
nested.append(sent)
|
22 |
sent = [sentence]
|
23 |
length = 0
|
|
|
24 |
if sent:
|
25 |
nested.append(sent)
|
26 |
return nested
|
|
|
40 |
top_n=10)
|
41 |
return keywords
|
42 |
|
|
|
|
|
43 |
# Reference: https://huggingface.co/facebook/bart-large-mnli
|
44 |
@st.cache_resource
|
45 |
def load_summary_model():
|
|
|
65 |
no_repeat_ngram_size=3)
|
66 |
return output[0].get('summary_text')
|
67 |
|
|
|
68 |
# # Reference: https://www.datatrigger.org/post/nlp_hugging_face/
|
69 |
# # Custom summarization pipeline (to handle long articles)
|
70 |
# def summarize(text, minimum_length_of_summary = 100):
|
|
|
75 |
# # Untokenize
|
76 |
# return([tokenizer_bart.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summary_ids][0])
|
77 |
|
|
|
78 |
# Reference: https://huggingface.co/spaces/team-zero-shot-nli/zero-shot-nli/blob/main/utils.py
|
79 |
@st.cache_resource
|
80 |
def load_model():
|
|
|
87 |
def classifier_zero(classifier, sequence:str, labels:list, multi_class:bool):
|
88 |
outputs = classifier(sequence, labels, multi_label=multi_class)
|
89 |
return outputs['labels'], outputs['scores']
|
|