Spaces:
Running
Running
medmediani
commited on
Commit
•
37ec07c
1
Parent(s):
af2b7f7
Changed the model path
Browse files- .ipynb_checkpoints/app-checkpoint.py +3 -3
- .ipynb_checkpoints/kwextractor-checkpoint.py +17 -6
- app.py +3 -3
- kwextractor.py +17 -6
.ipynb_checkpoints/app-checkpoint.py
CHANGED
@@ -20,9 +20,9 @@ def generate_kws(context,num_kw, kw_ngs):
|
|
20 |
kw_ngs=int(kw_ngs)
|
21 |
except ValueError:
|
22 |
kw_ngs=None
|
23 |
-
csv_encoded.truncate(0)
|
24 |
-
writer.writerow([context])
|
25 |
-
context=csv_encoded.getvalue()
|
26 |
return kw_ex.extract(context, num_kw, kw_ngs) or ""
|
27 |
else:
|
28 |
raise gr.Error("Please enter text in inputbox!!!!")
|
|
|
20 |
kw_ngs=int(kw_ngs)
|
21 |
except ValueError:
|
22 |
kw_ngs=None
|
23 |
+
#csv_encoded.truncate(0)
|
24 |
+
#writer.writerow([context])
|
25 |
+
#context=csv_encoded.getvalue()
|
26 |
return kw_ex.extract(context, num_kw, kw_ngs) or ""
|
27 |
else:
|
28 |
raise gr.Error("Please enter text in inputbox!!!!")
|
.ipynb_checkpoints/kwextractor-checkpoint.py
CHANGED
@@ -26,15 +26,26 @@ class KeyWordExtractor():
|
|
26 |
|
27 |
#self.kw_model.to(self.device)
|
28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
def extract(self, ctxt, nkws=None, max_kw_ngs=None):
|
30 |
nkws= nkws if nkws is not None else self.NKW
|
31 |
max_kw_ngs=max_kw_ngs if max_kw_ngs is not None else self.MAX_KW_NGS
|
32 |
|
33 |
-
|
34 |
-
|
35 |
-
#use_maxsum=True,nr_candidates=20, top_n=5,
|
36 |
-
#use_mmr=True,
|
37 |
-
#diversity=0.8,
|
38 |
-
stop_words=None)
|
39 |
return ", ".join(w for w,_ in kw)
|
40 |
|
|
|
26 |
|
27 |
#self.kw_model.to(self.device)
|
28 |
|
29 |
+
def _extract_by_paragraph(self, ctxt, nkws=None, max_kw_ngs=None):
|
30 |
+
paragraphs=map(str.strip,ctxt.split("\n"))
|
31 |
+
kws=[]
|
32 |
+
for paragraph in paragraphs:
|
33 |
+
if paragraph:
|
34 |
+
kws.extend(self.kw_model.extract_keywords(paragraph, keyphrase_ngram_range=(1, max_kw_ngs),
|
35 |
+
top_n=nkws,
|
36 |
+
#use_maxsum=True,nr_candidates=20, top_n=5,
|
37 |
+
#use_mmr=True,
|
38 |
+
#diversity=0.8,
|
39 |
+
stop_words=None)
|
40 |
+
)
|
41 |
+
|
42 |
+
return sorted(kws, key=lambda x: x[1],reverse=True)[:nkws]
|
43 |
+
|
44 |
def extract(self, ctxt, nkws=None, max_kw_ngs=None):
|
45 |
nkws= nkws if nkws is not None else self.NKW
|
46 |
max_kw_ngs=max_kw_ngs if max_kw_ngs is not None else self.MAX_KW_NGS
|
47 |
|
48 |
+
#Since we are taking only 512 tokens, let's do by paragraph
|
49 |
+
kw=self._extract_by_paragraph(ctxt,nkw,max_kw_ngs)
|
|
|
|
|
|
|
|
|
50 |
return ", ".join(w for w,_ in kw)
|
51 |
|
app.py
CHANGED
@@ -20,9 +20,9 @@ def generate_kws(context,num_kw, kw_ngs):
|
|
20 |
kw_ngs=int(kw_ngs)
|
21 |
except ValueError:
|
22 |
kw_ngs=None
|
23 |
-
csv_encoded.truncate(0)
|
24 |
-
writer.writerow([context])
|
25 |
-
context=csv_encoded.getvalue()
|
26 |
return kw_ex.extract(context, num_kw, kw_ngs) or ""
|
27 |
else:
|
28 |
raise gr.Error("Please enter text in inputbox!!!!")
|
|
|
20 |
kw_ngs=int(kw_ngs)
|
21 |
except ValueError:
|
22 |
kw_ngs=None
|
23 |
+
#csv_encoded.truncate(0)
|
24 |
+
#writer.writerow([context])
|
25 |
+
#context=csv_encoded.getvalue()
|
26 |
return kw_ex.extract(context, num_kw, kw_ngs) or ""
|
27 |
else:
|
28 |
raise gr.Error("Please enter text in inputbox!!!!")
|
kwextractor.py
CHANGED
@@ -26,15 +26,26 @@ class KeyWordExtractor():
|
|
26 |
|
27 |
#self.kw_model.to(self.device)
|
28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
def extract(self, ctxt, nkws=None, max_kw_ngs=None):
|
30 |
nkws= nkws if nkws is not None else self.NKW
|
31 |
max_kw_ngs=max_kw_ngs if max_kw_ngs is not None else self.MAX_KW_NGS
|
32 |
|
33 |
-
|
34 |
-
|
35 |
-
#use_maxsum=True,nr_candidates=20, top_n=5,
|
36 |
-
#use_mmr=True,
|
37 |
-
#diversity=0.8,
|
38 |
-
stop_words=None)
|
39 |
return ", ".join(w for w,_ in kw)
|
40 |
|
|
|
26 |
|
27 |
#self.kw_model.to(self.device)
|
28 |
|
29 |
+
def _extract_by_paragraph(self, ctxt, nkws=None, max_kw_ngs=None):
|
30 |
+
paragraphs=map(str.strip,ctxt.split("\n"))
|
31 |
+
kws=[]
|
32 |
+
for paragraph in paragraphs:
|
33 |
+
if paragraph:
|
34 |
+
kws.extend(self.kw_model.extract_keywords(paragraph, keyphrase_ngram_range=(1, max_kw_ngs),
|
35 |
+
top_n=nkws,
|
36 |
+
#use_maxsum=True,nr_candidates=20, top_n=5,
|
37 |
+
#use_mmr=True,
|
38 |
+
#diversity=0.8,
|
39 |
+
stop_words=None)
|
40 |
+
)
|
41 |
+
|
42 |
+
return sorted(kws, key=lambda x: x[1],reverse=True)[:nkws]
|
43 |
+
|
44 |
def extract(self, ctxt, nkws=None, max_kw_ngs=None):
|
45 |
nkws= nkws if nkws is not None else self.NKW
|
46 |
max_kw_ngs=max_kw_ngs if max_kw_ngs is not None else self.MAX_KW_NGS
|
47 |
|
48 |
+
#Since we are taking only 512 tokens, let's do by paragraph
|
49 |
+
kw=self._extract_by_paragraph(ctxt,nkw,max_kw_ngs)
|
|
|
|
|
|
|
|
|
50 |
return ", ".join(w for w,_ in kw)
|
51 |
|