medmediani commited on
Commit
37ec07c
1 Parent(s): af2b7f7

Changed the model path

Browse files
.ipynb_checkpoints/app-checkpoint.py CHANGED
@@ -20,9 +20,9 @@ def generate_kws(context,num_kw, kw_ngs):
20
  kw_ngs=int(kw_ngs)
21
  except ValueError:
22
  kw_ngs=None
23
- csv_encoded.truncate(0)
24
- writer.writerow([context])
25
- context=csv_encoded.getvalue()
26
  return kw_ex.extract(context, num_kw, kw_ngs) or ""
27
  else:
28
  raise gr.Error("Please enter text in inputbox!!!!")
 
20
  kw_ngs=int(kw_ngs)
21
  except ValueError:
22
  kw_ngs=None
23
+ #csv_encoded.truncate(0)
24
+ #writer.writerow([context])
25
+ #context=csv_encoded.getvalue()
26
  return kw_ex.extract(context, num_kw, kw_ngs) or ""
27
  else:
28
  raise gr.Error("Please enter text in inputbox!!!!")
.ipynb_checkpoints/kwextractor-checkpoint.py CHANGED
@@ -26,15 +26,26 @@ class KeyWordExtractor():
26
 
27
  #self.kw_model.to(self.device)
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  def extract(self, ctxt, nkws=None, max_kw_ngs=None):
30
  nkws= nkws if nkws is not None else self.NKW
31
  max_kw_ngs=max_kw_ngs if max_kw_ngs is not None else self.MAX_KW_NGS
32
 
33
- kw=self.kw_model.extract_keywords(ctxt, keyphrase_ngram_range=(1, max_kw_ngs),
34
- top_n=nkws,
35
- #use_maxsum=True,nr_candidates=20, top_n=5,
36
- #use_mmr=True,
37
- #diversity=0.8,
38
- stop_words=None)
39
  return ", ".join(w for w,_ in kw)
40
 
 
26
 
27
  #self.kw_model.to(self.device)
28
 
29
+ def _extract_by_paragraph(self, ctxt, nkws=None, max_kw_ngs=None):
30
+ paragraphs=map(str.strip,ctxt.split("\n"))
31
+ kws=[]
32
+ for paragraph in paragraphs:
33
+ if paragraph:
34
+ kws.extend(self.kw_model.extract_keywords(paragraph, keyphrase_ngram_range=(1, max_kw_ngs),
35
+ top_n=nkws,
36
+ #use_maxsum=True,nr_candidates=20, top_n=5,
37
+ #use_mmr=True,
38
+ #diversity=0.8,
39
+ stop_words=None)
40
+ )
41
+
42
+ return sorted(kws, key=lambda x: x[1],reverse=True)[:nkws]
43
+
44
  def extract(self, ctxt, nkws=None, max_kw_ngs=None):
45
  nkws= nkws if nkws is not None else self.NKW
46
  max_kw_ngs=max_kw_ngs if max_kw_ngs is not None else self.MAX_KW_NGS
47
 
48
+ #Since we are taking only 512 tokens, let's do by paragraph
49
+ kw=self._extract_by_paragraph(ctxt,nkw,max_kw_ngs)
 
 
 
 
50
  return ", ".join(w for w,_ in kw)
51
 
app.py CHANGED
@@ -20,9 +20,9 @@ def generate_kws(context,num_kw, kw_ngs):
20
  kw_ngs=int(kw_ngs)
21
  except ValueError:
22
  kw_ngs=None
23
- csv_encoded.truncate(0)
24
- writer.writerow([context])
25
- context=csv_encoded.getvalue()
26
  return kw_ex.extract(context, num_kw, kw_ngs) or ""
27
  else:
28
  raise gr.Error("Please enter text in inputbox!!!!")
 
20
  kw_ngs=int(kw_ngs)
21
  except ValueError:
22
  kw_ngs=None
23
+ #csv_encoded.truncate(0)
24
+ #writer.writerow([context])
25
+ #context=csv_encoded.getvalue()
26
  return kw_ex.extract(context, num_kw, kw_ngs) or ""
27
  else:
28
  raise gr.Error("Please enter text in inputbox!!!!")
kwextractor.py CHANGED
@@ -26,15 +26,26 @@ class KeyWordExtractor():
26
 
27
  #self.kw_model.to(self.device)
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  def extract(self, ctxt, nkws=None, max_kw_ngs=None):
30
  nkws= nkws if nkws is not None else self.NKW
31
  max_kw_ngs=max_kw_ngs if max_kw_ngs is not None else self.MAX_KW_NGS
32
 
33
- kw=self.kw_model.extract_keywords(ctxt, keyphrase_ngram_range=(1, max_kw_ngs),
34
- top_n=nkws,
35
- #use_maxsum=True,nr_candidates=20, top_n=5,
36
- #use_mmr=True,
37
- #diversity=0.8,
38
- stop_words=None)
39
  return ", ".join(w for w,_ in kw)
40
 
 
26
 
27
  #self.kw_model.to(self.device)
28
 
29
+ def _extract_by_paragraph(self, ctxt, nkws=None, max_kw_ngs=None):
30
+ paragraphs=map(str.strip,ctxt.split("\n"))
31
+ kws=[]
32
+ for paragraph in paragraphs:
33
+ if paragraph:
34
+ kws.extend(self.kw_model.extract_keywords(paragraph, keyphrase_ngram_range=(1, max_kw_ngs),
35
+ top_n=nkws,
36
+ #use_maxsum=True,nr_candidates=20, top_n=5,
37
+ #use_mmr=True,
38
+ #diversity=0.8,
39
+ stop_words=None)
40
+ )
41
+
42
+ return sorted(kws, key=lambda x: x[1],reverse=True)[:nkws]
43
+
44
  def extract(self, ctxt, nkws=None, max_kw_ngs=None):
45
  nkws= nkws if nkws is not None else self.NKW
46
  max_kw_ngs=max_kw_ngs if max_kw_ngs is not None else self.MAX_KW_NGS
47
 
48
+ #Since we are taking only 512 tokens, let's do by paragraph
49
+ kw=self._extract_by_paragraph(ctxt,nkw,max_kw_ngs)
 
 
 
 
50
  return ", ".join(w for w,_ in kw)
51