auhide commited on
Commit
9d2d46c
1 Parent(s): 84b9668

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +13 -6
README.md CHANGED
@@ -34,7 +34,10 @@ def get_keywords(
34
  # Additional keywords (might also indicate the end of a keyword sequence).
35
  # You can merge these with the begining keyword `B-KWD`.
36
  2: "I-KWD",
37
- }
 
 
 
38
  ):
39
  # Initialize the tokenizer and model.
40
  tokenizer = AutoTokenizer.from_pretrained(model_id)
@@ -77,11 +80,11 @@ def get_keywords(
77
  "score": float(probabilities[i, idx])
78
  }
79
  for i, idx in enumerate(prediction)
80
- if idx == 1 or idx == 2
81
  ]
82
  ```
83
 
84
- Choose a text and use the model on it. For example, I've chosen to use [this](https://www.24chasa.bg/bulgaria/article/14466321) article.
85
  Then, you can call `get_keywords` on it and extract its keywords:
86
  ```python
87
  # Reading the text from a file, since it is an article, and the text is large.
@@ -94,7 +97,11 @@ pprint(keywords)
94
  ```
95
  ```sh
96
  Keywords:
97
- [{'entity': 'Пловдив', 'entity_group': 'B-KWD', 'score': 0.7669068574905396},
98
- {'entity': 'Шофьорът', 'entity_group': 'B-KWD', 'score': 0.9119699597358704},
99
- {'entity': 'катастрофа', 'entity_group': 'B-KWD', 'score': 0.8441269993782043}]
 
 
 
 
100
  ```
 
34
  # Additional keywords (might also indicate the end of a keyword sequence).
35
  # You can merge these with the begining keyword `B-KWD`.
36
  2: "I-KWD",
37
+ },
38
+ # Probability threshold based on which the keywords will be accepted.
39
+ # If their probabiliy is less than `threshold`, they won't be added to the list of keywords.
40
+ threshold=0.50
41
  ):
42
  # Initialize the tokenizer and model.
43
  tokenizer = AutoTokenizer.from_pretrained(model_id)
 
80
  "score": float(probabilities[i, idx])
81
  }
82
  for i, idx in enumerate(prediction)
83
+ if (idx == 1 or idx == 2) and float(probabilities[i, idx]) > threshold
84
  ]
85
  ```
86
 
87
+ Choose a text and use the model on it. For example, I've chosen to use [this](https://novini.bg/biznes/biznes_tehnologii/781108) article.
88
  Then, you can call `get_keywords` on it and extract its keywords:
89
  ```python
90
  # Reading the text from a file, since it is an article, and the text is large.
 
97
  ```
98
  ```sh
99
  Keywords:
100
+ [{'entity': 'Туитър', 'entity_group': 'B-KWD', 'score': 0.9278278946876526},
101
+ {'entity': 'Илон', 'entity_group': 'B-KWD', 'score': 0.5862686634063721},
102
+ {'entity': 'Мъск', 'entity_group': 'B-KWD', 'score': 0.5289096832275391},
103
+ {'entity': 'изпълнителен',
104
+ 'entity_group': 'B-KWD',
105
+ 'score': 0.679943323135376},
106
+ {'entity': 'директор', 'entity_group': 'I-KWD', 'score': 0.6161141991615295}]
107
  ```