auhide
/

keybert-bg

@@ -34,7 +34,10 @@ def get_keywords(
         # Additional keywords (might also indicate the end of a keyword sequence).
         # You can merge these with the begining keyword `B-KWD`.
         2: "I-KWD",
-    }
 ):
     # Initialize the tokenizer and model.
     tokenizer = AutoTokenizer.from_pretrained(model_id)
@@ -77,11 +80,11 @@ def get_keywords(
             "score": float(probabilities[i, idx])
         }
         for i, idx in enumerate(prediction)
-        if idx == 1 or idx == 2
     ]
 ```
-Choose a text and use the model on it. For example, I've chosen to use [this](https://www.24chasa.bg/bulgaria/article/14466321) article.
 Then, you can call `get_keywords` on it and extract its keywords:
 ```python
 # Reading the text from a file, since it is an article, and the text is large.
@@ -94,7 +97,11 @@ pprint(keywords)
 ```
 ```sh
 Keywords:
-[{'entity': 'Пловдив', 'entity_group': 'B-KWD', 'score': 0.7669068574905396},
- {'entity': 'Шофьорът', 'entity_group': 'B-KWD', 'score': 0.9119699597358704},
- {'entity': 'катастрофа', 'entity_group': 'B-KWD', 'score': 0.8441269993782043}]
 ```

         # Additional keywords (might also indicate the end of a keyword sequence).
         # You can merge these with the begining keyword `B-KWD`.
         2: "I-KWD",
+    },
+    # Probability threshold based on which the keywords will be accepted.
+    # If their probabiliy is less than `threshold`, they won't be added to the list of keywords.
+    threshold=0.50
 ):
     # Initialize the tokenizer and model.
     tokenizer = AutoTokenizer.from_pretrained(model_id)
             "score": float(probabilities[i, idx])
         }
         for i, idx in enumerate(prediction)
+        if (idx == 1 or idx == 2) and float(probabilities[i, idx]) > threshold
     ]
 ```
+Choose a text and use the model on it. For example, I've chosen to use [this](https://novini.bg/biznes/biznes_tehnologii/781108) article.
 Then, you can call `get_keywords` on it and extract its keywords:
 ```python
 # Reading the text from a file, since it is an article, and the text is large.
 ```
 ```sh
 Keywords:
+[{'entity': 'Туитър', 'entity_group': 'B-KWD', 'score': 0.9278278946876526},
+ {'entity': 'Илон', 'entity_group': 'B-KWD', 'score': 0.5862686634063721},
+ {'entity': 'Мъск', 'entity_group': 'B-KWD', 'score': 0.5289096832275391},
+ {'entity': 'изпълнителен',
+  'entity_group': 'B-KWD',
+  'score': 0.679943323135376},
+ {'entity': 'директор', 'entity_group': 'I-KWD', 'score': 0.6161141991615295}]
 ```