Update README.md
Browse files
README.md
CHANGED
@@ -34,7 +34,10 @@ def get_keywords(
|
|
34 |
# Additional keywords (might also indicate the end of a keyword sequence).
|
35 |
# You can merge these with the begining keyword `B-KWD`.
|
36 |
2: "I-KWD",
|
37 |
-
}
|
|
|
|
|
|
|
38 |
):
|
39 |
# Initialize the tokenizer and model.
|
40 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
@@ -77,11 +80,11 @@ def get_keywords(
|
|
77 |
"score": float(probabilities[i, idx])
|
78 |
}
|
79 |
for i, idx in enumerate(prediction)
|
80 |
-
if idx == 1 or idx == 2
|
81 |
]
|
82 |
```
|
83 |
|
84 |
-
Choose a text and use the model on it. For example, I've chosen to use [this](https://
|
85 |
Then, you can call `get_keywords` on it and extract its keywords:
|
86 |
```python
|
87 |
# Reading the text from a file, since it is an article, and the text is large.
|
@@ -94,7 +97,11 @@ pprint(keywords)
|
|
94 |
```
|
95 |
```sh
|
96 |
Keywords:
|
97 |
-
[{'entity': '
|
98 |
-
{'entity': '
|
99 |
-
{'entity': '
|
|
|
|
|
|
|
|
|
100 |
```
|
|
|
34 |
# Additional keywords (might also indicate the end of a keyword sequence).
|
35 |
# You can merge these with the begining keyword `B-KWD`.
|
36 |
2: "I-KWD",
|
37 |
+
},
|
38 |
+
# Probability threshold based on which the keywords will be accepted.
|
39 |
+
# If their probabiliy is less than `threshold`, they won't be added to the list of keywords.
|
40 |
+
threshold=0.50
|
41 |
):
|
42 |
# Initialize the tokenizer and model.
|
43 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
|
80 |
"score": float(probabilities[i, idx])
|
81 |
}
|
82 |
for i, idx in enumerate(prediction)
|
83 |
+
if (idx == 1 or idx == 2) and float(probabilities[i, idx]) > threshold
|
84 |
]
|
85 |
```
|
86 |
|
87 |
+
Choose a text and use the model on it. For example, I've chosen to use [this](https://novini.bg/biznes/biznes_tehnologii/781108) article.
|
88 |
Then, you can call `get_keywords` on it and extract its keywords:
|
89 |
```python
|
90 |
# Reading the text from a file, since it is an article, and the text is large.
|
|
|
97 |
```
|
98 |
```sh
|
99 |
Keywords:
|
100 |
+
[{'entity': 'Туитър', 'entity_group': 'B-KWD', 'score': 0.9278278946876526},
|
101 |
+
{'entity': 'Илон', 'entity_group': 'B-KWD', 'score': 0.5862686634063721},
|
102 |
+
{'entity': 'Мъск', 'entity_group': 'B-KWD', 'score': 0.5289096832275391},
|
103 |
+
{'entity': 'изпълнителен',
|
104 |
+
'entity_group': 'B-KWD',
|
105 |
+
'score': 0.679943323135376},
|
106 |
+
{'entity': 'директор', 'entity_group': 'I-KWD', 'score': 0.6161141991615295}]
|
107 |
```
|