Update README.md
Browse files
README.md
CHANGED
@@ -15,10 +15,10 @@ Fine-tuned on a Bulgarian subset of [wikiann](https://huggingface.co/datasets/wi
|
|
15 |
## Usage
|
16 |
Import the libraries:
|
17 |
```python
|
18 |
-
from typing import List
|
19 |
|
20 |
import torch
|
21 |
-
from transformers import AutoModelForTokenClassification, AutoTokenizer
|
22 |
```
|
23 |
|
24 |
Firstly, you'll have to define these methods, since we are using a subword Tokenizer:
|
@@ -32,7 +32,7 @@ def predict(
|
|
32 |
1: "B-PER", 2: "I-PER",
|
33 |
3: "B-ORG", 4: "I-ORG",
|
34 |
5: "B-LOC", 6: "I-LOC"
|
35 |
-
}):
|
36 |
tokens_data = tokenizer(text)
|
37 |
tokens = tokenizer.convert_ids_to_tokens(tokens_data["input_ids"])
|
38 |
words = subwords_to_words(tokens)
|
@@ -75,7 +75,7 @@ def subwords_to_words(tokens: List[str]) -> List[str]:
|
|
75 |
return out_tokens
|
76 |
|
77 |
|
78 |
-
def merge_words_and_predictions(words, entities):
|
79 |
result = []
|
80 |
curr_word = []
|
81 |
|
@@ -85,7 +85,7 @@ def merge_words_and_predictions(words, entities):
|
|
85 |
curr_word = " ".join(curr_word)
|
86 |
result.append({
|
87 |
"word": curr_word,
|
88 |
-
"
|
89 |
})
|
90 |
curr_word = [word]
|
91 |
else:
|
@@ -99,7 +99,7 @@ def merge_words_and_predictions(words, entities):
|
|
99 |
curr_word = " ".join(curr_word)
|
100 |
result.append({
|
101 |
"word": curr_word,
|
102 |
-
"
|
103 |
})
|
104 |
|
105 |
curr_word = []
|
|
|
15 |
## Usage
|
16 |
Import the libraries:
|
17 |
```python
|
18 |
+
from typing import List, Dict
|
19 |
|
20 |
import torch
|
21 |
+
from transformers import AutoModelForTokenClassification, AutoTokenizer, pipeline
|
22 |
```
|
23 |
|
24 |
Firstly, you'll have to define these methods, since we are using a subword Tokenizer:
|
|
|
32 |
1: "B-PER", 2: "I-PER",
|
33 |
3: "B-ORG", 4: "I-ORG",
|
34 |
5: "B-LOC", 6: "I-LOC"
|
35 |
+
}) -> List[Dict[str, str]]:
|
36 |
tokens_data = tokenizer(text)
|
37 |
tokens = tokenizer.convert_ids_to_tokens(tokens_data["input_ids"])
|
38 |
words = subwords_to_words(tokens)
|
|
|
75 |
return out_tokens
|
76 |
|
77 |
|
78 |
+
def merge_words_and_predictions(words: List[str], entities: List[str]) -> List[Dict[str, str]]:
|
79 |
result = []
|
80 |
curr_word = []
|
81 |
|
|
|
85 |
curr_word = " ".join(curr_word)
|
86 |
result.append({
|
87 |
"word": curr_word,
|
88 |
+
"entity_group": entities[i][2:]
|
89 |
})
|
90 |
curr_word = [word]
|
91 |
else:
|
|
|
99 |
curr_word = " ".join(curr_word)
|
100 |
result.append({
|
101 |
"word": curr_word,
|
102 |
+
"entity_group": entities[i][2:]
|
103 |
})
|
104 |
|
105 |
curr_word = []
|