auhide commited on
Commit
6abf77b
1 Parent(s): b93da0f

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +6 -6
README.md CHANGED
@@ -15,10 +15,10 @@ Fine-tuned on a Bulgarian subset of [wikiann](https://huggingface.co/datasets/wi
15
  ## Usage
16
  Import the libraries:
17
  ```python
18
- from typing import List
19
 
20
  import torch
21
- from transformers import AutoModelForTokenClassification, AutoTokenizer
22
  ```
23
 
24
  Firstly, you'll have to define these methods, since we are using a subword Tokenizer:
@@ -32,7 +32,7 @@ def predict(
32
  1: "B-PER", 2: "I-PER",
33
  3: "B-ORG", 4: "I-ORG",
34
  5: "B-LOC", 6: "I-LOC"
35
- }):
36
  tokens_data = tokenizer(text)
37
  tokens = tokenizer.convert_ids_to_tokens(tokens_data["input_ids"])
38
  words = subwords_to_words(tokens)
@@ -75,7 +75,7 @@ def subwords_to_words(tokens: List[str]) -> List[str]:
75
  return out_tokens
76
 
77
 
78
- def merge_words_and_predictions(words, entities):
79
  result = []
80
  curr_word = []
81
 
@@ -85,7 +85,7 @@ def merge_words_and_predictions(words, entities):
85
  curr_word = " ".join(curr_word)
86
  result.append({
87
  "word": curr_word,
88
- "entity": entities[i][2:]
89
  })
90
  curr_word = [word]
91
  else:
@@ -99,7 +99,7 @@ def merge_words_and_predictions(words, entities):
99
  curr_word = " ".join(curr_word)
100
  result.append({
101
  "word": curr_word,
102
- "entity": entities[i][2:]
103
  })
104
 
105
  curr_word = []
 
15
  ## Usage
16
  Import the libraries:
17
  ```python
18
+ from typing import List, Dict
19
 
20
  import torch
21
+ from transformers import AutoModelForTokenClassification, AutoTokenizer, pipeline
22
  ```
23
 
24
  Firstly, you'll have to define these methods, since we are using a subword Tokenizer:
 
32
  1: "B-PER", 2: "I-PER",
33
  3: "B-ORG", 4: "I-ORG",
34
  5: "B-LOC", 6: "I-LOC"
35
+ }) -> List[Dict[str, str]]:
36
  tokens_data = tokenizer(text)
37
  tokens = tokenizer.convert_ids_to_tokens(tokens_data["input_ids"])
38
  words = subwords_to_words(tokens)
 
75
  return out_tokens
76
 
77
 
78
+ def merge_words_and_predictions(words: List[str], entities: List[str]) -> List[Dict[str, str]]:
79
  result = []
80
  curr_word = []
81
 
 
85
  curr_word = " ".join(curr_word)
86
  result.append({
87
  "word": curr_word,
88
+ "entity_group": entities[i][2:]
89
  })
90
  curr_word = [word]
91
  else:
 
99
  curr_word = " ".join(curr_word)
100
  result.append({
101
  "word": curr_word,
102
+ "entity_group": entities[i][2:]
103
  })
104
 
105
  curr_word = []