Update spaCy pipeline
Browse files- README.md +9 -9
- attribute_ruler/patterns +0 -0
- config.cfg +0 -2
- en_skillner-any-py3-none-any.whl +2 -2
- meta.json +33 -33
- ner/model +2 -2
- ner/moves +1 -1
- tagger/cfg +0 -1
- vocab/lookups.bin +2 -2
- vocab/strings.json +0 -0
README.md
CHANGED
@@ -14,25 +14,25 @@ model-index:
|
|
14 |
metrics:
|
15 |
- name: NER Precision
|
16 |
type: precision
|
17 |
-
value: 0.
|
18 |
- name: NER Recall
|
19 |
type: recall
|
20 |
-
value: 0.
|
21 |
- name: NER F Score
|
22 |
type: f_score
|
23 |
-
value: 0.
|
24 |
---
|
25 |
A Named Entity Recognition (NER) model to extract SKILL, EXPERIENCE and BENEFIT from job adverts.
|
26 |
|
27 |
| Feature | Description |
|
28 |
| --- | --- |
|
29 |
| **Name** | `en_skillner` |
|
30 |
-
| **Version** | `3.
|
31 |
-
| **spaCy** | `>=3.
|
32 |
| **Default Pipeline** | `tok2vec`, `tagger`, `parser`, `attribute_ruler`, `lemmatizer`, `ner` |
|
33 |
| **Components** | `tok2vec`, `tagger`, `parser`, `senter`, `attribute_ruler`, `lemmatizer`, `ner` |
|
34 |
| **Vectors** | 514157 keys, 514157 unique vectors (300 dimensions) |
|
35 |
-
| **Sources** | [OntoNotes 5](https://catalog.ldc.upenn.edu/LDC2013T19) (Ralph Weischedel, Martha Palmer, Mitchell Marcus, Eduard Hovy, Sameer Pradhan, Lance Ramshaw, Nianwen Xue, Ann Taylor, Jeff Kaufman, Michelle Franchini, Mohammed El-Bachouti, Robert Belvin, Ann Houston)<br
|
36 |
| **License** | `MIT` |
|
37 |
| **Author** | [nestauk](https://explosion.ai) |
|
38 |
|
@@ -52,6 +52,6 @@ A Named Entity Recognition (NER) model to extract SKILL, EXPERIENCE and BENEFIT
|
|
52 |
|
53 |
| Type | Score |
|
54 |
| --- | --- |
|
55 |
-
| `ENTS_P` |
|
56 |
-
| `ENTS_R` |
|
57 |
-
| `ENTS_F` |
|
|
|
14 |
metrics:
|
15 |
- name: NER Precision
|
16 |
type: precision
|
17 |
+
value: 0.5991309071
|
18 |
- name: NER Recall
|
19 |
type: recall
|
20 |
+
value: 0.5768828452
|
21 |
- name: NER F Score
|
22 |
type: f_score
|
23 |
+
value: 0.5877964295
|
24 |
---
|
25 |
A Named Entity Recognition (NER) model to extract SKILL, EXPERIENCE and BENEFIT from job adverts.
|
26 |
|
27 |
| Feature | Description |
|
28 |
| --- | --- |
|
29 |
| **Name** | `en_skillner` |
|
30 |
+
| **Version** | `3.5.0` |
|
31 |
+
| **spaCy** | `>=3.5.3,<3.6.0` |
|
32 |
| **Default Pipeline** | `tok2vec`, `tagger`, `parser`, `attribute_ruler`, `lemmatizer`, `ner` |
|
33 |
| **Components** | `tok2vec`, `tagger`, `parser`, `senter`, `attribute_ruler`, `lemmatizer`, `ner` |
|
34 |
| **Vectors** | 514157 keys, 514157 unique vectors (300 dimensions) |
|
35 |
+
| **Sources** | [OntoNotes 5](https://catalog.ldc.upenn.edu/LDC2013T19) (Ralph Weischedel, Martha Palmer, Mitchell Marcus, Eduard Hovy, Sameer Pradhan, Lance Ramshaw, Nianwen Xue, Ann Taylor, Jeff Kaufman, Michelle Franchini, Mohammed El-Bachouti, Robert Belvin, Ann Houston)<br />[ClearNLP Constituent-to-Dependency Conversion](https://github.com/clir/clearnlp-guidelines/blob/master/md/components/dependency_conversion.md) (Emory University)<br />[WordNet 3.0](https://wordnet.princeton.edu/) (Princeton University)<br />[Explosion Vectors (OSCAR 2109 + Wikipedia + OpenSubtitles + WMT News Crawl)](https://github.com/explosion/spacy-vectors-builder) (Explosion) |
|
36 |
| **License** | `MIT` |
|
37 |
| **Author** | [nestauk](https://explosion.ai) |
|
38 |
|
|
|
52 |
|
53 |
| Type | Score |
|
54 |
| --- | --- |
|
55 |
+
| `ENTS_P` | 59.91 |
|
56 |
+
| `ENTS_R` | 57.69 |
|
57 |
+
| `ENTS_F` | 58.78 |
|
attribute_ruler/patterns
CHANGED
Binary files a/attribute_ruler/patterns and b/attribute_ruler/patterns differ
|
|
config.cfg
CHANGED
@@ -17,7 +17,6 @@ after_creation = null
|
|
17 |
after_pipeline_creation = null
|
18 |
batch_size = 256
|
19 |
tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
|
20 |
-
vectors = {"@vectors":"spacy.Vectors.v1"}
|
21 |
|
22 |
[components]
|
23 |
|
@@ -117,7 +116,6 @@ maxout_pieces = 2
|
|
117 |
|
118 |
[components.tagger]
|
119 |
factory = "tagger"
|
120 |
-
label_smoothing = 0.0
|
121 |
neg_prefix = "!"
|
122 |
overwrite = false
|
123 |
scorer = {"@scorers":"spacy.tagger_scorer.v1"}
|
|
|
17 |
after_pipeline_creation = null
|
18 |
batch_size = 256
|
19 |
tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
|
|
|
20 |
|
21 |
[components]
|
22 |
|
|
|
116 |
|
117 |
[components.tagger]
|
118 |
factory = "tagger"
|
|
|
119 |
neg_prefix = "!"
|
120 |
overwrite = false
|
121 |
scorer = {"@scorers":"spacy.tagger_scorer.v1"}
|
en_skillner-any-py3-none-any.whl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:86b2de6b625e84c55050bccdba70da7d4dc1d39f272bb9debba1b74b0857c868
|
3 |
+
size 587688649
|
meta.json
CHANGED
@@ -1,14 +1,14 @@
|
|
1 |
{
|
2 |
"lang":"en",
|
3 |
"name":"skillner",
|
4 |
-
"version":"3.
|
5 |
"description":"A Named Entity Recognition (NER) model to extract SKILL, EXPERIENCE and BENEFIT from job adverts.",
|
6 |
"author":"nestauk",
|
7 |
"email":"[email protected]",
|
8 |
"url":"https://explosion.ai",
|
9 |
"license":"MIT",
|
10 |
-
"spacy_version":">=3.
|
11 |
-
"spacy_git_version":"
|
12 |
"vectors":{
|
13 |
"width":300,
|
14 |
"vectors":514157,
|
@@ -43,45 +43,45 @@
|
|
43 |
"senter"
|
44 |
],
|
45 |
"performance":{
|
46 |
-
"ents_p":0.
|
47 |
-
"ents_r":0.
|
48 |
-
"ents_f":0.
|
49 |
"ents_per_type":{
|
50 |
"SKILL":{
|
51 |
-
"correct":
|
52 |
-
"incorrect":
|
53 |
"partial":0,
|
54 |
-
"missed":
|
55 |
-
"spurious":
|
56 |
-
"possible":
|
57 |
-
"actual":
|
58 |
-
"precision":0.
|
59 |
-
"recall":0.
|
60 |
-
"f1":0.
|
61 |
},
|
62 |
"EXPERIENCE":{
|
63 |
-
"correct":
|
64 |
-
"incorrect":
|
65 |
"partial":0,
|
66 |
-
"missed":
|
67 |
-
"spurious":
|
68 |
-
"possible":
|
69 |
-
"actual":
|
70 |
-
"precision":0.
|
71 |
-
"recall":0.
|
72 |
-
"f1":0.
|
73 |
},
|
74 |
"BENEFIT":{
|
75 |
-
"correct":
|
76 |
-
"incorrect":
|
77 |
"partial":0,
|
78 |
-
"missed":
|
79 |
-
"spurious":
|
80 |
-
"possible":
|
81 |
-
"actual":
|
82 |
-
"precision":0,
|
83 |
-
"recall":0,
|
84 |
-
"f1":0
|
85 |
}
|
86 |
}
|
87 |
},
|
|
|
1 |
{
|
2 |
"lang":"en",
|
3 |
"name":"skillner",
|
4 |
+
"version":"3.5.0",
|
5 |
"description":"A Named Entity Recognition (NER) model to extract SKILL, EXPERIENCE and BENEFIT from job adverts.",
|
6 |
"author":"nestauk",
|
7 |
"email":"[email protected]",
|
8 |
"url":"https://explosion.ai",
|
9 |
"license":"MIT",
|
10 |
+
"spacy_version":">=3.5.3,<3.6.0",
|
11 |
+
"spacy_git_version":"9e0322de1",
|
12 |
"vectors":{
|
13 |
"width":300,
|
14 |
"vectors":514157,
|
|
|
43 |
"senter"
|
44 |
],
|
45 |
"performance":{
|
46 |
+
"ents_p":0.5991309071,
|
47 |
+
"ents_r":0.5768828452,
|
48 |
+
"ents_f":0.5877964295,
|
49 |
"ents_per_type":{
|
50 |
"SKILL":{
|
51 |
+
"correct":1208,
|
52 |
+
"incorrect":32,
|
53 |
"partial":0,
|
54 |
+
"missed":429,
|
55 |
+
"spurious":420,
|
56 |
+
"possible":1669,
|
57 |
+
"actual":1660,
|
58 |
+
"precision":0.7277108434,
|
59 |
+
"recall":0.7237866986,
|
60 |
+
"f1":0.7257434665
|
61 |
},
|
62 |
"EXPERIENCE":{
|
63 |
+
"correct":84,
|
64 |
+
"incorrect":37,
|
65 |
"partial":0,
|
66 |
+
"missed":55,
|
67 |
+
"spurious":29,
|
68 |
+
"possible":176,
|
69 |
+
"actual":150,
|
70 |
+
"precision":0.56,
|
71 |
+
"recall":0.4772727273,
|
72 |
+
"f1":0.5153374233
|
73 |
},
|
74 |
"BENEFIT":{
|
75 |
+
"correct":24,
|
76 |
+
"incorrect":3,
|
77 |
"partial":0,
|
78 |
+
"missed":40,
|
79 |
+
"spurious":4,
|
80 |
+
"possible":67,
|
81 |
+
"actual":31,
|
82 |
+
"precision":0.7741935484,
|
83 |
+
"recall":0.3582089552,
|
84 |
+
"f1":0.4897959184
|
85 |
}
|
86 |
}
|
87 |
},
|
ner/model
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5dd953e4cc7eeaa1052539fcb71b312a0650910a310fb84b5eac2d89e993e8c0
|
3 |
+
size 6384063
|
ner/moves
CHANGED
@@ -1 +1 @@
|
|
1 |
-
��moves
|
|
|
1 |
+
��moves��{"0":{},"1":{"ORG":56516,"DATE":40493,"PERSON":36534,"GPE":26745,"MONEY":15158,"CARDINAL":14109,"NORP":9641,"PERCENT":9199,"WORK_OF_ART":4488,"LOC":4055,"TIME":3678,"QUANTITY":3123,"FAC":3046,"EVENT":3021,"ORDINAL":2142,"PRODUCT":1787,"LAW":1624,"LANGUAGE":355,"EXPERIENCE":-1,"SKILL":-2,"BENEFIT":-3},"2":{"ORG":56516,"DATE":40493,"PERSON":36534,"GPE":26745,"MONEY":15158,"CARDINAL":14109,"NORP":9641,"PERCENT":9199,"WORK_OF_ART":4488,"LOC":4055,"TIME":3678,"QUANTITY":3123,"FAC":3046,"EVENT":3021,"ORDINAL":2142,"PRODUCT":1787,"LAW":1624,"LANGUAGE":355,"EXPERIENCE":-1,"SKILL":-2,"BENEFIT":-3},"3":{"ORG":56516,"DATE":40493,"PERSON":36534,"GPE":26745,"MONEY":15158,"CARDINAL":14109,"NORP":9641,"PERCENT":9199,"WORK_OF_ART":4488,"LOC":4055,"TIME":3678,"QUANTITY":3123,"FAC":3046,"EVENT":3021,"ORDINAL":2142,"PRODUCT":1787,"LAW":1624,"LANGUAGE":355,"EXPERIENCE":-1,"SKILL":-2,"BENEFIT":-3},"4":{"ORG":56516,"DATE":40493,"PERSON":36534,"GPE":26745,"MONEY":15158,"CARDINAL":14109,"NORP":9641,"PERCENT":9199,"WORK_OF_ART":4488,"LOC":4055,"TIME":3678,"QUANTITY":3123,"FAC":3046,"EVENT":3021,"ORDINAL":2142,"PRODUCT":1787,"LAW":1624,"LANGUAGE":355,"":1,"EXPERIENCE":-1,"SKILL":-2,"BENEFIT":-3},"5":{"":1}}�cfg��neg_key�
|
tagger/cfg
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
{
|
2 |
-
"label_smoothing":0.0,
|
3 |
"labels":[
|
4 |
"$",
|
5 |
"''",
|
|
|
1 |
{
|
|
|
2 |
"labels":[
|
3 |
"$",
|
4 |
"''",
|
vocab/lookups.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ddd140ecac6a8c4592e9146d6e30074569ffaed97ee51edc9587dc510f8934c
|
3 |
+
size 69982
|
vocab/strings.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|