lizgzil commited on
Commit
dd01874
1 Parent(s): cb74416

Update spaCy pipeline

Browse files
README.md CHANGED
@@ -14,25 +14,25 @@ model-index:
14
  metrics:
15
  - name: NER Precision
16
  type: precision
17
- value: 0.4605714286
18
  - name: NER Recall
19
  type: recall
20
- value: 0.4574347333
21
  - name: NER F Score
22
  type: f_score
23
- value: 0.4589977221
24
  ---
25
  A Named Entity Recognition (NER) model to extract SKILL, EXPERIENCE and BENEFIT from job adverts.
26
 
27
  | Feature | Description |
28
  | --- | --- |
29
  | **Name** | `en_skillner` |
30
- | **Version** | `3.7.1` |
31
- | **spaCy** | `>=3.7.4,<3.8.0` |
32
  | **Default Pipeline** | `tok2vec`, `tagger`, `parser`, `attribute_ruler`, `lemmatizer`, `ner` |
33
  | **Components** | `tok2vec`, `tagger`, `parser`, `senter`, `attribute_ruler`, `lemmatizer`, `ner` |
34
  | **Vectors** | 514157 keys, 514157 unique vectors (300 dimensions) |
35
- | **Sources** | [OntoNotes 5](https://catalog.ldc.upenn.edu/LDC2013T19) (Ralph Weischedel, Martha Palmer, Mitchell Marcus, Eduard Hovy, Sameer Pradhan, Lance Ramshaw, Nianwen Xue, Ann Taylor, Jeff Kaufman, Michelle Franchini, Mohammed El-Bachouti, Robert Belvin, Ann Houston)<br>[ClearNLP Constituent-to-Dependency Conversion](https://github.com/clir/clearnlp-guidelines/blob/master/md/components/dependency_conversion.md) (Emory University)<br>[WordNet 3.0](https://wordnet.princeton.edu/) (Princeton University)<br>[Explosion Vectors (OSCAR 2109 + Wikipedia + OpenSubtitles + WMT News Crawl)](https://github.com/explosion/spacy-vectors-builder) (Explosion) |
36
  | **License** | `MIT` |
37
  | **Author** | [nestauk](https://explosion.ai) |
38
 
@@ -52,6 +52,6 @@ A Named Entity Recognition (NER) model to extract SKILL, EXPERIENCE and BENEFIT
52
 
53
  | Type | Score |
54
  | --- | --- |
55
- | `ENTS_P` | 46.06 |
56
- | `ENTS_R` | 45.74 |
57
- | `ENTS_F` | 45.90 |
 
14
  metrics:
15
  - name: NER Precision
16
  type: precision
17
+ value: 0.5991309071
18
  - name: NER Recall
19
  type: recall
20
+ value: 0.5768828452
21
  - name: NER F Score
22
  type: f_score
23
+ value: 0.5877964295
24
  ---
25
  A Named Entity Recognition (NER) model to extract SKILL, EXPERIENCE and BENEFIT from job adverts.
26
 
27
  | Feature | Description |
28
  | --- | --- |
29
  | **Name** | `en_skillner` |
30
+ | **Version** | `3.5.0` |
31
+ | **spaCy** | `>=3.5.3,<3.6.0` |
32
  | **Default Pipeline** | `tok2vec`, `tagger`, `parser`, `attribute_ruler`, `lemmatizer`, `ner` |
33
  | **Components** | `tok2vec`, `tagger`, `parser`, `senter`, `attribute_ruler`, `lemmatizer`, `ner` |
34
  | **Vectors** | 514157 keys, 514157 unique vectors (300 dimensions) |
35
+ | **Sources** | [OntoNotes 5](https://catalog.ldc.upenn.edu/LDC2013T19) (Ralph Weischedel, Martha Palmer, Mitchell Marcus, Eduard Hovy, Sameer Pradhan, Lance Ramshaw, Nianwen Xue, Ann Taylor, Jeff Kaufman, Michelle Franchini, Mohammed El-Bachouti, Robert Belvin, Ann Houston)<br />[ClearNLP Constituent-to-Dependency Conversion](https://github.com/clir/clearnlp-guidelines/blob/master/md/components/dependency_conversion.md) (Emory University)<br />[WordNet 3.0](https://wordnet.princeton.edu/) (Princeton University)<br />[Explosion Vectors (OSCAR 2109 + Wikipedia + OpenSubtitles + WMT News Crawl)](https://github.com/explosion/spacy-vectors-builder) (Explosion) |
36
  | **License** | `MIT` |
37
  | **Author** | [nestauk](https://explosion.ai) |
38
 
 
52
 
53
  | Type | Score |
54
  | --- | --- |
55
+ | `ENTS_P` | 59.91 |
56
+ | `ENTS_R` | 57.69 |
57
+ | `ENTS_F` | 58.78 |
attribute_ruler/patterns CHANGED
Binary files a/attribute_ruler/patterns and b/attribute_ruler/patterns differ
 
config.cfg CHANGED
@@ -17,7 +17,6 @@ after_creation = null
17
  after_pipeline_creation = null
18
  batch_size = 256
19
  tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
20
- vectors = {"@vectors":"spacy.Vectors.v1"}
21
 
22
  [components]
23
 
@@ -117,7 +116,6 @@ maxout_pieces = 2
117
 
118
  [components.tagger]
119
  factory = "tagger"
120
- label_smoothing = 0.0
121
  neg_prefix = "!"
122
  overwrite = false
123
  scorer = {"@scorers":"spacy.tagger_scorer.v1"}
 
17
  after_pipeline_creation = null
18
  batch_size = 256
19
  tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
 
20
 
21
  [components]
22
 
 
116
 
117
  [components.tagger]
118
  factory = "tagger"
 
119
  neg_prefix = "!"
120
  overwrite = false
121
  scorer = {"@scorers":"spacy.tagger_scorer.v1"}
en_skillner-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d6401718f0b7e4271c63c65f625a931ecadad6916c30ebfaf4a954f3c645912
3
- size 587702666
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86b2de6b625e84c55050bccdba70da7d4dc1d39f272bb9debba1b74b0857c868
3
+ size 587688649
meta.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "lang":"en",
3
  "name":"skillner",
4
- "version":"3.7.1",
5
  "description":"A Named Entity Recognition (NER) model to extract SKILL, EXPERIENCE and BENEFIT from job adverts.",
6
  "author":"nestauk",
7
  "email":"[email protected]",
8
  "url":"https://explosion.ai",
9
  "license":"MIT",
10
- "spacy_version":">=3.7.4,<3.8.0",
11
- "spacy_git_version":"bd2c17e20",
12
  "vectors":{
13
  "width":300,
14
  "vectors":514157,
@@ -43,45 +43,45 @@
43
  "senter"
44
  ],
45
  "performance":{
46
- "ents_p":0.4605714286,
47
- "ents_r":0.4574347333,
48
- "ents_f":0.4589977221,
49
  "ents_per_type":{
50
  "SKILL":{
51
- "correct":461,
52
- "incorrect":8,
53
  "partial":0,
54
- "missed":304,
55
- "spurious":340,
56
- "possible":773,
57
- "actual":809,
58
- "precision":0.5698393078,
59
- "recall":0.596377749,
60
- "f1":0.582806574
61
  },
62
  "EXPERIENCE":{
63
- "correct":33,
64
- "incorrect":16,
65
  "partial":0,
66
- "missed":59,
67
- "spurious":17,
68
- "possible":108,
69
- "actual":66,
70
- "precision":0.5,
71
- "recall":0.3055555556,
72
- "f1":0.3793103448
73
  },
74
  "BENEFIT":{
75
- "correct":0,
76
- "incorrect":0,
77
  "partial":0,
78
- "missed":0,
79
- "spurious":0,
80
- "possible":0,
81
- "actual":0,
82
- "precision":0,
83
- "recall":0,
84
- "f1":0
85
  }
86
  }
87
  },
 
1
  {
2
  "lang":"en",
3
  "name":"skillner",
4
+ "version":"3.5.0",
5
  "description":"A Named Entity Recognition (NER) model to extract SKILL, EXPERIENCE and BENEFIT from job adverts.",
6
  "author":"nestauk",
7
  "email":"[email protected]",
8
  "url":"https://explosion.ai",
9
  "license":"MIT",
10
+ "spacy_version":">=3.5.3,<3.6.0",
11
+ "spacy_git_version":"9e0322de1",
12
  "vectors":{
13
  "width":300,
14
  "vectors":514157,
 
43
  "senter"
44
  ],
45
  "performance":{
46
+ "ents_p":0.5991309071,
47
+ "ents_r":0.5768828452,
48
+ "ents_f":0.5877964295,
49
  "ents_per_type":{
50
  "SKILL":{
51
+ "correct":1208,
52
+ "incorrect":32,
53
  "partial":0,
54
+ "missed":429,
55
+ "spurious":420,
56
+ "possible":1669,
57
+ "actual":1660,
58
+ "precision":0.7277108434,
59
+ "recall":0.7237866986,
60
+ "f1":0.7257434665
61
  },
62
  "EXPERIENCE":{
63
+ "correct":84,
64
+ "incorrect":37,
65
  "partial":0,
66
+ "missed":55,
67
+ "spurious":29,
68
+ "possible":176,
69
+ "actual":150,
70
+ "precision":0.56,
71
+ "recall":0.4772727273,
72
+ "f1":0.5153374233
73
  },
74
  "BENEFIT":{
75
+ "correct":24,
76
+ "incorrect":3,
77
  "partial":0,
78
+ "missed":40,
79
+ "spurious":4,
80
+ "possible":67,
81
+ "actual":31,
82
+ "precision":0.7741935484,
83
+ "recall":0.3582089552,
84
+ "f1":0.4897959184
85
  }
86
  }
87
  },
ner/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:00f3751c9e8131d61d924c752bc58f62e329d56e34612a544a8cea766b0bc6e5
3
- size 6383023
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dd953e4cc7eeaa1052539fcb71b312a0650910a310fb84b5eac2d89e993e8c0
3
+ size 6384063
ner/moves CHANGED
@@ -1 +1 @@
1
- ��moves�|{"0":{},"1":{"ORG":56516,"DATE":40493,"PERSON":36534,"GPE":26745,"MONEY":15158,"CARDINAL":14109,"NORP":9641,"PERCENT":9199,"WORK_OF_ART":4488,"LOC":4055,"TIME":3678,"QUANTITY":3123,"FAC":3046,"EVENT":3021,"ORDINAL":2142,"PRODUCT":1787,"LAW":1624,"LANGUAGE":355,"SKILL":-1,"EXPERIENCE":-2},"2":{"ORG":56516,"DATE":40493,"PERSON":36534,"GPE":26745,"MONEY":15158,"CARDINAL":14109,"NORP":9641,"PERCENT":9199,"WORK_OF_ART":4488,"LOC":4055,"TIME":3678,"QUANTITY":3123,"FAC":3046,"EVENT":3021,"ORDINAL":2142,"PRODUCT":1787,"LAW":1624,"LANGUAGE":355,"SKILL":-1,"EXPERIENCE":-2},"3":{"ORG":56516,"DATE":40493,"PERSON":36534,"GPE":26745,"MONEY":15158,"CARDINAL":14109,"NORP":9641,"PERCENT":9199,"WORK_OF_ART":4488,"LOC":4055,"TIME":3678,"QUANTITY":3123,"FAC":3046,"EVENT":3021,"ORDINAL":2142,"PRODUCT":1787,"LAW":1624,"LANGUAGE":355,"SKILL":-1,"EXPERIENCE":-2},"4":{"ORG":56516,"DATE":40493,"PERSON":36534,"GPE":26745,"MONEY":15158,"CARDINAL":14109,"NORP":9641,"PERCENT":9199,"WORK_OF_ART":4488,"LOC":4055,"TIME":3678,"QUANTITY":3123,"FAC":3046,"EVENT":3021,"ORDINAL":2142,"PRODUCT":1787,"LAW":1624,"LANGUAGE":355,"":1,"SKILL":-1,"EXPERIENCE":-2},"5":{"":1}}�cfg��neg_key�
 
1
+ ��moves��{"0":{},"1":{"ORG":56516,"DATE":40493,"PERSON":36534,"GPE":26745,"MONEY":15158,"CARDINAL":14109,"NORP":9641,"PERCENT":9199,"WORK_OF_ART":4488,"LOC":4055,"TIME":3678,"QUANTITY":3123,"FAC":3046,"EVENT":3021,"ORDINAL":2142,"PRODUCT":1787,"LAW":1624,"LANGUAGE":355,"EXPERIENCE":-1,"SKILL":-2,"BENEFIT":-3},"2":{"ORG":56516,"DATE":40493,"PERSON":36534,"GPE":26745,"MONEY":15158,"CARDINAL":14109,"NORP":9641,"PERCENT":9199,"WORK_OF_ART":4488,"LOC":4055,"TIME":3678,"QUANTITY":3123,"FAC":3046,"EVENT":3021,"ORDINAL":2142,"PRODUCT":1787,"LAW":1624,"LANGUAGE":355,"EXPERIENCE":-1,"SKILL":-2,"BENEFIT":-3},"3":{"ORG":56516,"DATE":40493,"PERSON":36534,"GPE":26745,"MONEY":15158,"CARDINAL":14109,"NORP":9641,"PERCENT":9199,"WORK_OF_ART":4488,"LOC":4055,"TIME":3678,"QUANTITY":3123,"FAC":3046,"EVENT":3021,"ORDINAL":2142,"PRODUCT":1787,"LAW":1624,"LANGUAGE":355,"EXPERIENCE":-1,"SKILL":-2,"BENEFIT":-3},"4":{"ORG":56516,"DATE":40493,"PERSON":36534,"GPE":26745,"MONEY":15158,"CARDINAL":14109,"NORP":9641,"PERCENT":9199,"WORK_OF_ART":4488,"LOC":4055,"TIME":3678,"QUANTITY":3123,"FAC":3046,"EVENT":3021,"ORDINAL":2142,"PRODUCT":1787,"LAW":1624,"LANGUAGE":355,"":1,"EXPERIENCE":-1,"SKILL":-2,"BENEFIT":-3},"5":{"":1}}�cfg��neg_key�
tagger/cfg CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "label_smoothing":0.0,
3
  "labels":[
4
  "$",
5
  "''",
 
1
  {
 
2
  "labels":[
3
  "$",
4
  "''",
vocab/lookups.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fce9c883c56165f29573cc938c2a1c9d417ac61bd8f56b671dd5f7996de70682
3
- size 70040
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ddd140ecac6a8c4592e9146d6e30074569ffaed97ee51edc9587dc510f8934c
3
+ size 69982
vocab/strings.json CHANGED
The diff for this file is too large to render. See raw diff