lvwerra HF staff commited on
Commit
a0d970f
1 Parent(s): f38aec4

Update Space (evaluate main: f62ad7c8)

Browse files
Files changed (2) hide show
  1. README.md +14 -11
  2. meteor.py +50 -16
README.md CHANGED
@@ -42,9 +42,9 @@ METEOR is based on a generalized concept of unigram matching between the machine
42
 
43
  METEOR has two mandatory arguments:
44
 
45
- `predictions`: a list of predictions to score. Each prediction should be a string with tokens separated by spaces.
46
 
47
- `references`: a list of references for each prediction. Each reference should be a string with tokens separated by spaces.
48
 
49
  It also has several optional parameters:
50
 
@@ -65,7 +65,10 @@ Refer to the [METEOR paper](https://aclanthology.org/W05-0909.pdf) for more info
65
 
66
  ## Output values
67
 
68
- The metric outputs a dictionary containing the METEOR score. Its values range from 0 to 1.
 
 
 
69
 
70
 
71
  ### Values from popular papers
@@ -74,34 +77,34 @@ The [METEOR paper](https://aclanthology.org/W05-0909.pdf) does not report METEOR
74
 
75
  ## Examples
76
 
77
- Maximal values :
78
 
79
  ```python
80
  >>> meteor = evaluate.load('meteor')
81
  >>> predictions = ["It is a guide to action which ensures that the military always obeys the commands of the party"]
82
- >>> references = ["It is a guide to action which ensures that the military always obeys the commands of the party"]
83
- >>> results = meteor.compute(predictions=predictions, references=references)
84
  >>> print(round(results['meteor'], 2))
85
  1.0
86
  ```
87
 
88
- Minimal values:
89
 
90
  ```python
91
  >>> meteor = evaluate.load('meteor')
92
  >>> predictions = ["It is a guide to action which ensures that the military always obeys the commands of the party"]
93
- >>> references = ["Hello world"]
94
  >>> results = meteor.compute(predictions=predictions, references=references)
95
  >>> print(round(results['meteor'], 2))
96
- 0.0
97
  ```
98
 
99
- Partial match:
100
 
101
  ```python
102
  >>> meteor = evaluate.load('meteor')
103
  >>> predictions = ["It is a guide to action which ensures that the military always obeys the commands of the party"]
104
- >>> references = ["It is a guide to action that ensures that the military will forever heed Party commands"]
105
  >>> results = meteor.compute(predictions=predictions, references=references)
106
  >>> print(round(results['meteor'], 2))
107
  0.69
 
42
 
43
  METEOR has two mandatory arguments:
44
 
45
+ `predictions`: a `list` of predictions to score. Each prediction should be a string with tokens separated by spaces.
46
 
47
+ `references`: a `list` of references (in the case of one `reference` per `prediction`), or a `list` of `lists` of references (in the case of multiple `references` per `prediction`. Each reference should be a string with tokens separated by spaces.
48
 
49
  It also has several optional parameters:
50
 
 
65
 
66
  ## Output values
67
 
68
+ The metric outputs a dictionary containing the METEOR score. Its values range from 0 to 1, e.g.:
69
+ ```
70
+ {'meteor': 0.9999142661179699}
71
+ ```
72
 
73
 
74
  ### Values from popular papers
 
77
 
78
  ## Examples
79
 
80
+ One `reference` per `prediction`:
81
 
82
  ```python
83
  >>> meteor = evaluate.load('meteor')
84
  >>> predictions = ["It is a guide to action which ensures that the military always obeys the commands of the party"]
85
+ >>> reference = ["It is a guide to action which ensures that the military always obeys the commands of the party"]
86
+ >>> results = meteor.compute(predictions=predictions, references=reference)
87
  >>> print(round(results['meteor'], 2))
88
  1.0
89
  ```
90
 
91
+ Multiple `references` per `prediction`:
92
 
93
  ```python
94
  >>> meteor = evaluate.load('meteor')
95
  >>> predictions = ["It is a guide to action which ensures that the military always obeys the commands of the party"]
96
+ >>> references = [['It is a guide to action that ensures that the military will forever heed Party commands', 'It is the guiding principle which guarantees the military forces always being under the command of the Party', 'It is the practical guide for the army always to heed the directions of the party']]
97
  >>> results = meteor.compute(predictions=predictions, references=references)
98
  >>> print(round(results['meteor'], 2))
99
+ 1.0
100
  ```
101
 
102
+ Multiple `references` per `prediction`, partial match:
103
 
104
  ```python
105
  >>> meteor = evaluate.load('meteor')
106
  >>> predictions = ["It is a guide to action which ensures that the military always obeys the commands of the party"]
107
+ >>> references = [['It is a guide to action that ensures that the military will forever heed Party commands', 'It is the guiding principle which guarantees the military forces always being under the command of the Party', 'It is the practical guide for the army always to heed the directions of the party']]
108
  >>> results = meteor.compute(predictions=predictions, references=references)
109
  >>> print(round(results['meteor'], 2))
110
  0.69
meteor.py CHANGED
@@ -89,12 +89,20 @@ class Meteor(evaluate.Metric):
89
  description=_DESCRIPTION,
90
  citation=_CITATION,
91
  inputs_description=_KWARGS_DESCRIPTION,
92
- features=datasets.Features(
93
- {
94
- "predictions": datasets.Value("string", id="sequence"),
95
- "references": datasets.Value("string", id="sequence"),
96
- }
97
- ),
 
 
 
 
 
 
 
 
98
  codebase_urls=["https://github.com/nltk/nltk/blob/develop/nltk/translate/meteor_score.py"],
99
  reference_urls=[
100
  "https://www.nltk.org/api/nltk.translate.html#module-nltk.translate.meteor_score",
@@ -112,17 +120,43 @@ class Meteor(evaluate.Metric):
112
  nltk.download("omw-1.4")
113
 
114
  def _compute(self, predictions, references, alpha=0.9, beta=3, gamma=0.5):
 
115
  if NLTK_VERSION >= version.Version("3.6.5"):
116
- scores = [
117
- meteor_score.single_meteor_score(
118
- word_tokenize(ref), word_tokenize(pred), alpha=alpha, beta=beta, gamma=gamma
119
- )
120
- for ref, pred in zip(references, predictions)
121
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  else:
123
- scores = [
124
- meteor_score.single_meteor_score(ref, pred, alpha=alpha, beta=beta, gamma=gamma)
125
- for ref, pred in zip(references, predictions)
126
- ]
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
  return {"meteor": np.mean(scores)}
 
89
  description=_DESCRIPTION,
90
  citation=_CITATION,
91
  inputs_description=_KWARGS_DESCRIPTION,
92
+ features=[
93
+ datasets.Features(
94
+ {
95
+ "predictions": datasets.Value("string", id="sequence"),
96
+ "references": datasets.Sequence(datasets.Value("string", id="sequence"), id="references"),
97
+ }
98
+ ),
99
+ datasets.Features(
100
+ {
101
+ "predictions": datasets.Value("string", id="sequence"),
102
+ "references": datasets.Value("string", id="sequence"),
103
+ }
104
+ ),
105
+ ],
106
  codebase_urls=["https://github.com/nltk/nltk/blob/develop/nltk/translate/meteor_score.py"],
107
  reference_urls=[
108
  "https://www.nltk.org/api/nltk.translate.html#module-nltk.translate.meteor_score",
 
120
  nltk.download("omw-1.4")
121
 
122
  def _compute(self, predictions, references, alpha=0.9, beta=3, gamma=0.5):
123
+ multiple_refs = isinstance(references[0], list)
124
  if NLTK_VERSION >= version.Version("3.6.5"):
125
+ # the version of METEOR in NLTK version 3.6.5 and earlier expect tokenized inputs
126
+ if multiple_refs:
127
+ scores = [
128
+ meteor_score.meteor_score(
129
+ [word_tokenize(ref) for ref in refs],
130
+ word_tokenize(pred),
131
+ alpha=alpha,
132
+ beta=beta,
133
+ gamma=gamma,
134
+ )
135
+ for refs, pred in zip(references, predictions)
136
+ ]
137
+ else:
138
+ scores = [
139
+ meteor_score.single_meteor_score(
140
+ word_tokenize(ref), word_tokenize(pred), alpha=alpha, beta=beta, gamma=gamma
141
+ )
142
+ for ref, pred in zip(references, predictions)
143
+ ]
144
  else:
145
+ if multiple_refs:
146
+ scores = [
147
+ meteor_score.meteor_score(
148
+ [[word_tokenize(ref) for ref in group] for group in references][0],
149
+ word_tokenize(pred),
150
+ alpha=alpha,
151
+ beta=beta,
152
+ gamma=gamma,
153
+ )
154
+ for ref, pred in zip(references, predictions)
155
+ ]
156
+ else:
157
+ scores = [
158
+ meteor_score.single_meteor_score(ref, pred, alpha=alpha, beta=beta, gamma=gamma)
159
+ for ref, pred in zip(references, predictions)
160
+ ]
161
 
162
  return {"meteor": np.mean(scores)}