Spaces:
Runtime error
Runtime error
Update Space (evaluate main: f62ad7c8)
Browse files
README.md
CHANGED
@@ -42,9 +42,9 @@ METEOR is based on a generalized concept of unigram matching between the machine
|
|
42 |
|
43 |
METEOR has two mandatory arguments:
|
44 |
|
45 |
-
`predictions`: a list of predictions to score. Each prediction should be a string with tokens separated by spaces.
|
46 |
|
47 |
-
`references`: a list of references
|
48 |
|
49 |
It also has several optional parameters:
|
50 |
|
@@ -65,7 +65,10 @@ Refer to the [METEOR paper](https://aclanthology.org/W05-0909.pdf) for more info
|
|
65 |
|
66 |
## Output values
|
67 |
|
68 |
-
The metric outputs a dictionary containing the METEOR score. Its values range from 0 to 1.
|
|
|
|
|
|
|
69 |
|
70 |
|
71 |
### Values from popular papers
|
@@ -74,34 +77,34 @@ The [METEOR paper](https://aclanthology.org/W05-0909.pdf) does not report METEOR
|
|
74 |
|
75 |
## Examples
|
76 |
|
77 |
-
|
78 |
|
79 |
```python
|
80 |
>>> meteor = evaluate.load('meteor')
|
81 |
>>> predictions = ["It is a guide to action which ensures that the military always obeys the commands of the party"]
|
82 |
-
>>>
|
83 |
-
>>> results = meteor.compute(predictions=predictions, references=
|
84 |
>>> print(round(results['meteor'], 2))
|
85 |
1.0
|
86 |
```
|
87 |
|
88 |
-
|
89 |
|
90 |
```python
|
91 |
>>> meteor = evaluate.load('meteor')
|
92 |
>>> predictions = ["It is a guide to action which ensures that the military always obeys the commands of the party"]
|
93 |
-
>>> references = [
|
94 |
>>> results = meteor.compute(predictions=predictions, references=references)
|
95 |
>>> print(round(results['meteor'], 2))
|
96 |
-
|
97 |
```
|
98 |
|
99 |
-
|
100 |
|
101 |
```python
|
102 |
>>> meteor = evaluate.load('meteor')
|
103 |
>>> predictions = ["It is a guide to action which ensures that the military always obeys the commands of the party"]
|
104 |
-
>>> references = [
|
105 |
>>> results = meteor.compute(predictions=predictions, references=references)
|
106 |
>>> print(round(results['meteor'], 2))
|
107 |
0.69
|
|
|
42 |
|
43 |
METEOR has two mandatory arguments:
|
44 |
|
45 |
+
`predictions`: a `list` of predictions to score. Each prediction should be a string with tokens separated by spaces.
|
46 |
|
47 |
+
`references`: a `list` of references (in the case of one `reference` per `prediction`), or a `list` of `lists` of references (in the case of multiple `references` per `prediction`. Each reference should be a string with tokens separated by spaces.
|
48 |
|
49 |
It also has several optional parameters:
|
50 |
|
|
|
65 |
|
66 |
## Output values
|
67 |
|
68 |
+
The metric outputs a dictionary containing the METEOR score. Its values range from 0 to 1, e.g.:
|
69 |
+
```
|
70 |
+
{'meteor': 0.9999142661179699}
|
71 |
+
```
|
72 |
|
73 |
|
74 |
### Values from popular papers
|
|
|
77 |
|
78 |
## Examples
|
79 |
|
80 |
+
One `reference` per `prediction`:
|
81 |
|
82 |
```python
|
83 |
>>> meteor = evaluate.load('meteor')
|
84 |
>>> predictions = ["It is a guide to action which ensures that the military always obeys the commands of the party"]
|
85 |
+
>>> reference = ["It is a guide to action which ensures that the military always obeys the commands of the party"]
|
86 |
+
>>> results = meteor.compute(predictions=predictions, references=reference)
|
87 |
>>> print(round(results['meteor'], 2))
|
88 |
1.0
|
89 |
```
|
90 |
|
91 |
+
Multiple `references` per `prediction`:
|
92 |
|
93 |
```python
|
94 |
>>> meteor = evaluate.load('meteor')
|
95 |
>>> predictions = ["It is a guide to action which ensures that the military always obeys the commands of the party"]
|
96 |
+
>>> references = [['It is a guide to action that ensures that the military will forever heed Party commands', 'It is the guiding principle which guarantees the military forces always being under the command of the Party', 'It is the practical guide for the army always to heed the directions of the party']]
|
97 |
>>> results = meteor.compute(predictions=predictions, references=references)
|
98 |
>>> print(round(results['meteor'], 2))
|
99 |
+
1.0
|
100 |
```
|
101 |
|
102 |
+
Multiple `references` per `prediction`, partial match:
|
103 |
|
104 |
```python
|
105 |
>>> meteor = evaluate.load('meteor')
|
106 |
>>> predictions = ["It is a guide to action which ensures that the military always obeys the commands of the party"]
|
107 |
+
>>> references = [['It is a guide to action that ensures that the military will forever heed Party commands', 'It is the guiding principle which guarantees the military forces always being under the command of the Party', 'It is the practical guide for the army always to heed the directions of the party']]
|
108 |
>>> results = meteor.compute(predictions=predictions, references=references)
|
109 |
>>> print(round(results['meteor'], 2))
|
110 |
0.69
|
meteor.py
CHANGED
@@ -89,12 +89,20 @@ class Meteor(evaluate.Metric):
|
|
89 |
description=_DESCRIPTION,
|
90 |
citation=_CITATION,
|
91 |
inputs_description=_KWARGS_DESCRIPTION,
|
92 |
-
features=
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
codebase_urls=["https://github.com/nltk/nltk/blob/develop/nltk/translate/meteor_score.py"],
|
99 |
reference_urls=[
|
100 |
"https://www.nltk.org/api/nltk.translate.html#module-nltk.translate.meteor_score",
|
@@ -112,17 +120,43 @@ class Meteor(evaluate.Metric):
|
|
112 |
nltk.download("omw-1.4")
|
113 |
|
114 |
def _compute(self, predictions, references, alpha=0.9, beta=3, gamma=0.5):
|
|
|
115 |
if NLTK_VERSION >= version.Version("3.6.5"):
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
else:
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
127 |
|
128 |
return {"meteor": np.mean(scores)}
|
|
|
89 |
description=_DESCRIPTION,
|
90 |
citation=_CITATION,
|
91 |
inputs_description=_KWARGS_DESCRIPTION,
|
92 |
+
features=[
|
93 |
+
datasets.Features(
|
94 |
+
{
|
95 |
+
"predictions": datasets.Value("string", id="sequence"),
|
96 |
+
"references": datasets.Sequence(datasets.Value("string", id="sequence"), id="references"),
|
97 |
+
}
|
98 |
+
),
|
99 |
+
datasets.Features(
|
100 |
+
{
|
101 |
+
"predictions": datasets.Value("string", id="sequence"),
|
102 |
+
"references": datasets.Value("string", id="sequence"),
|
103 |
+
}
|
104 |
+
),
|
105 |
+
],
|
106 |
codebase_urls=["https://github.com/nltk/nltk/blob/develop/nltk/translate/meteor_score.py"],
|
107 |
reference_urls=[
|
108 |
"https://www.nltk.org/api/nltk.translate.html#module-nltk.translate.meteor_score",
|
|
|
120 |
nltk.download("omw-1.4")
|
121 |
|
122 |
def _compute(self, predictions, references, alpha=0.9, beta=3, gamma=0.5):
|
123 |
+
multiple_refs = isinstance(references[0], list)
|
124 |
if NLTK_VERSION >= version.Version("3.6.5"):
|
125 |
+
# the version of METEOR in NLTK version 3.6.5 and earlier expect tokenized inputs
|
126 |
+
if multiple_refs:
|
127 |
+
scores = [
|
128 |
+
meteor_score.meteor_score(
|
129 |
+
[word_tokenize(ref) for ref in refs],
|
130 |
+
word_tokenize(pred),
|
131 |
+
alpha=alpha,
|
132 |
+
beta=beta,
|
133 |
+
gamma=gamma,
|
134 |
+
)
|
135 |
+
for refs, pred in zip(references, predictions)
|
136 |
+
]
|
137 |
+
else:
|
138 |
+
scores = [
|
139 |
+
meteor_score.single_meteor_score(
|
140 |
+
word_tokenize(ref), word_tokenize(pred), alpha=alpha, beta=beta, gamma=gamma
|
141 |
+
)
|
142 |
+
for ref, pred in zip(references, predictions)
|
143 |
+
]
|
144 |
else:
|
145 |
+
if multiple_refs:
|
146 |
+
scores = [
|
147 |
+
meteor_score.meteor_score(
|
148 |
+
[[word_tokenize(ref) for ref in group] for group in references][0],
|
149 |
+
word_tokenize(pred),
|
150 |
+
alpha=alpha,
|
151 |
+
beta=beta,
|
152 |
+
gamma=gamma,
|
153 |
+
)
|
154 |
+
for ref, pred in zip(references, predictions)
|
155 |
+
]
|
156 |
+
else:
|
157 |
+
scores = [
|
158 |
+
meteor_score.single_meteor_score(ref, pred, alpha=alpha, beta=beta, gamma=gamma)
|
159 |
+
for ref, pred in zip(references, predictions)
|
160 |
+
]
|
161 |
|
162 |
return {"meteor": np.mean(scores)}
|