|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""TODO: Add a description here.""" |
|
|
|
import evaluate |
|
import datasets |
|
|
|
|
|
|
|
_CITATION = """\ |
|
@InProceedings{huggingface:module, |
|
title = {A great new module}, |
|
authors={huggingface, Inc.}, |
|
year={2020} |
|
} |
|
""" |
|
|
|
|
|
_DESCRIPTION = """\ |
|
This new module is designed to solve this great ML task and is crafted with a lot of care. |
|
""" |
|
|
|
|
|
|
|
_KWARGS_DESCRIPTION = """ |
|
Calculates how good are predictions given some references, using certain scores |
|
Args: |
|
predictions: list of predictions to score. Each predictions |
|
should be a string with tokens separated by spaces. |
|
references: list of reference for each prediction. Each |
|
reference should be a string with tokens separated by spaces. |
|
Returns: |
|
accuracy: description of the first score, |
|
another_score: description of the second score, |
|
Examples: |
|
Examples should be written in doctest format, and should illustrate how |
|
to use the function. |
|
|
|
>>> metric = evaluate.load("DarrenChensformer/aciton_generation") |
|
>>> results = metric.compute(references=[0, 1], predictions=[0, 1]) |
|
>>> print(results) |
|
{'accuracy': 1.0} |
|
""" |
|
|
|
|
|
BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt" |
|
|
|
VALID_LABELS = [ |
|
"/開箱", |
|
"/教學", |
|
"/表達", |
|
"/分享/外部資訊", |
|
"/分享/個人資訊", |
|
"/推薦/產品", |
|
"/推薦/服務", |
|
"/推薦/其他", |
|
"" |
|
] |
|
|
|
class BaseEvaluater: |
|
eps = 1e-8 |
|
valid_labels = None |
|
|
|
def __call__(self, preds, labels): |
|
return self._compute(preds, labels) |
|
|
|
def _compute(self, preds, labels): |
|
|
|
tp, fp, fn = 0, 0, 0 |
|
for pred, label in zip(preds, labels): |
|
tp += len(set(pred) & set(label)) |
|
fp += len(set(pred) - set(label)) |
|
fn += len(set(label) - set(pred)) |
|
precision = tp / (tp + fp + self.eps) |
|
recall = tp / (tp + fn + self.eps) |
|
f1 = 2 * precision * recall / (precision + recall + self.eps) |
|
|
|
return { |
|
"precision": round(precision, 4), |
|
"recall": round(recall, 4), |
|
"f1": round(f1, 4) |
|
} |
|
|
|
def _init_valid_labels(self): |
|
if self.valid_labels is None: |
|
self.valid_labels = VALID_LABELS |
|
|
|
class ClassEvaluater(BaseEvaluater): |
|
def __init__(self, valid_labels=None): |
|
self.valid_labels = valid_labels |
|
self._init_valid_labels() |
|
|
|
def __call__(self, preds, labels): |
|
preds = map(self.extract_class, preds) |
|
labels = map(self.extract_class, labels) |
|
|
|
preds = list(map(self.extract_valid, preds)) |
|
labels = list(map(self.extract_valid, labels)) |
|
return self._compute(preds, labels) |
|
|
|
def extract_valid(self, tags): |
|
tags = list(filter(lambda tag: tag in self.valid_labels, tags)) |
|
return tags |
|
|
|
def extract_class(self, tags): |
|
tags = map(lambda tag: tag.replace("/ ", "/"), tags) |
|
tags = list(map(self.batch_extract_class, tags)) |
|
|
|
tags = list(dict.fromkeys(tags)) |
|
return tags |
|
|
|
def batch_extract_class(self, tag): |
|
|
|
tag = tag.split('/') |
|
if len(tag)==3: |
|
_class = '/'.join(tag[:2]) |
|
elif len(tag)==4: |
|
_class = '/'.join(tag[:3]) |
|
elif len(tag)==1: |
|
_class = '' |
|
else: |
|
_class = None |
|
if _class in self.valid_labels: |
|
return _class |
|
else: |
|
return "" |
|
|
|
|
|
class PhraseEvaluater(BaseEvaluater): |
|
def __init__(self, valid_labels=None): |
|
self.valid_labels = valid_labels |
|
self._init_valid_labels() |
|
|
|
def __call__(self, preds, labels): |
|
preds = map(self.extract_phrase, preds) |
|
labels = map(self.extract_phrase, labels) |
|
return self._compute(preds, labels) |
|
|
|
def extract_phrase(self, tags): |
|
tags = map(lambda tag: tag.replace("/ ", "/"), tags) |
|
tags = list(map(self.batch_extract_phrase, tags)) |
|
|
|
tags = list(dict.fromkeys(tags)) |
|
return tags |
|
|
|
def batch_extract_phrase(self, phrase): |
|
|
|
tag = phrase.split('/') |
|
if len(tag)==3: |
|
_class = '/'.join(tag[:2]) |
|
elif len(tag)==4: |
|
_class = '/'.join(tag[:3]) |
|
elif len(tag)==1: |
|
_class = '' |
|
else: |
|
_class = None |
|
if _class in self.valid_labels: |
|
return phrase.replace(_class, '') |
|
else: |
|
return "" |
|
|
|
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION) |
|
class action_generation(evaluate.Metric): |
|
"""TODO: Short description of my evaluation module.""" |
|
|
|
def _info(self): |
|
|
|
return evaluate.MetricInfo( |
|
|
|
module_type="metric", |
|
description=_DESCRIPTION, |
|
citation=_CITATION, |
|
inputs_description=_KWARGS_DESCRIPTION, |
|
|
|
features=datasets.Features({ |
|
'predictions': datasets.Sequence(datasets.Value('string')), |
|
'references': datasets.Sequence(datasets.Value('string')), |
|
}), |
|
|
|
homepage="http://module.homepage", |
|
|
|
codebase_urls=["http://github.com/path/to/codebase/of/new_module"], |
|
reference_urls=["http://path.to.reference.url/new_module"] |
|
) |
|
|
|
def _download_and_prepare(self, dl_manager): |
|
"""Optional: download external resources useful to compute the scores""" |
|
|
|
pass |
|
|
|
def _compute(self, predictions, references, |
|
valid_labels=None, detailed_scores=False, |
|
weights={"class": 0.8, "phrase": 0.2} |
|
): |
|
"""Returns the scores""" |
|
class_eval = ClassEvaluater(valid_labels)(predictions, references) |
|
phrase_eval = PhraseEvaluater(valid_labels)(predictions, references) |
|
weight_sum = { |
|
key: round((class_eval[key] * weights["class"]) + (phrase_eval[key] * weights["phrase"]), 4) |
|
for key in class_eval |
|
} |
|
if detailed_scores: |
|
results = { |
|
"class": class_eval, |
|
"phrase": phrase_eval, |
|
"weighted_sum": weight_sum |
|
} |
|
else: |
|
results = weight_sum |
|
|
|
return results |
|
|