Spaces:
Runtime error
Runtime error
| import datasets | |
| import evaluate | |
| import numpy as np | |
| from Levenshtein import distance as lev_dist | |
| _DESCRIPTION = """ | |
| TokenEditDistance: This is an NLP evaluation metric that records the minimum number of token edits | |
| (insertions, deletions, and replacements, all weighted equally) to the prediction string in order | |
| to make it exactly match the reference string. Uses identical logic to Levenshtein Edit Distance, | |
| except applied to tokens (i.e. individual ints in a list) as opposed to individual characters in a string. | |
| """ | |
| _CITATION = "Man of a thousand and eight names" | |
| _KWARGS_DESCRIPTION = """ | |
| TokenEditDistance: | |
| Args: | |
| predictions: list of predictions to score. | |
| Each prediction should be tokenized into a list of tokens. | |
| references: list of references/ground truth output to score against. | |
| Each reference should be tokenized into a list of tokens. | |
| Returns: | |
| "avg_token_edit_distance": Float, average Token Edit Distance for all inputted predictions and references | |
| "token_edit_distances": List[Int], the Token Edit Distance for each inputted prediction and reference | |
| Examples: | |
| >>> token_edit_distance_metric = datasets.load_metric('Token Edit Distance') | |
| >>> references = [[15, 4243], [100, 10008]] | |
| >>> predictions = [[15, 4243], [100, 10009]] | |
| >>> results = token_edit_distance_metric.compute(predictions=predictions, references=references) | |
| >>> print(results) | |
| {'avg_token_edit_distance': 0.5, 'token_edit_distances': array([0. 1.])} | |
| """ | |
| class TokenEditDistance(evaluate.Metric): | |
| def _info(self): | |
| return evaluate.MetricInfo( | |
| description=_DESCRIPTION, | |
| citation=_CITATION, | |
| inputs_description=_KWARGS_DESCRIPTION, | |
| features=datasets.Features( | |
| { | |
| "predictions": datasets.features.Sequence(datasets.Value("int32")), | |
| "references": datasets.features.Sequence(datasets.Value("int32")), | |
| } | |
| ), | |
| codebase_urls=[], | |
| reference_urls=[], | |
| ) | |
| def _compute(self, references, predictions): | |
| if len(predictions) != len(references): | |
| raise KeyError( | |
| "Token Edit Distance: Compute Error: Number of predictions does not match number of references." | |
| ) | |
| edit_dist_arr = np.zeros(len(predictions)) | |
| for i in range(len(edit_dist_arr)): | |
| if len(predictions[i]) != len(references[i]): | |
| raise KeyError( | |
| "Token Edit Distance: Compute Error: Prediction length does not match reference length for example" + | |
| str(i) + " (prediction len: " + str(len(predictions[i])) + ", reference len: " + str(len(references[i])) + ")." | |
| ) | |
| edit_dist_arr[i] = lev_dist(predictions[i], references[i]) | |
| return { | |
| "avg_token_edit_distance": np.mean(edit_dist_arr), | |
| "token_edit_distances": edit_dist_arr, | |
| } | |