Source code for aitoolbox.experiment.result_package.hf_evaluate_packages

from aitoolbox.experiment.result_package.abstract_result_packages import AbstractResultPackage


[docs]class HFEvaluateResultPackage(AbstractResultPackage):
    def __init__(self, hf_evaluate_metric, use_models_additional_results=True, **kwargs):
        """HuggingFace Evaluate Metrics Result Package

        Result package wrapping around the evaluation metrics provided in the HuggingFace Evaluate package.

        All the metric result names will have the '_HFEvaluate' appended at the end to help distinguish them.

        Github: https://github.com/huggingface/evaluate

        More info on how to use the metrics: https://huggingface.co/docs/evaluate/index

        Args:
            hf_evaluate_metric (evaluate.EvaluationModule): HF Evaluate metric to be used by the result package
            use_models_additional_results (bool): Should the additional results from the model
                (in addition to predictions and references) normally returned from the get_predictions() function be
                added as the additional input to the HF Evaluate metric to perform the evaluation calculation.
            **kwargs: additional parameters or inputs to the HF Evaluate metric being calculated. These can be generally
                inputs available already at the start before making model predictions and thus don't need to be gathered
                from the train/prediction loop.
        """
        AbstractResultPackage.__init__(self, pkg_name='HuggingFace Evaluate metrics', **kwargs)

        self.metric = hf_evaluate_metric
        self.use_models_additional_results = use_models_additional_results

[docs]    def prepare_results_dict(self):
        additional_metric_inputs = self.package_metadata

        if self.use_models_additional_results:
            model_additional_results = self.additional_results['additional_results']
            additional_metric_inputs = {**additional_metric_inputs, **model_additional_results}

        metric_result = self.metric.compute(
            references=self.y_true, predictions=self.y_predicted,
            **additional_metric_inputs
        )

        if isinstance(metric_result, dict):
            metric_result = {f'{k}_HFEvaluate': v for k, v in metric_result.items()}

        return metric_result