Source code for aitoolbox.nlp.experiment_evaluation.NLP_result_package

import os

from aitoolbox.utils import dict_util
from aitoolbox.experiment.local_save.local_results_save import BaseLocalResultsSaver
from aitoolbox.experiment.result_package.abstract_result_packages import AbstractResultPackage
from aitoolbox.experiment.core_metrics.classification import AccuracyMetric
from aitoolbox.nlp.experiment_evaluation.NLP_metrics import ROUGEMetric, ROUGEPerlMetric, \
    ExactMatchTextMetric, F1TextMetric, \
    BLEUSentenceScoreMetric, BLEUCorpusScoreMetric, BLEUScoreStrTorchNLPMetric, PerplexityMetric, \
    GLUEMetric, XNLIMetric
from aitoolbox.nlp.experiment_evaluation.attention_heatmap import AttentionHeatMap


[docs]class QuestionAnswerResultPackage(AbstractResultPackage): def __init__(self, paragraph_text_tokens, target_actual_text=None, output_text_dir=None, use_perl_rouge=False, flatten_result_dict=False, strict_content_check=False, **kwargs): """Question Answering task performance evaluation result package Args: paragraph_text_tokens (list): target_actual_text (list or None): output_text_dir (str or None): use_perl_rouge (bool): flatten_result_dict (bool): strict_content_check (bool): **kwargs (dict): """ if use_perl_rouge is True and output_text_dir is None: raise ValueError('When using the perl based ROUGE definition the output_text_dir path must be given.') if target_actual_text is not None: if len(paragraph_text_tokens) != len(target_actual_text): raise ValueError('paragraph_text_tokens size not the same as target_actual_text.') AbstractResultPackage.__init__(self, pkg_name='QuestionAnswerResult', strict_content_check=strict_content_check, **kwargs) # todo: check if this is efficient self.paragraph_text_tokens = [[str(w) for w in paragraph] for paragraph in paragraph_text_tokens] self.target_actual_text = target_actual_text self.use_target_actual_text = target_actual_text is not None self.output_text_dir = os.path.expanduser(output_text_dir) if output_text_dir else None self.use_perl_rouge = use_perl_rouge self.flatten_result_dict = flatten_result_dict
[docs] def prepare_results_dict(self): """ Returns: dict: """ y_span_start_true = self.y_true[:, 0] y_span_start_predicted = self.y_predicted[:, 0] y_span_end_true = self.y_true[:, 1] y_span_end_predicted = self.y_predicted[:, 1] if self.use_target_actual_text: true_text = self.target_actual_text else: true_text = [paragraph_text[start_span:end_span + 1] for start_span, end_span, paragraph_text in zip(y_span_start_true.astype('int'), y_span_end_true.astype('int'), self.paragraph_text_tokens)] pred_text = [paragraph_text[start_span:end_span + 1] for start_span, end_span, paragraph_text in zip(y_span_start_predicted.astype('int'), y_span_end_predicted.astype('int'), self.paragraph_text_tokens)] if not self.use_perl_rouge: rogue_metric = ROUGEMetric(true_text, pred_text, target_actual_text=self.use_target_actual_text, output_text_dir=self.output_text_dir) else: rogue_metric = ROUGEPerlMetric(true_text, pred_text, self.output_text_dir, target_actual_text=self.use_target_actual_text) em_metric = ExactMatchTextMetric(true_text, pred_text, target_actual_text=self.use_target_actual_text) f1_metric = F1TextMetric(true_text, pred_text, target_actual_text=self.use_target_actual_text) results_dict = rogue_metric + em_metric + f1_metric if self.flatten_result_dict: results_dict = dict_util.flatten_dict(results_dict) return results_dict
[docs] def set_experiment_dir_path_for_additional_results(self, project_name, experiment_name, experiment_timestamp, local_model_result_folder_path): if self.output_text_dir is not None: _, experiment_dir_path, _ = \ BaseLocalResultsSaver.get_experiment_local_results_folder_paths(project_name, experiment_name, experiment_timestamp, local_model_result_folder_path) self.output_text_dir = os.path.join(experiment_dir_path, self.output_text_dir)
[docs] def list_additional_results_dump_paths(self): if self.output_text_dir is not None: zip_path = self.zip_additional_results_dump(self.output_text_dir, self.output_text_dir) zip_file_name = os.path.basename(zip_path) return [[zip_file_name, zip_path]]
[docs]class QuestionAnswerSpanClassificationResultPackage(AbstractResultPackage): def __init__(self, strict_content_check=False, **kwargs): """Extractive Question Answering task performance evaluation result package Evaluates the classification of the correct answer start and end points. Args: strict_content_check (bool): **kwargs (dict): """ AbstractResultPackage.__init__(self, pkg_name='QuestionAnswerSpanClassificationResult', strict_content_check=strict_content_check, **kwargs)
[docs] def prepare_results_dict(self): """ Available general data: y_span_start_true (numpy.array or list): y_span_start_predicted (numpy.array or list): y_span_end_true (numpy.array or list): y_span_end_predicted (numpy.array or list): strict_content_check (bool): **kwargs (dict): Returns: dict: """ y_span_start_true = self.y_true[:, 0] y_span_start_predicted = self.y_predicted[:, 0] y_span_end_true = self.y_true[:, 1] y_span_end_predicted = self.y_predicted[:, 1] span_start_accuracy = AccuracyMetric(y_span_start_true, y_span_start_predicted, positive_class_thresh=None) span_start_accuracy.metric_name += '_span_start' span_end_accuracy = AccuracyMetric(y_span_end_true, y_span_end_predicted, positive_class_thresh=None) span_end_accuracy.metric_name += '_span_end' return span_start_accuracy + span_end_accuracy
[docs]class TextSummarizationResultPackage(AbstractResultPackage): def __init__(self, strict_content_check=False, **kwargs): """Text summarization task performance evaluation package Args: strict_content_check (bool): **kwargs (dict): """ AbstractResultPackage.__init__(self, pkg_name='TextSummarizationResult', strict_content_check=strict_content_check, **kwargs)
[docs] def prepare_results_dict(self): """ Returns: dict: """ # rogue_result = ROUGEMetric(self.y_true, self.y_predicted).get_metric_dict() raise NotImplementedError
[docs]class MachineTranslationResultPackage(AbstractResultPackage): def __init__(self, target_vocab, source_vocab=None, source_sents=None, output_text_dir=None, output_attn_heatmap_dir=None, strict_content_check=False, **kwargs): """Machine Translation task performance evaluation package Args: target_vocab (aitoolbox.nlp.core.vocabulary.Vocabulary): source_vocab (aitoolbox.nlp.core.vocabulary.Vocabulary or None): source_sents (list or None): output_text_dir (str or None): output_attn_heatmap_dir (str or None): strict_content_check (bool): **kwargs (dict): """ if output_text_dir is not None and (source_vocab is None or source_sents is None): raise ValueError(f'output_text_dir is not none which initiates the text results dump on disk. ' f'However, the the source_vocab or source_sents are not provided. ' f'To save text on disk these have to be supplied.\nCurrently:\n' f'output_text_dir: {output_text_dir}\n' f'source_vocab: {source_vocab}\n' f'source_sents: {source_sents}\n') AbstractResultPackage.__init__(self, pkg_name='MachineTranslationResult', strict_content_check=strict_content_check, np_array=False, **kwargs) self.requires_loss = True self.target_vocab = target_vocab self.source_vocab = source_vocab self.source_sents = source_sents self.output_text_dir = output_text_dir self.output_attn_heatmap_dir = output_attn_heatmap_dir self.attention_matrices = None self.y_true_text = None self.y_predicted_text = None
[docs] def prepare_results_dict(self): """ Returns: dict: result dict which is combination of different BLEU metric calculations and possibly saved attention heatmap plot files and perplexity """ self.y_true_text = [self.target_vocab.convert_idx_sent2sent(sent, rm_default_tokens=True) for sent in self.y_true] self.y_predicted_text = [self.target_vocab.convert_idx_sent2sent(sent, rm_default_tokens=True) for sent in self.y_predicted] bleu_avg_sent = BLEUSentenceScoreMetric(self.y_true_text, self.y_predicted_text, self.source_sents, self.output_text_dir) bleu_corpus_result = BLEUCorpusScoreMetric(self.y_true_text, self.y_predicted_text) # bleu_perl_result = BLEUScoreStrTorchNLPMetric(self.y_true_text, self.y_predicted_text) perplexity_result = PerplexityMetric(self.additional_results['additional_results']['loss']) results_dict = bleu_corpus_result + bleu_avg_sent + perplexity_result # Don't include TrainLoop objects inside the package - it makes it useful only for PyTorch, not other frameworks if self.output_attn_heatmap_dir is not None: # Get this from **kwargs or find another way of getting attention matrices self.attention_matrices = self.additional_results['additional_results']['attention_matrices'] source_sent_idx_tokens = self.additional_results['additional_results']['source_sent_text'] source_sent_text = [self.source_vocab.convert_idx_sent2sent(sent, rm_default_tokens=False) for sent in source_sent_idx_tokens] attn_heatmap_metric = AttentionHeatMap(self.attention_matrices, source_sent_text, self.y_predicted_text, self.output_attn_heatmap_dir) results_dict = results_dict + attn_heatmap_metric return results_dict
[docs] def set_experiment_dir_path_for_additional_results(self, project_name, experiment_name, experiment_timestamp, local_model_result_folder_path): if self.output_text_dir is not None: _, experiment_dir_path, _ = \ BaseLocalResultsSaver.get_experiment_local_results_folder_paths(project_name, experiment_name, experiment_timestamp, local_model_result_folder_path) self.output_text_dir = os.path.join(experiment_dir_path, self.output_text_dir) if self.output_attn_heatmap_dir is not None: _, experiment_dir_path, _ = \ BaseLocalResultsSaver.get_experiment_local_results_folder_paths(project_name, experiment_name, experiment_timestamp, local_model_result_folder_path) self.output_attn_heatmap_dir = os.path.join(experiment_dir_path, self.output_attn_heatmap_dir)
[docs] def list_additional_results_dump_paths(self): additional_results_paths = [] if self.output_text_dir is not None: zip_path = self.zip_additional_results_dump(self.output_text_dir, self.output_text_dir) zip_file_name = os.path.basename(zip_path) additional_results_paths.append([zip_file_name, zip_path]) if self.output_attn_heatmap_dir is not None: zip_path = self.zip_additional_results_dump(self.output_attn_heatmap_dir, self.output_attn_heatmap_dir) zip_file_name = os.path.basename(zip_path) additional_results_paths.append([zip_file_name, zip_path]) if len(additional_results_paths) > 0: return additional_results_paths
[docs]class GLUEResultPackage(AbstractResultPackage): def __init__(self, task_name): """GLUE task result package Wrapper around HF Transformers ``glue_compute_metrics()`` Args: task_name (str): name of the GLUE task """ super().__init__('GLUE benchmark') self.task_name = task_name
[docs] def prepare_results_dict(self): glue_result = GLUEMetric(self.y_true, self.y_predicted, self.task_name).get_metric_dict() glue_result = dict_util.flatten_dict(glue_result) return glue_result
[docs]class XNLIResultPackage(AbstractResultPackage): def __init__(self): """XNLI task result package Wrapper around HF Transformers ``xnli_compute_metrics()`` """ super().__init__('XNLI benchmark')
[docs] def prepare_results_dict(self): xnli_result = XNLIMetric(self.y_true, self.y_predicted).get_metric_dict() xnli_result = dict_util.flatten_dict(xnli_result) return xnli_result