Source code for aitoolbox.experiment.result_reporting.report_generator

import os
import csv
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.style as style
style.use('ggplot')


[docs]class TrainingHistoryPlotter:
    def __init__(self, experiment_results_local_path):
        """Plot the calculated performance metrics in the training history

        Args:
            experiment_results_local_path (str): path to the main experiment results folder on the local drive
        """
        self.experiment_results_local_path = experiment_results_local_path

[docs]    def generate_report(self, training_history, plots_folder_name='plots', file_format='png'):
        """Plot all the currently present performance result in the training history

        Every plot shows the progression of a single performance metric over the epochs.

        Args:
            training_history (aitoolbox.experiment.training_history.TrainingHistory): TrainLoop training history
            plots_folder_name (str): local dir name where the plots should be saved
            file_format (str): output file format. Can be either 'png' for saving separate images or 'pdf' for combining
                all the plots into a single pdf file.

        Returns:
            list: list of saved plot paths
        """
        if file_format == 'png':
            plots_local_folder_path = os.path.join(self.experiment_results_local_path, plots_folder_name)
            if not os.path.exists(plots_local_folder_path):
                os.mkdir(plots_local_folder_path)

            plots_paths = self.plot_png(training_history, plots_local_folder_path, plots_folder_name)

        elif file_format == 'pdf':
            plots_paths = self.plot_pdf(training_history, self.experiment_results_local_path, plots_folder_name)
        else:
            raise ValueError(f"Not supported file_format: {file_format}. "
                             "Select one of the following: 'png' or 'pdf'.")

        return plots_paths

[docs]    def plot_png(self, training_history, plots_local_folder_path, plots_folder_name):
        plots_paths = []

        for metric_name, fig in self.generate_plots(training_history):
            file_name = f'{metric_name}.png'
            file_path = os.path.join(plots_local_folder_path, file_name)

            fig.savefig(file_path)
            plt.close()

            plots_paths.append([os.path.join(plots_folder_name, file_name), file_path])

        return plots_paths

[docs]    def plot_pdf(self, training_history, plots_local_folder_path, plots_file_name):
        file_name = f'{plots_file_name}.pdf'
        file_path = os.path.join(plots_local_folder_path, file_name)

        with PdfPages(file_path) as pdf_pages:
            for _, fig in self.generate_plots(training_history):
                pdf_pages.savefig(fig)

        return [[file_name, file_path]]

[docs]    @staticmethod
    def generate_plots(training_history):
        for metric_name, result_history in training_history.get_train_history_dict(flatten_dict=True).items():
            if len(result_history) > 1:
                fig = TrainingHistoryPlotter.plot_performance_curve(metric_name, result_history)
                yield metric_name, fig

[docs]    @staticmethod
    def plot_performance_curve(metric_name, result_history):
        """Plot the performance of a selected calculated metric over the epochs

        Args:
            metric_name (str or int): name of plotted metric
            result_history (list or np.array): results history for the selected metric

        Returns:
            plt.figure: plot figure
        """
        fig = plt.figure()
        fig.set_size_inches(10, 8)
        
        ax = sns.lineplot(x=list(range(len(result_history))), y=result_history,
                          markers='o')

        ax.set_xlabel("Epoch", size=10)
        ax.set_ylabel(metric_name, size=10)

        # Adding plot title and subtitles
        ax.text(s=metric_name, x=0.5, y=1.07, fontsize=16, weight='bold', ha='center', va='bottom',
                transform=ax.transAxes)
        ax.text(s=f'Max result: {max(result_history)} {metric_name} (at epoch {np.argmax(result_history)})',
                x=0.5, y=1.035, fontsize=8, alpha=0.75,
                ha='center', va='bottom', transform=ax.transAxes)
        ax.text(s=f'Min result: {min(result_history)} {metric_name} (at epoch {np.argmin(result_history)})',
                x=0.5, y=1.01, fontsize=8, alpha=0.75,
                ha='center', va='bottom', transform=ax.transAxes)

        return fig


[docs]class TrainingHistoryWriter:
    def __init__(self, experiment_results_local_path):
        """Write the calculated performance metrics in the training history into human-readable text file

        Args:
            experiment_results_local_path (str or None): path to the main experiment results folder on the local drive
        """
        self.experiment_results_local_path = experiment_results_local_path
        self.metric_name_cols = None

[docs]    def generate_report(self, training_history, epoch, file_name, results_folder_name='', file_format='txt'):
        """Write all the currently present performance result in the training history into the text file

        Args:
            training_history (aitoolbox.experiment.training_history.TrainingHistory):
            epoch (int): current epoch
            file_name (str): output text file name
            results_folder_name (str): results folder path where the report file will be located
            file_format (str): output file format. Can be either 'txt' human-readable output or
                'tsv' for a tabular format or 'csv' for comma separated format.

        Returns:
            str, str: file name/path inside the experiment folder, local file_path
        """
        results_write_local_folder_path = os.path.join(self.experiment_results_local_path, results_folder_name)
        if not os.path.exists(results_write_local_folder_path):
            os.mkdir(results_write_local_folder_path)

        file_path = os.path.join(results_write_local_folder_path, file_name)

        if file_format == 'txt':
            self.write_txt(training_history, epoch, file_path)
        elif file_format == 'tsv':
            self.write_csv_tsv(training_history, epoch, file_path, delimiter='\t')
        elif file_format == 'csv':
            self.write_csv_tsv(training_history, epoch, file_path, delimiter=',')
        else:
            raise ValueError(f"Output format '{file_format}' is not supported. "
                             "Select one of the following: 'txt', 'tsv', 'csv'.")

        return os.path.join(results_folder_name if results_folder_name is not None else '',
                            file_name), file_path

[docs]    @staticmethod
    def write_txt(training_history, epoch, file_path):
        with open(file_path, 'a') as f:
            f.write('============================\n')
            f.write(f'Epoch: {epoch}\n')
            f.write('============================\n')
            for metric_name, result_history in training_history.get_train_history_dict(flatten_dict=True).items():
                f.write(f'{metric_name}:\t{result_history[-1]}\n')
            f.write('\n\n')

[docs]    def write_csv_tsv(self, training_history, epoch, file_path, delimiter):
        with open(file_path, 'a') as f:
            tsv_writer = csv.writer(f, delimiter=delimiter)
            current_metric_names = list(training_history.get_train_history_dict(flatten_dict=True).keys())

            if self.metric_name_cols is None:
                self.metric_name_cols = current_metric_names
                tsv_writer.writerow(['Epoch'] + self.metric_name_cols)

            if sorted(current_metric_names) != sorted(self.metric_name_cols):
                self.metric_name_cols = current_metric_names
                tsv_writer.writerow(['NEW_METRICS_DETECTED'])
                tsv_writer.writerow(['Epoch'] + self.metric_name_cols)

            training_history_dict = training_history.get_train_history_dict(flatten_dict=True)
            tsv_writer.writerow([epoch] + [training_history_dict[metric_name][-1] for metric_name in self.metric_name_cols])


[docs]class GradientPlotter:
    def __init__(self, experiment_grad_results_local_path):
        """Plot the gradient distributions for model's layers

        Args:
            experiment_grad_results_local_path (str): path to the main experiment results folder on the local drive
        """
        self.experiment_grad_results_local_path = experiment_grad_results_local_path

[docs]    def generate_report(self, model_layer_gradients, grad_plots_folder_name='grad_plots', file_format='png'):
        """Plot all the gradient distributions for the layers in the model

        Args:
            model_layer_gradients (list): list of model's gradients
            grad_plots_folder_name (str): name of the folder where gradient distribution plots will be saved
            file_format (str): output file format. Can be either 'png' for saving separate images or 'pdf' for combining
                all the plots into a single pdf file.

        Returns:
            list: list of saved plot paths: [file_path_in_cloud_grad_results_dir, local_file_path]
        """
        if file_format == 'png':
            grad_plots_local_folder_path = os.path.join(self.experiment_grad_results_local_path, grad_plots_folder_name)
            if not os.path.exists(grad_plots_local_folder_path):
                os.mkdir(grad_plots_local_folder_path)

            plots_paths = self.plot_png(model_layer_gradients, grad_plots_local_folder_path, grad_plots_folder_name)

        elif file_format == 'pdf':
            plots_paths = self.plot_pdf(model_layer_gradients, self.experiment_grad_results_local_path, grad_plots_folder_name)
        else:
            raise ValueError(f"Not supported file_format: {file_format}. "
                             "Select one of the following: 'png' or 'pdf'.")

        return plots_paths

[docs]    def plot_png(self, model_layer_gradients, grad_plots_local_folder_path, plots_folder_name):
        plots_paths = []

        for layer_name, fig in self.generate_dist_plots(model_layer_gradients):
            file_name = f'layer_{layer_name}.png'
            file_path = os.path.join(grad_plots_local_folder_path, file_name)

            fig.savefig(file_path)
            plt.close()

            plots_paths.append([os.path.join(plots_folder_name, file_name), file_path])

        return plots_paths

[docs]    def plot_pdf(self, model_layer_gradients, plots_local_folder_path, plots_file_name):
        file_name = f'{plots_file_name}.pdf'
        file_path = os.path.join(plots_local_folder_path, file_name)

        with PdfPages(file_path) as pdf_pages:
            for _, fig in self.generate_dist_plots(model_layer_gradients):
                pdf_pages.savefig(fig)

        return [[file_name, file_path]]

[docs]    @staticmethod
    def generate_dist_plots(model_layer_gradients, layer_names=None):
        for i, gradients in enumerate(model_layer_gradients):
            layer_name = i if layer_names is None else layer_names[i]

            if gradients is not None:
                fig = GradientPlotter.plot_gradient_distribution(gradients, layer_name)
                yield layer_name, fig
            else:
                print(f'Layer {layer_name} grads are None')

[docs]    @staticmethod
    def plot_gradient_distribution(gradients, layer_name):
        """Plot and save to file the distribution of the single layer's gradients

        Args:
            gradients (list or np.array): a flattened list  of gradients from a single layer
            layer_name (str or int): name or index of the layer

        Returns:
            plt.figure: plot figure
        """
        fig = plt.figure()
        fig.set_size_inches(10, 8)

        ax = sns.distplot(gradients)
        ax.set_xlabel("Gradient magnitude", size=10)

        # Adding plot title and subtitles
        ax.text(s=f'Gradient distribution for layer {layer_name}',
                x=0.5, y=1.07, fontsize=16, weight='bold', ha='center', va='bottom', transform=ax.transAxes)
        ax.text(s=f'Mean: {np.mean(gradients)}',
                x=0.5, y=1.035, fontsize=8, alpha=0.75, ha='center', va='bottom', transform=ax.transAxes)
        ax.text(s=f'Std: {np.std(gradients)}',
                x=0.5, y=1.01, fontsize=8, alpha=0.75,
                ha='center', va='bottom', transform=ax.transAxes)

        return fig