Source code for janggu.evaluation

"""Model evaluation utilities.

This module contains classes and methods for simplifying
model evaluation.
"""

import datetime
import logging
import os

import numpy
from sklearn.metrics import average_precision_score
from sklearn.metrics import explained_variance_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve

from janggu.utils import ExportJson
from janggu.utils import ExportScorePlot
from janggu.utils import ExportTsv
from janggu.utils import _to_list


def _dimension_match(kerasmodel, data, layertype):
    """Check if layer dimensions match.
    The function checks whether the kerasmodel as compatible with
    the supplied inputs.

    Parameters
    ----------
    kerasmodel : :class:`keras.Model`
        Object of type keras.Model.
    data : Dataset or list(Dataset)
        Dataset to check compatiblity for.
    layertype : str
        layers is either 'input_layers' or 'output_layers'.

    Returns
    -------
    boolean :
        Returns True if the keras model is compatible with the data
        and otherwise False.
    """
    if data is None and layertype == 'output_layers':
        return True

    tmpdata = _to_list(data)

    if len(kerasmodel.get_config()[layertype]) != len(tmpdata):
        return False
    # Check if output dims match between model spec and data
    for datum in tmpdata:

        if datum.name not in [el[0] for el in
                              kerasmodel.get_config()[layertype]]:
            # If the layer name is not present we end up here
            return False
        layer = kerasmodel.get_layer(datum.name)
        oshape = layer.output_shape
        if isinstance(oshape, list):
            # this case is required for keras 2.4.3 and tf 2
            # which returns a list of tuples
            oshape = oshape[0]
        if not oshape[1:] == datum.shape[1:]:
            # if the layer name is present but the dimensions
            # are incorrect, we end up here.
            return False
    return True


def _reshape(data, percondition):
    """Reshape the dataset to make it compatible with the
    evaluation method.

    Parameters
    ----------
    data : dict(Dataset)
        A dictionary of datasets
    percondition : boolean
        Indicates whether to keep the condition (last) dimension or flatten
        over the condition.
    """

    if isinstance(data, dict):
        if percondition:
            # currently this only works for channel_last
            data = {k: data[k][:].reshape(
                (int(numpy.prod(data[k].shape[:-1])),
                 data[k].shape[-1])) for k in data}
        else:
            data = {k: data[k][:].reshape(
                (numpy.prod(data[k].shape[:]), 1)) for k in data}
    else:
        raise ValueError('Data must be a dict not {}'.format(type(data)))

    return data


[docs]class Scorer(object):
    """Scorer class.

    This class implements the callback interface that is used
    with :code:`Janggu.evaluate` and :code:`Janggu.predict`.
    The scorer maintains a scoring callable and an exporter callable
    which take care of determining the desired score and writing
    the result into a desired file, e.g. json, tsv or a figure, respectively.


    Parameters
    ----------
    name : str
        Name of the score to be performed.
    score_fct : None or callable
        Callable that is invoked for scoring.
        This callable must satisfy the signature
        :code:`fct(y_true, y_pred)` if used with
        :code:`Janggu.evaluate` and :code:`fct(y_pred)` if
        used with :code:`Janggu.predict`. The returned score should be
        compatible with the exporter.
    conditions : list(str) or None
        List of strings describing the conditions dimension of the dataset
        that is processed. If None, conditions are extracted from the
        y_true Dataset, if available. Otherwise, the conditions are integers
        ranging from zero to :code:`len(conditions) - 1`.
    exporter : callable
        Exporter function is used to export the scoring results
        in the desired manner,
        e.g. as json or tsv file. This function must satisfy the signature
        :code:`fct(output_path, filename_prefix, results)`.
    immediate_export : boolean
        If set to True, the exporter function will be invoked immediately
        after the evaluation of the dataset. If set to False, the results
        are maintained in memory which allows to export the results as a
        collection rather than individually.
    percondition : boolean
        Indicates whether the evaluation should be performed per condition
        or across all conditions. The former determines a score for each
        output condition, while the latter first flattens the array and then
        scores across conditions. Default: percondition=True.
    subdir : str
        Name of the subdir to store the output in. Default: None
        means the results are stored in the 'evaluation' subdir.
    """

    def __init__(self, name, score_fct=None,
                 conditions=None,
                 exporter=ExportJson(),
                 immediate_export=True,
                 percondition=True,
                 subdir=None):
        # append the path by a folder 'AUC'
        self.score_name = name
        self.score_fct = score_fct
        self.percondition = percondition
        self.logger = logging.getLogger('scorer')

        self.results = dict()
        self._exporter = exporter

        self.immediate_export = immediate_export
        self.conditions = conditions
        if subdir is None:
            subdir = 'evaluation'
        self.subdir = subdir

[docs]    def export(self, path, collection_name, datatags=None):
        """Exporting of the results.

        When calling export, the results which have been collected
        in self.results by using the score method are
        written to disk by invoking the supplied exporter function.

        Parameters
        ----------
        path : str
            Output directory.
        collection_name : str
            Subdirectory in which the results should be stored. E.g. Modelname.
        datatags : list(str) or None
            Optional tags describing the dataset. E.g. 'training_set'.
            Default: None
        """
        output_path = os.path.join(path, collection_name)
        if datatags is not None:
            output_path = os.path.join(output_path, '-'.join(datatags))
        if not os.path.exists(output_path):
            os.makedirs(output_path)

        if self.results:
            # if there are some results, export them
            self.logger.info(' '.join(('exporting', self.score_name, 'to', output_path)))
            self._exporter(output_path, self.score_name,
                           self.results)

[docs]    def score(self, model, predicted, outputs=None, datatags=None):
        """Scoring of the predictions relative to true outputs.

        When calling score, the provided
        score_fct is applied
        for each layer and condition separately.
        The result scores are maintained in a dict that uses
        :code:`(modelname, layername, conditionname)` as key
        and as values another dict of the form:
        :code:`{'date':<currenttime>, 'value': derived_score, 'tags':datatags}`.

        Parameters
        ----------
        model : :class:`Janggu`
            a Janggu object representing the current model.
        predicted: dict{name: np.array}
            Predicted outputs.
        outputs : dict{name: Dataset} or None
            True output labels. The Scorer is used with :code:`Janggu.evaluate`
            this argument will be present. With :code:`Janggu.evaluate` it is
            absent.
        datatags : list(str) or None
            Optional tags describing the dataset, e.g. 'test_set'.
        """

        if not datatags:
            datatags = []

        if outputs is not None:
            _out = _reshape(outputs, self.percondition)
        _pre = _reshape(predicted, self.percondition)
        self.logger.info(' '.join(('scoring:', self.score_name)))
        score_fct = self.score_fct
        if score_fct is None and outputs is not None:
            raise ValueError('Scorer: without outputs a score_fct must be supplied.')

        if score_fct is None:
            def _dummy(value):
                return value
            score_fct = _dummy

        for layername in model.get_config()['output_layers']:

            for idx in range(_pre[layername[0]].shape[-1]):

                if outputs is None:
                    score = score_fct(_pre[layername[0]][:, idx])
                else:
                    score = score_fct(_out[layername[0]][:, idx],
                                      _pre[layername[0]][:, idx])

                if not self.percondition:
                    condition = 'across'
                elif self.conditions is not None and \
                   len(self.conditions) == _pre[layername[0]].shape[-1]:
                    # conditions were supplied manually
                    condition = self.conditions[idx]
                elif outputs is not None and hasattr(outputs[layername[0]],
                                                     "conditions"):
                    # conditions are extracted from the outputs dataset
                    condition = outputs[layername[0]].conditions[idx]
                else:
                    # not conditions present, just number them.
                    condition = str(idx)

                try:
                    iter(score)
                except TypeError:
                    # if the score is a scalar value, we write it into
                    # the log file.
                    self.logger.info(' '.join((self.score_name,
                                               model.name,
                                               layername[0],
                                               condition,
                                               ":", str(score))))

                key = (condition,)
                if len(model.get_config()['output_layers']) > 1:
                    key = (layername[0],) + key
                if not self.immediate_export:
                    key = (model.name,) + key
                self.results[key] = \
                    {'date': str(datetime.datetime.utcnow()),
                     'value': score}

        if self.immediate_export:
            # export directly if required
            output_dir = os.path.join(model.outputdir, self.subdir)

            self.export(output_dir, model.name, datatags)

            # reset the results
            self.results = {}


# some standard evaluations are provided directly

# evaluation metrics from sklearn.metrics
def wrap_roc_(y_true, y_pred):
    """Helper function to determine the ROC.
    """
    fpr, tpr, _ = roc_curve(y_true, y_pred)
    aux = str('({:.2%})'.format(roc_auc_score(y_true, y_pred)))
    return fpr, tpr, aux


def wrap_prc_(y_true, y_pred):
    """Helper function to determine the PRC"""
    precision, recall, _ = precision_recall_curve(y_true, y_pred)
    aux = str('({:.2%})'.format(average_precision_score(y_true, y_pred)))
    return recall, precision, aux

def wrap_cor_(y_true, y_pred):
    """Helper function to determine the Pearson's correlation coeff."""
    return numpy.corrcoef(y_true, y_pred)[0, 1]


def get_scorer(scorer):
    """Function maps string names to the Scorer objects.

    This function takes a scorer by name or a Scorer object
    and returns an instantiation of a Scorer object.
    """
    if isinstance(scorer, Scorer):
        pass
    elif scorer in ['ROC', 'roc']:
        scorer = Scorer(scorer, wrap_roc_,
                        exporter=ExportScorePlot(xlabel='FPR', ylabel='TPR'))
    elif scorer in ['PRC', 'prc']:
        scorer = Scorer(scorer, wrap_prc_,
                        exporter=ExportScorePlot(xlabel='Recall',
                                                 ylabel='Precision'))
    elif scorer in ['auc', 'AUC', 'auROC', 'auroc']:
        scorer = Scorer(scorer, roc_auc_score, exporter=ExportTsv())
    elif scorer in ['auprc', 'auPRC', 'ap', 'AP']:
        scorer = Scorer(scorer, average_precision_score, exporter=ExportTsv())
    elif scorer in ['cor', 'pearson']:
        scorer = Scorer(scorer, wrap_cor_, exporter=ExportTsv())
    elif scorer in ['var_explained']:
        scorer = Scorer(scorer, explained_variance_score, exporter=ExportTsv())
    elif scorer in ['mse', 'MSE']:
        scorer = Scorer(scorer, mean_squared_error, exporter=ExportTsv())
    elif scorer in ['mae', 'MAE']:
        scorer = Scorer(scorer, mean_absolute_error, exporter=ExportTsv())
    else:
        raise ValueError("scoring callback {} unknown.".format(scorer))
    return scorer