Source code for secure_learning.metrics

"""
Provides classification and regression metrics.
"""

import numpy as np

from tno.mpc.mpyc.secure_learning.exceptions import SecureLearnValueError
from tno.mpc.mpyc.secure_learning.utils import NumpyObjectArray, SecNumTypesTV, Vector


# Classification metrics

[docs]
def accuracy_score(
    y_real: Vector[SecNumTypesTV],
    y_pred: Vector[SecNumTypesTV],
) -> SecNumTypesTV:
    """
    Computes the accuracy of the predicted labels. Accuracy is computed as
    the ratio of all correctly predicted labels over the number of predicted
    labels.

    :param y_real: Real labels (-/+ 1)
    :param y_pred: Predicted labels (-/+ 1)
    :return: Accuracy of given predictions
    """
    # -1 if TN, 1 if TP, 0 if FN or FP
    signed_pred_correct = (np.asarray(y_pred) + np.asarray(y_real)) / 2
    accuracy: SecNumTypesTV = np.inner(signed_pred_correct, signed_pred_correct) / len(  # type: ignore[no-untyped-call]
        y_real
    )
    return accuracy




[docs]
def precision_score(
    y_real: Vector[SecNumTypesTV],
    y_pred: Vector[SecNumTypesTV],
    pos_label: int,
) -> SecNumTypesTV:
    """
    Computes the precision of the predicted labels of category
    pos_label. Precision is computed as the ratio of all correctly
    predicted pos_label over the number of predicted pos_label. This is
    an indication how precise the predictions of pos_label are: given
    prediction pos_label, how likely is it that the true label is
    pos_label.

    :param y_real: Real labels (-/+ 1)
    :param y_pred: Predicted labels (-/+ 1)
    :param pos_label: Label (value) to compute precision of
    :raise SecureLearnValueError: pos_label value must be either -1 or 1
    :return: Precision of given predictions
    """
    if pos_label not in [-1, 1]:
        raise SecureLearnValueError(
            f"Expected pos_label in [-1, 1], but received {pos_label}."
        )
    y_real_np = np.asarray(y_real)
    y_pred_np = np.asarray(y_pred)

    # Reduce to case target label = 1
    if pos_label == -1:
        y_real_np = -y_real_np
        y_pred_np = -y_pred_np

    # 1 if true target, 0 else
    num_true_targets: SecNumTypesTV = np.sum(
        (y_real_np * y_pred_np + y_real_np + y_pred_np + 1) / 4
    )
    num_pred_targets: SecNumTypesTV = np.sum((y_pred_np + 1) / 2)
    return num_true_targets / num_pred_targets




[docs]
def recall_score(
    y_real: Vector[SecNumTypesTV],
    y_pred: Vector[SecNumTypesTV],
    pos_label: int,
) -> SecNumTypesTV:
    """
    Computes the recall of the predicted labels of category pos_label.
    Recall is computed as the ratio of all correctly predicted pos_label
    over the number of real pos_label. This is an indication how many
    actual pos_label we misclassified.

    :param y_real: Real labels (-/+ 1)
    :param y_pred: Predicted labels (-/+ 1)
    :param pos_label: Label (value) to compute recall of
    :raise SecureLearnValueError: pos_label value must be either -1 or 1
    :return: Recall of given predictions
    """
    if pos_label not in [-1, 1]:
        raise SecureLearnValueError(
            f"Expected pos_label in [-1, 1], but received {pos_label}."
        )
    y_real_np = np.asarray(y_real)
    y_pred_np = np.asarray(y_pred)

    # Reduce to case target label = 1
    if pos_label == -1:
        y_real_np = -y_real_np
        y_pred_np = -y_pred_np

    # 1 if true target, 0 else
    num_true_targets: SecNumTypesTV = np.sum(
        (y_real_np * y_pred_np + y_real_np + y_pred_np + 1) / 4
    )
    num_real_targets: SecNumTypesTV = np.sum((y_real_np + 1) / 2)
    return num_true_targets / num_real_targets




[docs]
def f1_score(
    y_real: Vector[SecNumTypesTV],
    y_pred: Vector[SecNumTypesTV],
    pos_label: int,
) -> SecNumTypesTV:
    """
    F1-score for given predicted and real target labels.

    :param y_real: Real labels (-/+ 1)
    :param y_pred: Predicted labels (-/+ 1)
    :param pos_label: Label to compute f1 score of
    :raise SecureLearnValueError: pos_label value must be either -1 or 1
    :return: F1 score
    """
    if pos_label not in [-1, 1]:
        raise SecureLearnValueError(
            f"Expected pos_label in [-1, 1], but received {pos_label}."
        )
    precision: SecNumTypesTV = precision_score(y_real, y_pred, pos_label)
    recall: SecNumTypesTV = recall_score(y_real, y_pred, pos_label)
    return 2 * (precision * recall) / (precision + recall)



# Regression metrics

[docs]
def mean_squared_error(
    y_real: Vector[SecNumTypesTV],
    y_pred: Vector[SecNumTypesTV],
) -> SecNumTypesTV:
    """
    Compute residual mean of squares. Residual mean of squares
    equals the mean of squares of deviations between predicted
    and real values.

    :param y_real: Real labels (-/+ 1)
    :param y_pred: Predicted labels (-/+ 1)
    :return: Residual mean of squares
    """
    pred_error: NumpyObjectArray = np.asarray(y_pred) - np.asarray(y_real)
    res_mean_sq: SecNumTypesTV = np.inner(pred_error, pred_error) / len(y_real)  # type: ignore[no-untyped-call]
    return res_mean_sq




[docs]
def mean_squared_model(y_real: Vector[SecNumTypesTV]) -> SecNumTypesTV:
    """
    Compute explained mean of squares. Explained sum of
    squares equals the mean of squares of deviations from the
    mean.

    :param y_real: Input
    :return: Explained mean of squares
    """
    y = np.asarray(y_real)
    dev_from_mean = y - y.sum() / len(y)
    explained_mean_sq: SecNumTypesTV = np.inner(dev_from_mean, dev_from_mean) / len(y)  # type: ignore[no-untyped-call]
    return explained_mean_sq




[docs]
def r2_score(
    y_real: Vector[SecNumTypesTV],
    y_pred: Vector[SecNumTypesTV],
) -> SecNumTypesTV:
    """
    R-squared value for given predicted and real target values.

    :param y_real: Real target values
    :param y_pred: Predicted target values
    :return: R-squared value
    """
    mean_explained_ss: SecNumTypesTV = mean_squared_model(y_real)
    mean_residual_ss: SecNumTypesTV = mean_squared_error(y_real, y_pred)
    return 1 - (mean_residual_ss / mean_explained_ss)




[docs]
def adj_r2_score(
    y_real: Vector[SecNumTypesTV],
    y_pred: Vector[SecNumTypesTV],
    n_features: int,
) -> SecNumTypesTV:
    """
    Adjusted R-squared value for given predicted
    and real target values.

    :param y_real: Real target values
    :param y_pred: Predicted target values
    :param n_features: Number of features
    :return: Adjusted R-squared value
    """
    n_samples = len(y_pred)
    r_squared = r2_score(y_real, y_pred)
    return 1 - (1 - r_squared) * (n_samples - 1) / (n_samples - n_features - 1)