From 9947f549f869a05e00eb16d27a4ec2e1779e6480 Mon Sep 17 00:00:00 2001 From: Mustafa CAVUS <45355486+mcavs@users.noreply.github.com> Date: Sun, 20 Feb 2022 01:41:33 +0100 Subject: [PATCH 1/2] New metrics are added for imbalanced data Mathews Correlation Coefficient, Balanced Accuracy, Brier Score and log-loss metrics, which are used in case of imbalanced data, are added. --- R/model_performance.R | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/R/model_performance.R b/R/model_performance.R index 3418c1a0c..687ca0e63 100644 --- a/R/model_performance.R +++ b/R/model_performance.R @@ -97,11 +97,15 @@ model_performance <- function(explainer, ..., cutoff = 0.5) { fn = sum((observed == 1) * (predicted < cutoff)) measures <- list( - recall = model_performance_recall(tp, fp, tn, fn), - precision = model_performance_precision(tp, fp, tn, fn), - f1 = model_performance_f1(tp, fp, tn, fn), - accuracy = model_performance_accuracy(tp, fp, tn, fn), - auc = model_performance_auc(predicted, observed) + recall = model_performance_recall(tp, fp, tn, fn), + precision = model_performance_precision(tp, fp, tn, fn), + f1 = model_performance_f1(tp, fp, tn, fn), + accuracy = model_performance_accuracy(tp, fp, tn, fn), + auc = model_performance_auc(predicted, observed) + mcc = model_performance_mcc(tp, fp, tn, fn), + brier_score = model_performance_brier(predicted, observed), + log_loss = model_performance_logloss(predicted, observed), + balanced_accuracy = model_performance_bacc(tp, fp, tn, fn) ) } else if (type == "multiclass") { measures <- list( @@ -167,6 +171,23 @@ model_performance_accuracy <- function(tp, fp, tn, fn) { (tp + tn)/(tp + fp + tn + fn) } +model_performance_bacc <- function(tp, fp, tn, fn) { + ((tp / (tp + fn)) + (tn / (tn + fp))) / 2 +} + +model_performance_logloss <- function(predicted, observed) { + -mean(observed * log(predicted) + (1 - observed) * log(1 - predicted)) +} + +model_performance_brier <- function(predicted, observed) { + mean((predicted - observed) ^ 2) +} + +model_performance_mcc <- function(tp, fp, tn, fn) { + ((tp * tn) - (fp * fn)) / sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn)) +} + + model_performance_macro_f1 <- function(predicted, observed) { predicted_vectorized <- turn_probs_into_vector(predicted) confusion_matrixes <- calculate_confusion_matrixes(predicted_vectorized, observed) From 1fbfbee48c6c28b84d1963c818e2d8173174cbb9 Mon Sep 17 00:00:00 2001 From: Mustafa CAVUS <45355486+mcavs@users.noreply.github.com> Date: Mon, 21 Feb 2022 14:28:16 +0100 Subject: [PATCH 2/2] Update model_performance.R --- R/model_performance.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/model_performance.R b/R/model_performance.R index 687ca0e63..068b7c04b 100644 --- a/R/model_performance.R +++ b/R/model_performance.R @@ -101,7 +101,7 @@ model_performance <- function(explainer, ..., cutoff = 0.5) { precision = model_performance_precision(tp, fp, tn, fn), f1 = model_performance_f1(tp, fp, tn, fn), accuracy = model_performance_accuracy(tp, fp, tn, fn), - auc = model_performance_auc(predicted, observed) + auc = model_performance_auc(predicted, observed), mcc = model_performance_mcc(tp, fp, tn, fn), brier_score = model_performance_brier(predicted, observed), log_loss = model_performance_logloss(predicted, observed),