From c7a8c7ac209c15160bec8c814afb5b0ec3f7f9cb Mon Sep 17 00:00:00 2001 From: Mustafa CAVUS <45355486+mcavs@users.noreply.github.com> Date: Tue, 10 May 2022 13:02:18 +0200 Subject: [PATCH 1/5] Update model_performance.R --- R/model_performance.R | 39 +++++++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/R/model_performance.R b/R/model_performance.R index 3418c1a0c..83d79a229 100644 --- a/R/model_performance.R +++ b/R/model_performance.R @@ -6,7 +6,7 @@ #' #' @param explainer a model to be explained, preprocessed by the \code{\link{explain}} function #' @param ... other parameters -#' @param cutoff a cutoff for classification models, needed for measures like recall, precision, ACC, F1. By default 0.5. +#' @param cutoff a cutoff for classification models, needed for measures like recall, precision, ACC, F1, MCC, Brier score, balanced ACC, and log-loss. By default 0.5. #' #' @return An object of the class \code{model_performance}. #' @@ -97,11 +97,15 @@ model_performance <- function(explainer, ..., cutoff = 0.5) { fn = sum((observed == 1) * (predicted < cutoff)) measures <- list( - recall = model_performance_recall(tp, fp, tn, fn), - precision = model_performance_precision(tp, fp, tn, fn), - f1 = model_performance_f1(tp, fp, tn, fn), - accuracy = model_performance_accuracy(tp, fp, tn, fn), - auc = model_performance_auc(predicted, observed) + recall = model_performance_recall(tp, fp, tn, fn), + precision = model_performance_precision(tp, fp, tn, fn), + f1 = model_performance_f1(tp, fp, tn, fn), + accuracy = model_performance_accuracy(tp, fp, tn, fn), + auc = model_performance_auc(predicted, observed), + mcc = model_performance_mcc(tp, fp, tn, fn), + brier_score = model_performance_brier(predicted, observed), + log_loss = model_performance_logloss(predicted, observed), + baccuracy = model_performance_bacc(tp, fp, tn, fn) ) } else if (type == "multiclass") { measures <- list( @@ -167,6 +171,29 @@ model_performance_accuracy <- function(tp, fp, tn, fn) { (tp + tn)/(tp + fp + tn + fn) } +model_performance_baccuracy <- function(tp, fp, tn, fn) { + ((tp / (tp + fn)) + (tn / (tn + fp))) / 2 +} + +model_performance_logloss <- function(predicted, observed) { + predicted[which(predicted == 0)] <- 10^-15 + predicted[which(predicted == 1)] <- 1-10^-15 + -mean((observed * log(predicted) + (1 - observed) * log(1 - predicted))) +} + +model_performance_brier <- function(predicted, observed) { + mean((predicted - observed) ^ 2) +} + +model_performance_mcc <- function(tp, fp, tn, fn) { + tp <- as.numeric(tp) + fp <- as.numeric(fp) + tn <- as.numeric(tn) + fn <- as.numeric(fn) + ((tp * tn) - (fp * fn)) / sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn)) +} + + model_performance_macro_f1 <- function(predicted, observed) { predicted_vectorized <- turn_probs_into_vector(predicted) confusion_matrixes <- calculate_confusion_matrixes(predicted_vectorized, observed) From 1136cddc7a467ae2fd8bec0ed577a1db23db54cc Mon Sep 17 00:00:00 2001 From: Mustafa CAVUS <45355486+mcavs@users.noreply.github.com> Date: Tue, 10 May 2022 13:07:09 +0200 Subject: [PATCH 2/5] Update model_performance.Rd --- man/model_performance.Rd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/man/model_performance.Rd b/man/model_performance.Rd index d98e7b09a..7774f0dc4 100644 --- a/man/model_performance.Rd +++ b/man/model_performance.Rd @@ -11,7 +11,7 @@ model_performance(explainer, ..., cutoff = 0.5) \item{...}{other parameters} -\item{cutoff}{a cutoff for classification models, needed for measures like recall, precision, ACC, F1. By default 0.5.} +\item{cutoff}{a cutoff for classification models, needed for measures like recall, precision, ACC, F1, MCC, Brier score, balanced ACC, and log-loss. By default 0.5.} } \value{ An object of the class \code{model_performance}. @@ -26,7 +26,7 @@ It's a list with following fields: } \description{ Function \code{model_performance()} calculates various performance measures for classification and regression models. -For classification models following measures are calculated: F1, accuracy, recall, precision and AUC. +For classification models following measures are calculated: F1, accuracy, recall, precision and AUC, Matthews Correlation Coefficient, Brier score, balanced ACC, and log-loss. For regression models following measures are calculated: mean squared error, R squared, median absolute deviation. } \examples{ From b4cfe7da5f26e6ab849ef8ba46be6a2b3d6fd098 Mon Sep 17 00:00:00 2001 From: Mustafa CAVUS <45355486+mcavs@users.noreply.github.com> Date: Tue, 10 May 2022 13:07:49 +0200 Subject: [PATCH 3/5] Update model_performance.R --- R/model_performance.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/model_performance.R b/R/model_performance.R index 83d79a229..b1cbf17d6 100644 --- a/R/model_performance.R +++ b/R/model_performance.R @@ -6,7 +6,7 @@ #' #' @param explainer a model to be explained, preprocessed by the \code{\link{explain}} function #' @param ... other parameters -#' @param cutoff a cutoff for classification models, needed for measures like recall, precision, ACC, F1, MCC, Brier score, balanced ACC, and log-loss. By default 0.5. +#' @param cutoff a cutoff for classification models, needed for measures like recall, precision, ACC, F1, Matthews Correlation Coefficient, Brier score, balanced ACC, and log-loss. By default 0.5. #' #' @return An object of the class \code{model_performance}. #' From 2f069ec4aad4de13fc45d0c0db74602d083be211 Mon Sep 17 00:00:00 2001 From: Mustafa CAVUS <45355486+mcavs@users.noreply.github.com> Date: Tue, 10 May 2022 13:09:53 +0200 Subject: [PATCH 4/5] Update model_performance.Rd --- man/model_performance.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/model_performance.Rd b/man/model_performance.Rd index 7774f0dc4..9db1f6f32 100644 --- a/man/model_performance.Rd +++ b/man/model_performance.Rd @@ -11,7 +11,7 @@ model_performance(explainer, ..., cutoff = 0.5) \item{...}{other parameters} -\item{cutoff}{a cutoff for classification models, needed for measures like recall, precision, ACC, F1, MCC, Brier score, balanced ACC, and log-loss. By default 0.5.} +\item{cutoff}{a cutoff for classification models, needed for measures like recall, precision, ACC, F1, Matthews Correlation Coefficient, Brier score, balanced ACC, and log-loss. By default 0.5.} } \value{ An object of the class \code{model_performance}. From efec83b42a4981d1f829196384d82d6fd1986a62 Mon Sep 17 00:00:00 2001 From: Mustafa CAVUS <45355486+mcavs@users.noreply.github.com> Date: Tue, 10 May 2022 13:20:08 +0200 Subject: [PATCH 5/5] Update NEWS.md --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 86b5d161e..8bf494845 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,7 @@ DALEX (development) --------------------------------------------------------------- * changed URLs in the DESCRIPTION as requested in ([#484](https://github.com/ModelOriented/DALEX/issues/484)) * Fix model_info documentation ([#498](https://github.com/ModelOriented/DALEX/issues/498)) +* The Matthews Correlation Coefficient, Brier score, balanced ACC, and log-loss metrics are added to `model_performance` function for imbalanced classification task. DALEX 2.4.0 ---------------------------------------------------------------