Rework mutate.data.frame() to better implement .keep

tidyverse · Sep 30, 2021 · 32dec34 · 32dec34
1 parent 93fe9ef
commit 32dec34
Show file tree

Hide file tree

Showing 2 changed files with 53 additions and 25 deletions.
diff --git a/R/mutate.R b/R/mutate.R
@@ -174,37 +174,47 @@ mutate.data.frame <- function(.data, ...,
   keep <- arg_match(.keep)
 
   cols <- mutate_cols(.data, ..., caller_env = caller_env())
+  used <- attr(cols, "used")
+
   out <- dplyr_col_modify(.data, cols)
 
+  # Compact out `NULL` columns that got removed.
+  # These won't exist in `out`, but we don't want them to look "new".
+  # Note that `dplyr_col_modify()` makes it impossible to `NULL` a group column,
+  # which we rely on below.
+  cols <- compact_null(cols)
+
+  cols_data <- names(.data)
+  cols_group <- group_vars(.data)
+
+  cols_expr <- names(cols)
+  cols_expr_modified <- intersect(cols_expr, cols_data)
+  cols_expr_new <- setdiff(cols_expr, cols_expr_modified)
+
+  cols_used <- setdiff(cols_data, c(cols_group, cols_expr_modified, names(used)[!used]))
+  cols_unused <- setdiff(cols_data, c(cols_group, cols_expr_modified, names(used)[used]))
+
   .before <- enquo(.before)
   .after <- enquo(.after)
+
   if (!quo_is_null(.before) || !quo_is_null(.after)) {
     # Only change the order of new columns
-    new <- setdiff(names(cols), names(.data))
-    out <- relocate(out, !!new, .before = !!.before, .after = !!.after)
+    out <- relocate(out, all_of(cols_expr_new), .before = !!.before, .after = !!.after)
   }
 
+  cols_out <- names(out)
+
   if (keep == "all") {
-    out
-  } else if (keep == "unused") {
-    used <- attr(cols, "used")
-    unused <- names(used)[!used]
-    keep <- intersect(names(out), c(group_vars(.data), unused, names(cols)))
-    dplyr_col_select(out, keep)
+    cols_retain <- cols_out
   } else if (keep == "used") {
-    used <- attr(cols, "used")
-    used <- names(used)[used]
-    keep <- intersect(names(out), c(group_vars(.data), used, names(cols)))
-    dplyr_col_select(out, keep)
+    cols_retain <- setdiff(cols_out, cols_unused)
+  } else if (keep == "unused") {
+    cols_retain <- setdiff(cols_out, cols_used)
   } else if (keep == "none") {
-    keep <- c(
-      # ensure group vars present
-      setdiff(group_vars(.data), names(cols)),
-      # cols might contain NULLs
-      intersect(names(cols), names(out))
-    )
-    dplyr_col_select(out, keep)
+    cols_retain <- setdiff(cols_out, c(cols_used, cols_unused))
   }
+
+  dplyr_col_select(out, cols_retain)
 }
 
 #' @rdname mutate
@@ -243,9 +253,6 @@ mutate_cols <- function(.data, ..., caller_env) {
 
   rows <- mask$get_rows()
   dots <- dplyr_quosures(...)
-  if (length(dots) == 0L) {
-    return(NULL)
-  }
 
   new_columns <- set_names(list(), character())
 

diff --git a/tests/testthat/test-mutate.r b/tests/testthat/test-mutate.r
@@ -338,13 +338,13 @@ test_that(".keep = 'none' only keeps grouping variables", {
   expect_named(mutate(gf, z = 1, .keep = "none"), c("x", "z"))
 })
 
-test_that(".keep = 'none' prefers new order", {
+test_that(".keep = 'none' retains original ordering (#5967)", {
   df <- tibble(x = 1, y = 2)
-  expect_named(df %>% mutate(y = 1, x = 2, .keep = "none"), c("y", "x"))
+  expect_named(df %>% mutate(y = 1, x = 2, .keep = "none"), c("x", "y"))
 
   # even when grouped
   gf <- group_by(df, x)
-  expect_named(gf %>% mutate(y = 1, x = 2, .keep = "none"), c("y", "x"))
+  expect_named(gf %>% mutate(y = 1, x = 2, .keep = "none"), c("x", "y"))
 })
 
 test_that("can use .before and .after to control column position", {
@@ -358,6 +358,27 @@ test_that("can use .before and .after to control column position", {
   expect_named(mutate(df, x = 1, .after = y), c("x", "y"))
 })
 
+test_that(".keep and .before/.after interact correctly", {
+  df <- tibble(x = 1, y = 1, z = 1, a = 1, b = 2, c = 3) %>%
+    group_by(a, b)
+
+  expect_named(mutate(df, d = 1, x = 2, .keep = "none"), c("x", "a", "b", "d"))
+  expect_named(mutate(df, d = 1, x = 2, .keep = "none", .before = "a"), c("x", "d", "a", "b"))
+  expect_named(mutate(df, d = 1, x = 2, .keep = "none", .after = "a"), c("x", "a", "d", "b"))
+})
+
+test_that("dropping column with `NULL` then readding it retains original location", {
+  df <- tibble(x = 1, y = 2, z = 3, a = 4)
+  df <- group_by(df, z)
+
+  expect_named(mutate(df, y = NULL, y = 3, .keep = "all"), c("x", "y", "z", "a"))
+  expect_named(mutate(df, b = a, y = NULL, y = 3, .keep = "used"), c("y", "z", "a", "b"))
+  expect_named(mutate(df, b = a, y = NULL, y = 3, .keep = "unused"), c("x", "y", "z", "b"))
+
+  # It isn't treated as a "new" column
+  expect_named(mutate(df, y = NULL, y = 3, .keep = "all", .before = x), c("x", "y", "z", "a"))
+})
+
 test_that(".keep= always retains grouping variables (#5582)", {
   df <- tibble(x = 1, y = 2, z = 3) %>% group_by(z)
   expect_equal(