Skip to content

Commit

Permalink
Rework mutate.data.frame() to better implement .keep
Browse files Browse the repository at this point in the history
  • Loading branch information
DavisVaughan committed Sep 30, 2021
1 parent 93fe9ef commit 32dec34
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 25 deletions.
51 changes: 29 additions & 22 deletions R/mutate.R
Original file line number Diff line number Diff line change
Expand Up @@ -174,37 +174,47 @@ mutate.data.frame <- function(.data, ...,
keep <- arg_match(.keep)

cols <- mutate_cols(.data, ..., caller_env = caller_env())
used <- attr(cols, "used")

out <- dplyr_col_modify(.data, cols)

# Compact out `NULL` columns that got removed.
# These won't exist in `out`, but we don't want them to look "new".
# Note that `dplyr_col_modify()` makes it impossible to `NULL` a group column,
# which we rely on below.
cols <- compact_null(cols)

cols_data <- names(.data)
cols_group <- group_vars(.data)

cols_expr <- names(cols)
cols_expr_modified <- intersect(cols_expr, cols_data)
cols_expr_new <- setdiff(cols_expr, cols_expr_modified)

cols_used <- setdiff(cols_data, c(cols_group, cols_expr_modified, names(used)[!used]))
cols_unused <- setdiff(cols_data, c(cols_group, cols_expr_modified, names(used)[used]))

.before <- enquo(.before)
.after <- enquo(.after)

if (!quo_is_null(.before) || !quo_is_null(.after)) {
# Only change the order of new columns
new <- setdiff(names(cols), names(.data))
out <- relocate(out, !!new, .before = !!.before, .after = !!.after)
out <- relocate(out, all_of(cols_expr_new), .before = !!.before, .after = !!.after)
}

cols_out <- names(out)

if (keep == "all") {
out
} else if (keep == "unused") {
used <- attr(cols, "used")
unused <- names(used)[!used]
keep <- intersect(names(out), c(group_vars(.data), unused, names(cols)))
dplyr_col_select(out, keep)
cols_retain <- cols_out
} else if (keep == "used") {
used <- attr(cols, "used")
used <- names(used)[used]
keep <- intersect(names(out), c(group_vars(.data), used, names(cols)))
dplyr_col_select(out, keep)
cols_retain <- setdiff(cols_out, cols_unused)
} else if (keep == "unused") {
cols_retain <- setdiff(cols_out, cols_used)
} else if (keep == "none") {
keep <- c(
# ensure group vars present
setdiff(group_vars(.data), names(cols)),
# cols might contain NULLs
intersect(names(cols), names(out))
)
dplyr_col_select(out, keep)
cols_retain <- setdiff(cols_out, c(cols_used, cols_unused))
}

dplyr_col_select(out, cols_retain)
}

#' @rdname mutate
Expand Down Expand Up @@ -243,9 +253,6 @@ mutate_cols <- function(.data, ..., caller_env) {

rows <- mask$get_rows()
dots <- dplyr_quosures(...)
if (length(dots) == 0L) {
return(NULL)
}

new_columns <- set_names(list(), character())

Expand Down
27 changes: 24 additions & 3 deletions tests/testthat/test-mutate.r
Original file line number Diff line number Diff line change
Expand Up @@ -338,13 +338,13 @@ test_that(".keep = 'none' only keeps grouping variables", {
expect_named(mutate(gf, z = 1, .keep = "none"), c("x", "z"))
})

test_that(".keep = 'none' prefers new order", {
test_that(".keep = 'none' retains original ordering (#5967)", {
df <- tibble(x = 1, y = 2)
expect_named(df %>% mutate(y = 1, x = 2, .keep = "none"), c("y", "x"))
expect_named(df %>% mutate(y = 1, x = 2, .keep = "none"), c("x", "y"))

# even when grouped
gf <- group_by(df, x)
expect_named(gf %>% mutate(y = 1, x = 2, .keep = "none"), c("y", "x"))
expect_named(gf %>% mutate(y = 1, x = 2, .keep = "none"), c("x", "y"))
})

test_that("can use .before and .after to control column position", {
Expand All @@ -358,6 +358,27 @@ test_that("can use .before and .after to control column position", {
expect_named(mutate(df, x = 1, .after = y), c("x", "y"))
})

test_that(".keep and .before/.after interact correctly", {
df <- tibble(x = 1, y = 1, z = 1, a = 1, b = 2, c = 3) %>%
group_by(a, b)

expect_named(mutate(df, d = 1, x = 2, .keep = "none"), c("x", "a", "b", "d"))
expect_named(mutate(df, d = 1, x = 2, .keep = "none", .before = "a"), c("x", "d", "a", "b"))
expect_named(mutate(df, d = 1, x = 2, .keep = "none", .after = "a"), c("x", "a", "d", "b"))
})

test_that("dropping column with `NULL` then readding it retains original location", {
df <- tibble(x = 1, y = 2, z = 3, a = 4)
df <- group_by(df, z)

expect_named(mutate(df, y = NULL, y = 3, .keep = "all"), c("x", "y", "z", "a"))
expect_named(mutate(df, b = a, y = NULL, y = 3, .keep = "used"), c("y", "z", "a", "b"))
expect_named(mutate(df, b = a, y = NULL, y = 3, .keep = "unused"), c("x", "y", "z", "b"))

# It isn't treated as a "new" column
expect_named(mutate(df, y = NULL, y = 3, .keep = "all", .before = x), c("x", "y", "z", "a"))
})

test_that(".keep= always retains grouping variables (#5582)", {
df <- tibble(x = 1, y = 2, z = 3) %>% group_by(z)
expect_equal(
Expand Down

0 comments on commit 32dec34

Please # to comment.