Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Onewayfast #328

Merged
merged 3 commits into from
Jul 29, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,10 @@ export(od_aggregate_to)
export(od_coords)
export(od_coords2line)
export(od_dist)
export(od_id_max_min)
export(od_id_order)
export(od_id_szudzik)
export(od_oneway)
export(od_radiation)
export(od_to_odmatrix)
export(odmatrix_to_od)
Expand Down Expand Up @@ -154,7 +157,6 @@ export(sln2points)
export(sp_aggregate)
export(sum_network_links)
export(sum_network_routes)
export(szudzik_pairing)
export(table2matrix)
export(toptail_buff)
export(toptailgs)
Expand Down
191 changes: 162 additions & 29 deletions R/oneway.R
Original file line number Diff line number Diff line change
Expand Up @@ -158,56 +158,189 @@ od_id_order <- function(x, id1 = names(x)[1], id2 = names(x)[2]) {
)
)
}
#' Combines two ID values to create a single ID number
#' Combine two ID values to create a single ID number
#'
#' @details
#' In OD data it is common to have many flows from "A to B" and "B to A".
#' In OD data it is common to have many 'oneway' flows from "A to B" and "B to A".
#' It can be useful to group these an have a single ID that represents pairs of IDs
#' with or without directionality.
#' with or without directionality, so they contain 'twoway' or bi-directional values.
#'
#' This function implements the Szudzik pairing function, on two vectors of equal
#' `od_id*` functions take two vectors of equal length and return a vector of IDs,
#' which are unique for each combination but the same for twoway flows.
#'
#' - the Szudzik pairing function, on two vectors of equal
#' length. It returns a vector of ID numbers.
#'
#' This function superseeds od_id_order as it is faster on large datasets
#'
#' @param val1 a vector of numeric, character, or factor values
#' @param val2 a vector of numeric, character, or factor values
#' @param x a vector of numeric, character, or factor values
#' @param y a vector of numeric, character, or factor values
#' @param ordermatters logical, does the order of values matter to pairing, default = FALSE
#'
#' @name od_id
#' @examples
#'
#' szudzik_pairing(od_data_sample[[1]], od_data_sample[[2]])
#' od_id_order(od_data_sample)
#' head(od_id_order(od_data_sample), 9)
#' od_id_szudzik(od_data_sample[[1]], od_data_sample[[2]])
#' od_id_max_min(od_data_sample[[1]], od_data_sample[[2]])
#' n = 1000
#' ids <- as.character(runif(n, 1e4, 1e7 - 1))
#' x <- data.frame(id1 = rep(ids, times = n),
#' id2 = rep(ids, each = n),
#' val = 1,
#' stringsAsFactors = FALSE)
#' system.time(od_id_order(x))
#' system.time(szudzik_pairing(x$id1, x$id2))
#' # benchmark of methods:
#' # x <- data.frame(id1 = rep(ids, times = n),
#' # id2 = rep(ids, each = n),
#' # val = 1,
#' # stringsAsFactors = FALSE)
#' # bench::mark(
#' # check = FALSE,
#' # od_id_order(x),
#' # od_id_szudzik(x$id1, x$id2),
#' # od_id_max_min(x$id1, x$id2)
#' # )
NULL
#' @rdname od_id
#' @export
szudzik_pairing <- function(val1, val2, ordermatters = FALSE) {
if(length(val1) != length(val2)){
stop("val1 and val2 are not of equal length")
od_id_szudzik <- function(x, y, ordermatters = FALSE) {
if(length(x) != length(y)){
stop("x and y are not of equal length")
}

if(class(val1) == "factor"){
val1 <- as.character(val1)
if(class(x) == "factor"){
x <- as.character(x)
}
if(class(val2) == "factor"){
val2 <- as.character(val2)
if(class(y) == "factor"){
y <- as.character(y)
}
lvls <- unique(c(val1, val2))
val1 <- as.integer(factor(val1, levels = lvls))
val2 <- as.integer(factor(val2, levels = lvls))
lvls <- unique(c(x, y))
x <- as.integer(factor(x, levels = lvls))
y <- as.integer(factor(y, levels = lvls))
if(ordermatters){
ismax <- val1 > val2
stplanr.key <- (ismax * 1) * (val1^2 + val1 + val2) + ((!ismax) * 1) * (val2^2 + val1)
ismax <- x > y
stplanr.key <- (ismax * 1) * (x^2 + x + y) + ((!ismax) * 1) * (y^2 + x)
}else{
a <- ifelse(val1 > val2, val2, val1)
b <- ifelse(val1 > val2, val1, val2)
a <- ifelse(x > y, y, x)
b <- ifelse(x > y, x, y)
stplanr.key <- b^2 + a
}
return(stplanr.key)
}
#' @export
#' @rdname od_id
od_id_max_min <- function(x, y) {
d <- convert_to_numeric(x, y)
a <- pmax(d$x, d$y)
b <- pmin(d$x, d$y)
a * (a + 1) / 2 + b
}

convert_to_numeric <- function(x, y) {
if (length(x) != length(y)) stop("x and y are not of equal length")
if (class(x) == "factor") x <- as.character(x)
if (class(y) == "factor") y <- as.character(y)
lvls <- unique(c(x, y))
x <- as.integer(factor(x, levels = lvls))
y <- as.integer(factor(y, levels = lvls))
list(x = x, y = y)
}

od_id_order_base <- function(x, y) {
d <- convert_to_numeric(x, y)
x <- d$x
y <- d$y
paste(pmin(x, y), pmax(x, y))
}

not_duplicated <- function(x) {
!duplicated(x)
}
#' Aggregate od pairs they become non-directional
#'
#' For example, sum total travel in both directions.
#' @param x A data frame or SpatialLinesDataFrame, representing an OD matrix
#' @param attrib A vector of column numbers or names
#' for deciding which attribute(s) of class numeric to
#' aggregate
#' @param id1 Optional (it is assumed to be the first column)
#' text string referring to the name of the variable containing
#' the unique id of the origin
#' @param id2 Optional (it is assumed to be the second column)
#' text string referring to the name of the variable containing
#' the unique id of the destination
#' @return `oneway` outputs a data frame (or `sf` data frame) with rows containing
#' results for the user-selected attribute values that have been aggregated.
#' @family lines
#' @export
#' @details
#' Flow data often contains movement in two directions: from point A to point B
#' and then from B to A. This can be problematic for transport planning, because
#' the magnitude of flow along a route can be masked by flows the other direction.
#' If only the largest flow in either direction is captured in an analysis, for
#' example, the true extent of travel will by heavily under-estimated for
#' OD pairs which have similar amounts of travel in both directions.
#' Flows in both direction are often represented by overlapping lines with
#' identical geometries (see [flowlines()]) which can be confusing
#' for users and are difficult to plot.
#' @examples
#' flow_oneway <- od_oneway(flow, attrib = 3)
#' nrow(flow_oneway) < nrow(flow) # result has fewer rows
#' sum(flow$All) == sum(flow_oneway$All) # but the same total flow
#' # using names instead of index for attribute
#' od_oneway(flow, attrib = "All")
#' # using many attributes to aggregate
#' attrib <- which(vapply(flow, is.numeric, TRUE))
#' flow_oneway <- od_oneway(flow, attrib = attrib)
#' colSums(flow_oneway[attrib]) == colSums(flow[attrib]) # test if the colSums are equal
#' # Demonstrate the results from oneway and onewaygeo are identical
#' flow_oneway_geo <- onewaygeo(flowlines, attrib = attrib)
#' plot(flow_oneway$All, flow_oneway_geo$All)
#' flow_oneway_sf <- od_oneway(flowlines_sf, 3)
#' plot(flow_oneway_geo, lwd = flow_oneway_geo$All / mean(flow_oneway_geo$All))
#' plot(flow_oneway_sf$geometry, lwd = flow_oneway_sf$All / mean(flow_oneway_sf$All))
#' # benchmark performance
#' # bench::mark(check = F,
#' # onewayid(flowlines_sf, 3),
#' # od_oneway(flowlines_sf, 3)
#' # )
#' @export
od_oneway <- function(x, attrib, id1 = names(x)[1], id2 = names(x)[2]) {

stplanr.key = od_id_max_min(x[[id1]], x[[id2]])

if (is.numeric(attrib)) {
attrib_names <- names(x)[attrib]
} else {
attrib_names <- attrib
attrib <- which(names(x) %in% attrib)
}

# separate geometry for sf objects
is_sf <- is(x, "sf")
if(is_sf) {
x_sf <- sf::st_sf(data.frame(stplanr.key), geometry = sf::st_geometry(x))
x <- sf::st_drop_geometry(x)
}

x <- dplyr::bind_cols(x, stplanr.key = stplanr.key)

x_oneway_numeric <- dplyr::group_by(x, stplanr.key) %>%
dplyr::summarise_at(attrib, sum)

x_oneway_character <- x %>%
dplyr::group_by(stplanr.key) %>%
dplyr::summarise(id1 = dplyr::first(!!rlang::sym(id1)), id2 = dplyr::first(!!rlang::sym(id2))) %>%
dplyr::select(-stplanr.key)

x_oneway <- dplyr::bind_cols(
x_oneway_character,
x_oneway_numeric
)

if(is_sf) {
x_sf <- x_sf[!duplicated(x_sf$stplanr.key), ]
x_oneway <- sf::st_as_sf(dplyr::inner_join(x_oneway, x_sf))
# class(x_oneway) # sf
}

x_oneway$stplanr.key <- NULL
names(x_oneway)[1:2] <- c(id1, id2)

return(x_oneway)
}
7 changes: 4 additions & 3 deletions man/angle_diff.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 4 additions & 3 deletions man/geo_toptail.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 4 additions & 3 deletions man/is_linepoint.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 4 additions & 3 deletions man/line2df.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 4 additions & 3 deletions man/line_bearing.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 4 additions & 3 deletions man/line_match.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 4 additions & 3 deletions man/line_midpoint.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 4 additions & 3 deletions man/line_sample.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 4 additions & 3 deletions man/line_segment.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 4 additions & 3 deletions man/line_via.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 4 additions & 3 deletions man/mats2line.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading