diff --git a/NEWS.md b/NEWS.md index 50a377ded..f750c88a7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# soilDB 2.6.4 (2021-07-23) +# soilDB 2.6.4 (2021-08-06) * `fetchNASIS(from="pedons")` now supports `fill=TRUE` argument just like `from="components"` to include pedons that have no horizon records * `createStaticNASIS()`: column order should match NASIS, even if data types require reorder for ODBC driver * `fetchSoilGrids()` bug fixes, updates to metadata and references in documentation (https://github.com/ncss-tech/soilDB/issues/201) @@ -7,7 +7,8 @@ * Improved error handling * Added `progress` and `verbose` arguments for text progress bar and additional message output * Added support for {sf} and {sp} POINT geometry inputs - + * Add `get_SDA_coecoclass` SOD-style method for mapunit/component level summaries of ecological site and other vegetation class information + # soilDB 2.6.3 (2021-07-22) * `SDA_query()` and all functions that call `SDA_query()` get proper column class handling (related to #190), however: - be careful with the use of CAST(): unknown datatypes may not be correctly interpreted diff --git a/R/SDA_coecoclass.R b/R/SDA_coecoclass.R new file mode 100644 index 000000000..0c8f1bb08 --- /dev/null +++ b/R/SDA_coecoclass.R @@ -0,0 +1,89 @@ +#' Get mapunit ecological sites from Soil Data Access +#' +#' @details When `method="Dominant Condition"` an additional field `ecoclasspct_r` is returned in the result with the sum of `comppct_r` that have the dominant condition `ecoclassid`. The component with the greatest `comppct_r` is returned for the `component` and `coecosite` level information. +#' +#' Note that if there are multiple `coecoclasskey` per `ecoclassid` there may be more than one record per component. +#' +#' @param method aggregation method. One of: "Dominant Component", "Dominant Condition", "None". If "None" is selected one row will be returned per component, otherwise one row will be returned per map unit. +#' @param areasymbols vector of soil survey area symbols +#' @param mukeys vector of map unit keys +#' @param query_string Default: `FALSE`; if `TRUE` return a character string containing query that would be sent to SDA via `SDA_query` +#' @param sources One of more of `"coecosite"` or `"coothvegclass"`. If `NULL` no constraint on `sourcesdwtablephysicalname` is used in the query. +#' @param not_rated_value Default: `"Not assigned"` +#' @param miscellaneous_areas Include miscellaneous areas (non-soil components)? +get_SDA_coecoclass <- function(method = "None", + areasymbols = NULL, mukeys = NULL, + query_string = FALSE, + sources = "coecosite", + not_rated_value = "Not assigned", + miscellaneous_areas = TRUE) { + + if (is.null(sources)) { + sources <- c('coecosite', 'coothvegclass') + } + + method <- match.arg(toupper(method), c('NONE', "DOMINANT COMPONENT", "DOMINANT CONDITION")) + sources <- match.arg(tolower(sources), c('coecosite', 'coothvegclass'), several.ok = TRUE) + + stopifnot(!is.null(areasymbols) || !is.null(mukeys)) + + if (!is.null(areasymbols)) { + areasymbols <- soilDB::format_SQL_in_statement(areasymbols) + } + + if (!is.null(mukeys)) { + mukeys <- soilDB::format_SQL_in_statement(mukeys) + } + + if (!is.null(sources)) { + sources <- soilDB::format_SQL_in_statement(sources) + } + + base_query <- "SELECT * FROM legend l + LEFT JOIN mapunit mu ON l.lkey = mu.lkey + LEFT JOIN component c ON mu.mukey = c.mukey %s + LEFT JOIN coecoclass ce ON c.cokey = ce.cokey %s + WHERE %s" + + include_misc <- ifelse(miscellaneous_areas, "", " AND compkind != 'miscellaneous area'") + + include_src <- ifelse(is.null(sources), "", sprintf(" AND sourcesdwtablephysicalname IN %s", sources)) + + where_clause <- switch(as.character(is.null(areasymbols)), + "TRUE" = sprintf("mu.mukey IN %s", mukeys), + "FALSE" = sprintf("l.areasymbol IN %s", areasymbols)) + + q <- sprintf(base_query, include_misc, include_src, where_clause) + + if (query_string) + return(q) + + res <- SDA_query(q) + + .I <- NULL + idx <- NULL + comppct_r <- NULL + ecoclasspct_r <- NULL + + if (method == "NONE") { + return(res) + } else if (method == "DOMINANT COMPONENT") { + # dominant component + idx1 <- data.table::data.table(res)[, .I[which.max(comppct_r)], by = "mukey"]$V1 + return(res[idx1, ]) + } + + res$ecoclassid[is.na(res$ecoclassid)] <- not_rated_value + + + idx2 <- data.table::data.table(res)[, list(idx = .I[which.max(comppct_r)], + ecoclasspct_r = sum(comppct_r)), + by = c("mukey", "ecoclassid")][, + list(idx = idx[which.max(ecoclasspct_r)], + ecoclasspct_r = ecoclasspct_r[which.max(ecoclasspct_r)]), + by = "mukey"] + res2 <- res[idx2$idx, ] + res2$ecoclasspct_r <- idx2$ecoclasspct_r + res2 +} + diff --git a/man/get_SDA_coecoclass.Rd b/man/get_SDA_coecoclass.Rd new file mode 100644 index 000000000..a0f49ebd5 --- /dev/null +++ b/man/get_SDA_coecoclass.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/SDA_coecoclass.R +\name{get_SDA_coecoclass} +\alias{get_SDA_coecoclass} +\title{Get mapunit ecological sites from Soil Data Access} +\usage{ +get_SDA_coecoclass( + method = "None", + areasymbols = NULL, + mukeys = NULL, + query_string = FALSE, + sources = "coecosite", + not_rated_value = "Not assigned", + miscellaneous_areas = TRUE +) +} +\arguments{ +\item{method}{aggregation method. One of: "Dominant Component", "Dominant Condition", "None". If "None" is selected one row will be returned per component, otherwise one row will be returned per map unit.} + +\item{areasymbols}{vector of soil survey area symbols} + +\item{mukeys}{vector of map unit keys} + +\item{query_string}{Default: \code{FALSE}; if \code{TRUE} return a character string containing query that would be sent to SDA via \code{SDA_query}} + +\item{sources}{One of more of \code{"coecosite"} or \code{"coothvegclass"}. If \code{NULL} no constraint on \code{sourcesdwtablephysicalname} is used in the query.} + +\item{not_rated_value}{Default: \code{"Not assigned"}} + +\item{miscellaneous_areas}{Include miscellaneous areas (non-soil components)?} +} +\description{ +Get mapunit ecological sites from Soil Data Access +} +\details{ +When \code{method="Dominant Condition"} an additional field \code{ecoclasspct_r} is returned in the result with the sum of \code{comppct_r} that have the dominant condition \code{ecoclassid}. The component with the greatest \code{comppct_r} is returned for the \code{component} and \code{coecosite} level information. + +Note that if there are multiple \code{coecoclasskey} per \code{ecoclassid} there may be more than one record per component. +} diff --git a/tests/testthat/test-SDA_coecoclass.R b/tests/testthat/test-SDA_coecoclass.R new file mode 100644 index 000000000..e869b7a38 --- /dev/null +++ b/tests/testthat/test-SDA_coecoclass.R @@ -0,0 +1,39 @@ +test_that("get_SDA_coecoclass works", { + skip_if_offline() + + skip_on_cran() + + res1 <- get_SDA_coecoclass(method = "dominant condition", + areasymbols = c("CA077", "CA630")) + expect_equal(length(unique(res1$mukey)), nrow(res1)) + + res2 <- get_SDA_coecoclass(method = "dominant component", + areasymbols = c("CA077", "CA630")) + expect_equal(length(unique(res2$mukey)), nrow(res2)) + + res3 <- get_SDA_coecoclass(method = "dominant condition", + areasymbols = c("CA077", "CA630"), + miscellaneous_areas = FALSE) + expect_equal(length(unique(res3$mukey)), nrow(res3)) + + res4 <- get_SDA_coecoclass(method = "dominant component", + areasymbols = c("CA077", "CA630"), + miscellaneous_areas = FALSE) + expect_equal(length(unique(res4$mukey)), nrow(res4)) + + res5 <- get_SDA_coecoclass(areasymbols = c("CA077", "CA630")) + expect_equal(nrow(unique(res5[,c("cokey","coecoclasskey")])), nrow(res5)) + + res6 <- get_SDA_coecoclass(method = "dominant component", + mukeys = c(461994, 461995)) + expect_equal(length(unique(res6$mukey)), 2) + + res7 <- get_SDA_coecoclass(mukeys = c(461994, 461995)) + expect_equal(length(unique(res7$cokey)), nrow(res7)) + + res8 <- get_SDA_coecoclass(method = "dominant component", + areasymbols = c("CA077", "CA630"), + sources = "coothvegclass") + expect_equal(length(unique(res8$mukey)), nrow(res8)) + +})