Skip to content

Commit

Permalink
Fix bug where output_type_id data type was being incorrectly determin…
Browse files Browse the repository at this point in the history
…ed as Date instead of character.
  • Loading branch information
annakrystalli committed Oct 2, 2024
1 parent c8204c5 commit 252237f
Show file tree
Hide file tree
Showing 5 changed files with 317 additions and 5 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: hubData
Title: Tools for accessing and working with hubverse data
Version: 1.2.2
Version: 1.2.2.9000
Authors@R:
c(person("Anna", "Krystalli", , "annakrystalli@googlemail.com", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-2378-4915")),
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# hubData (development version)

* Fix bug in `create_hub_schema()` where `output_type_id` data type was being incorrectly determined as `Date` instead of `character` (Reported in https://github.com/reichlab/variant-nowcast-hub/pull/87#issuecomment-2387372238).

# hubData 1.2.2

* Remove dependency on development version of `arrow` package and bump required version to 17.0.0.
Expand Down
11 changes: 7 additions & 4 deletions R/create_hub_schema.R
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,8 @@ get_output_type_id_type <- function(config_tasks) {
unlist(recursive = FALSE) %>%
purrr::map(
function(x) {
purrr::pluck(x, config_tid)
purrr::pluck(x, config_tid) %>%
purrr::modify_if(~ inherits(.x, "Date"), as.character)
}
) %>%
unlist()
Expand All @@ -188,10 +189,10 @@ get_output_type_id_type <- function(config_tasks) {
) %>%
unlist() %>%
# Instead of using R data type coercion by combining vectors of output type
# id values, create zero length vectors of sample output type id types
# id values, create length 1 vectors of sample output type id types
# using the function specified by output_type_id_params type.
# Get the appropriate function using `get`.
purrr::map(~ get(.x)()) %>%
purrr::map(~ get(.x)(length = 1L)) %>%
unlist()

get_data_type(c(values, sample_values))
Expand Down Expand Up @@ -238,7 +239,9 @@ coerce_datatype <- function(types) {
}

test_iso_date <- function(x) {
class(try(as.Date(x), silent = TRUE)) == "Date"
to_date <- try(as.Date(x), silent = TRUE)
isFALSE(inherits(to_date, "try-error")) &&
isTRUE(all.equal(which(is.na(x)), which(is.na(to_date))))
}

get_partition_r_datatype <- function(partitions, arrow_datatypes) {
Expand Down
8 changes: 8 additions & 0 deletions tests/testthat/test-create_hub_schema.R
Original file line number Diff line number Diff line change
Expand Up @@ -126,4 +126,12 @@ test_that("create_hub_schema works with config output_type_id_datatype", {
)$GetFieldByName("output_type_id")$ToString(),
"output_type_id: double"
)
expect_equal(
create_hub_schema(
hubUtils::read_config_file(
testthat::test_path("testdata", "configs", "nowcast-tasks.json")
)
)$ToString(),
"nowcast_date: date32[day]\ntarget_date: date32[day]\nlocation: string\nclade: string\noutput_type: string\noutput_type_id: string\nvalue: double\nmodel_id: string"
)
})
297 changes: 297 additions & 0 deletions tests/testthat/testdata/configs/nowcast-tasks.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,297 @@
{
"schema_version": "https://raw.githubusercontent.com/hubverse-org/schemas/main/v3.0.1/tasks-schema.json",
"rounds": [
{
"round_id_from_variable": true,
"round_id": "nowcast_date",
"model_tasks": [
{
"task_ids": {
"nowcast_date": {
"required": [
"2024-09-11"
],
"optional": null
},
"target_date": {
"required": null,
"optional": ["2024-08-11", "2024-08-12", "2024-08-13", "2024-08-14", "2024-08-15", "2024-08-16", "2024-08-17", "2024-08-18", "2024-08-19", "2024-08-20", "2024-08-21", "2024-08-22", "2024-08-23", "2024-08-24", "2024-08-25", "2024-08-26", "2024-08-27", "2024-08-28", "2024-08-29", "2024-08-30", "2024-08-31", "2024-09-01", "2024-09-02", "2024-09-03", "2024-09-04", "2024-09-05", "2024-09-06", "2024-09-07", "2024-09-08", "2024-09-09", "2024-09-10", "2024-09-11", "2024-09-12", "2024-09-13", "2024-09-14", "2024-09-15", "2024-09-16", "2024-09-17", "2024-09-18", "2024-09-19", "2024-09-20", "2024-09-21"]
},
"location": {
"required": null,
"optional": ["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DE", "DC", "FL", "GA", "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC", "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY", "PR"]
},
"clade": {
"required": ["24A", "24B", "24C", "other", "recombinant"],
"optional": null
}
},
"output_type": {
"mean": {
"output_type_id": {
"required": null,
"optional": [
"NA"
]
},
"value": {
"type": "double",
"minimum": 0,
"maximum": 1
}
},
"sample": {
"output_type_id_params": {
"is_required": false,
"type": "character",
"max_length": 15,
"min_samples_per_task": 100,
"max_samples_per_task": 100
},
"value": {
"type": "double",
"minimum": 0,
"maximum": 1
}
}
},
"target_metadata": [
{
"target_id": "clade prop",
"target_name": "Daily nowcasted clade proportions",
"target_units": "proportion",
"target_keys": null,
"target_type": "compositional",
"is_step_ahead": true,
"time_unit": "day"
}
]
}
],
"submissions_due": {
"relative_to": "nowcast_date",
"start": -2,
"end": 1
}
},
{
"round_id_from_variable": true,
"round_id": "nowcast_date",
"model_tasks": [
{
"task_ids": {
"nowcast_date": {
"required": [
"2024-09-18"
],
"optional": null
},
"target_date": {
"required": null,
"optional": ["2024-08-18", "2024-08-19", "2024-08-20", "2024-08-21", "2024-08-22", "2024-08-23", "2024-08-24", "2024-08-25", "2024-08-26", "2024-08-27", "2024-08-28", "2024-08-29", "2024-08-30", "2024-08-31", "2024-09-01", "2024-09-02", "2024-09-03", "2024-09-04", "2024-09-05", "2024-09-06", "2024-09-07", "2024-09-08", "2024-09-09", "2024-09-10", "2024-09-11", "2024-09-12", "2024-09-13", "2024-09-14", "2024-09-15", "2024-09-16", "2024-09-17", "2024-09-18", "2024-09-19", "2024-09-20", "2024-09-21", "2024-09-22", "2024-09-23", "2024-09-24", "2024-09-25", "2024-09-26", "2024-09-27", "2024-09-28"]
},
"location": {
"required": null,
"optional": ["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DE", "DC", "FL", "GA", "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC", "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY", "PR"]
},
"clade": {
"required": ["24A", "24B", "24C", "other", "recombinant"],
"optional": null
}
},
"output_type": {
"mean": {
"output_type_id": {
"required": null,
"optional": [
"NA"
]
},
"value": {
"type": "double",
"minimum": 0,
"maximum": 1
}
},
"sample": {
"output_type_id_params": {
"is_required": false,
"type": "character",
"max_length": 15,
"min_samples_per_task": 100,
"max_samples_per_task": 100
},
"value": {
"type": "double",
"minimum": 0,
"maximum": 1
}
}
},
"target_metadata": [
{
"target_id": "clade prop",
"target_name": "Daily nowcasted clade proportions",
"target_units": "proportion",
"target_keys": null,
"target_type": "compositional",
"is_step_ahead": true,
"time_unit": "day"
}
]
}
],
"submissions_due": {
"relative_to": "nowcast_date",
"start": -2,
"end": 1
}
},
{
"round_id_from_variable": true,
"round_id": "nowcast_date",
"model_tasks": [
{
"task_ids": {
"nowcast_date": {
"required": [
"2024-09-25"
],
"optional": null
},
"target_date": {
"required": null,
"optional": ["2024-08-25", "2024-08-26", "2024-08-27", "2024-08-28", "2024-08-29", "2024-08-30", "2024-08-31", "2024-09-01", "2024-09-02", "2024-09-03", "2024-09-04", "2024-09-05", "2024-09-06", "2024-09-07", "2024-09-08", "2024-09-09", "2024-09-10", "2024-09-11", "2024-09-12", "2024-09-13", "2024-09-14", "2024-09-15", "2024-09-16", "2024-09-17", "2024-09-18", "2024-09-19", "2024-09-20", "2024-09-21", "2024-09-22", "2024-09-23", "2024-09-24", "2024-09-25", "2024-09-26", "2024-09-27", "2024-09-28", "2024-09-29", "2024-09-30", "2024-10-01", "2024-10-02", "2024-10-03", "2024-10-04", "2024-10-05"]
},
"location": {
"required": null,
"optional": ["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DE", "DC", "FL", "GA", "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC", "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY", "PR"]
},
"clade": {
"required": ["24A", "24B", "24C", "other", "recombinant"],
"optional": null
}
},
"output_type": {
"mean": {
"output_type_id": {
"required": null,
"optional": [
"NA"
]
},
"value": {
"type": "double",
"minimum": 0,
"maximum": 1
}
},
"sample": {
"output_type_id_params": {
"is_required": false,
"type": "character",
"max_length": 15,
"min_samples_per_task": 100,
"max_samples_per_task": 100
},
"value": {
"type": "double",
"minimum": 0,
"maximum": 1
}
}
},
"target_metadata": [
{
"target_id": "clade prop",
"target_name": "Daily nowcasted clade proportions",
"target_units": "proportion",
"target_keys": null,
"target_type": "compositional",
"is_step_ahead": true,
"time_unit": "day"
}
]
}
],
"submissions_due": {
"relative_to": "nowcast_date",
"start": -2,
"end": 1
}
},
{
"round_id_from_variable": true,
"round_id": "nowcast_date",
"model_tasks": [
{
"task_ids": {
"nowcast_date": {
"required": [
"2024-10-02"
],
"optional": null
},
"target_date": {
"required": null,
"optional": ["2024-09-01", "2024-09-02", "2024-09-03", "2024-09-04", "2024-09-05", "2024-09-06", "2024-09-07", "2024-09-08", "2024-09-09", "2024-09-10", "2024-09-11", "2024-09-12", "2024-09-13", "2024-09-14", "2024-09-15", "2024-09-16", "2024-09-17", "2024-09-18", "2024-09-19", "2024-09-20", "2024-09-21", "2024-09-22", "2024-09-23", "2024-09-24", "2024-09-25", "2024-09-26", "2024-09-27", "2024-09-28", "2024-09-29", "2024-09-30", "2024-10-01", "2024-10-02", "2024-10-03", "2024-10-04", "2024-10-05", "2024-10-06", "2024-10-07", "2024-10-08", "2024-10-09", "2024-10-10", "2024-10-11", "2024-10-12"]
},
"location": {
"required": null,
"optional": ["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DE", "DC", "FL", "GA", "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC", "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY", "PR"]
},
"clade": {
"required": ["24A", "24B", "24C", "24E", "other", "recombinant"],
"optional": null
}
},
"output_type": {
"mean": {
"output_type_id": {
"required": null,
"optional": [
"NA"
]
},
"value": {
"type": "double",
"minimum": 0,
"maximum": 1
}
},
"sample": {
"output_type_id_params": {
"is_required": false,
"type": "character",
"max_length": 15,
"min_samples_per_task": 100,
"max_samples_per_task": 100
},
"value": {
"type": "double",
"minimum": 0,
"maximum": 1
}
}
},
"target_metadata": [
{
"target_id": "clade prop",
"target_name": "Daily nowcasted clade proportions",
"target_units": "proportion",
"target_keys": null,
"target_type": "compositional",
"is_step_ahead": true,
"time_unit": "day"
}
]
}
],
"submissions_due": {
"relative_to": "nowcast_date",
"start": -2,
"end": 1
}
}
]
}

0 comments on commit 252237f

Please # to comment.