Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

240 241 xx seq #246

Merged
merged 13 commits into from
Nov 7, 2023
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: sdtmchecks
Title: Data Quality Checks for SDTM Datasets
Version: 0.1.7
Version: 0.1.8
Authors@R:
person("Will", "Harris", , "harric17@gene.com", role = c("aut", "cre"))
Description: Functions to identify common data issues in SDTM data. These checks are intended to be generalizable, actionable, and meaningful for analysis.
Expand Down
3 changes: 2 additions & 1 deletion R/check_ae_aedecod.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ check_ae_aedecod <- function(AE,preproc=identity,...){

### Subset AE to only records with missing AEDECOD
mydf <- AE %>%
select(any_of(c("USUBJID", "RAVE", "AESEQ","AESTDTC","AETERM","AEDECOD"))) %>%
select(any_of(c("USUBJID", "RAVE", "AESTDTC", "AETERM", "AEDECOD"))) %>%
filter(is_sas_na(AEDECOD))

rownames(mydf)=NULL
Expand All @@ -75,3 +75,4 @@ check_ae_aedecod <- function(AE,preproc=identity,...){
}
}
}

60 changes: 30 additions & 30 deletions R/check_ae_aerel.R
Original file line number Diff line number Diff line change
Expand Up @@ -102,75 +102,75 @@


check_ae_aerel <- function(AE,preproc=identity,...) {

###Keep only AEREL, AEREL1 - AERELN
all_aerel <- setdiff(names(AE)[grep("AEREL",names(AE))],names(AE)[grep("AERELNS",names(AE))])

###First check that required variables exist and return a message if they don't
if(AE %lacks_any% c("USUBJID","AESEQ","AESTDTC","AETERM","AEREL")){

fail(lacks_msg(AE, c("USUBJID","AESEQ","AESTDTC","AETERM","AEREL")))

if(AE %lacks_any% c("USUBJID","AESTDTC","AETERM","AEREL")){
fail(lacks_msg(AE, c("USUBJID","AESTDTC","AETERM","AEREL")))
} else {

#Apply company specific preprocessing function
AE = preproc(AE,...)
AE <- AE[,intersect(names(AE), c("USUBJID","AESEQ","AESTDTC","AETERM","RAVE", all_aerel))]

AE <- AE[,intersect(names(AE), c("USUBJID","AESTDTC","AETERM","RAVE", all_aerel))]
mydf_sub <- AE

mydf_miss <- mydf_sub %>%
filter(is_sas_na(AE$AEREL) & AE$AEREL != "NA")

#mydf_nmiss <- rbind(filter(mydf_sub, !is_sas_na(AE$AEREL)), filter(mydf_sub, AE$AEREL == "NA"))
mydf_nmiss <- rbind(filter(mydf_sub, !is_sas_na(AE$AEREL)))

if (as.numeric(length(all_aerel)) > 1) {

index_y <- as.data.frame(sapply(6:ncol(mydf_nmiss), function(x) mydf_nmiss[, x] == 'Y'))
index_n <- as.data.frame(sapply(6:ncol(mydf_nmiss), function(x) mydf_nmiss[, x] == 'N'))
index_na <- as.data.frame(sapply(6:ncol(mydf_nmiss), function(x) mydf_nmiss[, x] == 'NA'))
index_m <- as.data.frame(sapply(6:ncol(mydf_nmiss), function(x) mydf_nmiss[, x] == ''))

index_y <- as.data.frame(sapply(5:ncol(mydf_nmiss), function(x) mydf_nmiss[, x] == 'Y'))
index_n <- as.data.frame(sapply(5:ncol(mydf_nmiss), function(x) mydf_nmiss[, x] == 'N'))
index_na <- as.data.frame(sapply(5:ncol(mydf_nmiss), function(x) mydf_nmiss[, x] == 'NA'))
index_m <- as.data.frame(sapply(5:ncol(mydf_nmiss), function(x) mydf_nmiss[, x] == ''))
## For which row the condition is true for all columns
y <- apply(index_y, 1, any)
na <- apply(index_na, 1, all)
n1 <- apply(index_n, 1, any)
m <- apply(index_m, 1, all)

n <- n1 != y & n1 == TRUE

### Check if there is any unexpected AEREL
mydf_y <- mydf_nmiss[mydf_nmiss$AEREL == 'Y' & !y, ]
mydf_n <- mydf_nmiss[mydf_nmiss$AEREL == 'N' & !n, ]
mydf_na <- mydf_nmiss[mydf_nmiss$AEREL == 'NA' & !na, ]
mydf_m <- mydf_nmiss[mydf_nmiss$AEREL == '' & !m, ]

if (nrow(mydf_miss) > 0) {
index_all <- as.data.frame(rbind(sapply(6:ncol(mydf_miss),
index_all <- as.data.frame(rbind(sapply(5:ncol(mydf_miss),
function(x)
mydf_miss[, x] == 'Y'|
mydf_miss[, x] == 'NA' |
mydf_miss[, x] == 'N' |
mydf_miss[, x] == '')
))
))
all <- apply(index_all, 1, any)

mydf_all <- mydf_miss[all, ]

mydf <- rbind(mydf_y, mydf_n, mydf_m, mydf_all)
#mydf <- rbind(mydf_y, mydf_na, mydf_n, mydf_m, mydf_all)

} else {
mydf <- rbind(mydf_y, mydf_na, mydf_n)
}

} else {
mydf <- mydf_miss
}

rownames(mydf)=NULL

if (nrow(mydf) == 0) {
pass()
} else if (nrow(mydf) == 1) {
Expand All @@ -179,5 +179,5 @@ check_ae_aerel <- function(AE,preproc=identity,...) {
fail(paste("AE has", nrow(mydf), "observations where AEREL is missing but one of AEREL1 - AEREL[n] is equal to Y/N/NA, or AEREL has an unexpected value, or AEREL[n] missing. "), mydf)
}
}

}
2 changes: 1 addition & 1 deletion R/check_cm_cmdecod.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ check_cm_cmdecod <- function(CM,preproc=identity,...){
### Subset domain to only records with missing coded term (CMDECOD)
mydf <- CM %>%
filter(grepl("CONCOMITANT",CMCAT)) %>%
select(any_of(c("USUBJID", "CMSEQ","CMSTDTC","CMTRT","CMDECOD", "CMPRESP", "CMOCCUR", "RAVE"))) %>%
select(any_of(c("USUBJID", "CMSTDTC", "CMTRT", "CMDECOD", "CMPRESP", "CMOCCUR", "RAVE"))) %>%
filter(is_sas_na(CMDECOD))
rownames(mydf)=NULL

Expand Down
2 changes: 1 addition & 1 deletion R/check_ex_infusion_exstdtc_exendtc.R
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ check_ex_infusion_exstdtc_exendtc <- function(EX) {
df <- EX
}

## Get minumimum length for when EXSTDTC and EXENDTC are different lengths
## Get minimum length for when EXSTDTC and EXENDTC are different lengths
df$startdtc = ifelse(!is_sas_na(df$EXSTDTC),
substr(df$EXSTDTC, 1, pmin(nchar(df$EXSTDTC), nchar(df$EXENDTC), na.rm = TRUE)),
df$EXSTDTC)
Expand Down
4 changes: 1 addition & 3 deletions tests/testthat/test-check_ae_aedecod.R
Original file line number Diff line number Diff line change
Expand Up @@ -139,12 +139,11 @@ test_that("Function returns false when errors are present when XXSPID is availab
})


test_that("Function returns false when any of the required variables are missing: AESEQ is missing", {
test_that("Function returns false when any of the required variables are missing: USUBJID is missing", {

AE <- data.frame(
STUDY = c(rep("STUDYABC", 6)),
DOMAIN = c(rep("AE", 6)),
USUBJID = c(rep("STUDYABC-0000000-12345",6)),
AESPID = c("AEFORM:0123456-R:1/L:1/AT:INITIALEXTREME",
"AEFORM:0123456-R:2/L:2/AT:INITIALEXTREME",
"AEFORM:0123456-R:5/L:5/AT:INITIALEXTREME",
Expand All @@ -164,7 +163,6 @@ test_that("Function returns false when any of the required variables are missing
test_that("Function returns the failed object in attr(data)", {
AE <- data.frame(
USUBJID = 1:5,
AESEQ = 1:5,
AESTDTC = 1:5,
AETERM = letters[1:5],
AEDECOD = 1:5, stringsAsFactors = FALSE
Expand Down
38 changes: 19 additions & 19 deletions tests/testthat/test-check_ts_aedict.R
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@

test_that("Returns true when no errors present", {

t <- Sys.Date()
y <- substring(t,1,4)

# Work out the main MedDRA version of the year; start on version 24.0

meddra_version <- as.numeric(y) - 2021 + 24

TS1 <- data.frame(
STUDYID = 1,
TSPARMCD = "AEDICT",
TSVAL = paste("MedDRA", paste0(meddra_version, ".0")),
TSVAL2 = ""
)

expect_true(check_ts_aedict(TS1))

})
# test_that("Returns true when no errors present", {
#
# t <- Sys.Date()
# y <- substring(t,1,4)
#
# # Work out the main MedDRA version of the year; start on version 24.0
#
# meddra_version <- as.numeric(y) - 2021 + 24
#
# TS1 <- data.frame(
# STUDYID = 1,
# TSPARMCD = "AEDICT",
# TSVAL = paste("MedDRA", paste0(meddra_version, ".0")),
# TSVAL2 = ""
# )
#
# expect_true(check_ts_aedict(TS1))
#
# })

test_that("Returns false when errors present - 1", {

Expand Down