forked from metrumresearchgroup/metrumrg
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcheck.subjects.R
55 lines (52 loc) · 2.17 KB
/
check.subjects.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
`check.subjects` <-
function(x,subject){
columnNames <- names(x)
print(paste(length(x[,subject]),"records from",length(unique(x[,subject])),"subjects"))
for(columnName in columnNames) {
if( any(is.na(x[,columnName]))) {
temp <- x[,c(subject,columnName)]
names(temp) <- c("ID","value")
nsubAll <- length(unique(temp$ID))
nsubNA <- length(unique(temp$ID[is.na(temp$value)]))
nsub <- length(unique(temp$ID[!is.na(temp$value)]))
if(nsubAll == nsub + nsubNA){
print(paste(columnName,": ",nsubNA, " (",round(100*nsubNA/nsub,1),
"%) of subjects have only missing values",sep=""))
}else{
print(glue(columnName,": ",nsubAll-nsubNA," (",
round(100*(nsubAll-nsubNA)/nsubAll,1),"%) of subjects have all x;"))
print(glue(" ",nsubAll-nsub," (",round(100*(nsubAll-nsub)/nsubAll,1),
"%) of subjects have only missing values;"))
print(glue(" ",nsubNA+nsub -nsubAll," (",round(100*(nsubNA+nsub -nsubAll)/nsubAll,1),
"%) of subjects have some missing values"))
}
}
}
for(columnName in columnNames) {
temp <- x[,columnName]
temp <- temp[!is.na(temp)]
if( any(is.na(as.double(as.character(temp))))) {
print(paste(columnName,"is not a numeric variable"))
if(length(unique(temp)) < 10) {
print(paste(columnName,"levels:",paste(sort(unique(temp)),collapse=",")))
}
}else{
if(length(unique(temp)) < 10) {
print(paste(columnName,"levels:",paste(sort(unique(temp)),collapse=",")))
}else{
print(paste(columnName,"range:",range(temp)[1],"-",range(temp)[2]))
}
}
}
columnNames <- columnNames[columnNames != subject]
for(columnName in columnNames) {
temp <- x[,c(subject,columnName)]
names(temp) <- c("ID","value")
temp <- temp[!is.na(temp$value),]
if(length(unique(temp$ID)) != length(unique(paste(temp$ID,temp$value))) ) {
print(paste(columnName,": vary with time within some subjects"))
} else {
print(paste(columnName,": constant within all subjects"))
}
}
}