-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathTutorial2_
88 lines (81 loc) · 2.87 KB
/
Tutorial2_
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
library("Rcmdr")
library(ggplot2)
library(dplyr)
## Question 1a
summary(labs)
ggplot(labs)
hist(labs$WaitTime)
library(ggplot2)
ggplot(labs, aes(WaitTime)) + geom_histogram(aes(fill=WaitTime > 70))
## Question 1b
#gender & svcsat
filter(labs, Gender == "F", SvcSat == "Y")
filter(labs, Gender == "F", SvcSat == "N")
filter(labs, Gender == "M", SvcSat == "Y")
filter(labs, Gender == "M", SvcSat == "N")
dat <- data.frame(SvcSat=c('Y','N'), Female=c(73,9), Male=c(53,16))
library(reshape2)
dat.m <- melt(dat, id.vars='SvcSat')
ggplot(dat.m, aes(variable, value)) + geom_bar(aes(fill = SvcSat), stat='identity', position = "dodge")
ggplot(dat.m, aes(x=SvcSat, y=value)) + geom_bar(aes(fill = SvcSat), stat='identity') + facet_grid(.~variable)
#age groups & svcsat
filter(labs, AGE > 10, AGE <= 20, SvcSat == "Y")
filter(labs, AGE > 10, AGE <= 20, SvcSat == "N")
filter(labs, AGE > 20, AGE <= 30, SvcSat == "Y")
filter(labs, AGE > 20, AGE <= 30, SvcSat == "N")
filter(labs, AGE > 30, AGE <= 40, SvcSat == "Y")
filter(labs, AGE > 30, AGE <= 40, SvcSat == "N")
filter(labs, AGE > 40, AGE <= 50, SvcSat == "Y")
filter(labs, AGE > 40, AGE <= 50, SvcSat == "N")
filter(labs, AGE > 50, AGE <= 60, SvcSat == "Y")
filter(labs, AGE > 50, AGE <= 60, SvcSat == "N")
filter(labs, AGE > 60, SvcSat == "Y")
filter(labs, AGE > 60, SvcSat == "N")
dm <- data.frame(SvcSat=c('Y','N'), Age_below_20=c(7,0), Age_21to30=c(49,6), Age_31to40=c(18,1), Age_41to50=c(9,7), Age_above_60=c(6,9))
mm <- melt(dm, id.vars='SvcSat')
ggplot(mm, aes(variable, value)) + geom_bar(aes(fill = SvcSat), stat='identity', position = "dodge")
## Question 1c
labs$BMI = labs$WT/(labs$HT *labs$HT)
bmi<-data.frame(labs$BMI,labs$TC)
bmi$labs.TC <- as.numeric(bmi$labs.TC)
colnames(corr)[1] <- "BMI"
colnames(corr)[1] <- "Total Cholesterol"
cor(bmi, use="complete.obs", method="kendall")
cor(bmi, use="complete.obs", method="pearson")
cor(bmi, use="complete.obs", method="spearman")
## Question 1d
#Assume Linear Model
#Discretize Catergorical Values
labs$Chinese=0
labs$Malay=0
labs$Indian=0
labs$Male=0
labs$Female =0
for(i in 1:nrow(labs)){
if(labs$ETHNIC[i]=="C"){
labs$Chinese[i]=1
}
if(labs$ETHNIC[i]=="M"){
labs$Malay[i]=1
}
if(labs$ETHNIC[i]=="I"){
labs$Indian[i]=1
}
if(labs$Gender[i]=="M"){
labs$Male[i]=1
}
if(labs$Gender[i]=="F"){
labs$Female[i]=1
}
}
reg<-data.frame(labs$BMI,labs$AGE, labs$SBP,labs$TC,labs$Male,labs$Female,labs$Chinese,labs$Malay,labs$Indian)
reg$labs.TC <- as.numeric(reg$labs.TC)
reg$labs.BMI <- as.numeric(reg$labs.BMI)
reg$labs.AGE <- as.numeric(reg$labs.AGE)
reg$labs.SBP <- as.numeric(reg$labs.SBP)
reg <- setNames(reg, c("BMI","AGE","SBP","TC","Male","Female", "Chinese","Malay","Indian"))
fit <- lm(BMI ~ AGE + SBP + TC + Male + Female + Chinese + Malay + Indian, data=reg)
summary(fit)
prediction<- (0.027*45) + (0.065*135) + (0.057*150) + 12.78 + (-0.0825) + (-1.624)
prediction
#Answer: 29.61