## Create directories first
t=format(Sys.time(), "%d_%b_%Y")
dir.create(paste("C:/MinuteClinic/Dynamic_time_warping/modeling_work_",t,sep=""))
setwd(paste("C:/MinuteClinic/Dynamic_time_warping/modeling_work_",t,sep=""))
subDir=paste(getwd())
version_path=paste(subDir,"/version_name",sep="")
ifelse(file.exists(version_path),,dir.create(file.path(version_path)))
indata_path=paste(subDir,"/dtw_indata",sep="")
ifelse(file.exists(indata_path),,dir.create(file.path(indata_path)))
modelpath=version_path
outpath=paste(subDir,"/version_name/output",sep="")
ifelse(file.exists(outpath),,dir.create(file.path(outpath)))
#create figures folder
#figures=paste(subDir,"/version_name/figures",sep="")
#ifelse(file.exists(figures),,dir.create(file.path(figures)))
list.files()
setwd(modelpath)
list.files()
TRUE
TRUE
TRUE
- 'dtw_indata'
- 'version_name'
'output'
Starting with visit level data. Create an hourly aggregated data set for analysis.
require(dplyr)
require(lubridate)
# Develop best version hourly data ------
options(warn=0)
visit_level=read.csv('C:/MinuteClinic/Data/MC_data_01JAN2016_to_02JUL2018.csv')
visit_level$Employee=ifelse(visit_level$VisitOrigin=='Employee',1,0)
visit_level$Hmpil=ifelse(visit_level$VisitOrigin=='Hmpil',1,0)
visit_level$Scheduled=ifelse(visit_level$VisitOrigin=='Scheduled',ifelse(visit_level$ScheduleVisitType=="PSS",1,0),0)
visit_level$WalkIn=ifelse(visit_level$VisitOrigin=='Walk-In',1,0)
visit_level$RevenueGenerating=ifelse(visit_level$VisitStatus=='Revenue Generating',1,0)
visit_level$CompleteNonGenerating=ifelse(visit_level$VisitStatus=='Complete Non Revenue',1,0)
visit_level$ReferOutNoCharge=ifelse(visit_level$VisitStatus=='Refer Out No Charge',1,0)
visit_level$Canceled=ifelse(visit_level$VisitStatus=='Canceled',1,0)
visit_level$year=substr(visit_level$AppointmentDate,1,4)
visit_level$day=substr(visit_level$AppointmentDate,5,6)
visit_level$week=week(ymd(visit_level$AppointmentDate))
visit_level$Wait.Time=ifelse(visit_level$Canceled==1,0.01,visit_level$Wait.Time)
#subset to after Q4 of 2017
visit_level=filter(visit_level, AppointmentDate>=20171001)
visit_level$Hour=visit_level$AppointmentTimeOfDay/10000
visit_level=filter(visit_level, Hour>8 & Hour<=20)
visit_level$Hour=round(visit_level$Hour)
head(visit_level)
Loading required package: dplyr
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
Loading required package: lubridate
Attaching package: 'lubridate'
The following object is masked from 'package:base':
date
AppointmentDate | ClinicNumber | ProviderEpicid | VisitStatus | AppointmentTimeOfDay | AppointmentMadeTimeOfDay | Wait.Time | ScheduleVisitType | VisitOrigin | ScheduleTimeOfDay | ... | Scheduled | WalkIn | RevenueGenerating | CompleteNonGenerating | ReferOutNoCharge | Canceled | year | day | week | Hour |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
20171001 | 02497 | 3323 | Revenue Generating | 104000 | 103714 | 0.033333 | Walk-In | 1040 | ... | 0 | 1 | 1 | 0 | 0 | 0 | 2017 | 10 | 40 | 10 | |
20171001 | 02497 | 3323 | Revenue Generating | 121000 | 120056 | 0.066666 | Walk-In | 1210 | ... | 0 | 1 | 1 | 0 | 0 | 0 | 2017 | 10 | 40 | 12 | |
20171003 | 07017 | 1079 | Revenue Generating | 122500 | 121610 | 20.150000 | Walk-In | NA | ... | 0 | 1 | 1 | 0 | 0 | 0 | 2017 | 10 | 40 | 12 | |
20171005 | 22063 | 1398 | Revenue Generating | 113000 | 112358 | 0.066666 | Walk-In | 1130 | ... | 0 | 1 | 1 | 0 | 0 | 0 | 2017 | 10 | 40 | 11 | |
20171005 | 22063 | 1398 | Revenue Generating | 131000 | 130313 | 0.066666 | Walk-In | 1310 | ... | 0 | 1 | 1 | 0 | 0 | 0 | 2017 | 10 | 40 | 13 | |
20171005 | 22063 | 1398 | Revenue Generating | 142000 | 141007 | 0.050000 | Walk-In | 1420 | ... | 0 | 1 | 1 | 0 | 0 | 0 | 2017 | 10 | 40 | 14 |
#convert visit level to daily to calc percent visits per hour
hourly=data.frame(visit_level%>%group_by(ClinicNumber,AppointmentDate,Hour)%>%summarise(provider.count=n_distinct(ProviderEpicid),
Employee_Sum=sum(Employee),Hmpil_Sum=sum(Hmpil),Scheduled_Sum=sum(Scheduled),WalkIn_sum=sum(WalkIn),
ReferOutNoCharge_Sum=sum(ReferOutNoCharge),Canceled_Sum=sum(Canceled),wait_time_avg=mean(Wait.Time,na.rm=T)))
head(hourly)
ClinicNumber | AppointmentDate | Hour | provider.count | Employee_Sum | Hmpil_Sum | Scheduled_Sum | WalkIn_sum | ReferOutNoCharge_Sum | Canceled_Sum | wait_time_avg |
---|---|---|---|---|---|---|---|---|---|---|
00012 | 20171001 | 10 | 1 | 0 | 0 | 0 | 7 | 0 | 0 | 7.128571 |
00012 | 20171001 | 11 | 2 | 0 | 1 | 0 | 2 | 0 | 1 | 5.231111 |
00012 | 20171001 | 12 | 1 | 0 | 0 | 0 | 2 | 0 | 0 | 1.424999 |
00012 | 20171001 | 13 | 1 | 0 | 0 | 0 | 2 | 0 | 0 | 23.191666 |
00012 | 20171001 | 14 | 1 | 0 | 0 | 0 | 5 | 0 | 0 | 40.020000 |
00012 | 20171001 | 15 | 1 | 0 | 0 | 0 | 2 | 0 | 0 | 3.666666 |
hourly$AllVisits=hourly$Employee_Sum+hourly$Hmpil_Sum+hourly$Scheduled_Sum+hourly$WalkIn_sum
head(hourly)
daily=hourly%>%group_by(ClinicNumber, AppointmentDate)%>%summarise(daily_sum=sum(AllVisits))
head(daily)
ClinicNumber | AppointmentDate | Hour | provider.count | Employee_Sum | Hmpil_Sum | Scheduled_Sum | WalkIn_sum | ReferOutNoCharge_Sum | Canceled_Sum | wait_time_avg | AllVisits |
---|---|---|---|---|---|---|---|---|---|---|---|
00012 | 20171001 | 10 | 1 | 0 | 0 | 0 | 7 | 0 | 0 | 7.128571 | 7 |
00012 | 20171001 | 11 | 2 | 0 | 1 | 0 | 2 | 0 | 1 | 5.231111 | 3 |
00012 | 20171001 | 12 | 1 | 0 | 0 | 0 | 2 | 0 | 0 | 1.424999 | 2 |
00012 | 20171001 | 13 | 1 | 0 | 0 | 0 | 2 | 0 | 0 | 23.191666 | 2 |
00012 | 20171001 | 14 | 1 | 0 | 0 | 0 | 5 | 0 | 0 | 40.020000 | 5 |
00012 | 20171001 | 15 | 1 | 0 | 0 | 0 | 2 | 0 | 0 | 3.666666 | 2 |
ClinicNumber | AppointmentDate | daily_sum |
---|---|---|
00012 | 20171001 | 25 |
00012 | 20171002 | 40 |
00012 | 20171003 | 23 |
00012 | 20171004 | 35 |
00012 | 20171005 | 34 |
00012 | 20171006 | 38 |
hrly=left_join(select(hourly,ClinicNumber, AppointmentDate,Hour,AllVisits), daily, by=c("ClinicNumber","AppointmentDate"))
hrly=unique(hrly)
hrly$PerVisits=hrly$AllVisits/hrly$daily_sum
head(hrly)
ClinicNumber | AppointmentDate | Hour | AllVisits | daily_sum | PerVisits |
---|---|---|---|---|---|
00012 | 20171001 | 10 | 7 | 25 | 0.28 |
00012 | 20171001 | 11 | 3 | 25 | 0.12 |
00012 | 20171001 | 12 | 2 | 25 | 0.08 |
00012 | 20171001 | 13 | 2 | 25 | 0.08 |
00012 | 20171001 | 14 | 5 | 25 | 0.20 |
00012 | 20171001 | 15 | 2 | 25 | 0.08 |
#check the data
range(hrly$AppointmentDate)
hrly=filter(hrly, AppointmentDate<20180101)
hrly_out=data.frame(hrly%>%group_by(ClinicNumber,Hour)%>%summarise(per_visits=mean(PerVisits, na.rm=T)))
summary(hrly_out)
write.csv(hrly_out, 'C:/MinuteClinic/Dynamic_time_warping/DTW_Input_data/hourly_percent_visits_2018.csv', row.names=F)
- 20171001
- 20171231
ClinicNumber Hour per_visits
00012 : 13 Min. : 8.00 Min. :0.00000
00015 : 13 1st Qu.:11.00 1st Qu.:0.08849
00017 : 13 Median :14.00 Median :0.11101
00019 : 13 Mean :13.74 Mean :0.10735
00033 : 13 3rd Qu.:17.00 3rd Qu.:0.12753
00069 : 13 Max. :20.00 Max. :1.00000
(Other):13567 NA's :7
memory.limit()
rm(list=ls())
gc()
7603
used | (Mb) | gc trigger | (Mb) | max used | (Mb) | |
---|---|---|---|---|---|---|
Ncells | 634472 | 33.9 | 3763080 | 201.0 | 4703850 | 251.3 |
Vcells | 3063194 | 23.4 | 283334255 | 2161.7 | 354149013 | 2702.0 |
require(lubridate)
hrly=read.csv('C:/MinuteClinic/Dynamic_time_warping/DTW_Input_data/hourly_percent_visits_2018.csv')
head(hrly)
hrly=filter(hrly, Hour>=9 & Hour <19)
#install.packages('Hmisc')
Hmisc::describe(hrly)
ClinicNumber | Hour | per_visits |
---|---|---|
00012 | 8 | 0.04038461 |
00012 | 9 | 0.07950237 |
00012 | 10 | 0.10117204 |
00012 | 11 | 0.11112025 |
00012 | 12 | 0.12879019 |
00012 | 13 | 0.06822577 |
hrly
3 Variables 11221 Observations
--------------------------------------------------------------------------------
ClinicNumber
n missing distinct
11221 0 1133
lowest : 00012 00015 00017 00019 00022, highest: 22063 22067 MC26 MC960 MC98
--------------------------------------------------------------------------------
Hour
n missing distinct Info Mean Gmd .05 .10
11221 0 10 0.99 13.49 3.298 9 9
.25 .50 .75 .90 .95
11 13 16 17 18
Value 9 10 11 12 13 14 15 16 17 18
Frequency 1127 1125 1125 1123 1122 1123 1125 1122 1119 1110
Proportion 0.100 0.100 0.100 0.100 0.100 0.100 0.100 0.100 0.100 0.099
--------------------------------------------------------------------------------
per_visits
n missing distinct Info Mean Gmd .05 .10
11214 7 11207 1 0.1179 0.02845 0.07580 0.08813
.25 .50 .75 .90 .95
0.10193 0.11667 0.13057 0.14605 0.15838
lowest : 0.03225806 0.03311966 0.03333333 0.03367003 0.03846154
highest: 0.43598821 0.50000000 0.66666667 0.75000000 1.00000000
--------------------------------------------------------------------------------
#check for missing hour
counts=data.frame(table(hrly$ClinicNumber))
#counts
#create base size dataframe
clinic_list=as.character(unique(hrly$ClinicNumber))
df1=data.frame(rep(clinic_list,each=10),rep(9:18,length(clinic_list)))
head(df1)
names(df1)<-c("ClinicNumber","Hour")
df1$ClinicNumber=as.character(df1$ClinicNumber)
hrly$ClinicNumber=as.character(hrly$ClinicNumber)
df2=left_join(df1,hrly, by=c("ClinicNumber", "Hour"))
head(df2)
rep.clinic_list..each...10. | rep.9.18..length.clinic_list.. |
---|---|
00012 | 9 |
00012 | 10 |
00012 | 11 |
00012 | 12 |
00012 | 13 |
00012 | 14 |
ClinicNumber | Hour | per_visits |
---|---|---|
00012 | 9 | 0.07950237 |
00012 | 10 | 0.10117204 |
00012 | 11 | 0.11112025 |
00012 | 12 | 0.12879019 |
00012 | 13 | 0.06822577 |
00012 | 14 | 0.08772773 |
df2[is.na(df2)] <- 0
Hmisc::describe(df2)
df2
3 Variables 11330 Observations
--------------------------------------------------------------------------------
ClinicNumber
n missing distinct
11330 0 1133
lowest : 00012 00015 00017 00019 00022, highest: 22063 22067 MC26 MC960 MC98
--------------------------------------------------------------------------------
Hour
n missing distinct Info Mean Gmd .05 .10
11330 0 10 0.99 13.5 3.3 9.0 9.9
.25 .50 .75 .90 .95
11.0 13.5 16.0 17.1 18.0
Value 9 10 11 12 13 14 15 16 17 18
Frequency 1133 1133 1133 1133 1133 1133 1133 1133 1133 1133
Proportion 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1
--------------------------------------------------------------------------------
per_visits
n missing distinct Info Mean Gmd .05 .10
11330 0 11208 1 0.1167 0.03026 0.07315 0.08674
.25 .50 .75 .90 .95
0.10135 0.11640 0.13044 0.14590 0.15818
lowest : 0.00000000 0.03225806 0.03311966 0.03333333 0.03367003
highest: 0.43598821 0.50000000 0.66666667 0.75000000 1.00000000
--------------------------------------------------------------------------------
#full clean dataset
write.csv(df2,'C:/MinuteClinic/Dynamic_time_warping/DTW_Input_data/hourly_percent_visits_Q42017_clean.csv', row.names=F)
head(df2)
ClinicNumber | Hour | per_visits |
---|---|---|
00012 | 9 | 0.07950237 |
00012 | 10 | 0.10117204 |
00012 | 11 | 0.11112025 |
00012 | 12 | 0.12879019 |
00012 | 13 | 0.06822577 |
00012 | 14 | 0.08772773 |
indata=data.frame(matrix(df2$per_visits,nrow = (nrow(df2)/1133),ncol = 1133, byrow = F))
head(indata)
dim(indata)
names(indata)<-clinic_list
head(indata)
write.csv(indata,'C:/MinuteClinic/Dynamic_time_warping/DTW_Input_data/hourly_percent_visits_Q42017_column_wise.csv', row.names=F)
X1 | X2 | X3 | X4 | X5 | X6 | X7 | X8 | X9 | X10 | ... | X1124 | X1125 | X1126 | X1127 | X1128 | X1129 | X1130 | X1131 | X1132 | X1133 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0.07950237 | 0.08660912 | 0.09182722 | 0.09255609 | 0.2100011 | 0.08324442 | 0.09708343 | 0.10065626 | 0.08714902 | 0.10045744 | ... | 0.09186785 | 0.1050256 | 0.08468987 | 0.4166667 | 0.2000000 | 0.1153846 | 0.062500 | 0.1910700 | 0.1944730 | 0.2063808 |
0.10117204 | 0.11938805 | 0.12762900 | 0.12588859 | 0.2179389 | 0.11400472 | 0.12718215 | 0.12386541 | 0.12393098 | 0.12556233 | ... | 0.10410890 | 0.1351720 | 0.10708249 | 0.3928571 | 0.0000000 | 0.2307692 | 0.156250 | 0.2075086 | 0.1831905 | 0.2195078 |
0.11112025 | 0.10078731 | 0.10567166 | 0.10958468 | 0.1934753 | 0.10677515 | 0.10940930 | 0.12006374 | 0.11926979 | 0.10829557 | ... | 0.10850045 | 0.1458302 | 0.14667971 | 0.0000000 | 0.0000000 | 0.2692308 | 0.125000 | 0.1708053 | 0.1517759 | 0.1650625 |
0.12879019 | 0.11806513 | 0.12734586 | 0.12343345 | 0.1695541 | 0.13091189 | 0.12049299 | 0.12076218 | 0.11667445 | 0.10636356 | ... | 0.15694842 | 0.1386013 | 0.18225043 | 0.0000000 | 0.6666667 | 0.0000000 | 0.171875 | 0.2303108 | 0.1668078 | 0.2484773 |
0.06822577 | 0.05750729 | 0.07647690 | 0.06655861 | 0.2203184 | 0.06758471 | 0.07530850 | 0.08395549 | 0.07342130 | 0.08027248 | ... | 0.12346782 | 0.1014109 | 0.09603915 | 0.0000000 | 0.0000000 | 0.1538462 | 0.062500 | 0.2072596 | 0.1178391 | 0.1468982 |
0.08772773 | 0.09535318 | 0.10385227 | 0.10551818 | 0.1745796 | 0.09538082 | 0.13993317 | 0.15060871 | 0.11085450 | 0.12323391 | ... | 0.15328175 | 0.1472172 | 0.13435042 | 0.0000000 | 0.0000000 | 0.2307692 | 0.171875 | 0.2210729 | 0.1590159 | 0.1734248 |
- 10
- 1133
00012 | 00015 | 00017 | 00019 | 00022 | 00033 | 00038 | 00041 | 00069 | 00073 | ... | 17661 | 17663 | 17665 | 22052 | 22053 | 22063 | 22067 | MC26 | MC960 | MC98 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0.07950237 | 0.08660912 | 0.09182722 | 0.09255609 | 0.2100011 | 0.08324442 | 0.09708343 | 0.10065626 | 0.08714902 | 0.10045744 | ... | 0.09186785 | 0.1050256 | 0.08468987 | 0.4166667 | 0.2000000 | 0.1153846 | 0.062500 | 0.1910700 | 0.1944730 | 0.2063808 |
0.10117204 | 0.11938805 | 0.12762900 | 0.12588859 | 0.2179389 | 0.11400472 | 0.12718215 | 0.12386541 | 0.12393098 | 0.12556233 | ... | 0.10410890 | 0.1351720 | 0.10708249 | 0.3928571 | 0.0000000 | 0.2307692 | 0.156250 | 0.2075086 | 0.1831905 | 0.2195078 |
0.11112025 | 0.10078731 | 0.10567166 | 0.10958468 | 0.1934753 | 0.10677515 | 0.10940930 | 0.12006374 | 0.11926979 | 0.10829557 | ... | 0.10850045 | 0.1458302 | 0.14667971 | 0.0000000 | 0.0000000 | 0.2692308 | 0.125000 | 0.1708053 | 0.1517759 | 0.1650625 |
0.12879019 | 0.11806513 | 0.12734586 | 0.12343345 | 0.1695541 | 0.13091189 | 0.12049299 | 0.12076218 | 0.11667445 | 0.10636356 | ... | 0.15694842 | 0.1386013 | 0.18225043 | 0.0000000 | 0.6666667 | 0.0000000 | 0.171875 | 0.2303108 | 0.1668078 | 0.2484773 |
0.06822577 | 0.05750729 | 0.07647690 | 0.06655861 | 0.2203184 | 0.06758471 | 0.07530850 | 0.08395549 | 0.07342130 | 0.08027248 | ... | 0.12346782 | 0.1014109 | 0.09603915 | 0.0000000 | 0.0000000 | 0.1538462 | 0.062500 | 0.2072596 | 0.1178391 | 0.1468982 |
0.08772773 | 0.09535318 | 0.10385227 | 0.10551818 | 0.1745796 | 0.09538082 | 0.13993317 | 0.15060871 | 0.11085450 | 0.12323391 | ... | 0.15328175 | 0.1472172 | 0.13435042 | 0.0000000 | 0.0000000 | 0.2307692 | 0.171875 | 0.2210729 | 0.1590159 | 0.1734248 |
#let's create the dataset as rowwise with hour across the top
rdata=data.frame(matrix(df2$per_visits,nrow =1133,ncol = 10, byrow = F))
head(rdata)
dim(rdata)
X1 | X2 | X3 | X4 | X5 | X6 | X7 | X8 | X9 | X10 |
---|---|---|---|---|---|---|---|---|---|
0.07950237 | 0.13953903 | 0.1294059 | 0.1049971 | 0.10328918 | 0.1154648 | 0.11143828 | 0.12894046 | 0.08395457 | 0.12655779 |
0.10117204 | 0.06502419 | 0.1250171 | 0.1055075 | 0.11953988 | 0.1106251 | 0.11387425 | 0.12945934 | 0.11490577 | 0.10256979 |
0.11112025 | 0.10560313 | 0.1034505 | 0.1369112 | 0.06265098 | 0.1230172 | 0.09043376 | 0.12664228 | 0.11448791 | 0.10633368 |
0.12879019 | 0.11710577 | 0.1137967 | 0.1256704 | 0.09814131 | 0.0944434 | 0.11907518 | 0.07421062 | 0.12783914 | 0.09441497 |
0.06822577 | 0.11897637 | 0.1270601 | 0.1263911 | 0.10574567 | 0.1090297 | 0.11621321 | 0.09758311 | 0.10665360 | 0.12550554 |
0.08772773 | 0.10618040 | 0.1511323 | 0.1076241 | 0.12465133 | 0.1031845 | 0.11686510 | 0.11184903 | 0.12159527 | 0.12267545 |
- 1133
- 10
#install.packages('dtw')
require(dtw)
dmatrix=(dist(rdata, method="DTW"))
clusters<-hclust(dmatrix,method="average")
str(clusters)
List of 7
$ merge : int [1:1132, 1:2] -286 -116 -138 -373 -93 -141 -703 -951 -186 -421 ...
$ height : num [1:1132] 0.0557 0.057 0.0572 0.0608 0.0614 ...
$ order : int [1:1133] 1077 270 271 1073 1080 1064 1065 971 520 235 ...
$ labels : NULL
$ method : chr "average"
$ call : language hclust(d = dmatrix, method = "average")
$ dist.method: chr "DTW"
- attr(*, "class")= chr "hclust"
dim(dmatrix)
plot(clusters)
- 1133
- 1133
# Cut tree into 10 groups
sub_grp <- cutree(clusters, k = 10)
sub_grp2<- cutree(clusters, k = 2)
sub_grp3 <- cutree(clusters, k = 3)
sub_grp4 <- cutree(clusters, k = 4)
sub_grp5 <- cutree(clusters, k = 5)
sub_grp6<- cutree(clusters, k = 6)
sub_grp7 <- cutree(clusters, k = 7)
sub_grp8 <- cutree(clusters, k = 8)
sub_grp9 <- cutree(clusters, k = 9)
# Number of members in each cluster
table(sub_grp)
table(sub_grp3)
table(sub_grp5)
sub_grp
1 2 3 4 5 6 7 8 9 10
1118 3 1 2 1 1 1 1 4 1
sub_grp3
1 2 3
1131 1 1
sub_grp5
1 2 3 4 5
1124 6 1 1 1
clustdf=rdata %>%
mutate(cluster10 = sub_grp, cluster2=sub_grp2,cluster3 = sub_grp3, cluster4=sub_grp4,cluster5 = sub_grp5, cluster6=sub_grp6,
cluster7= sub_grp7, cluster8=sub_grp8,cluster9 = sub_grp9)
clustdf$ClinicNumber<-clinic_list
head(clustdf)
write.csv(clustdf,'C:/MinuteClinic/Dynamic_time_warping/hourly_percent_visits_Q42017_clusters.csv')
#table(clustdf$cluster,clustdf$ClinicNumber)
X1 | X2 | X3 | X4 | X5 | X6 | X7 | X8 | X9 | X10 | cluster10 | cluster2 | cluster3 | cluster4 | cluster5 | cluster6 | cluster7 | cluster8 | cluster9 | ClinicNumber |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0.07950237 | 0.13953903 | 0.1294059 | 0.1049971 | 0.10328918 | 0.1154648 | 0.11143828 | 0.12894046 | 0.08395457 | 0.12655779 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 00012 |
0.10117204 | 0.06502419 | 0.1250171 | 0.1055075 | 0.11953988 | 0.1106251 | 0.11387425 | 0.12945934 | 0.11490577 | 0.10256979 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 00015 |
0.11112025 | 0.10560313 | 0.1034505 | 0.1369112 | 0.06265098 | 0.1230172 | 0.09043376 | 0.12664228 | 0.11448791 | 0.10633368 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 00017 |
0.12879019 | 0.11710577 | 0.1137967 | 0.1256704 | 0.09814131 | 0.0944434 | 0.11907518 | 0.07421062 | 0.12783914 | 0.09441497 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 00019 |
0.06822577 | 0.11897637 | 0.1270601 | 0.1263911 | 0.10574567 | 0.1090297 | 0.11621321 | 0.09758311 | 0.10665360 | 0.12550554 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 00022 |
0.08772773 | 0.10618040 | 0.1511323 | 0.1076241 | 0.12465133 | 0.1031845 | 0.11686510 | 0.11184903 | 0.12159527 | 0.12267545 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 00033 |
#create long data
dflong=left_join(df2,select(clustdf,cluster10,cluster2,cluster3,cluster4,cluster5,cluster6,cluster7,cluster8,
cluster9,ClinicNumber), by="ClinicNumber")
head(dflong)
#plot the cluster groups with a few number of clusters by clinic by hour
write.csv(dflong,'C:/MinuteClinic/Dynamic_time_warping/hourly_percent_visits_Q42017_clusters_longformat.csv')
ClinicNumber | Hour | per_visits | cluster10 | cluster2 | cluster3 | cluster4 | cluster5 | cluster6 | cluster7 | cluster8 | cluster9 |
---|---|---|---|---|---|---|---|---|---|---|---|
00012 | 9 | 0.07950237 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
00012 | 10 | 0.10117204 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
00012 | 11 | 0.11112025 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
00012 | 12 | 0.12879019 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
00012 | 13 | 0.06822577 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
00012 | 14 | 0.08772773 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
ggplot(dflong, aes(x=Hour, y=per_visits)) +
geom_line(aes(colour=as.factor(cluster10), group=as.factor(cluster10))) + # colour, group both depend on cond2
geom_point(aes(colour=as.factor(cluster10)), # colour depends on cond2
size=3)+facet_wrap(~cluster10)+theme_bw()
#install.packages("factoextra")
#library(factoextra)
fviz_nbclust(rdata, FUN = hcut, method = "wss")
Warning message in stats::dist(x):
"NAs introduced by coercion"Warning message in stats::dist(x, method = method, ...):
"NAs introduced by coercion"Warning message in stats::dist(x, method = method, ...):
"NAs introduced by coercion"Warning message in stats::dist(x, method = method, ...):
"NAs introduced by coercion"Warning message in stats::dist(x, method = method, ...):
"NAs introduced by coercion"Warning message in stats::dist(x, method = method, ...):
"NAs introduced by coercion"Warning message in stats::dist(x, method = method, ...):
"NAs introduced by coercion"Warning message in stats::dist(x, method = method, ...):
"NAs introduced by coercion"Warning message in stats::dist(x, method = method, ...):
"NAs introduced by coercion"Warning message in stats::dist(x, method = method, ...):
"NAs introduced by coercion"Warning message in stats::dist(x, method = method, ...):
"NAs introduced by coercion"
#install.packages('gplots')
#library(gplots)
y <- matrix(rnorm(500), 100, 5, dimnames=list(paste("g", 1:100, sep=""), paste("t", 1:5, sep="")))
#head(y)
#heatmap.2(y)
heatmap.2(as.matrix(rdata))
plot(as.dendrogram(clusters), edgePar=list(col=3, lwd=4), horiz=T)
install.packages('')
library(pheatmap); library("RColorBrewer")
pheatmap(clusterdf, color=brewer.pal(9,"Blues"))
Error in library(pheatmap): there is no package called 'pheatmap'
Traceback:
1. library(pheatmap)
2. stop(txt, domain = NA)