-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAdjusted_DPM_UPDATED.R
116 lines (90 loc) · 3.55 KB
/
Adjusted_DPM_UPDATED.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
install.packages("rstan", type = "source")
install.packages("ggforce")
install.packages("ggrepel")
install.packages("openxlsx")
library(rstan)
library(readr)
library(dplyr)
library(ggplot2)
library(ggforce)
library(ggrepel)
library(openxlsx)
options(mc.cores = parallel::detectCores())
#rstan_options(auto_write = TRUE)
GLP1Persistency_FM <- read_csv("GLP1Persistency_sampled_data.csv")
View(GLP1Persistency_FM)
print(mean(GLP1Persistency_FM$MemberAge))
print(mean(GLP1Persistency_FM$DaysToFirstMiss))
##### Selecting columns for the model
data_matrix <- as.matrix(GLP1Persistency_FM[, c("DaysToFirstMiss", "MemberAge")]) # Adjust column names as needed
#### Prepare your data for Stan
stan_data <- list(N = nrow(data_matrix), D = ncol(data_matrix), K = 20, y = data_matrix) # Adjust K as necessary
##### Run the Stan model
fit <- stan(
file = "2Adjusted_DPM.stan", # Specify the path to your Stan model file
data = stan_data,
chains = 4,
iter = 4000 # Increased max_treedepth to avoid warning
)
##### Check diagnostics
print(fit)
check_hmc_diagnostics(fit)
plot(fit)
####Extract fits to data frames####
posterior_samples <- extract(fit)
cluster_assignments <- posterior_samples$cluster_assignments
write.xlsx(final_cluster_assignments, file = "Final_20_cluster_assignments.xlsx", rowNames=TRUE)
####CLuster Assignment Probability Analysis######
install.packages("pheatmap")
library(pheatmap)
library(tidyr)
library(ggplot2)
cluster20_prob<-read_csv("Cluste_20_only_proba.csv")
cluster13_prob<-read_csv("Cluster13_only_Prob.csv")
# Extracting patient IDs
patient_ids <- cluster20_prob$`Patient ID`
# Creating a matrix for the heatmap, excluding the patient ID column
prob_matrix <- as.matrix(cluster20_prob[,-1])
prob_matrix2 <- as.matrix(cluster20_prob_sub[,-1])
# Setting row names of the matrix to patient IDs
rownames(prob_matrix) <- patient_ids
rownames(prob_matrix2)< patient_ids2
#####Heatmap Probabilities#########
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("ComplexHeatmap")
library(ComplexHeatmap)
install.packages("clue")
library(clue)
library(circlize)
# Assuming prob_matrix is your probability matrix
# Transpose it if necessary to align individuals on the y-axis and clusters on the x-axis
prob_matrix <- t(prob_matrix)
color_gradient <- colorRamp2(c(0, 0.5, 1), c("blue", "white", "red"))
# Create the heatmap
Heatmap(prob_matrix,
name = "Probability",
col = color_gradient,
show_row_names = TRUE,
show_column_names = TRUE,
column_names_rot = 45, # Rotate cluster names for better visibility
row_names_side = "left", # Patient IDs on the left
column_title = "Cluster Probability",
row_title = "Patient ID")
# Generate the heatmap Subsets
prob_matrix2_subset <- prob_matrix[1:30, ]
Heatmap(prob_matrix2_subset,
name = "Probability",
col = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red")), # Customize colors as needed
clustering_distance_rows = "euclidean",
clustering_distance_columns = "euclidean",
clustering_method_rows = "complete",
clustering_method_columns = "complete",
show_row_names = TRUE,
show_column_names = TRUE,
column_names_side = "top", # Display cluster IDs at the top
row_names_side = "left", # Display individual IDs on the left
row_title = "Individuals",
column_title = "Clusters",
heatmap_legend_param = list(title = "Membership Probability", at = c(0, 0.25, 0.5, 0.75, 1), labels = c("0%", "25%", "50%", "75%", "100%"))
)