-
Notifications
You must be signed in to change notification settings - Fork 20
/
Copy pathfigure-neuroblastoma-predictions.R
114 lines (107 loc) · 3.43 KB
/
figure-neuroblastoma-predictions.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
source("packages.R")
data(neuroblastoma)
data(neuroblastomaProcessed)
profiles.dt <- data.table(neuroblastoma$profiles)
all.feature.mat <- penaltyLearning::featureMatrix(
profiles.dt,
problem.vars=c("profile.id", "chromosome"),
data.var="logratio")
train.target.mat <- neuroblastomaProcessed$target.mat
train.pid.chr <- sub("[.]", " ", rownames(train.target.mat))
train.feature.mat <- all.feature.mat[train.pid.chr, ]
fit <- IntervalRegressionCV(train.feature.mat, train.target.mat)
mean.mat <- matrix(
colMeans(train.feature.mat),
nrow(all.feature.mat),
ncol(all.feature.mat),
byrow=TRUE)
pred.feature.mat <- all.feature.mat
is.bad <- !is.finite(all.feature.mat)
pred.feature.mat[is.bad] <- mean.mat[is.bad]
pred.log.pen.mat <- predict(fit, pred.feature.mat)
pred.change.dt <- profiles.dt[, {
pid.chr <- paste0(profile.id, " ", chromosome)
penalty <- exp(pred.log.pen.mat[pid.chr,])
fit.fpop <- fpop::Fpop(logratio, penalty)
end <- fit.fpop$t.est
before.change <- end[-length(end)]
after.change <- before.change+1L
list(
model="predicted",
change=as.integer((position[before.change]+position[after.change])/2))
}, by=list(profile.id, chromosome)]
label.dt <- data.table(neuroblastoma$annotations)
error.list <- labelError(
label.dt[, data.table(
model="predicted", profile.id, chromosome)],
label.dt, pred.change.dt,
change.var="change",
model.vars="model",
problem.vars=c("profile.id", "chromosome"))
tit <- error.list$model.errors[, sprintf(
paste(
"Predicted IntervalRegressionCV penalty,",
"%d/%d incorrect labels",
"= %.1f%% train errors,",
"%d/%d FP = %.1f%%,",
"%d/%d FN = %.1f%%"),
sum(errors), .N, sum(errors)/.N*100,
sum(fp), sum(possible.fp), sum(fp)/sum(possible.fp)*100,
sum(fn), sum(possible.fn), sum(fn)/sum(possible.fn)*100)]
print(tit)
show.pid.vec <- 1:10
show.pid.vec <- unique(profiles.dt$profile.id)
show.changes <- pred.change.dt[profile.id %in% show.pid.vec]
show.profiles <- profiles.dt[profile.id %in% show.pid.vec]
show.labels <- label.dt[profile.id %in% show.pid.vec]
show.errors <- error.list$label.errors[profile.id %in% show.pid.vec]
gg.supervised <- ggplot()+
ggtitle(tit)+
theme_bw()+
theme(
panel.margin=grid::unit(0, "lines"),
legend.position="bottom",
legend.box="horizontal"
)+
facet_grid(profile.id ~ chromosome, scales="free", space="free_x")+
penaltyLearning::geom_tallrect(aes(
xmin=min/1e6, xmax=max/1e6, fill=annotation),
alpha=0.5,
color=NA,
data=show.labels)+
geom_point(aes(position/1e6, logratio),
data=show.profiles,
color="grey40",
shape=1)+
scale_x_continuous(
"position on chromosome (mega bases)",
breaks=c(100, 200))+
scale_y_continuous(
"logratio (approximate DNA copy number)",
breaks=c(-1, 0, 1),
limits=c(-1,1)*1.3)+
scale_fill_manual("label", values=c(
"1change"="#ff7d7d",
breakpoint="#a445ee",
normal="#f6c48f"
))+
penaltyLearning::geom_tallrect(aes(
xmin=min/1e6,
xmax=max/1e6,
linetype=status),
fill=NA,
data=show.errors)+
scale_linetype_manual("error type", values=c(
correct=0,
"false negative"=3,
"false positive"=1))+
geom_vline(aes(
xintercept=change/1e6),
data=show.changes,
color="green",
size=0.7,
linetype="dashed")
n.profiles <- length(unique(show.profiles$profile.id))
png("figure-neuroblastoma-predictions.png", 1000, 110+30*n.profiles)
print(gg.supervised)
dev.off()