-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSurrogsForKonza_t.R
201 lines (176 loc) · 6.13 KB
/
SurrogsForKonza_t.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
###***Note from Shya***
#Two different soil types: f, t.
#Each common species time series along each column of the matrix except for the
#last column which indicates the intermediate and rare ones merged into a pseudo sp.
#
#Matrix data saved as .RDS from this file (Line 257) with a given soil type (at Line 164)
#https://github.com/sghosh89/Copula_spaceavg/blob/master/data_cleaning_and_npa_for_KNZ.R
#
#Thanks,
#Shyamolina.
#***End note from Shya***
#***the data to work on
datloc_knz<-"./Results/knz_results/skewness_results/"
d<-readRDS(file=paste0(datloc_knz,"ts_CP_knz_soiltype_t.RDS"))
#***setup for the chunk
library(matrixcalc)
library(mvtnorm)
# functions needed
source("PPSurrogObjFun.R")
source("pwlin.R")
source("getmap.R")
source("alignranks.R")
# result folder to save surrogates and function plots
resloc_surrog_knz<-"./Results/knz_results/skewness_results/pp_surrogs_knz_t_CP/"
if (!dir.exists(resloc_surrog_knz)){
dir.create(resloc_surrog_knz)
}
#***for each pair of species, create the function from N-copula space
#to covariance space
set.seed(102)
numpts<-18
mapy<-array(NA,c(dim(d)[2],dim(d)[2],numpts))
for (iind in 1:(dim(d)[2]-1))
{
for (jind in (iind+1):(dim(d)[2]))
{
print(paste0("iind: ",iind,"; jind: ",jind))
plotnm<-paste0(resloc_surrog_knz,"Map_",iind,"_",jind)
thisres<-getmap(d[,c(iind,jind)],numpts=numpts,numsims=500,plotnm=plotnm)
mapx<-thisres$fit_parameters$x
y<-thisres$fit_parameters$y
if (any(diff(y)<0)){stop("Error in make_surrogs_CP_knz: non-monotonic piecewise linear interpolation")}
mapy[iind,jind,]<-y
}
}
save(mapx,mapy,file=paste0(resloc_surrog_knz,"MapsRes.RData"))
#***now do a constrained optimization
#first just test things by calling the objective function on the start vector
pijstart<-cor(d)
pijstart<-pijstart[upper.tri(pijstart)]
PPSurrogObj(pij=pijstart,cijmatd=cijmatd,mapx=mapx,mapy=mapy,saveloc=resloc_surrog_knz)
#set up the constraints - see the math saved in PPSurrog202001
#getting ready
cijmatd<-cov(d)
n<-dim(mapy)[1] #number of species
epsil<-0.1
yijbot<-mapy[,,1]
yijbot<-yijbot[upper.tri(yijbot)]
yijtop<-mapy[,,dim(mapy)[3]]
yijtop<-yijtop[upper.tri(yijtop)]
#first set of constraints
ui1<-diag(length(pijstart))
ci1<-yijbot
#second set of constraints
ui2<-(-1*diag(length(pijstart)))
ci2<-(-yijtop)
#third set of constraints (actually just one constraint)
h<-outer(diag(cijmatd),diag(cijmatd))
h<-sqrt(h[upper.tri(h)])
ui3<-matrix(h,1,length(h))
synccomp<-sum(cijmatd[upper.tri(cijmatd)])
if (synccomp>=0)
{
ci3<-(1-epsil)*synccomp
} else
{
ci3<-(1+epsil)*synccomp
}
#fourth set of constraints
ui4<-(-ui3)
if (synccomp>=0)
{
ci4<-(-(1+epsil))*synccomp
} else
{
ci4<-(-(1-epsil))*synccomp
}
#combine them all
ui<-rbind(ui1,ui2,ui3,ui4)
ci<-c(ci1,ci2,ci3,ci4)
#make sure the start vector satisfies the constrain
sum(ui%*%pijstart-ci>=0)
length(ci)
#Now just explore the objective function a bit, by looking at slices
for (indcounter in 1:length(pijstart))
{
print(paste0("indcounter ",indcounter," of ",length(pijstart)))
bdbot1<-yijbot[indcounter]
h<-pijstart*ui3
if (synccomp>=0)
{
bdbot2<-((1-epsil)*sum(cijmatd[upper.tri(cijmatd)])-sum(h[-indcounter]))/(ui3[1,indcounter])
} else
{
bdbot2<-((1+epsil)*sum(cijmatd[upper.tri(cijmatd)])-sum(h[-indcounter]))/(ui3[1,indcounter])
}
bdbot<-max(bdbot1,bdbot2)
bdtop1<-yijtop[indcounter]
if (synccomp>=0)
{
bdtop2<-((1+epsil)*sum(cijmatd[upper.tri(cijmatd)])-sum(h[-indcounter]))/(ui3[1,indcounter])
} else
{
bdtop2<-((1-epsil)*sum(cijmatd[upper.tri(cijmatd)])-sum(h[-indcounter]))/(ui3[1,indcounter])
}
bdtop<-min(bdtop1,bdtop2)
x<-seq(from=bdbot,to=bdtop,length.out=100)
x<-sort(cbind(x,pijstart[indcounter]))
thisres<-NA*numeric(length(x))
pij<-pijstart
for (xcounter in 1:length(x))
{
pij[indcounter]<-x[xcounter]
thisres[xcounter]<-PPSurrogObj(pij=pij,cijmatd=cijmatd,mapx=mapx,mapy=mapy,saveloc=resloc_surrog_knz)
}
pdf(paste0(resloc_surrog_knz,"Slice_",indcounter,".pdf"))
plot(x,thisres,type="b",pch=20,cex=.5)
points(x[x==pijstart[indcounter]],thisres[x==pijstart[indcounter]],col="red")
lines(rep(pijstart[indcounter],2),c(-1000,1000),type="l",col="red")
dev.off()
}
#Now do the optimization. The goal is not actually to get all the way to
#the optimum, rather it is to get to the first pos def mat you come to.
#The try function is used for that purpose.
#now run the optimization
res<-NA
try(res<-constrOptim(theta=pijstart,f=PPSurrogObj,grad=NULL,ui=ui,ci=ci,
cijmatd=cijmatd,mapx=mapx,mapy=mapy,saveloc=resloc_surrog_knz,
control=list(fnscale=-1,trace=0,maxit=10000)),silent=TRUE)
#***now if it succeeded in finding a pos def mat, then it saved a result, and
#we can pull the result back in from the disk and examine it
if (!is.na(res))
{
stop("Error in make_surrogs_CP_knz_t: optimization to find good Pearson preserving surrogates failed")
}
pijmat<-readRDS(paste0(resloc_surrog_knz,"FirstSuccess_pijmat.RDS"))
nijmat<-readRDS(paste0(resloc_surrog_knz,"FirstSuccess_nijmat.RDS"))
pij<-pijmat[upper.tri(pijmat)]
pdf(file=paste0(resloc_surrog_knz,"ActualVsOptimizedSurrogatePearsons.pdf"))
plot(pijstart,pij,type='p',xlab="vij",ylab="pij",xlim=c(-1,1),ylim=c(-1,1))
lines(c(-1,1),c(-1,1))
text(1,1,cor(pijstart,pij),adj=c(1,1))
dev.off()
#make surrogates using nijmat, and compute their CV_com^2 and
#compare to the actual CV_com^2 of the real data
numsurrog<-100000
sims<-rmvnorm(numsurrog*(dim(d)[1]),sigma=nijmat)
dim(sims)<-c(dim(d)[1],numsurrog,dim(d)[2])
sims<-aperm(sims,c(1,3,2))
dsort<-apply(FUN=sort,X=d,MARGIN=2)
surrogs<-alignranks(dsort,sims)
saveRDS(surrogs,file=paste0(resloc_surrog_knz,"KNZtSurrogates.RDS"))
totpops<-apply(FUN=sum,MARGIN=c(1,3),X=surrogs)
survars<-apply(FUN=var,X=totpops,MARGIN=2)
surmns<-apply(FUN=mean,X=totpops,MARGIN=2)
CVcom2_sur<-survars/(surmns^2)
totd<-apply(FUN=sum,X=d,MARGIN=1)
dvar<-var(totd)
dmn<-mean(totd)
CVcom2_d<-dvar/(dmn^2)
pdf(file=paste0(resloc_surrog_knz,"CVcom2_For_Surr_And_Dat.pdf"))
hist(CVcom2_sur,main=sum(CVcom2_sur<CVcom2_d)/numsurrog)
points(CVcom2_d,0,col="red")
dev.off()
surrogs_CP_KNZ_t<-surrogs # this is the pearson preserving surrogs for
#use in subsequent chunks