-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathSpatialFiltering.R
343 lines (295 loc) · 12.3 KB
/
SpatialFiltering.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
SpatialFiltering <- function (formula, lagformula=NULL, data=list(), na.action=na.fail, nb=NULL,
glist=NULL, style="C", zero.policy=NULL, tol=0.1, zerovalue = 0.0001,
ExactEV=FALSE, symmetric=TRUE, alpha=NULL, alternative="two.sided",
verbose=NULL) {
#
# tol: tolerance value for convergence of spatial filtering (Moran's I).
# The search for eigenvector terminates, once the residual
# autocorrelation falls below abs(Moran's I) < tol. For positive
# spatial autocorrelation in the residuals of the basic unfiltered model,
# only those eigenvectors associated with positive autocorrelation are in
# the selection set. Vice versa, for negative autocorrelation in the
# regression residuals.
#
# zerovalue: eigenvectors with eigenvalues smaller than zerovalue
# will be excluded in eigenvector search. Allows to restrict the
# search set of eigenvectors to those with extreme autocorrelation levels.
#
# ExactEV: In some incidences the approximation of using the expectation
# and variance of Moran's I from the previous iteration will lead
# to inversions. Set ExactEV=TRUE in this situation to use exact
# expectations and variances
# alpha: Added for Pedro Peres-Neto to explore its consequences as
# compared to tol= as a stopping rule.
#
# Authors: Yongwan Chun and Michael Tiefelsdorf
# Dept. of Geography - The Ohio State University
# Columbus, Ohio 43210
# emails: chun.49@osu.edu and tiefelsdorf.1@osu.edu
# Modified by Roger Bivand
#
# Reference: Tiefelsdorf M, Griffith DA. Semiparametric Filtering of Spatial
# Autocorrelation: The Eigenvector Approach. Environment and Planning A
# 2007, 39 (5) 1193 - 1221
#
# Version 0.9.1 - September 11, 2004
# Adaptation to formula format Roger Bivand December 2005
if (is.null(nb)) stop("Neighbour list argument missing")
if (missing(formula)) stop("Formula argument missing")
if (is.null(verbose)) verbose <- get("verbose", envir = .spatialregOptions)
stopifnot(is.logical(verbose))
if (is.null(zero.policy))
zero.policy <- get("zeroPolicy", envir = .spatialregOptions)
stopifnot(is.logical(zero.policy))
# Generate Eigenvectors if eigen vectors are not given
# (M1 for no SAR, MX for SAR)
if (!inherits(formula, "formula")) formula <- as.formula(formula)
# if (missing(data)) data <- environment(formula)
mf <- match.call(expand.dots = FALSE)
m <- match(c("formula", "data", "na.action"), names(mf), 0)
mf <- mf[c(1, m)]
mf$drop.unused.levels <- TRUE
mf[[1]] <- quote(stats::model.frame)
mf <- eval(mf, parent.frame())
mt <- attr(mf, "terms")
y <- model.extract(mf, "response")
if (any(is.na(y))) stop("NAs in dependent variable")
xsar <- model.matrix(mt, mf)
if (any(is.na(xsar))) stop("NAs in independent variable")
lw <- nb2listw(nb, glist=glist, style=style, zero.policy=zero.policy)
na.act <- attr(mf, "na.action")
if (!is.null(na.act)) {
subset <- !(1:length(lw$neighbours) %in% na.act)
lw <- subset(lw, subset, zero.policy=zero.policy)
}
if (NROW(xsar) != length(lw$neighbours))
stop("Input data and neighbourhood list have different dimensions")
if (symmetric) lw <- listw2U(lw)
S <- listw2mat(lw)
a <- sum(S)
S <- nrow(S)/a*S
nofreg <- nrow(S) # number of observations
mx <- diag(1,nofreg) - xsar %*% qr.solve(crossprod(xsar), t(xsar))
S <- mx %*% S %*% mx
#Get EigenVectors and EigenValues
eigens <- eigen(S,symmetric=symmetric)
val <- as.matrix(eigens$values)
vec <- as.matrix(eigens$vectors)
if (is.null(lagformula)) X <- xsar
else {
xlag <- model.matrix(lagformula, data=data)
isIntercept <- match("(Intercept)", colnames(xlag))
if (!is.na(isIntercept)) xlag <- xlag[,-(isIntercept), drop=FALSE]
X <- cbind(xsar, xlag)
}
coll_test <- lm(y ~ X - 1)
if (any(is.na(coefficients(coll_test)))) stop("Collinear RHS variable detected")
y <- as.matrix(y)
# Xorg <- X
# X will be augmented by the selected eigenvectors
#Total sum of squares for R2
TSS <- sum((y - mean(y))^2)
#Compute first Moran Expectation and Variance
nofexo <- ncol(X)
# Number of exogenous variables (incl. const)
degfree <- nofreg - nofexo
M <- diag(1,nofreg) - X %*% solve(crossprod(X),t(X))
MSM <- M %*% S %*% M
MStat <- GetMoranStat(MSM, degfree)
E <- MStat$Mean
V <- MStat$Var
#Matrix storing the iteration history:
# [1] Step counter of the selection procedure
# [2] number of selected eigenvector (sorted descending)
# [3] its associated eigenvalue
# [4] value Moran's I for residual autocorrelation
# [5] standardized value of Moran's I assuming a normal approximation
# [6] p-value of [5] for given alternative
# [7] R^2 of the model including exogenous variables and eigenvectors
# c("Step","SelEvec","Eval","MinMi","ZMinMi","R2","gamma")
#Store the results at Step 0 (i.e., no eigenvector selected yet)
cyMy <- crossprod(y, M) %*% y
cyMSMy <- crossprod(y, MSM) %*% y
IthisTime <- (cyMSMy) / (cyMy)
zIthisTime <- (IthisTime - E) / sqrt(V)
altfunc <- function(ZI, alternative="two.sided") {
if (alternative == "two.sided")
PrI <- 2 * pnorm(abs(ZI), lower.tail=FALSE)
else if (alternative == "greater")
PrI <- pnorm(ZI, lower.tail=FALSE)
else PrI <- pnorm(ZI)
PrI
}
out <- c( 0,
0,
0,
IthisTime,
zIthisTime,
altfunc(zIthisTime, alternative=alternative),
1 - ((cyMy) / TSS)
)
if (verbose) cat("Step", out[1], "SelEvec", out[2], "MinMi", out[4],
"ZMinMi", out[5],"Pr(ZI)", out[6], "\n")
Aout <- out
#Define search eigenvalue range
#The search range is restricted into a sign range based on Moran's I
#Put a sign for eigenvectors associated with their eigenvalues
#if val > zerovalue (e.g. if val > 0.0001), then 1
#if val < zerovalue (e.g. if val < -0.0001), then -1
#otherwise 0
sel <- cbind(row(y)[,1],val,matrix(0,nofreg,1))
# sel[,3] <- (val > zerovalue) - (val < -zerovalue)
sel[,3] <- (val > abs(zerovalue)) - (val < -abs(zerovalue))
#Compute the Moran's I of the aspatial model (without any eigenvector)
#i.e., the sign of autocorrelation
#if MI is positive, then acsign = 1
#if MI is negative, then acsign = -1
res <- y - X %*% solve(crossprod(X), crossprod(X, y))
acsign <- 1
if (((crossprod(res, S) %*% res) / crossprod(res)) < 0) acsign <- -1
#If only sar model is applied or just the intercept,
#Compute and save coefficients for all eigenvectors
is.onlysar <- FALSE
# if (missing(xlag) & !missing(xsar)) # changed by MT
if (missing(lagformula)) {
is.onlysar <- TRUE
Xcoeffs <- solve(crossprod(X), crossprod(X, y))
gamma4eigenvec <- cbind(row(y)[,1],matrix(0,nofreg,1))
# Only SAR the first parameter estimation for all eigenvectors
# Due to orthogonality each coefficient can be estimate individually
for (j in 1:nofreg) { #Loop
if (sel[j,3] == acsign ) { #Use only feasible unselected evecs
gamma4eigenvec[j,2] <- solve(crossprod(vec[,j]),
crossprod(vec[,j], y))
}
}
}
# Here the actual search starts - The inner loop check each candidate -
# The outer loop selects eigenvectors until the residual autocorrelation
# falls below 'tol'
# Loop over all eigenvectors with positive or negative eigenvalue
oldZMinMi <- Inf
for (i in 1:nofreg) { #Outer Loop
z <- Inf
idx <- 0
for (j in 1:nofreg) { #Inner Loop - Find next eigenvector
if (sel[j,3] == acsign ) { #Use only feasible unselected evecs
xe <- cbind(X, vec[,j]) #Add test eigenvector
#Based on whether it is an only SAR model or not
if (is.onlysar)
res <- y - xe %*% as.matrix(rbind(Xcoeffs,
gamma4eigenvec[j,2]))
else
res <- y - xe %*% solve(crossprod(xe), crossprod(xe, y))
mi <- (crossprod(res, S) %*% res) / crossprod(res)
if (ExactEV) {
M <- diag(1,nofreg) - xe %*% solve(crossprod(xe),t(xe))
degfree <- nofreg - ncol(xe)
MSM <- M %*% S %*% M
MStat <- GetMoranStat(MSM, degfree)
E <- MStat$Mean
V <- MStat$Var
}
if (abs((mi - E) / sqrt(V)) < z) { #Identify min z(Moran)
MinMi = mi
z <- (MinMi - E) / sqrt(V)
idx =j
}
}
} #End inner loop
#Update design matrix permanently by selected eigenvector
X <- cbind(X,vec[,idx])
if (is.onlysar) Xcoeffs <- (rbind(Xcoeffs,gamma4eigenvec[idx,2]))
M <- diag(1,nofreg) - X %*% solve(crossprod(X),t(X))
degfree <- nofreg - ncol(X)
#Update Expectation and Variance
MSM <- M %*% S %*% M
MStat <- GetMoranStat(MSM, degfree)
E <- MStat$Mean
V <- MStat$Var
ZMinMi <- ((MinMi - E) / sqrt(V))
#Add results of i-th step
out <- c(i, idx, val[idx],MinMi,ZMinMi, altfunc(ZMinMi,
alternative=alternative), (1 - (crossprod(y, M) %*% y / TSS)))
if (verbose) cat("Step", out[1], "SelEvec", out[2], "MinMi",
out[4], "ZMinMi", out[5],"Pr(ZI)", out[6], "\n")
Aout <- rbind(Aout, out)
#To exclude the selected eigenvector in the next loop
sel[idx,3] <- 0
if (is.null(alpha)) {
if (abs(ZMinMi) < tol) {
break
} else if (abs(ZMinMi) > abs(oldZMinMi)) {
if (!ExactEV) {
cat(" An inversion has been detected. The procedure will terminate now.\n")
cat(" It is suggested to use the exact expectation and variance of Moran's I\n")
cat(" by setting the option ExactEV to TRUE.\n")
}
break
}
} else {
if (altfunc(ZMinMi, alternative=alternative) >= alpha) break
}
if (!ExactEV) {
if (abs(ZMinMi) > abs(oldZMinMi)) {
cat(" An inversion has been detected. The procedure will terminate now.\n")
cat(" It is suggested to use the exact expectation and variance of Moran's I\n")
cat(" by setting the option ExactEV to TRUE.\n")
break
}
}
oldZMinMi <- ZMinMi
} # End Outer Loop
# Regression coefficients of selected eigenvectors
betagam <- solve(crossprod(X),crossprod(X,y))
gammas <- as.matrix(betagam[(nofexo+1):(nrow(betagam)),1])
#Formatting the output
gammas <- rbind(0, gammas) # Add 0 for iteration zero
out <- cbind(Aout,gammas)
colnames(out) <- c("Step","SelEvec","Eval","MinMi","ZMinMi","Pr(ZI)","R2","gamma")
rownames(out) <- out[,1]
selVec <- vec[,out[,2], drop=FALSE]
colnames(selVec) <- c(paste("vec",out[2:nrow(out),2],sep=""))
#Generating a result object
SFResult <- list(selection=out, dataset=selVec)
if (!is.null(na.act))
SFResult$na.action <- na.act
class(SFResult) <- "SfResult"
return(SFResult)
}
print.SfResult <- function(x, ...) {
print(x$selection, ...)
}
fitted.SfResult <- function(object, ...) {
if (is.null(object$na.action)) {
res <- object$dataset
} else {
omitted_rows <- unname(object$na.action)
res <- matrix(as.numeric(NA), ncol=ncol(object$dataset),
nrow=length(omitted_rows)+nrow(object$dataset))
i <- j <- k <- 1
while (i <= nrow(res)) {
if (j <= length(omitted_rows) && i == omitted_rows[j]) {
i <- i+1
j <- j+1
} else {
res[i,] <- object$dataset[k,]
i <- i+1
k <- k+1
}
}
}
res
}
GetMoranStat <- function(MSM, degfree) {
#MSM : M %*% S %*% M matrix
# M : projection matrix
# S : coded symmetric spatial link matrix
#degfree: degrees of freedom
MSM <- as.matrix(MSM)
t1 <- sum(diag(MSM))
t2 <- sum(diag(MSM %*% MSM))
E <- t1 / degfree
V <- 2 * (degfree * t2 - t1 * t1)/(degfree * degfree * (degfree + 2))
return(list(Mean=E,Var=V))
}