-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathurban_water_conservation_policies_si.Rnw
1733 lines (1421 loc) · 82.8 KB
/
urban_water_conservation_policies_si.Rnw
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Supporting Information
%% (Optional)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% OVERVIEW
%
% Please note that all supporting information will be peer reviewed with your manuscript.
% In general, the purpose of the supporting information is to enable
% authors to provide and archive auxiliary information such as data
% tables, method information, figures, video, or computer software,
% in digital formats so that other scientists can use it.
% The key criteria are that the data:
% 1. supplement the main scientific conclusions of the paper but are not essential to the conclusions (with the exception of
% including data so the experiment can be reproducible);
% 2. are likely to be usable or used by other scientists working in the field;
% 3. are described with sufficient precision that other scientists can understand them, and
% 4. are not exe files.
%
% All Supporting text and figures should be included in this document.
% Data sets, large tables, movie files,
% and audio files should be uploaded separately, following AGU naming
% conventions. Include their captions in this document and list the
% file name with the caption. You will be prompted to upload these
% files on the Upload Files tab during the submission process, using
% file type “Supporting Information (SI)”
\documentclass[draft]{agujournal}
%\documentclass{agujournal}
% \usepackage{jglucida}
\usepackage[hyphens]{url}
% \usepackage[hidelinks]{hyperref}
\usepackage{amsmath}
\usepackage{float}
% Please type in the journal name: \journalname{<Journal Name>}
% ie,
\journalname{Earth's Future}
%% Choose from this list of Journals:
%
% Journal of Geophysical Research
% JGR-Biogeosciences
% JGR-Earth Surface
% JGR-Planets
% JGR-Solid Earth
% JGR-Space Physics
% Global Biochemical Cycles
% Geophysical Research Letters
% Paleoceanography
% Radio Science
% Reviews of Geophysics
% Tectonics
% Space Weather
% Water Resource Research
% Geochemistry, Geophysics, Geosystems
% Journal of Advances in Modeling Earth Systems (JAMES)
% Earth's Future
% Earth and Space Science
<<knitr_options, cache=F, echo=F, include=F, eval=T, message=F, warning=F, error=F, results='hide'>>=
library(knitr)
my_tex_chunk_hook <- function(x, options) {
ai = knitr:::output_asis(x, options)
size = if (options$size == 'normalsize') '' else sprintf('\\%s', options$size)
if (!ai) x = sprintf('%% jg_tex_chunk_hook\n%s\n%s\n', size, x)
if (options$split) {
name = fig_path('.tex', options, NULL)
if (!file.exists(dirname(name)))
dir.create(dirname(name))
cat(x, file = name)
sprintf('\\input{%s}', name)
} else x
}
my_tex_output_hook <- function(x, options) {
if (knitr:::output_asis(x, options)) {
x
} else knitr:::.verb.hook(x)
}
my_tex_plot_hook <- function(x, options) {
knitr:::hook_plot_tex(x, options)
}
knit_hooks$set(chunk = my_tex_chunk_hook, output = my_tex_output_hook,
plot = my_tex_plot_hook)
opts_knit$set(progress = TRUE, verbose = TRUE,
header = '',
self.contained="FALSE"
)
do_cache = FALSE
clean_output <- FALSE
if (clean_output) {
cache_dir <- "cache_si_clean/"
si_output_dir <- 'si_files_clean/'
figures_dir <- 'figures_si_clean/'
} else {
cache_dir <- "cache_si/"
si_output_dir <- 'si_files/'
figures_dir <- 'figures_si/'
}
opts_chunk$set(cache = do_cache,
echo = FALSE,
message = FALSE, warning = TRUE,
error = FALSE, # stop knitting on errors
out.width="0.8\\linewidth",
dev = 'pdf', dpi = 600,
cache.path=cache_dir,
fig.path=figures_dir)
@
<<si_options, cache=F, include=F, echo=F, eval=T, results='hide'>>=
if (!dir.exists(si_output_dir)) dir.create(si_output_dir, recursive = TRUE)
if (!dir.exists(file.path(si_output_dir, 'tables'))) dir.create(file.path(si_output_dir, 'tables'), recursive = TRUE)
if (!dir.exists(file.path(si_output_dir, 'figures'))) dir.create(file.path(si_output_dir, 'figures'), recursive = TRUE)
set.seed(477668150)
random_alpha <- TRUE # random effects on state-level intercept.
sigma_sigma_delta <- 2.5 # for partial-pooling at state-level.
pop_target_year <- 2014
mu_phi_vwci <- 50
sigma_phi_vwci <- 20
mu_phi_rr <- 15
sigma_phi_rr <- 10
pop_target_year <- 2014
bea_year <- 2014
@
<<setup, echo=F, include=F, eval=T, cache=F, results='hide'>>=
library(pacman)
p_load(tidyverse, rlang, readxl, stringr, xtable,
extrafont, ggrepel, gridExtra, ggthemes, viridis,
cowplot, egg)
p_load(rstan, loo)
p_load_gh('jonathan-g/jgmcmc@jgmcmc')
p_load_gh('jonathan-g/jgally@jgally')
figure_font <- choose_font('CM Sans')
if (figure_font == '') {
font_install('fontcm')
figure_font <- choose_font(c('CM Sans', 'Helvetica', 'Arial', 'sans'), quiet = FALSE)
}
loadfonts(device = 'pdf', quiet = TRUE)
if (Sys.info()['sysname'] == 'Windows') {
loadfonts(device = 'win', quiet = TRUE)
}
# theme_set(theme_bw(base_size = 15))
theme_set(
theme_bw(base_size = 15, base_family = figure_font) +
theme(plot.title = element_text(size = rel(1)),
axis.text.x = element_text(size = rel(1)),
panel.grid.major = element_line(size = 0.5, colour = "gray90"),
panel.grid.minor = element_line(size = 0.25, colour = "gray90")
)
)
thick_ci <- c(0.34 / 2., 1. - 0.34 / 2.)
thin_ci <- c(0.05 / 2., 1. - 0.05 / 2.)
number_names <- c("one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten")
if(FALSE) {
opts_chunk$set(dev.args = c(opts_chunk$dev.args,
list(family = figure_font)))
}
opts_chunk$set(dev.args = c(opts_chunk$dev.args,
list(pointsize = 8)))
opts_chunk$get() %>% str_c(names(.), ., sep = '=', collapse = ', ') %>% message()
@
<<set_processing_vars, echo=F, eval=T, results='hide'>>=
script_dir <- 'scripts'
data_dir <- 'data'
@
<<load_scripts, echo=F, eval=T, results='hide', cache.extra=c(file.mtime(file.path(script_dir, 'fit_model.R')))>>=
source(file.path(script_dir, 'fit_model.R'))
@
<<load_data, include = FALSE, cache=FALSE>>=
filtered_data <- read_rds(file.path(data_dir, 'filtered_data.Rds'))
msa_vars <- vars_1$msa_vars
# state_vars <- vars_1$state_vars %>% set_names(.) %>% map(~str_replace_all(.x, fixed('state.'), ''))
state_vars <- vars_1$state_vars %>% str_replace_all(fixed('state.'), '')
msa_data <- filtered_data$msa_data %>%
dplyr::select_(~city, ~state, ~msa.name, ~vwci, ~reqtotal, ~rebtotal, ~pop,
.dots = msa_vars) %>%
rename(state.abb = state, requirements = reqtotal, rebates = rebtotal) %>%
mutate(vwci.rank = rank(-vwci, ties.method = 'min'),
msa.name = str_extract(msa.name, "^(.+?)(?=([[:space:]]*Metro +Area)?$)"))
state_data <- filtered_data$state_data %>%
dplyr::select_(~state, ~state.name, ~state.fips,
.dots = state_vars) %>%
rename(state.abb = state)
n_cities <- nrow(msa_data)
n_states <- length(unique(msa_data$state.abb))
message("Reading model fits...")
model_fits <- read_rds(file.path(data_dir, "model_fits.Rds"))
message("...finished reading model fits")
message("Reading test model fits...")
loo_model_fits <- read_rds(file.path(data_dir, "test_model_fits.Rds"))
message("...finished reading test model fits")
message("Reading pooled model fits...")
pooled_loo_model_fits <- read_rds(file.path(data_dir, "test_model_fits_pooled.Rds"))
message("...finished reading pooled model fits")
@
\begin{document}
%% This command needs article title as argument to \supportinginfo{}:
\supportinginfo{Urban Water Conservation Policies in the United States}
\authors{Jonathan M. Gilligan\affil{1,2,3}, Christopher A. Wold\affil{3}, Scott C. Worland\affil{2}, John J. Nay\affil{3,4,5}, David J. Hess\affil{3,6}, George M. Hornberger\affil{1,2,3}}
\affiliation{1}{Department of Earth \& Environmental Sciences, Vanderbilt University, Nashville, Tennessee, USA}
\affiliation{2}{Department of Civil \& Environmental Engineering, Vanderbilt University, Nashville, Tennessee, USA}
\affiliation{3}{Vanderbilt Institute for Energy and Environment, Vanderbilt University, Nashville, Tennessee, USA}
\affiliation{4}{Information Law Institute, New York University, New York, New York, USA}
\affiliation{5}{Berkman Klein Center, Harvard University, Cambridge, Massachusetts, USA}
\affiliation{6}{Department of Sociology, Vanderbilt University, Nashville, Tennessee, USA}
% \affiliation{6}{U.S. Geological Survey, Nashville, Tennessee, USA}
\correspondingauthor{Jonathan M. Gilligan}{jonathan.gilligan@vanderbilt.edu}
\section*{Contents}
%%%Remove or add items as needed%%%
\begin{enumerate}
\item Text S1
\item Figures S1 to S10
\item Tables S1 to S17
%if Tables are larger than 1 page, upload as separate excel file
\end{enumerate}
\section*{Additional Supporting Information}
\begin{enumerate}
\item Captions for Datasets S1 to S4
\item Data Analysis Scripts S1
\end{enumerate}
\section*{Introduction}
This supporting information document presents additional details of the data and analysis.
\section*{SI Text}
\subsection*{Data}
We used VWCI data for \Sexpr{n_cities}~cities in \Sexpr{n_states}~states, as shown in Table~S\ref{tab:vwci}.
At the MSA level (Dataset~S1, Table~S\ref{tab:vwci}), our regression analysis used the following six covariates: $\ln(\text{population})$, population growth rate between 2010 and \Sexpr{pop_target_year}, the K\"oppen aridity index, the fraction of the municipal water supply coming from surface water (henceforth, surface-water fraction), the Cook Partisan Voting Index (PVI), and the per-capita real personal income (RPI) for \Sexpr{bea_year} normalized for inflation and regional variations in the cost of living. We used the natural logarithm of the population rather than the raw population because the raw population was
skewed, with a sharp peak near \Sexpr{filtered_data$msa_data %>% summarize(median(pop)) %>% simplify() %>% signif(1) %>% scales::comma() %>% unname()}%
, and a long tail at higher populations
(Figure~S\ref{fig:msa_vars_distribution}).
At the state level (Dataset~S2, Table~S\ref{tab:state}), our analysis used the following four covariates: PVI, RPI, the K\"oppen aridity index, and the surface-water fraction.
\subsection*{Analysis}
\subsubsection*{Diagnostics}
\iftrue
Our Monte Carlo analysis sampled four Markov chains,
allowing each chain to warm-up and tune sampling parameters for
the first 1000 iterations
and then sampling each chain for 1000 more iterations,
yielding a total of 4000 samples.
Each sample is a vector of length \Sexpr{1 + length(msa_vars) + length(state_vars) + n_states - 1 + 2},
with values for each of the parameters
$\alpha_0$, $\beta_j$, $\gamma_k$, $\delta_{\text{state}}$, $\sigma$, and $\phi$,
where $j$ indexes over the \Sexpr{number_names[length(msa_vars)]} MSA-level covariates,
$k$ indexes over the \Sexpr{number_names[length(state_vars)]} state-level covariates,
and \emph{state\/} indexes over
\Sexpr{n_states - 1} of the \Sexpr{n_states} states (leaving one out for
identifiability).
The samples approximate random draws from the joint posterior probability
distribution of the parameters, given the priors and the observed
data. Thus, the statistics of the sampled values approximate the statistics
of the joint posterior distribution.
Collinearity among the predictor variables is diagnosed by observing correlations
in the joint posterior probability distributions of the regression coefficients
\citep[pp.~288--293]{stan:manual:2015}.
Inefficient sampling due to varying curvature in the log-probability manifold or
poorly chosen priors can be diagnosed by irregularities in joint posterior
distributions \citep[pp.~316--321]{stan:manual:2015}. Pairwise correlation
plots of the Monte-Carlo samples for the regression coefficients in our models
of VWCI, requirements, and rebates
(Figures~S\ref{fig:vwci_pairs_plot}--S\ref{fig:reb_pairs_plot}) are smooth with
little correlation and give no cause for concern.
In addition, the Hamiltonian Monte Carlo calculations proceeded without any
divergences or exceessive tree depths after warm-up, and the
Gelman-Rubin $\hat R$ potential
scale-reduction factor converged to $\le 1.02$ for each parameter
\citep{stan:manual:2015}.
\else
Pairwise correlation plots of the posterior probability distributions of
regressions parameters (Figures~S\ref{fig:vwci_pairs_plot}--S\ref{fig:reb_pairs_plot})
are smooth and show little correlation.
The Hamiltonian Monte Carlo calculations
proceeded without any divergences or exceessive tree depths after warm-up, and the
Gelman-Rubin
$\hat R$ potential scale-reduction factor
(Tabs~S\ref{tab:vwci_posterior}--S\ref{tab:reb_posterior}) converged to
$\le 1.02$ for each parameter.
\fi
\subsubsection*{Model Selection}
\iftrue
We used several model-selection criteria in deciding whether to model the VWCI,
requirements, and rebates as binomial or beta-binomial processes. At each joint
sample of the model parameters in the Monte-Carlo process, we both computed the
log-likelihood of the observed data under the sampled parameters and also
generated posterior predictions obtained by drawing simulated observations from
binomial or beta-binomial distribution at each joint sample of the model parameters.
Visual comparisons of distributions of posterior predictions to observed data and
comparisons of the posterior predictions of mean, maximum, and minimum VWCI over
the cities in our data set showed better agreement for the overdispersed
$\beta$-binomial process than for a purely binomial one \citep{gelman:bda:2014}.
A separate test for overdispersion, which accounts for the danger of overfitting
by introducing new free parameters, assesses the predictive accuracy of different
models using
Leave-One-Out cross-validation Information Criterion (LOO-IC)
or the
Widely Available Information Criterion (WAIC, also known as the
Watanabe-Aikake Information Criterion),
obtained by Pareto-smoothed importance sampling \citep{gelman:predictive:2014,vehtari:loo:2016}.
Both information criteria favored the overdispersed beta-binomial distribution
over a pure binomial, and also strongly favored hierarchical over single-level
models (Tables~S\ref{tab:loo.years}--S\ref{tab:waic.vars}).
Our choice to use very weakly informative priors in our model reduces the
accuracy of our estimates of LOO-IC and WAIC \citep{vehtari:loo:2016},
but we do not worry overly about this potential inaccuracy both because the
posterior prediction test yields the same results and because a pure binomial
model gives very similar results to those presented here.
\else
Leave-one-out cross-validation (Table~S\ref{tab:loo.years}--S\ref{tab:loo.vars})
and the Widely Applicable Information Criterion
(Table~S\ref{tab:waic.years}--S\ref{tab:waic.vars}) were used for model selection
(overdispersed beta-binomial versus binomial and hierarchical versus single-level
regressions).
\fi
\subsubsection*{Results}
Results of the analysis are summarized in
Tables~S\ref{tab:vwci_posterior}--S\ref{tab:reb_posterior}.
\subsubsection*{Robustness Tests}
We chose our explanatory variables based on theoretical considerations, as
described in \citet{hess:drought:2016}. To test the robustness of our analysis,
we compared the results described above to several kinds of alternate regression
analyses for the VWCI, using the LOO-IC and WAIC information criteria to
assess the predictive accuracy of the different analyses
\citep{gelman:predictive:2014,vehtari:loo:2016}.
In the first series, we varied the interval over which we averaged
the K\"oppen aridity index, considering the 30-year period 1985--2014,
the 45 year period 1970--2014,
the 20 year period 1995--2014, and the 10-year period 2005--2014. This tests
for sensitivity to recent extreme events versus the longer-term average
climate.
There were no significant differences between the regressions using the
four different intervals: the posterior distributions were nearly identical
(Figure~S5) and the information criteria differed by less than one tenth
of the standard error (Tables~S3 and S6).
Second, we performed regressions with additional or different explanatory
variables (Figures~S6--S7 and Tables~S4--S5 and S7--S8).
Population density has been found to correlate well with
voting patterns, and thus might affect water conservation policies
\citep{rodden:geographic:2010}.
We substituted 2010 population-weighted population density
and the rate of change of population density from 2000--2010
\citep{wilson:pop.density:2012}
for total population.
The results were similar to those of our original analysis,
and produced slightly, but insignificantly, inferior information criteria scores.
We also considered that the area of an MSA might be important to collecting and
distributing water, so we conducted regressions that included an additional
explanatory variable representing the total area of the MSA as reported
in the 2010 U.S. Census
\citep{wilson:pop.density:2012}, the coefficient
for area was consistent with zero and the information criteria scores were
slightly and insignificantly inferior to our original analysis.
We also considered that in addition to mean personal
income and relative purchasing power, the distribution of income might be
important, so we performed regressions that included Gini indices of
income inequality at both the MSA and the state levels,
taken from the 2014 American Community Survey \citep{acs:gini:2017}.
The Gini index lies in the range
zero (complete equality, with everyone receiving the same income)
to one (complete inequality, with one person receiving all of the income and
everyone else receiving nothing).
In these regressions the coefficient for the state-level Gini index was
positive and of comparable magnitude to the state-PVI coefficient,
and the coefficient for the MSA-level Gini index was very small and consistent
with zero.
The information criteria scores were slightly and insignificantly inferior
to our original model.
In order to test alternative model structures, we introduced interaction terms
between aridity and PVI at both the state and MSA levels. As with the previous
tests, introducing this term did not change the posterior distributions of the
coefficients for the other covariates by very much and the information criteria
scores were slightly, but insignificantly, worse.
All of these different analyses of VWCI consistently found that
at the state level, the largest coefficients were for aridity and PVI,
and at the MSA-level PVI, population (or population density), and population
(or population-density) growth rates were positive and of comparable magnitude.
The variations in coefficients across all of the alternate analyses
were well within the 95\% highest-density
intervals of the posterior probability distribution.
In order to test that aridity and PVI were, in fact, significant We also performed
regressions leaving out the aridity, leaving out PVI, and leaving out PVI
but replacing population with population density.
These regressions produced information criteria scores that were inferior to the
original regression by slightly less than one standard error in the case of PVI
and by about one-third of the standard error in the case of aridity.
We also observed that removing either one of these covariates did not significantly
change the coefficients for any of the other covariates. This reinforces the evidence
of the pairwise correlation plots that there are no important problems with
interdependence or multicollinearity among the covariates.
In all of these analyses, the MSA-level PVI, population, and population
growth coefficients were positive and distinct from zero. The values and the
ranking of these three coefficients changed, but by amounts that were well
within the posterior probability distributions. The remaining MSA-level
variables were consistent with zero.
The posterior distributions were considerably narrower than the prior
distributions and lay well within those prior distributions,
which indicates that they are not constrained by the priors.
We tested this by varying the scales of the priors and by
replacing the Cauchy priors on $\alpha_0$, $\beta$, and $\gamma$ with normal
priors.
The results were very similar to and consistent with the original analysis.
We also tested alternative regressions that used different normalizations for the
MSA-level variables: In this alternative normalization, instead of taking the
differences between the MSA-level variables and the state-level variables, and
scaling this difference to the state-level scales, we took the raw values for
each MSA-level variable and scaled them to have zero-mean and a standard deviation
of 0.5 across all of the MSAs, without referring them to the state-level variables.
We repeated all of the regressions described above using this alternative scaling
of the MSA-variables (Figures~S8--10 and Tables~9--14).
The information criteria scores for these regressions were generally slightly,
but insignificantly, inferior to the corresponding regressions using the original
MSA-scaling and the posterior distributions of the regression coefficients
differed in one important way: The coefficient for state-level PVI shifted to
lower values, becoming consistent with zero, and its
median was roughly one third of its value for the original scaling.
The other regresion coefficients did not change much under the new scaling.
This result reveals an ambiguity in interpreting the role of PVI:
Under the original scaling, PVI was important at both the state and MSA levels,
leading to the interpretation that both the state-level PVI and the difference between
the state-level and MSA-level PVI were well corelated with the propensity to adopt
water-conservation policies. Under the alternative scaling, the
coefficient for state-level PVI is consistent with zero
(with an 84\% probability of being positive)
and the coefficient for MSA-level PVI is clearly positive, with an almost
identical distribution as with the original scaling.
Because the information criteria scores are only slightly different between
the two analyses (the difference is roughly 5\% of the standard error on the LOO-IC),
there is no good reason to prefer one to the other and from a statistical perspective
two alternative interpretations are equally plausible: That both the state-level PVI
and the difference between the state- and MSA-level PVI are independently significant,
or that the effect of the state-level PVI is not significantly different from zero,
but that the MSA-level PVI is significant.
We conclude from this that the results of our analysis are robust against
many changes of time-spans, explanatory variables, and assumptions about priors,
except for the ambituity over the importance of state-level PVI.
Aside from state-level PVI, the effects of state-level aridity and
MSA-level PVI, population, and population growth are robust and stable.
Alternate model structures and alternate specifications of covariates were either
markedly inferior (as measured by information criteria scores) or produced
regression coefficients that were consistent with the original analysis.
Although many of the differences between information criteria were small,
our original analysis produced the best score.
There are myriad other potential explanatory variables, but our concern that
further exploration of alternative models might
unintentionally become an exercise in ``$p$-hacking'' due to
``garden of forking paths'' effects \citep{gelman:forking.paths:2014}
led us to confine this analysis to our original set of variables, which
we had previously chosen for theoretical reasons \citep{hess:drought:2016}.
\section*{Figures S1--S10}
<<msa_vars_distribution, include=TRUE, fig.cap = "Kernel-density distribution of MSA-level covariates. Population in millions and RPI in thousands of chained 2009 dollars.", fig.width=15, fig.height=8, out.width="6in", fig.pos='H'>>=
sci_10 <- function(x) {
parse(text=gsub("e\\+?","%*% 10^",x))
}
sc_breaks <- function(x) {
message(str_c(x, collapse = ', '))
default = waiver(x)
ifelse(x[[2]] <= 1000, default, default[c(1, 3, 5)])
}
sel_vars <- c('pvi', 'aridity', 'rpi', 'surface.water', 'pop', 'pop.growth') %>%
keep(~.x %in% names(msa_data))
msa_data %>% dplyr::select(one_of(sel_vars)) %>%
mutate(log.pop = log(pop), pop = pop / 1E+6, rpi = rpi / 1000) %>%
gather(key = covariate, value = value) %>%
mutate(covariate =
str_replace_all(covariate,
fixed(c('pvi' = 'PVI', 'rpi' = 'RPI', 'rpp' = 'RPP',
'pop.growth' = 'pop growth',
'surface.water' = 'surface water',
'log.pop' = 'log pop',
'log.RPI' = 'log RPI'))) %>%
ordered(levels = c('pop', 'log pop', 'pop growth',
'aridity', 'surface water',
'PVI', 'RPI'
))) %>%
ggplot(aes(x = value)) + geom_density() +
facet_wrap(~covariate, scales = 'free', ncol = 3) +
scale_x_continuous(label=sci_10) +
scale_y_continuous(label=sci_10) +
labs( x = "Value", y = "Density") -> p2
ggsave(file.path(si_output_dir, 'figures', 'fig_S1.pdf'), p2, 'pdf', height=8, width = 15)
print(p2)
@
<<vwci_pairs_plot_function, cache=do_cache, include = FALSE>>=
plot_vwci_pairs <- function(g) {
g <- g %>% mutate(ParameterOriginal = str_replace_all(ParameterOriginal, '[\\[\\]_]+', '.'))
pl_1 <- g %>% dplyr::select(Parameter, ParameterOriginal) %>% distinct() %>%
mutate(Parameter = str_replace_all(Parameter, 'alpha[._]0', 'alpha[0]')) %>%
spread(key = ParameterOriginal, value = Parameter) %>% simplify()
gs <- g %>%
dplyr::select(Chain, Iteration, ParameterOriginal, value) %>%
spread(key = ParameterOriginal, value = value) %>%
dplyr::select(-Chain, -Iteration) # %>% dplyr::sample_n(200)
pl_2 <- pl_1 %>% str_replace_all(fixed(c('surface water' = 'SW', 'pop growth' = 'growth',
'pvi' = 'PVI', 'rpi' = 'RPI', 'rpp' = 'RPP',
'affordability' = 'afford.')))
plt <- ggpairs(gs, upper=list(continuous = wrap("points", alpha = 0.01, size=0.1), discrete = "blank", na = "blank"),
diag=list(continuous = "densityDiag", discrete = "blankDiag", na = "blankDiag"),
lower=list(continuous = wrap("density", size=0.2), discrete="blank", na="blank"),
columnLabels = pl_2, axisLabels = "show",
labeller = "label_parsed"
) +
theme(axis.text.x = element_text(size = 5, family = figure_font, angle = 45, hjust = 1, vjust = 1),
axis.text.y = element_text(size = 5, family = figure_font, angle = 0),
strip.text.x = element_text(size = 7.5, family = figure_font, angle = 45, hjust = 0.5, vjust = 0),
strip.text.y = element_text(size = 9, family = figure_font, angle = 0, hjust = 0, vjust = 0.5),
strip.background = element_blank())
plt
}
@
<<vwci_pairs_plot, cache=do_cache, include = TRUE, fig.cap="Correlation plot of posterior probability distribution of regression coefficients $\\alpha$, $\\beta$, and $\\gamma$ for VWCI. The diagonal panels show the probability density for each coefficient, panels in the upper triangle show scatterplots of 4000 HMC samples, and panels in the lower triangle show joint probability density contours corresponding to the scatterplot in the upper triangle. Slight correlations are apparent, as between $\\gamma_{\\text{aridity}}$ and $\\gamma_{\\text{SW}}$, $\\gamma_{\\text{PVI}}$ and $\\gamma_{\\text{RPI}}$, and $\\beta_{\\text{SW}}$ and $\\alpha_0$, but these are small enough not to pose problems apart from slightly increasing the uncertainty in the parameter estimates.", fig.height=6, fig.width=6, out.width="6.25in", dev = "png", cache.extra=c(model_fits)>>=
plot_vwci_pairs(model_fits$ggs$ggs_1_ml_beta_alpha) -> p3
ggsave(file.path(si_output_dir, 'figures', 'fig_S2.png'), p3, 'png', height=6, width = 6, dpi = 600)
print(p3)
@
<<req_pairs_plot, cache=do_cache, include = TRUE, fig.cap="Correlation plot of posterior probability distribution of regression coefficients $\\alpha$, $\\beta$, and $\\gamma$ for requirements.", fig.height=6, fig.width=6, out.width="6.25in", dev = "png", cache.extra=c(model_fits, plot_vwci_pairs)>>=
plot_vwci_pairs(model_fits$ggs$ggs_1_ml_beta_alpha_req) -> p4
ggsave(file.path(si_output_dir, 'figures', 'fig_S3.png'), p4, 'png', height=6, width = 6, dpi = 600)
print(p4)
@
<<reb_pairs_plot, cache=do_cache, include = TRUE, fig.cap="Correlation plot of posterior probability distribution of regression coefficients $\\alpha$, $\\beta$, and $\\gamma$ for rebates.", fig.height=6, fig.width=6, out.width="6.25in", dev = "png", cache.extra=c(model_fits, plot_vwci_pairs)>>=
plot_vwci_pairs(model_fits$ggs$ggs_1_ml_beta_alpha_reb) -> p5
ggsave(file.path(si_output_dir, 'figures', 'fig_S4.png'), p5, 'png', height=6, width = 6, dpi = 600)
print(p5)
@
<<setup_cat_plots, cache=do_cache, include=FALSE>>=
source('scripts/gen_cat_plot.R')
model_indices <- c(1:11)
make_indexed_cat_plot <- function(ggs_list, model_vars, limits, param_levels,
index, subplot_label = "", label_suffix = "",
use_legend = TRUE,
fig_type = c("years", "vars"),
dep_var = c("vwci", "req", "reb")) {
fig_type = match.arg(fig_type)
dep_var = match.arg(dep_var)
ggs <- ggs_list[[str_c("ggs_", index, "_ml_beta_alpha")]]
v <- model_vars[[str_c("vars_", index)]]
fig_index = str_c(fig_type, "_fig")
title <- str_trim(subplot_label)
if (str_length(title) > 0) {
title <- str_c(" ", title, " ")
}
suffix <- str_trim(label_suffix)
if (str_length(suffix) > 0) {
suffix <- str_c(" ", suffix)
}
title <- str_c(title, v$captions[[fig_index]], suffix)
p <- generate_cat_plot(ggs, dep_var, title,
limits = limits, levels = param_levels,
legend = use_legend) +
theme(axis.title.x = element_text(size = rel(0.8)),
plot.title = element_text(size = rel(0.8)),
axis.text.x = element_text(size = rel(0.7)),
axis.text.y = element_text(size = rel(0.6))
)
p
}
@
<<load_ggs, cache=do_cache, include=FALSE>>=
model_ggs <- loo_model_fits$ggs %>% discard(str_detect(names(.), '_re[qb]$')) %>%
keep(str_detect(names(.), '_ml_beta_alpha$'))
for (i in seq_along(model_ggs)) {
old_levels <- levels(model_ggs[[i]]$Parameter)
new_levels <- old_levels %>% set_names(.) %>% str_replace_all(c("aridity_[0-9]+" = "aridity", "gini" = "Gini"))
model_ggs[[i]]$Parameter <- ordered(model_ggs[[i]]$Parameter, levels = old_levels, labels = new_levels)
}
param_levels <- levels(model_ggs$ggs_1_ml_beta_alpha$Parameter)
dens_levels <- levels(model_ggs$ggs_2_ml_beta_alpha$Parameter) %>% setdiff(param_levels) %>%
set_names(str_replace(., " *dens", ""))
extra_levels <- model_ggs %>% keep(str_detect(names(.), '^ggs_[0-9]+')) %>%
map(~levels(.x$Parameter)) %>% simplify() %>% unname() %>% unique() %>%
setdiff(param_levels) %>% setdiff(dens_levels)
for (i in seq_along(dens_levels)) {
index <- which(param_levels == names(dens_levels)[i])
param_levels <- c(head(param_levels, index), dens_levels[i], tail(param_levels, -index))
}
gamma_levels <- param_levels %>% keep(str_detect(., "^gamma"))
beta_levels <- param_levels %>% keep(str_detect(., "^beta"))
other_levels <- param_levels %>% discard(str_detect(., "^beta|gamma"))
gamma_levels <- c(extra_levels %>% keep(str_detect(., "^gamma")), gamma_levels)
beta_levels <- c(extra_levels %>% keep(str_detect(., "^beta")), beta_levels)
param_levels <- c(other_levels, beta_levels, gamma_levels)
for (i in seq_along(model_ggs)) {
model_ggs[[i]]$Parameter <- ordered(model_ggs[[i]]$Parameter, levels = param_levels)
}
limits <- tibble(lower = 0.0, upper = 0.0)
for (model_num in c(model_indices)) {
limits <- model_ggs[[str_c('ggs', model_num, 'ml_beta_alpha', sep = "_")]] %>%
dplyr::filter(str_detect(Parameter, '^(beta|gamma)')) %>%
group_by(Parameter) %>% summarize(lower = quantile(value, 0.025), upper = quantile(value,0.975)) %>%
ungroup() %>% bind_rows(limits)
}
limits <- limits %>%
summarize(lower = min(lower), upper = max(upper)) %>%
mutate_all(funs(./4)) %>%
unlist()
@
<<vwci_years_cat_plots, cache=do_cache, include=TRUE, fig.cap="Regression coefficients for VWCI, averaging state and MSA aridity over different intervals.", fig.height=8, fig.width=6, out.width="6.25in", cache.extra=c(loo_model_fits)>>=
p1 <- make_indexed_cat_plot(model_ggs, model_vars, limits, param_levels,
index = 1, subplot_label = "(a)", label_suffix = "",
use_legend = TRUE,
fig_type = "years", dep_var = "vwci") +
theme(legend.position = c(0.99,0.01),
axis.title.x = element_blank()
)
p2 <- make_indexed_cat_plot(model_ggs, model_vars, limits, param_levels,
2, "(b)", "", FALSE,
"years", "vwci") +
theme(axis.title.x = element_blank()
)
p3 <- make_indexed_cat_plot(model_ggs, model_vars, limits, param_levels,
3, "(c)", "", FALSE,
"years", "vwci") +
theme(axis.title.x = element_blank()
)
p4 <- make_indexed_cat_plot(model_ggs, model_vars, limits, param_levels,
4, "(d)", "", FALSE,
"years", "vwci")
p_cat_years <- egg::ggarrange(p1, p2, p3, p4, ncol = 1, draw = FALSE)
ggsave(file.path(si_output_dir, 'figures', 'fig_S5.pdf'), p_cat_years, 'pdf', height=9, width = 6)
print(p_cat_years)
@
<<vwci_vars_cat_plots, cache=do_cache, include=TRUE, dependson="vwci_years_cat_plots", fig.cap="Regression coefficients for VWCI, with different covariates.", fig.height=8, fig.width=6, out.width="6.25in">>=
p1 <- make_indexed_cat_plot(model_ggs, model_vars, limits, param_levels,
1, "(a)", "", TRUE,
"vars", "vwci") +
theme(legend.position = c(0.99,0.01),
axis.title.x = element_blank()
)
p5 <- make_indexed_cat_plot(model_ggs, model_vars, limits, param_levels,
5, "(b)", "", FALSE,
"vars", "vwci")
p6 <- make_indexed_cat_plot(model_ggs, model_vars, limits, param_levels,
6, "(c)", "", FALSE,
"vars", "vwci") +
theme(axis.title.x = element_blank()
)
p7 <- make_indexed_cat_plot(model_ggs, model_vars, limits, param_levels,
7, "(d)", "", FALSE,
"vars", "vwci")
p_cat_vars <- egg::ggarrange(p1, p5, p6, p7, ncol = 1, draw = FALSE)
ggsave(file.path(si_output_dir, 'figures', 'fig_S6.pdf'), p_cat_vars, 'pdf', height=9, width = 6)
print(p_cat_vars)
@
<<vwci_pvi_cat_plots, cache=do_cache, include=TRUE, dependson="vwci_years_cat_plots", fig.cap="Regression coefficients for VWCI, with different covariates.", fig.height=8, fig.width=5, out.width="6.25in">>=
p1 <- make_indexed_cat_plot(model_ggs, model_vars, limits, param_levels,
1, "(a)", "", TRUE,
"vars", "vwci") +
theme(legend.position = c(0.01, 0.01),
legend.justification = c(0,0),
axis.title.x = element_blank()
)
p8 <- make_indexed_cat_plot(model_ggs, model_vars, limits, param_levels,
8, "(b)", "", FALSE,
"vars", "vwci") +
theme(axis.title.x = element_blank()
)
p9 <- make_indexed_cat_plot(model_ggs, model_vars, limits, param_levels,
9, "(d)", "", FALSE,
"vars", "vwci") +
theme(axis.title.x = element_blank()
)
p10 <- make_indexed_cat_plot(model_ggs, model_vars, limits, param_levels,
10, "(e)", "", FALSE,
"vars", "vwci")
p11 <- make_indexed_cat_plot(model_ggs, model_vars, limits, param_levels,
11, "(c)", "", FALSE,
"vars", "vwci") +
theme(axis.title.x = element_blank()
)
p_cat_pvi <- egg::ggarrange(p1, p8, p11, p9, p10, ncol = 1, draw = FALSE)
ggsave(file.path(si_output_dir, 'figures', 'fig_S7.pdf'), p_cat_pvi, 'pdf', height=9, width = 5)
print(p_cat_pvi)
@
<<load_pooled_vwci, cache=do_cache, include=FALSE>>=
pooled_model_ggs <- pooled_loo_model_fits$ggs %>% discard(str_detect(names(.), '_re[qb]$')) %>%
keep(str_detect(names(.), '_ml_beta_alpha$'))
for (i in seq_along(pooled_model_ggs)) {
old_levels <- levels(pooled_model_ggs[[i]]$Parameter)
new_levels <- old_levels %>% set_names(.) %>% str_replace_all(c("aridity_[0-9]+" = "aridity", "gini" = "Gini"))
pooled_model_ggs[[i]]$Parameter <- ordered(pooled_model_ggs[[i]]$Parameter, levels = old_levels, labels = new_levels)
}
pooled_param_levels <- levels(pooled_model_ggs$ggs_1_ml_beta_alpha$Parameter)
pooled_dens_levels <- levels(pooled_model_ggs$ggs_2_ml_beta_alpha$Parameter) %>% setdiff(pooled_param_levels) %>%
set_names(str_replace(., " *dens", ""))
pooled_extra_levels <- pooled_model_ggs %>% keep(str_detect(names(.), '^ggs_[0-9]+')) %>%
map(~levels(.x$Parameter)) %>% simplify() %>% unname() %>% unique() %>%
setdiff(pooled_param_levels) %>% setdiff(pooled_dens_levels)
for (i in seq_along(pooled_dens_levels)) {
index <- which(param_levels == names(pooled_dens_levels)[i])
pooled_param_levels <- c(head(pooled_param_levels, index), pooled_dens_levels[i], tail(pooled_param_levels, -index))
}
pooled_gamma_levels <- pooled_param_levels %>% keep(str_detect(., "^gamma"))
pooled_beta_levels <- pooled_param_levels %>% keep(str_detect(., "^beta"))
pooled_other_levels <- pooled_param_levels %>% discard(str_detect(., "^beta|gamma"))
pooled_gamma_levels <- c(pooled_extra_levels %>% keep(str_detect(., "^gamma")), pooled_gamma_levels)
pooled_beta_levels <- c(pooled_extra_levels %>% keep(str_detect(., "^beta")), pooled_beta_levels)
pooled_param_levels <- c(pooled_other_levels, pooled_beta_levels, pooled_gamma_levels)
for (i in seq_along(pooled_model_ggs)) {
# To keep things consistent with the previous figures, I am not changing the levels between
# so I use param_levels, not pooled_param_levels
pooled_model_ggs[[i]]$Parameter <- ordered(pooled_model_ggs[[i]]$Parameter, levels = param_levels)
}
pooled_limits <- tibble(lower = 0.0, upper = 0.0)
for (model_num in c(model_indices)) {
pooled_limits <- pooled_model_ggs[[str_c('ggs', model_num, 'ml_beta_alpha', sep = "_")]] %>%
dplyr::filter(str_detect(Parameter, '^(beta|gamma)')) %>%
group_by(Parameter) %>% summarize(lower = quantile(value, 0.025), upper = quantile(value,0.975)) %>%
ungroup() %>% bind_rows(pooled_limits)
}
pooled_limits <- pooled_limits %>%
summarize(lower = min(lower), upper = max(upper)) %>%
mutate_all(funs(./4)) %>%
unlist()
@
<<pooled_vwci_years_cat_plots, cache=do_cache, include=TRUE, fig.cap="Regression coefficients for VWCI, averaging state and MSA aridity over different intervals, using alternative scaling of MSA-level variables.", fig.height=8, fig.width=6, out.width="6.25in", cache.extra=c(loo_model_fits)>>=
pp1 <- make_indexed_cat_plot(pooled_model_ggs, model_vars, pooled_limits, param_levels,
1, "(a)", "(alt. scaling)", TRUE,
"years", "vwci") +
theme(legend.position = c(0.01,0.01), legend.justification = c(0,0),
axis.title.x = element_blank()
)
pp2 <- make_indexed_cat_plot(pooled_model_ggs, model_vars, pooled_limits, param_levels,
2, "(b)", "(alt. scaling)", FALSE,
"years", "vwci") +
theme(axis.title.x = element_blank()
)
pp3 <- make_indexed_cat_plot(pooled_model_ggs, model_vars, pooled_limits, param_levels,
3, "(c)", "(alt. scaling)", FALSE,
"years", "vwci") +
theme(axis.title.x = element_blank()
)
pp4 <- make_indexed_cat_plot(pooled_model_ggs, model_vars, pooled_limits, param_levels,
4, "(d)", "(alt. scaling)", FALSE,
"years", "vwci")
pp_cat_years <- egg::ggarrange(pp1, pp2, pp3, pp4, ncol = 1, draw = FALSE)
ggsave(file.path(si_output_dir, 'figures', 'fig_S9.pdf'), pp_cat_years, 'pdf', height=9, width = 6)
print(pp_cat_years)
@
<<pooled_vwci_vars_cat_plots, cache=do_cache, include=TRUE, dependson="pooled_vwci_years_cat_plots", fig.cap="Regression coefficients for VWCI, with different covariates, using alternative scaling of MSA-level variables.", fig.height=8, fig.width=6, out.width="6.25in">>=
pp1 <- make_indexed_cat_plot(pooled_model_ggs, model_vars, pooled_limits, param_levels,
1, "(a)", "(alt. scaling)", TRUE,
"vars", "vwci") +
theme(legend.position = c(0.01,0.01), legend.justification = c(0,0),
axis.title.x = element_blank()
)
pp5 <- make_indexed_cat_plot(pooled_model_ggs, model_vars, pooled_limits, param_levels,
5, "(b)", "(alt. scaling)", FALSE,
"vars", "vwci") +
theme(axis.title.x = element_blank()
)
pp6 <- make_indexed_cat_plot(pooled_model_ggs, model_vars, pooled_limits, param_levels,
6, "(c)", "(alt. scaling)", FALSE,
"vars", "vwci") +
theme(axis.title.x = element_blank()
)
pp7 <- make_indexed_cat_plot(pooled_model_ggs, model_vars, pooled_limits, param_levels,
7, "(d)", "(alt. scaling)", FALSE,
"vars", "vwci")
pp_cat_vars <- egg::ggarrange(pp1, pp5, pp7, pp7, ncol = 1, draw = FALSE)
ggsave(file.path(si_output_dir, 'figures', 'fig_S10.pdf'), pp_cat_vars, 'pdf', height=9, width = 6)
print(pp_cat_vars)
@
<<pooled_vwci_pvi_cat_plots, cache=do_cache, include=TRUE, dependson="pooled_vwci_years_cat_plots", fig.cap="Regression coefficients for VWCI, with different covariates, using alternative scaling of MSA-level variables.", fig.height=8, fig.width=5, out.width="6.25in">>=
pp1 <- make_indexed_cat_plot(pooled_model_ggs, model_vars, pooled_limits, param_levels,
1, "(a)", "(alt. scaling)", TRUE,
"vars", "vwci") +
theme(legend.position = c(0.01,0.01), legend.justification = c(0,0),
axis.title.x = element_blank()
)
pp8 <- make_indexed_cat_plot(pooled_model_ggs, model_vars, pooled_limits, param_levels,
8, "(b)", "(alt. scaling)", FALSE,
"vars", "vwci") +
theme(axis.title.x = element_blank()
)
pp9 <- make_indexed_cat_plot(pooled_model_ggs, model_vars, pooled_limits, param_levels,
9, "(d)", "(alt. scaling)", FALSE,
"vars", "vwci") +
theme(axis.title.x = element_blank()
)
pp10 <- make_indexed_cat_plot(pooled_model_ggs, model_vars, pooled_limits, param_levels,
10, "(e)", "(alt. scaling)", FALSE,
"vars", "vwci")
pp11 <- make_indexed_cat_plot(pooled_model_ggs, model_vars, pooled_limits, param_levels,
11, "(c)", "(alt. scaling)", FALSE,
"vars", "vwci") +
theme(axis.title.x = element_blank()
)
pp_cat_pvi <- egg::ggarrange(pp1, pp8, pp11, pp9, pp10, ncol = 1, draw = FALSE)
ggsave(file.path(si_output_dir, 'figures', 'fig_S11.pdf'), pp_cat_pvi, 'pdf', height=9, width = 5)
print(pp_cat_pvi)
@
\clearpage
\section{Tables S1--S17}
<<vwci_table, cache=do_cache, include = TRUE, results = "asis">>=
table_counter <- 1
vwci_table_num <- table_counter
cat(str_c("\\subsection*{Table S", vwci_table_num, " Caption}", "\n"))
vwci_tbl <- msa_data %>% arrange(city, state.abb) %>%
mutate(pvi = round(pvi, 2), aridity = round(aridity, 1),
pop.growth = round(100 * pop.growth, 3),
# surface.water = round(100 * surface.water, 1),
pop = round(pop / 1000., 0), rpi = round(rpi / 1000., 1)) %>%
dplyr::select(City = city, State = state.abb, Rank = vwci.rank, VWCI = vwci, Req. = requirements, Reb. = rebates,
PVI = pvi, Aridity = aridity,
RPI = rpi, # RPI = rpi, # Afford. = affordability,
Pop. = pop, `Growth (%)` = pop.growth # , `Surf. W. (%)` = surface.water
)
vwci_caption <- list(str_c("Conservation scores and covariates for ", nrow(tbl), " cities: VWCI = Vanderbilt Water Conservation Index (total \\# of conservation measures), Req.\\ = \\# requirements, Reb.\\ = \\# rebates, PVI = Cook Partisan Voting Index, Aridity = K\\\"oppen aridity index, RPI\\ = per-capita real personal income (thousands of regionally adjusted chained 2009 dollars), Pop.\\ = population (thousands), Growth = population growth rate (2010--", pop_target_year, "), Surf.\\ W.\\ = surface-water fraction."),
str_c("Conservation scores for ", nrow(tbl), " cities.")
)
write_csv(vwci_tbl, path = file.path(si_output_dir, 'tables', str_c("Table_S", table_counter, ".csv")))
cat(str_c("\\begin{table}[H]\n\\centering\nLarge Tables are available from \\citet{gilligan:vwci.data:2018} at \\url{https://doi.org/10.6084/m9.figshare.5714944}.\n\\caption{", vwci_caption[[1]], "}\n\\label{tab:vwci}\n\\end{table}\n"))
table_counter <- table_counter + 1
@
<<state_covar_table, include = TRUE, results="asis">>=
state_table_num <- table_counter
cat(str_c("\\subsection*{Table S", state_table_num, " Caption}", "\n"))
state_tbl <- state_data %>% arrange(state.name) %>%
mutate(
# surface.water = round(100 * surface.water, 3),
pvi = round(pvi, 2), aridity = round(aridity, 1),