generated from kstreet13/assignment
-
Notifications
You must be signed in to change notification settings - Fork 0
/
submit.qmd
176 lines (138 loc) · 3.94 KB
/
submit.qmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
---
title: "Lab 4"
author: "Karisa Ke"
format: html
editor: visual
embed-resources: true
---
## 1. Read in the data
```{r}
install.packages('R.utils')
library(tidyverse)
library(data.table)
library(ggplot2)
library(R.utils)
```
```{r}
if (!file.exists("met_all.gz"))
download.file(
url = "https://raw.githubusercontent.com/USCbiostats/data-science-data/master/02_met/met_all.gz",
destfile = "met_all.gz",
method = "libcurl",
timeout = 60
)
met <- data.table::fread("met_all.gz")
```
## 2. Prepare the data
- Remove temperatures less than -17C
```{r}
met <- met[temp>-17]
```
- Make sure there are no missing data in the key variables coded as 9999, 999, etc
```{r}
met[met$elev==9999.0] <- NA
```
```{r}
summary(met$dew.point)
```
- Generate a date variable using the functions `as.Date()`
```{r}
met$date <- as.Date(paste(met$year, met$month, met$day, sep = "-"))
```
```{r}
met$week = week(met$date)
```
```{r}
met =filter(met, week < 33 & day < 8)
```
```{r}
met
```
```{r}
met_avg <- met[,.(temp=mean(temp,na.rm=TRUE), rh=mean(rh,na.rm=TRUE), wind.sp=mean(wind.sp,na.rm=TRUE), vis.dist=mean(vis.dist,na.rm=TRUE), dew.point = mean(dew.point, na.rm=TRUE), lat=mean(lat), lon=mean(lon), elev=mean(elev,na.rm=TRUE)), by="USAFID"]
```
```{r}
met_avg$region <- ifelse(met_avg$lon > -98 & met_avg$lat >39.71, "NE", ifelse(met_avg$lon > -98 & met_avg$lat < 39.71, "SE", ifelse(met_avg$lon < -98 & met_avg$lat >39.71, "NW", "SW")))
```
```{r}
met_avg$elev_cat <- ifelse(met_avg$elev> 252, "high", "low")
```
```{r}
table(met_avg$region)
```
## 3. **Use `geom_violin` to examine the wind speed and dew point by region**
```{r}
met_avg %>%
filter(!(region %in% NA)) %>%
ggplot()+
geom_violin(mapping = aes(y=wind.sp, x=1)) +
facet_wrap(~region, nrow=2)
```
```{r}
met_avg %>%
filter(!(region %in% NA)) %>%
ggplot()+
geom_violin(mapping = aes(y=dew.point, x=1)) +
facet_wrap(~region, nrow=2)
```
## 4. **Use `geom_jitter` with `stat_smooth` to examine the association between dew point and wind speed by region**
```{r}
met_avg %>%
filter(!(region %in% NA)) %>%
ggplot(mapping = aes(x=dew.point, y=wind.sp, color=region))+
geom_jitter() +
stat_smooth(method=lm)
```
## 5. **Use `geom_bar` to create barplots of the weather stations by elevation category colored by region**
```{r}
met_avg %>%
filter(!(region %in% NA)) %>%
ggplot()+
geom_bar(mapping=aes(x=elev_cat,fill=region), position = "dodge")+
scale_fill_brewer(palette = "PuOr")+
labs(title="Number of weather stations by elevation category and region", x="Elevation Category", y= "Count")+
theme_bw()
```
## 6. **Use `stat_summary` to examine mean dew point and wind speed by region with standard deviation error bars**
```{r}
install.packages('Hmisc')
library(Hmisc)
```
```{r}
met_avg %>%
filter(!(region %in% NA)) %>%
ggplot(mapping=aes(x=region, y=dew.point)) +
stat_summary(fun.data="mean_sdl", geom="errorbar") +
stat_summary(fun.data="mean_sdl")
```
```{r}
met_avg %>%
filter(!(region %in% NA)) %>%
ggplot(mapping=aes(x=region, y=wind.sp)) +
stat_summary(fun.data="mean_sdl", geom="errorbar") +
stat_summary(fun.data="mean_sdl")
```
```{r}
```
## 7. **Make a map showing the spatial trend in relative humidity in the US**
```{r}
library(leaflet)
met_avg2<-met_avg[!is.na(rh)]
tops <- met_avg2[rank(-rh) <= 10]
rh_pal = colorNumeric(c('red','yellow','blue'), domain=met_avg2$rh)
leaflet(met_avg2) %>%
addProviderTiles('OpenStreetMap') %>%
addCircles(lat=~lat, lng=~lon, color=~rh_pal(rh), label=~paste0(round(rh,2), ' rh'), opacity=1,fillOpacity=1, radius=500) %>%
addMarkers(lat=~lat, lng=~lon, label=~paste0(round(rh,2), ' rh'), data = tops) %>%
addLegend('bottomleft',pal=rh_pal, values=met_avg2$rh, title="Relative Humidity", opacity=1)
```
## 8. Use a ggplot extension
```{r}
install.packages("ggforce")
```
```{r}
library(ggforce)
ggplot(met_avg, aes(wind.sp, dew.point, colour = region)) +
geom_point() +
facet_zoom(x = Species == "versicolor")
```