From e84bdee1cd92926c5d255329ec9220add962b3ef Mon Sep 17 00:00:00 2001 From: Yunqing Ke Date: Wed, 20 Sep 2023 18:39:23 -0700 Subject: [PATCH] Lab 4 --- submit.qmd | 165 ++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 151 insertions(+), 14 deletions(-) diff --git a/submit.qmd b/submit.qmd index d4b907f..da558b6 100644 --- a/submit.qmd +++ b/submit.qmd @@ -1,28 +1,165 @@ --- -title: "" -author: "" -format: - html: - embed-resources: true +title: "Lab 4_v2" +author: "Karisa Ke" +format: html +editor: visual +embed-resources: true --- -## Quarto +## 1. Read in the data -Quarto enables you to weave together content and executable code into a finished document. To learn more about Quarto see . +```{r} + + +if (!file.exists("met_all.gz")) + download.file( + url = "https://raw.githubusercontent.com/USCbiostats/data-science-data/master/02_met/met_all.gz", + destfile = "met_all.gz", + method = "libcurl", + timeout = 60 + ) +met <- data.table::fread("met_all.gz") +``` + +## 2. Prepare the data + +- Remove temperatures less than -17C + +```{r} +met <- met[temp>-17] +``` + +- Make sure there are no missing data in the key variables coded as 9999, 999, etc + +```{r} +met[met$elev==9999.0] <- NA +``` + +```{r} +summary(met$dew.point) +``` + +- Generate a date variable using the functions `as.Date()` + +```{r} +met$date <- as.Date(paste(met$year, met$month, met$day, sep = "-")) +``` + +```{r} +met$week = week(met$date) +``` + +```{r} +met =filter(met, week < 33 & day < 8) +``` + +```{r} +met +``` + +```{r} +met_avg <- met[,.(temp=mean(temp,na.rm=TRUE), rh=mean(rh,na.rm=TRUE), wind.sp=mean(wind.sp,na.rm=TRUE), vis.dist=mean(vis.dist,na.rm=TRUE), dew.point = mean(dew.point, na.rm=TRUE), lat=mean(lat), lon=mean(lon), elev=mean(elev,na.rm=TRUE)), by="USAFID"] +``` + +```{r} +met_avg$region <- ifelse(met_avg$lon > -98 & met_avg$lat >39.71, "NE", ifelse(met_avg$lon > -98 & met_avg$lat < 39.71, "SE", ifelse(met_avg$lon < -98 & met_avg$lat >39.71, "NW", "SW"))) +``` -## Running Code +```{r} +met_avg$elev_cat <- ifelse(met_avg$elev> 252, "high", "low") +``` + +```{r} +table(met_avg$region) +``` + +## 3. **Use `geom_violin` to examine the wind speed and dew point by region** + +```{r} +met_avg %>% + filter(!(region %in% NA)) %>% +ggplot()+ + geom_violin(mapping = aes(y=wind.sp, x=1)) + + facet_wrap(~region, nrow=2) +``` -When you click the **Render** button a document will be generated that includes both content and the output of embedded code. You can embed code like this: +```{r} +met_avg %>% + filter(!(region %in% NA)) %>% +ggplot()+ + geom_violin(mapping = aes(y=dew.point, x=1)) + + facet_wrap(~region, nrow=2) +``` + +## 4. **Use `geom_jitter` with `stat_smooth` to examine the association between dew point and wind speed by region** ```{r} -1 + 1 +met_avg %>% +filter(!(region %in% NA)) %>% + ggplot(mapping = aes(x=dew.point, y=wind.sp, color=region))+ + geom_jitter() + + stat_smooth(method=lm) ``` -You can add options to executable code like this +## 5. **Use `geom_bar` to create barplots of the weather stations by elevation category colored by region** ```{r} -#| echo: false -2 * 2 +met_avg %>% +filter(!(region %in% NA)) %>% + ggplot()+ + geom_bar(mapping=aes(x=elev_cat,fill=region), position = "dodge")+ + scale_fill_brewer(palette = "PuOr")+ + labs(title="Number of weather stations by elevation category and region", x="Elevation Category", y= "Count")+ + theme_bw() ``` -The `echo: false` option disables the printing of code (only output is displayed). +## 6. **Use `stat_summary` to examine mean dew point and wind speed by region with standard deviation error bars** + +```{r} +met_avg %>% +filter(!(region %in% NA)) %>% + ggplot(mapping=aes(x=region, y=dew.point)) + + stat_summary(fun.data="mean_sdl", geom="errorbar") + + stat_summary(fun.data="mean_sdl")Ye1 +``` + +```{r} +met_avg %>% +filter(!(region %in% NA)) %>% + ggplot(mapping=aes(x=region, y=wind.sp)) + + stat_summary(fun.data="mean_sdl", geom="errorbar") + + stat_summary(fun.data="mean_sdl") +``` + +```{r} + +``` + +## 7. **Make a map showing the spatial trend in relative humidity in the US** + +```{r} +library(leaflet) +met_avg2<-met_avg[!is.na(rh)] + +tops <- met_avg2[rank(-rh) <= 10] + +rh_pal = colorNumeric(c('red','yellow','blue'), domain=met_avg2$rh) +leaflet(met_avg2) %>% + addProviderTiles('OpenStreetMap') %>% + addCircles(lat=~lat, lng=~lon, color=~rh_pal(rh), label=~paste0(round(rh,2), ' rh'), opacity=1,fillOpacity=1, radius=500) %>% + addMarkers(lat=~lat, lng=~lon, label=~paste0(round(rh,2), ' rh'), data = tops) %>% + addLegend('bottomleft',pal=rh_pal, values=met_avg2$rh, title="Relative Humidity", opacity=1) +``` + +## 8. Use a ggplot extension + +```{r} +install.packages("ggforce") +``` + +```{r} +library(ggforce) +ggplot(met_avg, aes(wind.sp, dew.point, colour = region)) + + geom_point() + + facet_zoom(x = Species == "versicolor") +```