-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_exploration_sample.R
34 lines (28 loc) · 1022 Bytes
/
data_exploration_sample.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# loading libraries
library(dplyr)
library(ggplot2)
library(ggpubr)
library(tidyverse)
library(plotly)
library(lubridate)
library(RColorBrewer)
library(scales)
# modifying chart size
options(repr.plot.width=5, repr.plot.height=3)
#upload excel file
flights0 <- read_csv("flight_data/bos_flights_only.csv")
airline <- read_csv("flight_data/airlines.csv")
airports <- read_csv("flight_data/airports.csv")
flights <- flights0 %>%
left_join(airports, by = c("DESTINATION_AIRPORT" = "IATA_CODE")) %>%
left_join(airline, by = c("AIRLINE" = "IATA_CODE"))
# How many airports can you fly to from Boston and what were the most popular airports to fly to?
flights %>%
count(AIRPORT, CITY, sort = TRUE)
# How many flights are out of Boston each month?
flights %>% count(MONTH, sort = TRUE) %>%
arrange(desc(n)) %>%
ggplot() +
geom_col(aes(x = factor(MONTH), y = n),fill = "#E6AB02") +
theme(axis.text.x=element_text(angle=90, hjust=1)) +
labs(title = "Flights By Month", x = "month", y = "count of flights")