-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBANA7025 Moduel 5 Lab Ang Zhang.Rmd
100 lines (88 loc) · 3.52 KB
/
BANA7025 Moduel 5 Lab Ang Zhang.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
---
title: "BANA7025 Module 5 Lab"
author: "Ang Zhang"
output:
html_document:
df_print: paged
---
```{r message=FALSE, warning=FALSE}
packages <- c("ggplot2", "dplyr", "stringr", "completejourney", "lubridate")
pkg <- lapply(packages, library, character.only = T)
```
```{r message=FALSE, warning=FALSE}
transactions <- get_transactions()
promotions <- get_promotions()
df <- transactions %>%
left_join(products, by = "product_id") %>%
left_join(promotions, by = c("product_id", "store_id")) %>%
mutate(on_display = ifelse(is.na(display_location), F, display_location !=0)) %>%
mutate(on_mailer = ifelse(is.na(mailer_location), F, mailer_location !=0))
# try to investigate the effect of promotions on sales value as well as quantity.
# fetch the data and give each a flag as whether related to in-store display promotion or mailer promotion.
df <- df %>%
mutate(hour = hour(transaction_timestamp))
```
```{r}
df %>%
count(hour) %>%
ggplot(aes(x = hour, y = n, fill = n)) +
geom_bar(stat = "identity") +
labs(title = "Customer Shopping Behavior Investigation",
subtitle = "number of shopping across time of the day\nshoppers peak around 5pm while few were seen before 10am. stores should staff accordingly.",
x = "time (hour) of the day",
y = "total number of transactions observed") + theme_minimal() +
theme(
plot.subtitle = element_text(hjust = 0, size = 10)
)
```
```{r}
plot1 <- df %>%
group_by(brand, department) %>%
summarize(total_sales = sum(sales_value)) %>%
treemap::treemap(
index = c("brand", "department"),
vSize = "total_sales",
title = "Total sales generated by different deparments overlayed by brand type"
)
```
```{r}
plot1_data <- df %>%
group_by(on_display) %>%
summarize(product_count = n_distinct(product_id)) %>%
mutate(ypos = product_count - 0.2*product_count)
plot1 <- ggplot(plot1_data, aes(x = "", y = product_count, fill = on_display)) +
geom_bar(stat = "identity", width = 1, color = "white") +
coord_polar("y", start = 0) +
theme_void() +
theme(legend.position = "none") +
geom_text(aes(y = ypos, label = c("on display", "not on display"))) +
ggtitle("Product constitution")
plot2_data <- df %>%
group_by(on_display) %>%
summarize(transaction_count = n()) %>%
mutate(ypos = cumsum(transaction_count) - 0.5*transaction_count)
plot2 <- ggplot(plot2_data, aes(x = "", y = transaction_count, fill = on_display)) +
geom_bar(stat = "identity", width = 1, color = "white") +
coord_polar("y", start = 0) +
theme_void() +
theme(legend.position = "none") +
geom_text(aes(y = ypos, label = c("on display", "not on display"))) +
ggtitle("Transaction contribution")
plot3_data <- df %>%
group_by(on_display) %>%
summarize(total_sales = sum(sales_value)) %>%
mutate(ypos = cumsum(total_sales) - 0.5*total_sales)
plot3 <- ggplot(plot3_data, aes(x = "", y = total_sales, fill = on_display)) +
geom_bar(stat = "identity", width = 1, color = "white") +
coord_polar("y", start = 0) +
theme_void() +
theme(legend.position = "none") +
geom_text(aes(y = ypos, label = c("on display", "not on display"))) +
ggtitle("Sales contribution")
gridExtra::grid.arrange(
grid::textGrob("On display product contribution in sales & transaction", gp = grid::gpar(fontsize = 15, fontface = "bold")),
grid::textGrob("On display products account for less than 1/4 but contributed nearly half of total sales & transaction", gp = grid::gpar(fontsize = 10)),
gridExtra::arrangeGrob(plot1, plot2, plot3, nrow = 1),
heights = c(0.1, 0.05, 0.60)
)
```