-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathEDA.sql
275 lines (237 loc) · 5.15 KB
/
EDA.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
SELECT *
FROM SalesProject.Sales;
-- How many unique cities does the data have?
SELECT
DISTINCT city
FROM Sales;
-- In which city is each branch?
SELECT
DISTINCT branch,
city
FROM Sales
ORDER BY branch;
-- How many unique product lines does the data have?
SELECT
COUNT(DISTINCT(product_line))
FROM Sales;
-- What is the most common payment method?
SELECT
payment_method,
COUNT(payment_method) AS Numbers
FROM Sales
GROUP BY payment_method
ORDER BY Numbers DESC;
-- What is the most selling product line?
SELECT
product_line,
COUNT(product_line) AS most_sales
FROM Sales
GROUP BY product_line
ORDER BY most_sales DESC;
-- What is the total revenue by month?
SELECT
month,
SUM(unit_price * quantity) AS rev_month
FROM Sales
GROUP BY month;
SELECT
month,
SUM(total) AS total_rev
FROM Sales
GROUP BY month;
-- What month had the largest cost of goods?
SELECT
month,
sum(cost_of_goods) AS CostofGoods
FROM Sales
GROUP BY month;
-- What product line had the largest revenue?
SELECT
product_line,
SUM(total) AS total_rev
FROM Sales
GROUP BY product_line
ORDER BY total_rev DESC;
-- What city had the largest revenue?
SELECT
city,
SUM(total) AS total_rev
FROM Sales
GROUP BY city
ORDER BY total_rev DESC;
-- What product line has the largest VAT?
SELECT
product_line,
SUM(tax) AS VAT
FROM Sales
GROUP BY product_line
ORDER BY VAT DESC;
SELECT
product_line,
AVG(tax) AS VAT
FROM Sales
GROUP BY product_line
ORDER BY VAT DESC;
-- Fetch each product line and add a column to those product line showing "Good", "Bad". Good if its greater than average sales
SELECT
product_line,
ROUND(AVG(quantity), 3) AS avg_qty,
CASE
WHEN AVG(quantity) > 5.5 THEN "good"
ELSE "bad"
END AS comment
FROM sales
GROUP BY product_line;
-- Which branch sold more products than average product sold?
SELECT
branch,
SUM(quantity) AS products_sold
FROM sales
GROUP BY branch
ORDER BY products_sold DESC
SELECT
branch,
SUM(quantity) AS products_sold
FROM sales
GROUP BY branch
HAVING SUM(quantity) > (SELECT AVG(quantity) FROM sales);
-- What is the most common product line by gender?
SELECT
gender,
product_line,
COUNT(gender) AS qty
FROM sales
GROUP BY gender, product_line
ORDER BY qty DESC;
-- What is the average rating of each product line?
SELECT
product_line,
ROUND(AVG(rating), 3) AS avg_rating
FROM sales
GROUP BY product_line
ORDER BY avg_rating DESC;
-- Number of sales made in each time of the day per weekday?
SELECT
weekday,
time_of_day,
COUNT(invoice_id) AS sales_made
FROM sales
GROUP BY weekday, time_of_day
ORDER BY weekday
SELECT
time_of_day,
COUNT(invoice_id) AS sales_made
FROM sales
GROUP BY time_of_day
SELECT
time_of_day,
COUNT(invoice_id) AS sales_made
FROM sales
WHERE weekday = "Monday"
GROUP BY time_of_day
-- Which of the customer types brings the most revenue?
SELECT
customer_type,
ROUND(SUM(total), 3) AS revenue
FROM sales
GROUP BY customer_type
ORDER BY revenue DESC
-- Which city has the largest tax percent/ VAT (Value Added Tax)?
SELECT
city,
ROUND(SUM(tax), 3) vat_percent
FROM sales
GROUP BY city
ORDER BY vat_percent DESC
SELECT
city,
ROUND(AVG(tax), 3) vat_percent
FROM sales
GROUP BY city
ORDER BY vat_percent DESC
-- Which customer type pays the most in VAT?
SELECT
customer_type,
ROUND(SUM(tax), 3) AS vat_payment
FROM sales
GROUP BY customer_type
ORDER By vat_payment DESC
-- How many unique customer types does the data have?
SELECT
customer_type,
COUNT(customer_type) AS customers
FROM sales
GROUP BY customer_type
-- How many unique payment methods does the data have?
SELECT
payment_method,
COUNT(payment_method) AS Total
FROM sales
GROUP BY payment_method
-- What is the most common customer type?
SELECT
customer_type,
COUNT(customer_type) AS total
FROM sales
GROUP BY customer_type
ORDER BY total DESC
-- Which customer type buys the most?
SELECT
customer_type,
SUM(quantity) AS total_purchase
FROM sales
GROUP BY customer_type
-- What is the gender of most of the customers?
SELECT
customer_type,
gender,
COUNT(gender) AS gender_count
FROM sales
GROUP BY customer_type, gender
ORDER BY gender_count DESC
-- What is the gender distribution per branch?
SELECT
branch,
gender,
COUNT(gender) AS gender_count
FROM sales
GROUP BY branch, gender
ORDER BY branch ASC
-- Which time of the day do customers give most ratings?
SELECT
time_of_day,
COUNT(rating) AS ratings
FROM sales
GROUP BY time_of_day
ORDER BY ratings DESC
-- Which time of the day do customers give most ratings per branch?
SELECT
branch,
time_of_day,
COUNT(rating) AS ratings
FROM sales
GROUP BY branch, time_of_day
ORDER BY branch, time_of_day
-- Which day of the week has the best avg ratings?
SELECT
weekday,
AVG(rating) AS avg_rating
FROM sales
GROUP BY weekday
ORDER BY avg_rating DESC
-- Which day of the week has the best average ratings per branch?
SELECT
branch,
weekday,
AVG(rating) AS avg_rating
FROM sales
GROUP BY branch, weekday
ORDER BY avg_rating DESC
SELECT
branch,
weekday,
AVG(rating) AS avg_rating
FROM sales
WHERE branch = "B"
GROUP BY weekday
ORDER BY avg_rating DESC