-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathWalmart Analysis Query.sql
More file actions
238 lines (185 loc) · 5.91 KB
/
Walmart Analysis Query.sql
File metadata and controls
238 lines (185 loc) · 5.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
-- Create database
CREATE DATABASE walmartSales;
-- Create table
CREATE TABLE sales(
invoice_id VARCHAR(30) NOT NULL PRIMARY KEY,
branch VARCHAR(10) NOT NULL,
city VARCHAR(30) NOT NULL,
customer_type VARCHAR(30) NOT NULL,
gender VARCHAR(30) NOT NULL,
product_line VARCHAR(100) NOT NULL,
unit_price NUMERIC(10,2) NOT NULL,
quantity INT NOT NULL,
tax_pct NUMERIC(6,4) NOT NULL,
total NUMERIC(12, 4) NOT NULL,
date TIMESTAMP NOT NULL,
time TIME NOT NULL,
payment VARCHAR(15) NOT NULL,
cogs NUMERIC(10,2) NOT NULL,
gross_margin_pct NUMERIC(11,9),
gross_income NUMERIC(12, 4),
rating NUMERIC(2, 1)
);
-- Add the time_of_day column
ALTER TABLE sales
ADD COLUMN time_of_day VARCHAR(20);
UPDATE sales
SET time_of_day = (
CASE
WHEN time BETWEEN '00:00:00' AND '12:00:00' THEN 'Morning'
WHEN time BETWEEN '12:01:00' AND '16:00:00' THEN 'Afternoon'
ELSE 'Evening'
END
);
-- Add day_name column
ALTER TABLE sales
ADD COLUMN day_name VARCHAR(10);
UPDATE sales
SET day_name = to_char(date,'Day')
-- Add month_name column
ALTER TABLE sales
ADD COLUMN month_name VARCHAR(10);
UPDATE sales
SET month_name = to_char(date,'Month');
-- --------------------------------------------------------------------
-- ---------------------------- Product -------------------------------
-- --------------------------------------------------------------------
-- How many unique product lines does the data have?
SELECT Count(DISTINCT product_line)
FROM sales;
-- Which is the most common payment method?
SELECT payment_method, count(payment_method) as cnt
FROM sales
GROUP BY payment_method
ORDER BY cnt DESC;
-- Which is the most selling product line?
SELECT SUM(quantity) as qty, product_line
FROM sales
GROUP BY product_line
ORDER BY qty DESC;
-- What is the total revenue by month?
SELECT month_name AS month, SUM(total) AS total_revenue
FROM sales
GROUP BY month_name
ORDER BY total_revenue;
-- Which month had the largest COGS?
SELECT month_name AS month, SUM(cogs) AS cogs
FROM sales
GROUP BY month_name
ORDER BY cogs desc;
-- Which product line had the largest revenue?
SELECT product_line, SUM(total) as total_revenue
FROM sales
GROUP BY product_line
ORDER BY total_revenue DESC;
-- City with the largest revenue?
SELECT city, SUM(total) AS total_revenue
FROM sales
GROUP BY city
ORDER BY total_revenue;
-- Which product line had the largest VAT?
SELECT product_line, AVG(tax_pct) as avg_tax
FROM sales
GROUP BY product_line
ORDER BY avg_tax DESC;
-- Fetch each product line and its average quantity. Add a column to those product
-- line showing "Good", "Bad". Good if its greater than average sales
SELECT product_line, AVG(quantity)
CASE
WHEN AVG(quantity) > (SELECT AVG(quantity) FROM sales) THEN 'Good'
ELSE 'Bad'
END AS remark
FROM sales
GROUP BY product_line;
-- Which branch sold more products than average product sold?
SELECT branch, SUM(quantity)
FROM sales
GROUP BY branch
HAVING SUM(quantity) > (SELECT AVG(quantity) FROM sales);
-- Which is the most common product line by gender?
SELECT product_line, gender, COUNT(gender) AS total_cnt
FROM sales
GROUP BY product_line, gender
ORDER BY total_cnt DESC;
-- What is the average rating of each product line?
SELECT product_line, ROUND(AVG(rating), 2) as avg_rating,
FROM sales
GROUP BY product_line
ORDER BY avg_rating DESC;
-- --------------------------------------------------------------------
-- -------------------------- Customers -------------------------------
-- --------------------------------------------------------------------
-- How many unique customer types does the data have?
SELECT DISTINCT customer_type
FROM sales;
-- How many unique payment methods does the data have?
SELECT DISTINCT payment
FROM sales;
-- Which is the most common customer type?
SELECT customer_type, count(*) as cnt
FROM sales
GROUP BY customer_type
ORDER BY cnt DESC;
-- Which customer type buys the most?
SELECT customer_type, COUNT(*)
FROM sales
GROUP BY customer_type;
-- What is the gender of most of the customers?
SELECT gender, COUNT(*) as gender_cnt
FROM sales
GROUP BY gender
ORDER BY gender_cnt DESC;
-- What is the gender distribution for branch C?
SELECT gender, COUNT(*) as gender_cnt
FROM sales
WHERE branch = "C"
GROUP BY gender
ORDER BY gender_cnt DESC;
-- Which time of the day do customers give most ratings?
SELECT time_of_day, AVG(rating) AS avg_rating
FROM sales
GROUP BY time_of_day
ORDER BY avg_rating DESC;
-- Which time of the day do customers give most ratings for branch A?
SELECT time_of_day, AVG(rating) AS avg_rating
FROM sales
WHERE branch = "A"
GROUP BY time_of_day
ORDER BY avg_rating DESC;
-- Which day of the week has the best avg ratings?
SELECT day_name, AVG(rating) AS avg_rating
FROM sales
GROUP BY day_name
ORDER BY avg_rating DESC;
-- Which day of the week has the best average ratings for branch C?
SELECT day_name, AVG(rating) AS avg_rating
FROM sales
WHERE branch = "C"
GROUP BY day_name
ORDER BY avg_rating DESC;
-- --------------------------------------------------------------------
-- ---------------------------- Sales ---------------------------------
-- --------------------------------------------------------------------
-- Number of sales made in each time of the day on Sunday?
SELECT time_of_day, COUNT(*) AS total_sales
FROM sales
WHERE day_name = "Sunday"
GROUP BY time_of_day
ORDER BY total_sales DESC;
-- Which of the customer types brings the most revenue?
SELECT customer_type, SUM(total) AS total_revenue
FROM sales
GROUP BY customer_type
ORDER BY total_revenue;
-- Which city has the largest tax/VAT percent?
SELECT city, ROUND(AVG(tax_pct),2) AS avg_tax_pct
FROM sales
GROUP BY city
ORDER BY avg_tax_pct DESC;
-- Which customer type pays the most in VAT?
SELECT customer_type, AVG(tax_pct) AS total_tax
FROM sales
GROUP BY customer_type
ORDER BY total_tax;
-- --------------------------------------------------------------------
-- --------------------------------------------------------------------