-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathqueries.sql
More file actions
100 lines (79 loc) · 3.3 KB
/
queries.sql
File metadata and controls
100 lines (79 loc) · 3.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
-- use database
USE sql_project_db;
-- =====================================================================================
-- 1. data cleaning queries
-- =====================================================================================
-- check for nulls
SELECT
COUNT(*) AS total_rows,
SUM(CASE WHEN sale_amount IS NULL THEN 1 ELSE 0 END) AS null_sale_amounts,
SUM(CASE WHEN assessed_value IS NULL THEN 1 ELSE 0 END) AS null_assessed_values,
FROM real_estate_sales;
-- remove any rows with negative/zero sales
DELETE FROM real_estate_sales
WHERE sale_amount <= 0;
DELETE FROM real_estate_sales
WHERE assessed_value <= 0;
-- trim spaces + uppercase town names
UPDATE real_estate_sales
SET town = UPPER(TRIM(town));
-- ======================================================================================
-- ======================================================================================
-- 2. explore the data
-- ======================================================================================
-- preview first 10 rows
SELECT * FROM real_estate_sales
LIMIT 10;
-- count the total sales records
SELECT COUNT(*) AS total_sales
FROM real_estate_sales;
-- average sale amount across all years
SELECT ROUND(AVG(sale_amount), 2) AS avg_sale_amount
FROM real_estate_sales;
-- sales ratio distribution, the average by town
SELECT town, ROUND(AVG(sales_ratio), 4) AS avg_sales_ratio
FROM real_estate_sales
GROUP BY town
ORDER BY avg_sales_ratio DESC
LIMIT 10;
-- count any null values per column
SELECT
SUM(CASE WHEN serial_number IS NULL THEN 1 ELSE 0 END) AS null_serial_number,
SUM(CASE WHEN list_year IS NULL THEN 1 ELSE 0 END) AS null_list_year,
SUM(CASE WHEN date_recorded IS NULL THEN 1 ELSE 0 END) AS null_date_recorded,
SUM(CASE WHEN town IS NULL THEN 1 ELSE 0 END) AS null_town,
SUM(CASE WHEN address IS NULL THEN 1 ELSE 0 END) AS null_address,
SUM(CASE WHEN assessed_value IS NULL THEN 1 ELSE 0 END) AS null_assessed_value,
SUM(CASE WHEN sale_amount IS NULL THEN 1 ELSE 0 END) AS null_sale_amount,
SUM(CASE WHEN sales_ratio IS NULL THEN 1 ELSE 0 END) AS null_sales_ratio,
SUM(CASE WHEN property_type IS NULL THEN 1 ELSE 0 END) AS null_property_type,
SUM(CASE WHEN residential_type IS NULL THEN 1 ELSE 0 END) AS null_residential_type,
FROM real_estate_sales;
-- find the min & max sale amounts
SELECT
MIN(sale_amount) AS min_sale_amount,
MAX(sale_amount) AS max_sale_amount
FROM real_estate_sales;
-- find distinct property types
SELECT DISTINCT property_type
FROM real_estate_sales;
-- ======================================================================================
-- ======================================================================================
-- 3. analyze the data
-- ======================================================================================
-- top 10 towns with the highest number of sales
SELECT town, COUNT(*) AS sales_count
FROM real_estate_sales
GROUP BY town
ORDER BY sales_count DESC
LIMIT 10;
-- average sale price by year
SELECT list_year, ROUND(AVG(sale_price), 2) AS avg_sale_price
FROM real_estate_sales
GROUP BY list_year
ORDER BY list_year;
-- highest valued property sold; based on assessed value
SELECT serial_number, town, address, assessed_value, sale_amount
FROM real_estate_sales
ORDER BY assessed_value DESC
LIMIT 1;