-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path02_intermediate_sql_queries.sql
More file actions
340 lines (305 loc) · 10.8 KB
/
Copy path02_intermediate_sql_queries.sql
File metadata and controls
340 lines (305 loc) · 10.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
-- ============================================
-- INTERMEDIATE SQL QUERIES REFERENCE
-- ============================================
-- ==========================================
-- 1. ADVANCED JOINS AND COMPLEX QUERIES
-- ==========================================
-- Three-way join with aggregations
SELECT
p.Passenger_fname,
p.Passenger_lname,
t.Train_name,
b.Total_amount,
CASE
WHEN p.Passenger_age <= 12 THEN "Kids"
WHEN p.Passenger_age >= 13 AND p.Passenger_age <= 21 THEN "Teenagers"
WHEN p.Passenger_age >= 22 AND p.Passenger_age <= 64 THEN "Adults"
WHEN p.Passenger_age >= 65 THEN "Elders"
END AS age_category
FROM PASSENGERS p
INNER JOIN TRAINS t ON p.Train_id = t.Train_id
INNER JOIN BOOKINGS b ON p.Passenger_id = b.Passenger_id;
-- Revenue analysis by age group
SELECT
CONCAT(ROUND(SUM(b.Total_amount), 2), ' $') AS generated_amount,
CASE
WHEN p.Passenger_age <= 12 THEN "Kids"
WHEN p.Passenger_age >= 13 AND p.Passenger_age <= 21 THEN "Teenagers"
WHEN p.Passenger_age >= 22 AND p.Passenger_age <= 64 THEN "Adults"
WHEN p.Passenger_age >= 65 THEN "Elders"
END AS age_category,
COUNT(*) AS passenger_count,
AVG(b.Total_amount) AS avg_amount_per_passenger
FROM PASSENGERS p
INNER JOIN TRAINS t ON p.Train_id = t.Train_id
INNER JOIN BOOKINGS b ON p.Passenger_id = b.Passenger_id
GROUP BY age_category
ORDER BY SUM(b.Total_amount) DESC;
-- ==========================================
-- 2. COMPLEX AGGREGATIONS AND ANALYTICS
-- ==========================================
-- Daily revenue analysis with running totals
SELECT
DATE(booking_date) AS booking_date,
ROUND(SUM(total_amount), 2) AS daily_revenue,
COUNT(*) AS bookings_count,
AVG(total_amount) AS avg_booking_amount,
MAX(total_amount) AS max_booking_amount,
MIN(total_amount) AS min_booking_amount
FROM BOOKINGS
GROUP BY DATE(booking_date)
ORDER BY booking_date;
-- Monthly revenue trends
SELECT
YEAR(booking_date) AS booking_year,
MONTH(booking_date) AS booking_month,
MONTHNAME(booking_date) AS month_name,
COUNT(*) AS total_bookings,
SUM(total_amount) AS monthly_revenue,
AVG(total_amount) AS avg_booking_value,
SUM(total_amount) / COUNT(*) AS revenue_per_booking
FROM BOOKINGS
GROUP BY YEAR(booking_date), MONTH(booking_date)
ORDER BY booking_year, booking_month;
-- Train utilization analysis
SELECT
t.Train_name,
t.Train_type,
COUNT(p.Passenger_id) AS passenger_count,
AVG(b.Total_amount) AS avg_revenue_per_passenger,
SUM(b.Total_amount) AS total_revenue,
t.Length_of_travel
FROM TRAINS t
LEFT JOIN PASSENGERS p ON t.Train_id = p.Train_id
LEFT JOIN BOOKINGS b ON p.Passenger_id = b.Passenger_id
GROUP BY t.Train_id, t.Train_name, t.Train_type, t.Length_of_travel
ORDER BY total_revenue DESC;
-- ==========================================
-- 3. ADVANCED SUBQUERIES AND CORRELATED QUERIES
-- ==========================================
-- Find passengers who spent more than average
SELECT
p.Passenger_fname,
p.Passenger_lname,
b.Total_amount,
(SELECT AVG(Total_amount) FROM BOOKINGS) AS system_avg
FROM PASSENGERS p
INNER JOIN BOOKINGS b ON p.Passenger_id = b.Passenger_id
WHERE b.Total_amount > (SELECT AVG(Total_amount) FROM BOOKINGS)
ORDER BY b.Total_amount DESC;
-- Correlated subquery: Find trains with above-average passengers
SELECT
t.Train_name,
t.Train_type,
(SELECT COUNT(*) FROM PASSENGERS p WHERE p.Train_id = t.Train_id) AS passenger_count
FROM TRAINS t
WHERE (SELECT COUNT(*) FROM PASSENGERS p WHERE p.Train_id = t.Train_id) >
(SELECT AVG(passenger_count) FROM
(SELECT COUNT(*) AS passenger_count FROM PASSENGERS GROUP BY Train_id) AS avg_calc);
-- Exists subquery example
SELECT p.Passenger_fname, p.Passenger_lname, p.Passenger_age
FROM PASSENGERS p
WHERE EXISTS (
SELECT 1 FROM BOOKINGS b
WHERE b.Passenger_id = p.Passenger_id
AND b.Total_amount > 500
);
-- ==========================================
-- 4. WINDOW FUNCTIONS AND RANKING
-- ==========================================
-- Ranking bookings by amount
SELECT
b.Booking_no,
p.Passenger_fname,
p.Passenger_lname,
b.Total_amount,
RANK() OVER (ORDER BY b.Total_amount DESC) AS amount_rank,
DENSE_RANK() OVER (ORDER BY b.Total_amount DESC) AS dense_rank,
ROW_NUMBER() OVER (ORDER BY b.Total_amount DESC) AS row_num
FROM BOOKINGS b
INNER JOIN PASSENGERS p ON b.Passenger_id = p.Passenger_id;
-- Running totals and moving averages
SELECT
DATE(booking_date) AS booking_date,
SUM(total_amount) AS daily_revenue,
SUM(SUM(total_amount)) OVER (ORDER BY DATE(booking_date)) AS running_total,
AVG(SUM(total_amount)) OVER (ORDER BY DATE(booking_date) ROWS BETWEEN 6 PRECEDING AND CURRENT ROW) AS moving_avg_7_days
FROM BOOKINGS
GROUP BY DATE(booking_date)
ORDER BY booking_date;
-- Percentile analysis
SELECT
p.Passenger_fname,
p.Passenger_lname,
b.Total_amount,
NTILE(4) OVER (ORDER BY b.Total_amount) AS quartile,
PERCENT_RANK() OVER (ORDER BY b.Total_amount) AS percent_rank,
CUME_DIST() OVER (ORDER BY b.Total_amount) AS cumulative_dist
FROM BOOKINGS b
INNER JOIN PASSENGERS p ON b.Passenger_id = p.Passenger_id;
-- ==========================================
-- 5. COMPLEX CONDITIONAL LOGIC
-- ==========================================
-- Multi-level categorization
SELECT
p.Passenger_fname,
p.Passenger_lname,
p.Passenger_age,
b.Total_amount,
CASE
WHEN p.Passenger_age < 18 THEN 'Minor'
WHEN p.Passenger_age >= 18 AND p.Passenger_age < 65 THEN 'Adult'
ELSE 'Senior'
END AS age_category,
CASE
WHEN b.Total_amount < 100 THEN 'Budget'
WHEN b.Total_amount >= 100 AND b.Total_amount < 300 THEN 'Standard'
WHEN b.Total_amount >= 300 AND b.Total_amount < 500 THEN 'Premium'
ELSE 'Luxury'
END AS spending_category,
CASE
WHEN p.Passenger_age < 18 AND b.Total_amount < 100 THEN 'Young Budget Traveler'
WHEN p.Passenger_age >= 65 AND b.Total_amount > 300 THEN 'Senior Premium Traveler'
WHEN p.Passenger_age BETWEEN 25 AND 40 AND b.Total_amount > 200 THEN 'Professional Traveler'
ELSE 'Regular Traveler'
END AS traveler_profile
FROM PASSENGERS p
INNER JOIN BOOKINGS b ON p.Passenger_id = b.Passenger_id;
-- ==========================================
-- 6. DATE AND TIME ANALYSIS
-- ==========================================
-- Travel time analysis
SELECT
Train_name,
Train_type,
First_station,
Last_station,
LENGTH_OF_TRAVEL,
CASE
WHEN LENGTH_OF_TRAVEL <= '02:00:00' THEN 'Short Journey'
WHEN LENGTH_OF_TRAVEL <= '06:00:00' THEN 'Medium Journey'
ELSE 'Long Journey'
END AS journey_type,
HOUR(LENGTH_OF_TRAVEL) AS hours,
MINUTE(LENGTH_OF_TRAVEL) AS minutes
FROM TRAINS
WHERE LENGTH_OF_TRAVEL IS NOT NULL
ORDER BY LENGTH_OF_TRAVEL;
-- Peak booking times analysis
SELECT
HOUR(booking_date) AS booking_hour,
COUNT(*) AS bookings_count,
AVG(total_amount) AS avg_amount,
SUM(total_amount) AS total_revenue
FROM BOOKINGS
GROUP BY HOUR(booking_date)
ORDER BY bookings_count DESC;
-- Day of week analysis
SELECT
DAYNAME(booking_date) AS day_name,
WEEKDAY(booking_date) AS day_number,
COUNT(*) AS bookings_count,
AVG(total_amount) AS avg_booking_amount,
SUM(total_amount) AS total_revenue
FROM BOOKINGS
GROUP BY DAYNAME(booking_date), WEEKDAY(booking_date)
ORDER BY WEEKDAY(booking_date);
-- ==========================================
-- 7. ADVANCED STRING MANIPULATION
-- ==========================================
-- Advanced name formatting
SELECT
Passenger_id,
Passenger_fname,
Passenger_lname,
CONCAT(
UPPER(LEFT(Passenger_fname, 1)),
LOWER(SUBSTRING(Passenger_fname, 2)),
' ',
UPPER(LEFT(Passenger_lname, 1)),
LOWER(SUBSTRING(Passenger_lname, 2))
) AS formatted_name,
CONCAT(
LEFT(Passenger_fname, 1),
'.',
LEFT(Passenger_lname, 1),
'.'
) AS initials,
LENGTH(CONCAT(Passenger_fname, Passenger_lname)) AS name_length
FROM PASSENGERS;
-- Station name analysis
SELECT
Train_name,
First_station,
Last_station,
CONCAT(First_station, ' → ', Last_station) AS route,
CASE
WHEN First_station LIKE '%Central%' OR Last_station LIKE '%Central%' THEN 'Central Station Route'
WHEN First_station LIKE '%Airport%' OR Last_station LIKE '%Airport%' THEN 'Airport Route'
ELSE 'Regular Route'
END AS route_type
FROM TRAINS;
-- ==========================================
-- 8. COMPLEX FILTERING AND SEARCH
-- ==========================================
-- Multi-criteria search
SELECT
p.Passenger_fname,
p.Passenger_lname,
p.Passenger_age,
b.Total_amount,
t.Train_name,
b.Booking_date
FROM PASSENGERS p
INNER JOIN BOOKINGS b ON p.Passenger_id = b.Passenger_id
INNER JOIN TRAINS t ON p.Train_id = t.Train_id
WHERE p.Passenger_age BETWEEN 25 AND 45
AND b.Total_amount > 200
AND t.Train_type = 'Express'
AND DATE(b.Booking_date) >= DATE_SUB(CURDATE(), INTERVAL 30 DAY)
ORDER BY b.Total_amount DESC;
-- Pattern matching for passenger names
SELECT
Passenger_fname,
Passenger_lname,
CASE
WHEN Passenger_fname REGEXP '^[A-D]' THEN 'Group A-D'
WHEN Passenger_fname REGEXP '^[E-H]' THEN 'Group E-H'
WHEN Passenger_fname REGEXP '^[I-M]' THEN 'Group I-M'
WHEN Passenger_fname REGEXP '^[N-R]' THEN 'Group N-R'
WHEN Passenger_fname REGEXP '^[S-Z]' THEN 'Group S-Z'
ELSE 'Other'
END AS name_group
FROM PASSENGERS
WHERE Passenger_fname REGEXP '^[A-Za-z]'
ORDER BY name_group, Passenger_fname;
-- ==========================================
-- 9. BACKUP AND MAINTENANCE QUERIES
-- ==========================================
-- Create backup tables with data
CREATE TABLE passengers_backup AS SELECT * FROM PASSENGERS;
CREATE TABLE trains_backup AS SELECT * FROM TRAINS;
CREATE TABLE bookings_backup AS SELECT * FROM BOOKINGS;
-- Compare tables for data integrity
SELECT 'PASSENGERS' AS table_name, COUNT(*) AS record_count FROM PASSENGERS
UNION ALL
SELECT 'PASSENGERS_BACKUP' AS table_name, COUNT(*) AS record_count FROM passengers_backup
UNION ALL
SELECT 'TRAINS' AS table_name, COUNT(*) AS record_count FROM TRAINS
UNION ALL
SELECT 'TRAINS_BACKUP' AS table_name, COUNT(*) AS record_count FROM trains_backup;
-- Find orphaned records
SELECT 'Passengers without bookings' AS issue_type, COUNT(*) AS count
FROM PASSENGERS p
LEFT JOIN BOOKINGS b ON p.Passenger_id = b.Passenger_id
WHERE b.Passenger_id IS NULL
UNION ALL
SELECT 'Bookings without passengers' AS issue_type, COUNT(*) AS count
FROM BOOKINGS b
LEFT JOIN PASSENGERS p ON b.Passenger_id = p.Passenger_id
WHERE p.Passenger_id IS NULL
UNION ALL
SELECT 'Passengers without trains' AS issue_type, COUNT(*) AS count
FROM PASSENGERS p
LEFT JOIN TRAINS t ON p.Train_id = t.Train_id
WHERE t.Train_id IS NULL;