-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathstats.html
More file actions
215 lines (183 loc) · 11 KB
/
stats.html
File metadata and controls
215 lines (183 loc) · 11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
<title></title>
<script src="https://d3js.org/d3.v5.js"></script>
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.1.1/css/bootstrap.min.css" integrity="sha384-WskhaSGFgHYWDcbwN70/dfYBj47jz9qbsMId/iRN3ewGhXQFZCSftd1LZCfmhktB" crossorigin="anonymous">
<script src="https://code.jquery.com/jquery-3.3.1.slim.min.js" integrity="sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo" crossorigin="anonymous"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.14.3/umd/popper.min.js" integrity="sha384-ZMP7rVo3mIykV+2+9J3UJ46jBk0WLaUAdn689aCwoqbBJiSnjAK/l8WvCWPIPm49" crossorigin="anonymous"></script>
<script src="https://stackpath.bootstrapcdn.com/bootstrap/4.1.1/js/bootstrap.min.js" integrity="sha384-smHYKdLADwkXOn1EmN1qk/HfnUcbVRZyYmZ4qpPea6sjB/pTJ0euyQp0Mk8ck+5T" crossorigin="anonymous"></script>
<link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.0.13/css/all.css" integrity="sha384-DNOHZ68U8hZfKXOrtjWvjxusGo9WQnrNx2sqG0tfsghAvtVlRW3tvkXWZh58N9jp" crossorigin="anonymous">
<link rel="stylesheet" href="main.css">
<script src="utils.js"></script>
<script src="common.js"></script>
<script src="charts.js"></script>
</head>
<body>
<div class="container">
<nav class="navbar navbar-expand-lg navbar-light bg-light">
<button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbarNavDropdown" aria-controls="navbarNavDropdown" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div id="navbarNavDropdown" class="navbar-collapse collapse">
<ul class="navbar-nav mr-auto">
<li class="nav-item"><a class="nav-link" href="index.html">Main page</a></li>
<li class="nav-item active"><a class="nav-link" href="stats.html">Dataset statistics</a></li>
</ul>
<ul class="navbar-nav">
<li class="nav-item" style="font-size: 80%"><a class="nav-link" href="badgood.html">Bad/good charts</a></li>
</ul>
</div>
</nav>
<p>
<a href="https://www.kaggle.com/usdot/flight-delays">Kaggle 2015 Flight Delays and Cancellations</a> dataset
(with <a href="https://www.kaggle.com/smiller933/fixing-airport-codes">this fix</a> for some airport codes).
It has information about flights for major US airlines in 2015 (arrival/departure times, delay, flight ID, distance, ...)
and information about airport cities/coordinates, airlines.
</p>
<p>
The data was retrieved via SQL queries in PostgresSQL database, using <code class="code">ROW_TO_JSON</code> for export.
</p>
<p>The dataset contains <b>5 819 079</b> flights, <b>322</b> airports and <b>14</b> airlines.</p>
<table class="table table-bordered" id="attributesTable">
<thead>
<tr>
<th scope="col">Attribute</th>
<th scope="col">Description</th>
<th scope="col">Type</th>
<th scope="col">Min</th>
<th scope="col">Max</th>
<th scope="col">Average</th>
<th scope="col">Median</th>
</tr>
</thead>
<tbody>
</tbody>
</table>
<p><i>Attribute types according to these definitions: <a href="http://isoconsultantpune.com/statistics-in-quality/">isoconsultantpune.com/statistics-in-quality/</a></i></p>
<div class="code-spoiler">
<a data-toggle="collapse" role="button">SQL</a>
<div class="collapse">
<div class="card card-body code-panel">
<pre class="code"><code>SELECT
MIN(distance) as min_distance, MAX(distance) as max_distance, ROUND(AVG(distance), 2) as avg_distance, PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER by distance) as median_distance,
MIN(departure_delay) as min_departure_delay, MAX(departure_delay) as max_departure_delay, ROUND(AVG(departure_delay), 2) as avg_departure_delay, PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER by departure_delay) as median_departure_delay,
MIN(arrival_delay) as min_arrival_delay, MAX(arrival_delay) as max_arrival_delay, ROUND(AVG(arrival_delay), 2) as avg_arrival_delay, PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER by arrival_delay) as median_arrival_delay,
MIN(air_time) as min_air_time, MAX(air_time) as max_air_time, ROUND(AVG(air_time), 2) as avg_air_time, PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER by air_time) as median_air_time,
MIN(scheduled_time) as min_scheduled_time, MAX(scheduled_time) as max_scheduled_time, ROUND(AVG(scheduled_time), 2) as avg_scheduled_time, PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER by scheduled_time) as median_scheduled_time,
MIN(elapsed_time) as min_elapsed_time, MAX(elapsed_time) as max_elapsed_time, ROUND(AVG(elapsed_time), 2) as avg_elapsed_time, PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER by elapsed_time) as median_elapsed_time,
MIN(air_system_delay) as min_air_system_delay, MAX(air_system_delay) as max_air_system_delay, ROUND(AVG(air_system_delay), 2) as avg_air_system_delay, PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER by air_system_delay) as median_air_system_delay,
MIN(security_delay) as min_security_delay, MAX(security_delay) as max_security_delay, ROUND(AVG(security_delay), 2) as avg_security_delay, PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER by security_delay) as median_security_delay,
MIN(airline_delay) as min_airline_delay, MAX(airline_delay) as max_airline_delay, ROUND(AVG(airline_delay), 2) as avg_airline_delay, PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER by airline_delay) as median_airline_delay,
MIN(late_aircraft_delay) as min_late_aircraft_delay, MAX(late_aircraft_delay) as max_late_aircraft_delay, ROUND(AVG(late_aircraft_delay), 2) as avg_late_aircraft_delay, PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER by late_aircraft_delay) as median_late_aircraft_delay,
MIN(weather_delay) as min_weather_delay, MAX(weather_delay) as max_weather_delay, ROUND(AVG(weather_delay), 2) as avg_weather_delay, PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER by weather_delay) as median_weather_delay,
MIN(taxi_in) as min_taxi_in, MAX(taxi_in) as max_taxi_in, ROUND(AVG(taxi_in), 2) as avg_taxi_in, PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER by taxi_in) as median_taxi_in,
MIN(taxi_out) as min_taxi_out, MAX(taxi_out) as max_taxi_out, ROUND(AVG(taxi_out), 2) as avg_taxi_out, PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER by taxi_out) as median_taxi_out
FROM flights</code></pre>
</div>
</div>
</div>
<h3>Cancellations</h3>
<p>There are less than 2% of cancelled or diverted flights, and most cancellations are caused by weather.</p>
<svg id="cancellationsPlot" class="plot pie" width="460" height="460"></svg>
<svg id="cancellationReasonPlot" class="plot pie" width="460" height="460"></svg>
<div class="code-spoiler" style="margin-top: -40px">
<a data-toggle="collapse" role="button">SQL</a>
<div class="collapse">
<div class="card card-body code-panel">
<pre class="code"><code>SELECT
COUNT(*) filter (WHERE cancelled = 1) as cancelled_count,
COUNT(*) filter (WHERE diverted = 1) as diverted_count,
COUNT(*) filter (WHERE cancelled = 0 AND diverted = 0) as normal_count
FROM flights
SELECT cancellation_reason, COUNT(*)
FROM flights
WHERE cancellation_reason is not null
GROUP BY cancellation_reason</code></pre>
</div>
</div>
</div>
<h3>Seasonality</h3>
<p>
There is a huge drop of activity on Saturdays, probably related to business trips.
</p>
<p>
Also it seems like there are more flights in the summer (especially July-August), probably because of holidays,
as well as some increases in the beginning of Spring (Easter, Spring break?),
and the least active period is from middle of January to middle of February.
</p>
<svg id="datesPlot" class="plot" width="900" height="300"></svg>
<svg id="weeksPlot" class="plot" width="900" height="500"></svg>
<div class="code-spoiler">
<a data-toggle="collapse" role="button">SQL</a>
<div class="collapse">
<div class="card card-body code-panel">
<pre class="code"><code>SELECT make_date(year, month, day) as date, count(*)
FROM flights
GROUP BY date</code></pre>
</div>
</div>
</div>
<footer>
<hr/>
<a href="https://github.com/AlexP11223/D3FlightsDataVisualization">Source code</a>
</footer>
</div>
<script>
async function makeAttributesTable() {
const attributes = (await d3.json('data/attributes.json'));
const table = d3.select('#attributesTable');
const trs = table
.select('tbody')
.selectAll('tr')
.data(attributes)
.enter()
.append('tr');
trs.append('th').attr('scope', 'row').text(d => capitalize(d.name.replace(/_/g, ' ')));
trs.append('td').text(d => d.description);
trs.append('td').text(d => capitalize(d.type));
trs.append('td').text(d => d.min);
trs.append('td').text(d => d.max);
trs.append('td').text(d => d.avg);
trs.append('td').text(d => d.median);
}
async function makeCancellationPlots() {
const names = {
'A': 'Airline',
'B': 'Weather',
'C': 'NAS',
'D': 'Security'
};
const data = (await d3.json('data/cancellations.json'));
const states = data.states.map((it) => ({name: it.name, value: it.count}));
const reasons = data.cancellation_reasons.map((it) => ({name: names[it.name], value: it.count}));
new PieChart(states, d3.select('#cancellationsPlot')).colors(d3.schemeAccent).draw();
new PieChart(reasons, d3.select('#cancellationReasonPlot')).colors(d3.schemeAccent).draw();
}
async function makeDatesPlot() {
const data = (await d3.json('data/dates.json'))
.map((it) => ({date: new Date(Date.parse(it.date)), value: it.count}));
new DatesLineChart(data, d3.select('#datesPlot'))
.tooltipLines([d => dateWithWeekday(d.date), d => `${d.value} flights`])
.draw();
const sampleWeekNumbers = [2, 3, 6, 8, 17, 22, 26, 30, 40, 42];
const firstIndex = 4;
const sampleLength = 9;
const sampleWeeks = sampleWeekNumbers.map(i => ({
name: `Week ${i}`,
values: data.slice(firstIndex + i * 7, firstIndex + i * 7 + sampleLength)
.map((d, i) => ({name: weekDayNameShort(d.date), value: d.value, index: i}))
}));
new MultiLineChart(sampleWeeks, d3.select('#weeksPlot')).draw();
}
</script>
<!--suppress JSIgnoredPromiseFromCall -->
<script>
makeAttributesTable();
makeCancellationPlots();
makeDatesPlot();
</script>
</body>
</html>