Overview

Row

Row

Vehicles

Row

Row

People and Predictions

Row

<
Chicago Crashes Model
Dependent variable:
is_severe
(1) (2) (3)
four_way 0.639*** 0.639*** 0.633***
(0.009) (0.009) (0.009)
bad_weather 0.125*** 0.225***
(0.009) (0.010)
wet_road -0.299***
(0.024)
bad_weatherTRUE:wet_road -0.120***
(0.030)
Constant -0.520*** -0.534*** -0.529***
(0.003) (0.003) (0.003)
Observations 226,336 226,336 226,336
Log Likelihood -140,163.200 -140,056.900 -139,721.900
Akaike Inf. Crit. 280,330.300 280,119.800 279,453.900
Note: p<0.1; p<0.05; p<0.01

Row

---
title: "Chicago Traffic Accidents"
output: 
  flexdashboard::flex_dashboard:
    orientation: rows
    vertical_layout: scroll
    source_code: embed
    theme:
      version: 4
      bg: '#edf2f4'
      fg: '#edf2f4'
      primary: "#1d3557"
      navbar-bg: '#1d3557'
      heading_font:
        google: Roboto
      
---

```{r setup & libraries, include=FALSE}
# Load Packages
library(flexdashboard)
library(tidyverse)
library(lubridate)
library(sf)
library(tmap)
library(ggraph)
library(calendR)
library(tidygraph)
library(ggmosaic)
library(scales)

# Used for WD
path <- '/Users/SG/Documents/Programming/dac2022/closing_celebration/'

```

```{r function declarations}
# Scaling function because calendaR for some reason only takes values < 365
scale_fn <- function(x) { x / sqrt(sum(x^2)) }


# You already know
`%!in%` <- Negate(`%in%`)

# Quick summary
count_summary <- function(data){
  data %>%
  filter(maneuver %!in% c('UNKNOWN/NA', 'NA'),
         area_hit %!in% c('UNKNOWN', 'NA')) %>%
  drop_na(maneuver, area_hit) %>%
  group_by(maneuver, area_hit) %>%
  summarise(n = n())
}
```

```{r colors & theme}
# Theme and Colors from Coolor
flag_colors <- c('#e63946', '#f1faee', '#a8dadc', '#457b9d', '#1d3557')

grey_red <- c('#2b2d42', '#8d99ae', '#edf2f4', '#ef233c', '#d90429')

bg_color <- '#f8f9fa'

bg_photo <- paste(path,'pedro-lastra-Nyvq2juw4_o-unsplash.jpg', sep = '')

# Minimalist theme Coolor bg color
theme_cel <- function(base_size = 11,
                      base_family = 'serif'){
    theme(
      # Rect and Line
      rect = element_rect(fill = bg_color, colour = NA, linetype = 0),
      line = element_line(color = grey_red[1]),
      
      # Remove y axis / Adjust x axis
      axis.ticks = element_line(),
      axis.ticks.y = element_blank(),
      axis.line = element_line(linetype = 1, colour = grey_red[1]),
      
      # Change Legend background color 
      legend.key = element_rect(linetype = 0, fill = bg_color),
      
      # Clear background and change color
      panel.background = element_rect(linetype = 0, fill = bg_color),
      panel.border = element_blank(),
      panel.grid.major = element_blank(),
      panel.grid.minor = element_blank(),
      strip.background = element_rect(fill = bg_color, colour = NA, linetype = 0),
      plot.background = element_rect(fill = bg_color, colour = NA))
}
```

```{r load data}
# Download Data from Chicago Open Data
crashes <- janitor::clean_names(read_csv(paste(path, 'Traffic_Crashes_-_Crashes.csv', sep = '')))

people <- janitor::clean_names(read_csv(paste(path,'Traffic_Crashes_-_People.csv', sep = '')))

vehicles <- janitor::clean_names(read_csv(paste(path, 'Traffic_Crashes_-_Vehicles.csv', sep = '')))
```

```{r munging & joining}
# Some preliminary cleaning and joining Vehicles + Crashes
vehicles <- vehicles %>%
  
  # Select Rows
  select(id = crash_record_id,
         date = crash_date,
         make,
         model,
         travel_direction,
         maneuver,
         occupants = occupant_cnt,
         area_hit = first_contact_point) %>%
  
  # Char to Date
  mutate(date = mdy_hms(date),
         
         # Some models have silly names
         model = replace(model, str_detect(model,'SENTRA'), 'SENTRA'),
         model = replace(model, str_detect(model,'MALIBU'), 'MALIBU'),
         model = replace(model, str_detect(model,'RAV4'), 'RAV4'),
         model = replace(model, str_detect(model,'ALTIMA'), 'ALTIMA'))

people <- people %>%
  
  # Select relevant columns
  select(id = crash_record_id,
         action = driver_action,
         zipcode,
         sex,
         age,
         injury = injury_classification,
         airbag = airbag_deployed)

crashes <- crashes %>%
  
  # Select columns
  select(id = crash_record_id,
         street = street_name,
         weather = weather_condition,
         roadtype = trafficway_type,
         road_cond = roadway_surface_cond,
         crash_type,
         damage,
         latitude,
         longitude,
         location)

# Join data and filter, for interactive remove filter
traffic <- left_join(vehicles, crashes, by = 'id') %>% filter(year(date) == 2021)
```



Overview
==================

Row
-----------------------------------------------------------------------

###
```{r calendar heatmap, fig.width = 15, fig.height = 8}

# Grouping by date and scaling accidents, calendR can only take values < 365
date_traffic <- traffic %>%
  filter(year(date) == 2021) %>%
  group_by(date = as.Date(date)) %>%
  summarise(accidents = n()) %>%
  mutate(scale_acc = scale_fn(accidents))


# Only relevant for reactive dashboard.  special.days must be length 365 and current year is incomplete
if (unique(year(date_traffic$date)) == year(Sys.Date())){
    
    days <- rep(min(date_traffic$scale_acc) - 0.05, 365)
    days[1:nrow(date_traffic)] <- date_traffic$scale_acc
} else {
    days <- date_traffic$scale_acc
}


# Create calendar
calendR(year = as.numeric(unique(format(date_traffic$date, format = '%Y'))),
        special.days = days,
        gradient = TRUE,
        low.col = '#edf2f4',
        special.col = '#e63946',
        bg.img = bg_photo,
        title = paste('Heatmap of Traffic Accidents Year ', unique(year(date_traffic$date))),
        title.size = 20,
        title.col = grey_red[1],
        lty = 1,
        lwd = 0.1,
        col = grey_red[1],
        days.col = grey_red[1],
        day.size = 3,
        subtitle = 'Chicago, IL | Photo: Pedro Lastra',
        subtitle.col = grey_red[1],
        weeknames = c("Mo", "Tu",  
                      "We", "Th",
                      "Fr", "Sa",
                      "Su"),
        weeknames.col = 'black',
        weeknames.size = 4,
        mbg.col = bg_color,
        font.family = 'sans')
```

Row
-----------------------------------------------------------------------

###

```{r tmap, echo = FALSE, message = FALSE, warning = FALSE, fig.height = 8}
# Pull Chicago Shapefile
chi_sf <- st_read(paste(path, 'chicago_tracts_2010.shp', sep = ''), quiet = TRUE)
chi_sf <- st_transform(chi_sf, 4326)

# Convert lat/long from original data to sf
traffic_sf <- traffic %>% 
  filter(year(date) == 2021 &
         month(date) == month(Sys.Date())) %>%
  drop_na(c(latitude, longitude)) %>%
  st_as_sf(coords = c('longitude', 'latitude'), crs = 4326)

# Join the two shape files
sf_join <- chi_sf %>% st_join(traffic_sf, left = TRUE, join = st_intersects)


# Find the accidents per GEOID
sf_group <- sf_join %>%
  group_by(geoid) %>%
  summarise(accidents = n())


# tmap
tmap_mode(mode = 'view')
tm_shape(sf_group) +
  tm_fill('accidents', id = 'accidents', alpha = 0.6)  +
          tm_borders(col = 'grey40', alpha = 0.5, lwd = .4) +
   tm_layout(aes.palette = list(seq = "-RdBu")) +
  tm_basemap('Esri.WorldTopoMap') +
  tm_layout(title = 'Chicago Traffic Accidents 2021',
            outer.bg.color = bg_color,
            legend.bg.color = bg_color,
            title.bg.color = bg_color,
            bg.color = bg_color)
```

###
```{r clock plot}

# group by time and summarise
traffic %>%
  filter(year(date) == 2021) %>%
  mutate(year = year(date)) %>%
  
  # get accidents in each hour
  group_by(time = hour(date)) %>%
  summarise(accidents = n(), across(year)) %>%
  
  # Dynamically calculate accidents / year
  mutate(accident_hour = if_else(year == year(Sys.Date()),
                                 accidents / (as.numeric(Sys.Date()) - as.numeric(floor_date(Sys.Date()))), 
                                 accidents / 365)) %>%
  ungroup() %>%
  
  # regroup to summarise with accident / hour
  group_by(time, accidents, accident_hour) %>%
  summarise() %>%
  
  # ggplot
  ggplot() +
  
  # first night rectangle
  geom_rect(aes(xmin = -0.5, xmax = 6, ymin = 0, ymax = Inf), fill = flag_colors[3], alpha = 0.02) +
  
  # day rectangle
  geom_rect(aes(xmin = 6, xmax = 18, ymin = 0, ymax = Inf), fill = '#ffb703', alpha = 0.02) +
  
  # second night rectangle
  geom_rect(aes(xmin = 18, xmax = 24, ymin = 0, ymax = Inf), fill = flag_colors[3], alpha = 0.02) +
  
  # simple bar plot
  geom_col(aes(x = time, y = accident_hour, fill = accident_hour)) +
  
  # custom theme
  theme_cel() +
  
  # now polar start gets it more centered and clock-ish
  coord_polar(start = 6.21, direction = 1) +
  
  # aesthetics
  scale_fill_gradient2(low = grey_red[3],
                       high = flag_colors[5])  +
  scale_x_continuous(breaks = (seq(0,23,1))) +
  theme(axis.line = element_blank(),
        axis.ticks.x = element_blank(),
        axis.text.y = element_blank(),
        axis.text.x = element_text(vjust = 1)) +
  labs(title = '24 Hour Clock of When Accidents Occur',
        subtitle = 'Daily Chicago, IL 2021',
       fill = 'Accidents per Hour') +
  xlab('') +
  ylab('')
```

Vehicles
==================

Row {data-height=650}
-------------------------------------

```{r network graph, fig.width = 16, fig.height = 6.75}

# filter and proportion of crashes
traffic_connect_prop <- traffic %>%
  filter(year(date) == 2021) %>%
  mutate(area_hit = replace(area_hit, area_hit == 'OTHER', 'OTHER AREA'),
         area_hit = replace(area_hit, area_hit == 'TOTAL (ALL AREAS)', 'TOTAL')) %>%
  count_summary() %>%
  mutate(percent = n/sum(n) * 100) %>%
  rename(crashes = n)

# ggraph network graph
traffic_connect_prop %>%
  ggraph(layout = "linear", circular = TRUE) +
  
  # controls color / alpha / width of 'arms'
  geom_edge_arc(aes(edge_alpha = percent, edge_width = percent), color = flag_colors[1], fold = TRUE, lineend = 'round') +
  
  # text for area hit
  geom_node_text(aes(filter = name %in% traffic_connect_prop$area_hit, label = name), size = 3, hjust = .5, repel = T) +
  
  # labels for maneuver
  geom_node_label(aes(filter = name %in% traffic_connect_prop$maneuver, label = name), size = 3, fill = flag_colors[2], repel = T) +
  
  # aesthetics
  theme_cel() +
  theme(axis.line = element_blank(),
        axis.ticks = element_blank(),
        legend.position = 'bottom') +
  labs(title = 'Where is the First Point of Contact, Given a Maneuver?',
       subtitle = 'Labels are Maneuvers, Thickness is % of Accidents Hitting Area')
```

Row {data-height=350}
-------------------------------------

```{r lollipop, fig.width = 8}
# Summarise by make
make_data <- traffic %>%
  filter(!is.na(make),
         make != 'UNKNOWN',
         year(date) == 2021) %>%
  mutate(make = fct_lump(make, n = 20)) %>%
  group_by(make) %>%
  summarise(n = n(), 
            date = year(date)) %>%
  mutate(make = str_to_title(make))

# Lillipop chart of car makes
ggplot(make_data) +
  
  # sticks of lollipop
  geom_segment(aes(x = fct_reorder(make, n), xend = make, y = 0, yend = n), color = grey_red[2]) +
  
  # circles on end of segments
  geom_point(aes(x = make, y = n), size = 5, color = flag_colors[4]) +
  coord_flip() +
  
  # aesthetics
  theme_cel() +
  xlab('') +
  ylab('') +
  scale_y_continuous(expand = c(0,0)) +
  labs(title = 'Number of Accidents by Car Make in Thousands',
       subtitle = 'Chicago, IL 2021') +
  theme(legend.position = 'none',
        axis.text.x = element_text(hjust = .75)) + 
  
  # prevents from being cut off if not in reactive environment
  expand_limits(y = c(0, ceiling(max(make_data$n)) + 1000))
```

```{r make bar, fig.width = 8}
# Simple bar plot of top 12 make / model

# summarise
traffic %>%
  filter(!is.na(model),
         make %!in% c('UNKNOWN', 'MOTORIZED'),
         model %!in% c('OTHER (EXPLAIN IN NARRATIVE)', 'UNKNOWN'),
         year(date) == 2021) %>%
  group_by(model, make) %>%
  summarise('count' = n()) %>%
  arrange(desc(count)) %>%
  head(12) %>%
  
# plot  
  ggplot() +
  geom_col(aes(x = fct_rev(fct_reorder(model, count)), y = count, fill = make), color = 'gray') +
  
  # aesthetics
  scale_fill_manual(values = c(flag_colors, grey_red)) +
  labs(title = 'What make and model cars are getting in the most accidents?',
       subtitle = '...or maybe simply which are popular',
       fill = 'Make') +
  ylab('Number of Accidents') +
  xlab('') +
  theme_cel() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))
```

People and Predictions
==================

Row {data-height=550}
-------------------------------------
###

```{r mosaic}
# Creating age group bins
people_range <- people %>%
  drop_na(sex, age) %>%
  filter(sex %!in% 'X') %>%
  mutate(age_range = case_when(age < 25 ~'under 25',
                               age < 45 ~'25-45',
                               age < 65 ~'45-65',
                               TRUE ~'over 65'),
         sex = case_when(sex == 'F' ~'Female',
                         sex == 'M' ~'Male'))

# Rearranging factors for display
people_range$age_range <- factor(people_range$age_range, levels = c('under 25', '25-45', '45-65', 'over 65'))

# Mosaic plot
ggplot(people_range) +
  geom_mosaic(aes(x = product(sex), fill = age_range)) +
  
  # aesthetics
  theme_cel() +
  theme(axis.line = element_blank(),
        axis.ticks.x = element_blank()) +
  labs(title = 'Who is getting into accidents in Chicago?',
       fill = 'Age Range',
       caption = 'All Data from Chicago Open Data') +
  ylab('') +
  xlab('') +
  scale_fill_manual(values = flag_colors[-2],
                    guide=guide_legend(reverse=T)) 
```

###

```{r model hidden, results = 'asis'}
# Here is the actual model, but I pulled out the html and adjusted some of the aesthetics

#traffic_model <- traffic %>%
#   mutate(is_severe = if_else(str_detect(crash_type, 'TOW'), TRUE, FALSE),
#         four_way = if_else(roadtype == 'FOUR WAY', TRUE, FALSE),
#         bad_weather = if_else(weather %in% c('RAIN', 'FREEZING RAIN/DRIZZLE', 'SLEET/HAIL', 'SNOW'), TRUE, FALSE),
#         wet_road = if_else(str_detect(road_cond, 'SNOW | WET | ICE'), TRUE, FALSE)) %>%
 # select(is_severe, four_way, bad_weather, wet_road)


#model1 <- glm(is_severe~ four_way, data = traffic_model, binomial(link = 'probit'))

#model2 <- glm(is_severe~ four_way + bad_weather, data = traffic_model, binomial(link = 'probit'))

#model3 <- glm(is_severe~ four_way + bad_weather * wet_road, data = traffic_model, binomial(link = 'probit'))

#stargazer::stargazer(model1, model2, model3, type = 'html', title = 'Chicago Crashes Model')
```
<
Chicago Crashes Model
Dependent variable:
is_severe
(1)(2)(3)
four_way0.639***0.639***0.633***
(0.009)(0.009)(0.009)
bad_weather0.125***0.225***
(0.009)(0.010)
wet_road-0.299***
(0.024)
bad_weatherTRUE:wet_road-0.120***
(0.030)
Constant-0.520***-0.534***-0.529***
(0.003)(0.003)(0.003)
Observations226,336226,336226,336
Log Likelihood-140,163.200-140,056.900-139,721.900
Akaike Inf. Crit.280,330.300280,119.800279,453.900
Note:*p<0.1; **p<0.05; ***p<0.01
Row {data-height=350} ------------------------------------- ### ```{r injury bar, fig.width = 16} # Simple bar plot regarding injuries people %>% filter(action %!in% c('UNKNOWN', 'NONE', NA, 'OTHER'), !is.na(injury), injury %!in% c('NO INDICATION OF INJURY')) %>% ggplot(aes(x = fct_rev(action), fill = injury)) + geom_bar(position = 'fill') + # aesthetics theme_cel() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_blank(), panel.background = element_blank(), axis.ticks.x = element_blank(), axis.text.x = element_blank(), legend.position = 'bottom', legend.text = element_text(size = 7), legend.key.size = unit(.5, 'cm'), legend.title = element_blank(), axis.ticks.y = element_blank(), axis.text.y = element_text(size = 9)) + scale_y_continuous(expand = c(0,0)) + xlab('') + ylab('') + labs(title = 'What Actions Lead to Serious Injury?', subtitle = 'If Injury and Action Recorded', caption = 'https://data.cityofchicago.org/Transportation/Traffic-Crashes-Crashes/85ca-t3if') + scale_fill_manual(values = flag_colors[-2]) + coord_flip() ```