StatsBombR Tutorial: Comparing Player Shots and Goals and Shots and Key Passes per 90 in the Champions League and UEFA Euros
Stay Curious!
Introduction to the "StatsBombR" Package
All credit for this tutorial goes to StatsBomb and the webinar they recently hosted which went through this code. This tutorial is a step-by-step guide on how to use the StatsBombR package to analyse the available data. You can check out all free data available by clicking here.
Load packages
library(tidyverse)
library(StatsBombR)
Pull the free StatsBomb competition data into the environment
Comps <- FreeCompetitions()
Champions League
cl <- FreeCompetitions() %>%
filter(competition_id == 16)
cl_matches <- FreeMatches(cl)
#1 This pulls all the matches for the desired competition.
CL_Stats_Bomb_Data <- free_allevents(MatchesDF = cl_matches, Parallel = T)
#2 This pulls all the event data for the matches that are chosen.
cl_data_clean = allclean(CL_Stats_Bomb_Data)
#3 Extracts lots of relevant information such as x/y coordinates.
# More information can be found in the package info.
# Per 90 Player Shots and Goals
cl_shots_goals_per90 = cl_data_clean %>%
group_by(player.name, player.id) %>%
#1: This code groups the data by player,
# so that whatever operation we perform on it will be done on a player by
# player basis. I.e, we will find the shots and goals for every player one by one.
summarise(shots = sum(type.name=="Shot", na.rm = TRUE),
#2: Summarise takes whatever operation we give it and produces a
# new, separate table out of it. The vast majority of summarise
# uses come after group_by.
goals = sum(shot.outcome.name=="Goal", na.rm = TRUE))
#3: shots = sum(type.name=="Shot", na.rm = TRUE) is telling it to create a new
# column called ʻshotsʼ that sums up all the rows under the ʻtype.nameʼ column
# that contain the word “Shot”. na.rm = TRUE tells it to ignore any NAs within
# that column. shot.outcome.name=="Goal", na.rm = TRUE)
# does the same but for goals.
# Adding in the ʻn_distinct(match_id)ʼ means we are dividing the number of
# shots/goals by each distinct (or unique) instance of a match, for every team.
# I.e, we are dividing the numbers per game.
# Get minutes played data
cl_player_minutes = get.minutesplayed(cl_data_clean)
#1: This function gives us the minutes played in each match by ever
# player in the dataset.
cl_player_minutes = cl_player_minutes %>%
group_by(player.id) %>%
summarise(minutes = sum(MinutesPlayed))
#2: Now we group that by player and sum it altogether to get
# their total minutes played.
# Join minutes played data to player shots dataframe
cl_player_shots_goals_per90 = left_join(cl_shots_goals_per90, cl_player_minutes)
#1: left_join allows us to combine our shots and key passes table and our
# minutes table, with the the player.id acting as a reference point.
cl_player_shots_goals_per90 = cl_player_shots_goals_per90 %>%
mutate(nineties = minutes/90)
#2: `mutate` is a `dplyr` function that creates a new column. In this instance
# we are creating a column that divides the minutes totals by 90,
# giving us each players number of 90s played.
cl_player_shots_goals_per90 = cl_player_shots_goals_per90 %>%
mutate(shots_per90 = shots/nineties,
gls_per90 = goals/nineties,
shots_gls_per90 = shots_per90+gls_per90)
#3: Finally we divide our totals by our number of 90s to get our totals
# per 90s columns for shots and key passes.
# We also calculate the sum of these two columns.
# Filter minutes for players who have played at least 180 minutes
cl_player_shots_goals_per90 = cl_player_shots_goals_per90 %>%
filter(minutes>180)
#1: This code filters the data to only include players
# who have played at least 180 minutes for a fair comparison.
################################################################
cl_player_shots_keypasses = cl_data_clean %>%
group_by(player.name, player.id) %>%
#1: This code groups the data by player,
# so that whatever operation we perform on it will be done on a player by
# player basis. I.e, we will find the shots and goals for every player one by one.
summarise(shots = sum(type.name=="Shot", na.rm = TRUE),
keypasses = sum(pass.shot_assist==TRUE, na.rm = TRUE))
# Get minutes played data
cl_player_minutes = get.minutesplayed(cl_data_clean)
#1: This function gives us the minutes played in each match by ever
# player in the dataset.
cl_player_minutes = cl_player_minutes %>%
group_by(player.id) %>%
summarise(minutes = sum(MinutesPlayed))
#2: Now we group that by player and sum it altogether to get
# their total minutes played.
# Join minutes played data to player shots dataframe
cl_player_shots_keypasses = left_join(cl_player_shots_keypasses, cl_player_minutes)
#1: left_join allows us to combine our shots and key passes table and our
# minutes table, with the the player.id acting as a reference point.
cl_player_shots_keypasses = cl_player_shots_keypasses %>%
mutate(nineties = minutes/90)
#2: `mutate` is a `dplyr` function that creates a new column. In this instance
# we are creating a column that divides the minutes totals by 90,
# giving us each players number of 90s played.
cl_player_shots_keypasses = cl_player_shots_keypasses %>%
mutate(shots_per90 = shots/nineties,
kp_per90 = keypasses/nineties,
shots_kp_per90 = shots_per90+kp_per90)
#3: Finally we divide our totals by our number of 90s to get our totals
# per 90s columns for shots and key passes.
# We also calculate the sum of these two columns.
# Filter minutes for players who have played at least 180 minutes
cl_player_shots_keypasses = cl_player_shots_keypasses %>%
filter(minutes>180)
#1: This code filters the data to only include players
# who have played at least 180 minutes for a fair comparison.
Load packages
library(ggplot2)
#1: This is the package we use to create our plots.
library(ggrepel)
#2: This is the package we use to modify our plots.
library(SBpitch)
#3: This is the package we use to create our pitch.
library(scales)
#4: This is the package we use to modify our scales.
library(prismatic)
#5: This is the package we use to modify our colours.
Plotting player shots and goals
ggplot(cl_player_shots_goals_per90, aes(x = shots_per90, y = gls_per90, label = player.name)) +
#1: This code sets up the plot and tells it what data to use and what
# columns to use for the x and y axis.
geom_smooth(method = "lm", color = "green", fill = "green") +
#2: This code tells it to create a linear regression line on the plot.
# method = "lm" tells it to use a linear model, colour = "green" sets the
# colour of the line, and fill = "green" sets the fill colour of the line.
geom_point(aes(fill = "green", color = after_scale(clr_darken(fill, 0.3))),
shape = 21,
alpha = .75,
size = 3) +
#3: This code tells it to create points on the plot. aes(fill = "green",
# color = after_scale(clr_darken(fill, 0.3))) sets the fill and colour of
# the points, shape = 21 sets the shape of the points, alpha = .75 sets
# the transparency of the points, and size = 3 sets the size of the points.
geom_text_repel(size = 2.5, color = "white", min.segment.length = unit(0.1, "lines")) +
#4: This code tells it to create text labels on the plot. size = 2.5 sets
# the size of the text, colour = "white" sets the colour of the text,
# and min.segment.length = unit(0.1, "lines") sets the minimum length of the segments.
theme(
legend.position = "none",
plot.background = element_rect(fill = "purple", colour = "purple"),
panel.background = element_rect(fill = "purple", colour = "purple"),
panel.grid.major = element_line(colour = "purple"),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "white"),
axis.text = element_text(colour = "white"),
axis.title = element_text(colour = "white"),
plot.title = element_text(colour = "white", hjust=.5, face="bold", size = 15),
plot.subtitle = element_text(colour = "white", hjust=.5, face="bold", size = 8)) +
labs(x = "Shots per 90",
y = "Goals per 90",
title = "Player Shots and Goals per 90",
subtitle = "Champions League
Data: StatsBombR Free Data (>180 minutes of data)| @lorcanmason")
Plotting player shots and key passes
ggplot(cl_player_shots_keypasses, aes(x = shots_per90, y = kp_per90, label = player.name)) +
geom_smooth(method = "lm", color = "green", fill = "green") +
geom_point(aes(fill = "green", color = after_scale(clr_darken(fill, 0.3))),
shape = 21,
alpha = .75,
size = 3) +
geom_text_repel(size = 2.5, color = "white", min.segment.length = unit(0.1, "lines")) +
theme(
legend.position = "none",
plot.background = element_rect(fill = "purple", colour = "purple"),
panel.background = element_rect(fill = "purple", colour = "purple"),
panel.grid.major = element_line(colour = "purple"),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "white"),
axis.text = element_text(colour = "white"),
axis.title = element_text(colour = "white"),
plot.title = element_text(colour = "white", hjust=.5, face="bold", size = 15),
plot.subtitle = element_text(colour = "white", hjust=.5, face="bold", size = 8)) +
labs(x = "Shots per 90",
y = "Key Passes per 90",
title = "Shots and Key Passes per 90",
subtitle = "Champions League
Data: StatsBombR Free Data (>180 minutes of data)| @lorcanmason")
UEFA Euros
euros <- FreeCompetitions() %>%
filter(competition_id == 55)
euros_matches <- FreeMatches(euros)
#1 This pulls all the matches for the desired competition.
EUROS_Stats_Bomb_Data <- free_allevents(MatchesDF = euros_matches, Parallel = T)
#2 This pulls all the event data for the matches that are chosen.
euros_data_clean = allclean(EUROS_Stats_Bomb_Data)
#3 Extracts lots of relevant information such as x/y coordinates.
# More information can be found in the package info.
# Per 90 Player Shots and Goals
euros_shots_goals_per90 = euros_data_clean %>%
group_by(player.name, player.id) %>%
#1: This code groups the data by player,
# so that whatever operation we perform on it will be done on a player by
# player basis. I.e, we will find the shots and goals for every player one by one.
summarise(shots = sum(type.name=="Shot", na.rm = TRUE),
#2: Summarise takes whatever operation we give it and produces a
# new, separate table out of it. The vast majority of summarise
# uses come after group_by.
goals = sum(shot.outcome.name=="Goal", na.rm = TRUE))
#3: shots = sum(type.name=="Shot", na.rm = TRUE) is telling it to create a new
# column called ʻshotsʼ that sums up all the rows under the ʻtype.nameʼ column
# that contain the word “Shot”. na.rm = TRUE tells it to ignore any NAs within
# that column. shot.outcome.name=="Goal", na.rm = TRUE)
# does the same but for goals.
# Adding in the ʻn_distinct(match_id)ʼ means we are dividing the number of
# shots/goals by each distinct (or unique) instance of a match, for every team.
# I.e, we are dividing the numbers per game.
# Get minutes played data
euros_player_minutes = get.minutesplayed(euros_data_clean)
#1: This function gives us the minutes played in each match by ever
# player in the dataset.
euros_player_minutes = euros_player_minutes %>%
group_by(player.id) %>%
summarise(minutes = sum(MinutesPlayed))
#2: Now we group that by player and sum it altogether to get
# their total minutes played.
# Join minutes played data to player shots dataframe
euros_player_shots_goals_per90 = left_join(euros_shots_goals_per90, euros_player_minutes)
#1: left_join allows us to combine our shots and key passes table and our
# minutes table, with the the player.id acting as a reference point.
euros_player_shots_goals_per90 = euros_player_shots_goals_per90 %>%
mutate(nineties = minutes/90)
#2: `mutate` is a `dplyr` function that creates a new column. In this instance
# we are creating a column that divides the minutes totals by 90,
# giving us each players number of 90s played.
euros_player_shots_goals_per90 = euros_player_shots_goals_per90 %>%
mutate(shots_per90 = shots/nineties,
gls_per90 = goals/nineties,
shots_gls_per90 = shots_per90+gls_per90)
#3: Finally we divide our totals by our number of 90s to get our totals
# per 90s columns for shots and key passes.
# We also calculate the sum of these two columns.
# Filter minutes for players who have played at least 360 minutes
euros_player_shots_goals_per90 = euros_player_shots_goals_per90 %>%
filter(minutes>360)
#1: This code filters the data to only include players
# who have played at least 360 minutes for a fair comparison.
################################################################
euros_player_shots_keypasses = euros_data_clean %>%
group_by(player.name, player.id) %>%
#1: This code groups the data by player,
# so that whatever operation we perform on it will be done on a player by
# player basis. I.e, we will find the shots and goals for every player one by one.
summarise(shots = sum(type.name=="Shot", na.rm = TRUE),
keypasses = sum(pass.shot_assist==TRUE, na.rm = TRUE))
# Get minutes played data
euros_player_minutes = get.minutesplayed(euros_data_clean)
#1: This function gives us the minutes played in each match by ever
# player in the dataset.
euros_player_minutes = euros_player_minutes %>%
group_by(player.id) %>%
summarise(minutes = sum(MinutesPlayed))
#2: Now we group that by player and sum it altogether to get
# their total minutes played.
# Join minutes played data to player shots dataframe
euros_player_shots_keypasses = left_join(euros_player_shots_keypasses, euros_player_minutes)
#1: left_join allows us to combine our shots and key passes table and our
# minutes table, with the the player.id acting as a reference point.
euros_player_shots_keypasses = euros_player_shots_keypasses %>%
mutate(nineties = minutes/90)
#2: `mutate` is a `dplyr` function that creates a new column. In this instance
# we are creating a column that divides the minutes totals by 90,
# giving us each players number of 90s played.
euros_player_shots_keypasses = euros_player_shots_keypasses %>%
mutate(shots_per90 = shots/nineties,
kp_per90 = keypasses/nineties,
shots_kp_per90 = shots_per90+kp_per90)
#3: Finally we divide our totals by our number of 90s to get our totals
# per 90s columns for shots and key passes.
# We also calculate the sum of these two columns.
# Filter minutes for players who have played at least 360 minutes
euros_player_shots_keypasses = euros_player_shots_keypasses %>%
filter(minutes>360)
#1: This code filters the data to only include players
# who have played at least 360 minutes for a fair comparison.
Load packages
library(ggplot2)
#1: This is the package we use to create our plots.
library(ggrepel)
#2: This is the package we use to modify our plots.
library(SBpitch)
#3: This is the package we use to create our pitch.
library(scales)
#4: This is the package we use to modify our scales.
library(prismatic)
#5: This is the package we use to modify our colours.
Plotting player shots and goals
ggplot(euros_player_shots_goals_per90, aes(x = shots_per90, y = gls_per90, label = player.name)) +
#1: This code sets up the plot and tells it what data to use and what
# columns to use for the x and y axis.
geom_smooth(method = "lm", color = "green", fill = "green") +
#2: This code tells it to create a linear regression line on the plot.
# method = "lm" tells it to use a linear model, colour = "green" sets the
# colour of the line, and fill = "green" sets the fill colour of the line.
geom_point(aes(fill = "green", color = after_scale(clr_darken(fill, 0.3))),
shape = 21,
alpha = .75,
size = 3) +
#3: This code tells it to create points on the plot. aes(fill = "green",
# color = after_scale(clr_darken(fill, 0.3))) sets the fill and colour of
# the points, shape = 21 sets the shape of the points, alpha = .75 sets
# the transparency of the points, and size = 3 sets the size of the points.
geom_text_repel(size = 2.5, color = "white", min.segment.length = unit(0.1, "lines")) +
#4: This code tells it to create text labels on the plot. size = 2.5 sets
# the size of the text, colour = "white" sets the colour of the text,
# and min.segment.length = unit(0.1, "lines") sets the minimum length of the segments.
theme(
legend.position = "none",
plot.background = element_rect(fill = "purple", colour = "purple"),
panel.background = element_rect(fill = "purple", colour = "purple"),
panel.grid.major = element_line(colour = "purple"),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "white"),
axis.text = element_text(colour = "white"),
axis.title = element_text(colour = "white"),
plot.title = element_text(colour = "white", hjust=.5, face="bold", size = 15),
plot.subtitle = element_text(colour = "white", hjust=.5, face="bold", size = 8)) +
labs(x = "Shots per 90",
y = "Goals per 90",
title = "Player Shots and Goals per 90",
subtitle = "UEFA Euros
Data: StatsBombR Free Data (>360 minutes of data)| @lorcanmason")
Plotting player shots and key passes
ggplot(euros_player_shots_keypasses, aes(x = shots_per90, y = kp_per90, label = player.name)) +
geom_smooth(method = "lm", color = "green", fill = "green") +
geom_point(aes(fill = "green", color = after_scale(clr_darken(fill, 0.3))),
shape = 21,
alpha = .75,
size = 3) +
geom_text_repel(size = 2.5, color = "white", min.segment.length = unit(0.1, "lines")) +
theme(
legend.position = "none",
plot.background = element_rect(fill = "purple", colour = "purple"),
panel.background = element_rect(fill = "purple", colour = "purple"),
panel.grid.major = element_line(colour = "purple"),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "white"),
axis.text = element_text(colour = "white"),
axis.title = element_text(colour = "white"),
plot.title = element_text(colour = "white", hjust=.5, face="bold", size = 15),
plot.subtitle = element_text(colour = "white", hjust=.5, face="bold", size = 8)) +
labs(x = "Shots per 90",
y = "Key Passes per 90",
title = "Shots and Key Passes per 90",
subtitle = "UEFA Euros
Data: StatsBombR Free Data (>360 minutes of data)| @lorcanmason")