This challenge explores the final ranking of the Italian soccer Serie A league 2016-2017.

Downloads

Challenges

  1. group teams into performance classes using hierarchical clustering
  2. use ggplot to visualize the team ranking highlighting the performance classes with different colors
## Analysis
library(tidyverse)
# read data from CSV 
rank = read_csv("rank.csv")
## Group teams into performance classes 
rank = arrange(rank, desc(Rating))
D = dist(rank$Rating)
cc = hclust(D, method = "average")
plot(cc)
clusters.list = rect.hclust(cc, k = 4, border="blue")

## Visualize the team ranking highlighting the performance classes with different colors
clusters = cutree(cc, k = 4)
rank = mutate(rank, cluster = as.factor(clusters))
ggplot(rank, aes(x = 1, y = Rating, color = cluster)) +
  geom_text(aes(label = Team), show.legend = FALSE) + 
  labs(y = "Rating") +
  theme_minimal() + 
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank())