This challenge explores the final ranking of the Italian soccer Serie A league 2016-2017.
## Analysis
library(tidyverse)
# read data from CSV
rank = read_csv("rank.csv")
## Group teams into performance classes
rank = arrange(rank, desc(Rating))
D = dist(rank$Rating)
cc = hclust(D, method = "average")
plot(cc)
clusters.list = rect.hclust(cc, k = 4, border="blue")
## Visualize the team ranking highlighting the performance classes with different colors
clusters = cutree(cc, k = 4)
rank = mutate(rank, cluster = as.factor(clusters))
ggplot(rank, aes(x = 1, y = Rating, color = cluster)) +
geom_text(aes(label = Team), show.legend = FALSE) +
labs(y = "Rating") +
theme_minimal() +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())