Jose A. Rodriguez of the University of Barcelona created a network of the individuals involved in the bombing of commuter trains in Madrid on March 11, 2004. Rodriguez used press accounts in the two major Spanish daily newspapers (El Pais and El Mundo) to reconstruct the terrorist network. The names included were of those people suspected of having participated and their relatives. Rodriguez specified 4 kinds of ties linking the individuals involved:
These four were added together providing a strength of connection index that ranges from 1 to 4.
library(tidyverse)
library(igraph)
library(ggraph)
terrorists = read_csv("names.csv")
## Parsed with column specification:
## cols(
## name = col_character()
## )
ties = read_csv("edges.csv")
## Parsed with column specification:
## cols(
## from = col_integer(),
## to = col_integer(),
## weight = col_integer()
## )
terrorists = mutate(terrorists, id = 1:nrow(terrorists)) %>% select(id, everything())
# make graph
#g = graph_from_data_frame(ties, directed = FALSE, vertices = terrorists)
g = graph_from_data_frame(ties, directed = FALSE, vertices = tibble(1:nrow(terrorists)))
#g = delete_vertex_attr(g, "name")
# degree and strength
terrorists =
terrorists %>%
mutate(degree = degree(g), strength = strength(g))
# most connected terrorists
arrange(terrorists, desc(degree))
## # A tibble: 70 x 4
## id name degree strength
## <int> <chr> <dbl> <dbl>
## 1 1 Jamal Zougam 29 43
## 2 3 Mohamed Chaoui 27 34
## 3 7 Imad Eddin Barakat 22 35
## 4 11 Amer Azizi 18 27
## 5 41 Said Berrak 17 19
## 6 18 Galeb Kalaje 16 21
## 7 24 Naima Oulad Akcha 16 16
## 8 19 Abderrahim Zbakh 15 15
## 9 31 Jamal Ahmidan 14 14
## 10 61 Mohamed El Egipcio 13 14
## # ... with 60 more rows
# most strongly connected terrorists
arrange(terrorists, desc(strength))
## # A tibble: 70 x 4
## id name degree strength
## <int> <chr> <dbl> <dbl>
## 1 1 Jamal Zougam 29 43
## 2 7 Imad Eddin Barakat 22 35
## 3 3 Mohamed Chaoui 27 34
## 4 11 Amer Azizi 18 27
## 5 18 Galeb Kalaje 16 21
## 6 15 Mohamed Belfatmi 11 19
## 7 41 Said Berrak 17 19
## 8 16 Said Bahaji 11 17
## 9 24 Naima Oulad Akcha 16 16
## 10 19 Abderrahim Zbakh 15 15
## # ... with 60 more rows
# isolated terrorists
filter(terrorists, degree == 0)
## # A tibble: 6 x 4
## id name degree strength
## <int> <chr> <dbl> <dbl>
## 1 17 Al? Amrous 0 0
## 2 26 Abdelhalak Bentasser 0 0
## 3 29 Faisal Alluch 0 0
## 4 46 Mohamad Bard Ddin Akkab 0 0
## 5 47 Abu Zubaidah 0 0
## 6 48 Sanel Sjekirika 0 0
ggplot(ties, aes(x = from, y = to, color = as.factor(weight))) +
geom_point() +
labs(color = "weigth")
# add reciprocal edges
ties2 = mutate(ties, temp = to, to = from, from = temp) %>% select(-temp)
ties2 = rbind(ties, ties2)
ggplot(ties2, aes(x = from, y = to, color = as.factor(weight))) +
geom_point() +
labs(color = "weigth")
ggplot(ties2, aes(x = from, y = to, alpha = weight)) +
geom_point()
# remove isolated nodes
isolated = filter(terrorists, degree == 0)$id
g = delete_vertices(g, isolated)
A = as_adjacency_matrix(g, attr = "weight", sparse = FALSE)
vcount(g)
## [1] 64
# strength
sum(strength(g) == rowSums(A))
## [1] 64
# degree
sum(degree(g) == rowSums(A > 0))
## [1] 64
# similarity as (Pearson) correlation among columns
S = cor(A)
# remove self similarity
S = S + diag(-1, nrow(A))
# some statistics
summary(c(S))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.27574 -0.07729 -0.03336 0.07074 0.17037 1.00000
# tidy similarity matrix
h = graph_from_adjacency_matrix(S, mode = "undirected", weighted = TRUE)
sim = as.tibble(as_data_frame(h, what = "edges")) %>%
mutate(from = as.integer(from), to = as.integer(to)) %>%
arrange(desc(weight))
# join sim with terrorists
sim_joint = sim %>%
left_join(terrorists, c("from" = "id")) %>%
left_join(terrorists, c("to" = "id")) %>%
filter(degree.x >= 10, degree.y >= 10) %>%
select(-strength.x, -strength.y)
# most similar pairs
head(sim_joint)
## # A tibble: 6 x 7
## from to weight name.x degree.x name.y
## <int> <int> <dbl> <chr> <dbl> <chr>
## 1 13 14 0.9061526 Mohamed Atta 10 Ramzi Binalshibh
## 2 4 5 0.8814815 Vinay Kholy 10 Suresh Kumar
## 3 12 58 0.8553506 Abu Musad Alsakaoui 10 Shakur
## 4 12 13 0.8471779 Abu Musad Alsakaoui 10 Mohamed Atta
## 5 12 15 0.8363355 Abu Musad Alsakaoui 10 Mohamed Belfatmi
## 6 4 28 0.8305289 Vinay Kholy 10 Basel Ghayoun
## # ... with 1 more variables: degree.y <dbl>
# most dissimilar pairs
tail(sim_joint)
## # A tibble: 6 x 7
## from to weight name.x degree.x name.y
## <int> <int> <dbl> <chr> <dbl> <chr>
## 1 16 63 -0.1881146 Said Bahaji 11 Semaan Gaby Eid
## 2 1 63 -0.1942068 Jamal Zougam 29 Semaan Gaby Eid
## 3 12 63 -0.1960476 Abu Musad Alsakaoui 10 Semaan Gaby Eid
## 4 40 63 -0.1993567 Abdeluahid Berrak 11 Semaan Gaby Eid
## 5 3 63 -0.2186105 Mohamed Chaoui 27 Semaan Gaby Eid
## 6 7 63 -0.2264384 Imad Eddin Barakat 22 Semaan Gaby Eid
## # ... with 1 more variables: degree.y <dbl>
# plot similarity graph
h2 = graph_from_data_frame(filter(sim_joint, weight >= 0.60), directed = FALSE, vertices = terrorists)
h2 = delete_vertices(h2, which(degree(h2) == 0))
ggraph(h2) +
geom_edge_link(aes(alpha = weight)) +
geom_node_point() +
theme_graph()
## Using `nicely` as default layout
ggraph(h2) +
geom_edge_link(aes(alpha = weight)) +
geom_node_point(aes(size = strength)) +
theme_graph()
## Using `nicely` as default layout
ggraph(h2) +
geom_edge_link(aes(alpha = weight)) +
geom_node_point() +
geom_node_text(aes(label = name), repel=T) +
theme_graph()
## Using `nicely` as default layout
V(h2)$betw = betweenness(h2)
ggraph(h2) +
geom_edge_link(aes(alpha = weight)) +
geom_node_point(aes(size = betw)) +
theme_graph()
## Using `nicely` as default layout
# distance matrix
D = 1-S
# distance object
d = as.dist(D)
# average-linkage clustering method
cc = hclust(d, method = "average")
# plot dendrogram
plot(cc)
# draw blue borders around clusters
clusters.list = rect.hclust(cc, k = 4, border="blue")
dendrogram = as.dendrogram(cc)
ggraph(dendrogram, layout = 'dendrogram', circular = TRUE) +
geom_edge_diagonal() +
geom_node_text(aes(filter = leaf, label = label, x = x*1.03, y=y*1.03), size = 3) +
theme_graph()