The Zachary network represents the pattern of friendships between members of a karate club at a North American university, as determined by direct observation of the club’s members by the experimenter over a period of about two years. The network is interesting because during the period of observation a dispute arose among the members of the club over whether to raise the club’s fees and as a result the club eventually split into two parts, of 18 and 16 members respectively, the latter departing to form their own club.

Dataset

The network is included in the package igraph. Load it with command make_graph(“Zachary”)

Data challenges

  1. Plot the network and try to visually identify communities
  2. Run at least 3 approximated community detection algorithms and compare the resulting groups
library(igraph)
g = make_graph("Zachary")
coords = layout_with_fr(g)
# plot the graph
plot(g, layout=coords, vertex.label=NA, vertex.size=6, vertex.color = "grey")

Spectral community detection

# greedy method (hiearchical, fast method)
c1 = cluster_leading_eigen(g)

# modularity measure
modularity(c1)
## [1] 0.3934089
# modularity matrix
B = modularity_matrix(g, membership(c1))
round(B[1,],2)
##  [1] -1.64  0.08 -0.03  0.38  0.69  0.59  0.59  0.59  0.49 -0.21  0.69
## [12]  0.90  0.79  0.49 -0.21 -0.21 -0.21  0.79 -0.21  0.69 -0.21  0.79
## [23] -0.21 -0.51 -0.31 -0.31 -0.21 -0.41 -0.31 -0.41 -0.41  0.38 -1.23
## [34] -1.74
round(apply(B, 1, sum), 6)
##  [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
# memberships of nodes
membership(c1)
##  [1] 1 3 3 3 1 1 1 3 2 2 1 1 3 3 2 2 1 3 2 3 2 3 2 4 4 4 2 4 4 2 2 4 2 2
# number of communities
length(c1)
## [1] 4
# size of communities
sizes(c1)
## Community sizes
##  1  2  3  4 
##  7 12  9  6
# crossing edges
crossing(c1, g)
##  [1]  TRUE  TRUE  TRUE FALSE FALSE FALSE  TRUE  TRUE FALSE FALSE  TRUE
## [12]  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE
## [23] FALSE  TRUE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE
## [34] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [45] FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE
## [56] FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE
## [67] FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE  TRUE  TRUE
## [78] FALSE
# plot communities with shaded regions
plot(c1, g, layout=coords)

# plot communities without shaded regions
plot(g, vertex.color=membership(c1), layout=coords)

Greedy community detection

# greedy method (hiearchical, fast method)
c2 = cluster_fast_greedy(g)

# modularity measure
modularity(c2)
## [1] 0.3806706
modularity(c1) / modularity(c2)
## [1] 1.033463
# plot communities with shaded regions
plot(c2, g, layout=coords)

# plot dendogram
plot_dendrogram(c2)

Hieararchical clustering

# hierarchical clustering
A = get.adjacency(g, sparse=FALSE)

# cosine similarity
S = cor(A)

# distance matrix
D = 1-S

# distance object
d = as.dist(D)

# average-linkage clustering method
cc = hclust(d, method = "average")

# plot dendrogram
plot(cc)

# draw blue borders around clusters
clusters.list = rect.hclust(cc, k = 2, border="blue")

# cut dendrogram at 4 clusters
clusters = cutree(cc, k = 2)

# plot graph with clusters
plot(g, vertex.color=clusters, layout=coords)