David Lusseau, a researcher at the University of Aberdeen, observed the group of dolphins of Doubtful Sound. Every time a school of dolphins was encountered in the fjord between 1995 and 2001, each adult member of the school was photographed and identified from natural markings on the dorsal fin. This information was utilised to determine how often two individuals were seen together. Read the full story.
library(tidyverse)
library(tidygraph)
library(ggraph)
library(corrplot)
edges = read_csv("dolphin_edges.csv")
nodes = read_csv("dolphin_nodes.csv")
dolphin = tbl_graph(nodes = nodes, edges = edges, directed = FALSE)
dolphin
## # A tbl_graph: 62 nodes and 159 edges
## #
## # An undirected simple graph with 1 component
## #
## # Node Data: 62 × 3 (active)
## id name sex
## <dbl> <chr> <chr>
## 1 1 Beak M
## 2 2 Beescratch M
## 3 3 Bumper M
## 4 4 CCL F
## 5 5 Cross M
## 6 6 DN16 F
## 7 7 DN21 M
## 8 8 DN63 M
## 9 9 Double F
## 10 10 Feather M
## # ℹ 52 more rows
## #
## # Edge Data: 159 × 2
## from to
## <int> <int>
## 1 4 9
## 2 6 10
## 3 7 10
## # ℹ 156 more rows
# Are tied dolphins friends or lovers?
actors = as.list(dolphin)$nodes
ties = as.list(dolphin)$edges
ties %>%
left_join(actors, by = c("from" = "id")) %>%
rename(nameFrom = name, sexFrom = sex) %>%
left_join(actors, by = c("to" = "id")) %>%
rename(nameTo = name, sexTo = sex) %>%
mutate(homo = sexFrom == sexTo) %>%
count(homo) %>%
mutate(p = n / sum(n))
## # A tibble: 2 × 3
## homo n p
## <lgl> <int> <dbl>
## 1 FALSE 55 0.346
## 2 TRUE 104 0.654
# Compute degree, closeness, betweenness and PageRank on the network. Are top-ranked dolphins male or female?
dolphin =
dolphin %>%
activate(nodes) %>%
mutate(degree = centrality_degree(),
pagerank = centrality_pagerank(),
closeness = centrality_closeness(),
betweenness = centrality_betweenness())
actors = as.list(dolphin)$nodes
actors %>%
group_by(sex) %>%
summarise(avg_degree = mean(degree),
avg_pagerank = mean(pagerank),
avg_closeness = mean(closeness),
avg_betweenness = mean(betweenness))
## # A tibble: 3 × 5
## sex avg_degree avg_pagerank avg_closeness avg_betweenness
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 F 5.6 0.0169 0.00529 86.2
## 2 M 5.06 0.0162 0.00493 66.8
## 3 U 2.75 0.0104 0.00431 24.8
# t-test: determine whether there is a significant difference between the means of two groups.
# if p-value < 0.05 (common threshold), we conclude that the means are significantly different.
actors =
actors %>%
filter(sex %in% c("M", "F"))
t_degree = t.test(degree ~ sex, data = actors)
t_closeness = t.test(closeness ~ sex, data = actors)
t_betweenness <- t.test(betweenness ~ sex, data = actors)
t_pagerank <- t.test(pagerank ~ sex, data = actors)
# Collect and display the results
test_results <- data.frame(
Metric = c("Degree", "Closeness", "Betweenness", "PageRank"),
P_Value = c(t_degree$p.value, t_closeness$p.value, t_betweenness$p.value, t_pagerank$p.value),
Significant = c(t_degree$p.value < 0.05, t_closeness$p.value < 0.05, t_betweenness$p.value < 0.05, t_pagerank$p.value < 0.05)
)
test_results
## Metric P_Value Significant
## 1 Degree 0.5085044 FALSE
## 2 Closeness 0.1159761 FALSE
## 3 Betweenness 0.4650050 FALSE
## 4 PageRank 0.7369882 FALSE
# Study the correlation among the four centrality measures.
M = cbind(degree = actors$degree,
pagerank = actors$pagerank,
closeness = actors$closeness,
betweenness = actors$betweenness)
(corM = cor(M))
## degree pagerank closeness betweenness
## degree 1.0000000 0.9830948 0.7126718 0.5902140
## pagerank 0.9830948 1.0000000 0.6217912 0.6021920
## closeness 0.7126718 0.6217912 1.0000000 0.6657346
## betweenness 0.5902140 0.6021920 0.6657346 1.0000000
corrplot(corM, method = "ellipse")