David Lusseau, a researcher at the University of Aberdeen, observed the group of dolphins of Doubtful Sound. Every time a school of dolphins was encountered in the fjord between 1995 and 2001, each adult member of the school was photographed and identified from natural markings on the dorsal fin. This information was utilised to determine how often two individuals were seen together. Read the full story.

Dataset

Data challenges

  1. Are tied dolphins friends or lovers?
  2. Compute degree, closeness, betweenness and PageRank on the network. Are female dolphins significantly performing better than male ones on these metrics?
  3. Study the correlation among the four centrality measures and visualize it with the corrplot package
library(tidyverse)
library(tidygraph)
library(ggraph)
library(corrplot)

edges = read_csv("dolphin_edges.csv")
nodes = read_csv("dolphin_nodes.csv")
dolphin = tbl_graph(nodes = nodes, edges = edges, directed = FALSE)
dolphin
## # A tbl_graph: 62 nodes and 159 edges
## #
## # An undirected simple graph with 1 component
## #
## # Node Data: 62 × 3 (active)
##       id name       sex  
##    <dbl> <chr>      <chr>
##  1     1 Beak       M    
##  2     2 Beescratch M    
##  3     3 Bumper     M    
##  4     4 CCL        F    
##  5     5 Cross      M    
##  6     6 DN16       F    
##  7     7 DN21       M    
##  8     8 DN63       M    
##  9     9 Double     F    
## 10    10 Feather    M    
## # ℹ 52 more rows
## #
## # Edge Data: 159 × 2
##    from    to
##   <int> <int>
## 1     4     9
## 2     6    10
## 3     7    10
## # ℹ 156 more rows
# Are tied dolphins friends or lovers?

actors = as.list(dolphin)$nodes
ties = as.list(dolphin)$edges

ties %>% 
  left_join(actors, by = c("from" = "id")) %>% 
  rename(nameFrom = name, sexFrom = sex) %>% 
  left_join(actors, by = c("to" = "id")) %>% 
  rename(nameTo = name, sexTo = sex) %>% 
  mutate(homo = sexFrom == sexTo) %>% 
  count(homo) %>%   
  mutate(p = n / sum(n))
## # A tibble: 2 × 3
##   homo      n     p
##   <lgl> <int> <dbl>
## 1 FALSE    55 0.346
## 2 TRUE    104 0.654
# Compute degree, closeness, betweenness and PageRank on the network. Are top-ranked dolphins male or female? 

dolphin = 
  dolphin %>% 
  activate(nodes) %>% 
  mutate(degree = centrality_degree(), 
         pagerank = centrality_pagerank(), 
         closeness = centrality_closeness(),
         betweenness = centrality_betweenness())

actors = as.list(dolphin)$nodes

 
actors %>% 
  group_by(sex) %>% 
  summarise(avg_degree = mean(degree),
            avg_pagerank = mean(pagerank),
            avg_closeness = mean(closeness),
            avg_betweenness = mean(betweenness))
## # A tibble: 3 × 5
##   sex   avg_degree avg_pagerank avg_closeness avg_betweenness
##   <chr>      <dbl>        <dbl>         <dbl>           <dbl>
## 1 F           5.6        0.0169       0.00529            86.2
## 2 M           5.06       0.0162       0.00493            66.8
## 3 U           2.75       0.0104       0.00431            24.8
# t-test: determine whether there is a significant difference between the means of two groups.
# if p-value < 0.05  (common threshold), we conclude that the means are significantly different.
actors = 
  actors %>%
  filter(sex %in% c("M", "F")) 

t_degree = t.test(degree ~ sex, data = actors)
t_closeness = t.test(closeness ~ sex, data = actors)
t_betweenness <- t.test(betweenness ~ sex, data = actors)
t_pagerank <- t.test(pagerank ~ sex, data = actors)

# Collect and display the results
test_results <- data.frame(
  Metric = c("Degree", "Closeness", "Betweenness", "PageRank"),
  P_Value = c(t_degree$p.value, t_closeness$p.value, t_betweenness$p.value, t_pagerank$p.value),
  Significant = c(t_degree$p.value < 0.05, t_closeness$p.value < 0.05, t_betweenness$p.value < 0.05, t_pagerank$p.value < 0.05)
)

test_results
##        Metric   P_Value Significant
## 1      Degree 0.5085044       FALSE
## 2   Closeness 0.1159761       FALSE
## 3 Betweenness 0.4650050       FALSE
## 4    PageRank 0.7369882       FALSE
# Study the correlation among the four centrality measures.

M = cbind(degree = actors$degree,
          pagerank = actors$pagerank,
          closeness = actors$closeness,
          betweenness = actors$betweenness)


(corM = cor(M))
##                degree  pagerank closeness betweenness
## degree      1.0000000 0.9830948 0.7126718   0.5902140
## pagerank    0.9830948 1.0000000 0.6217912   0.6021920
## closeness   0.7126718 0.6217912 1.0000000   0.6657346
## betweenness 0.5902140 0.6021920 0.6657346   1.0000000
corrplot(corM, method = "ellipse")