Rare digital art, also known as crypto art, is limited-edition collectible art cryptographically registered with a token on a blockchain. Tokens represent transparent, auditable origin and provenance for a piece of digital art. Blockchain technology allows tokens to be held and securely traded from one collector to another.
Digital galleries like SuperRare exhibit rare digital artworks, such as static and animated images. When a digital asset made by an artist is added to a digital gallery, a token is generated by a smart contract and deposited in the artist’s wallet. The token is permanently linked to the artwork, and is a unique, one-of-a-kind asset that represents ownership and authenticity of the underlying artwork.
Once created, the artwork starts its life on the given blockchain, where a fan or collector can purchase it, and where it can be subsequently exchanged, traded or held by collectors like any other rare artifact. Typically, artworks can be sold using auctions: bidders make offers, and the creator has the ability to accept the bids. When an asset is sold, it is directly transferred to the buyer’s wallet, while the corresponding price in crypto currency is moved to the seller’s wallet. Thanks to the blockchain, each transaction is cryptographically secured and peer-to-peer, meaning neither the funds nor the asset are ever held by the gallery or any other third party.
The typical events on the SuperRare gallery the the following:
The SuperRare (SR) dataset contains transaction information on the digital art gallery SuperRare during one year (from April 2018 to April 2019). The main data frames are the following:
We challenge the dataset with the following queries:
library(tidyverse)
library(lubridate)
library(rlang)
In this prolog we retrieve the main events of the crypto art market: creations, bids, direct and auction sales.
# read data
ERC721Transfer <- read_csv("csv/ERC721Transfer.csv")
EventDetails <- read_csv("csv/EventDetails.csv")
SuperRareCreations = ERC721Transfer %>%
filter(from == "0000000000000000000000000000000000000000") %>% # mint transfers only
left_join(EventDetails, by = "id") %>% # join with event details to get timestamp
select(tokenId, artist = to, timestamp = blockTimestamp) %>% # select relevant variables
arrange(timestamp) # arrange by timestamp
# read data
SuperRareSold <- read_csv("csv/SuperRareSold.csv")
# direct sales
SuperRareDirectSales =
SuperRareSold %>%
mutate(amount = amount / 10^18) %>% # express amount in ETH
left_join(EventDetails, by = "id") %>% # join to get timestamp
select(tokenId, seller, buyer, amount, timestamp = blockTimestamp) %>% # select relevant variables
arrange(timestamp) # arrange by timestamp
# read data
SuperRareAcceptBid <- read_csv("csv/SuperRareAcceptBid.csv")
SuperRareAuctionSales =
SuperRareAcceptBid %>%
mutate(amount = amount / 10^18) %>% # express amount in ETH
rename(buyer = bidder) %>% # remname bidder as buyer
left_join(EventDetails, by = "id") %>% # join with event details to get timestamp
select(tokenId, seller, buyer, amount, timestamp = blockTimestamp) %>% # select relevant variables
arrange(timestamp) # arrange by timestamp
SuperRareAllSales =
SuperRareAuctionSales %>%
dplyr::union(SuperRareDirectSales) %>% # union
arrange(timestamp) # arrange by timestamp
SuperRareBid <- read_csv("csv/SuperRareBid.csv")
SuperRareTokenCreator <- read_csv("csv/SuperRareTokenCreator.csv")
# all bids with creator of artwork and timestamp
SuperRareAllBids =
SuperRareBid %>%
left_join(SuperRareTokenCreator, by = "tokenId") %>%
left_join(EventDetails, by = "id") %>%
mutate(amount = amount / 10^18) %>%
select(tokenId, bidder, artist = address, amount, timestamp = blockTimestamp)
# summary of prices
summary(SuperRareAllSales$amount)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000001 0.180000 0.350000 0.456933 0.500000 5.000000
# popular prices
count(SuperRareAllSales, amount, sort = TRUE)
## # A tibble: 138 x 2
## amount n
## <dbl> <int>
## 1 0.5 187
## 2 0.2 88
## 3 0.3 88
## 4 0.1 86
## 5 0.25 69
## 6 0.4 65
## 7 1 62
## 8 0.35 59
## 9 0.15 56
## 10 0.05 42
## # … with 128 more rows
# visualize popular prices
ggplot(SuperRareAllSales) +
geom_histogram(aes(x = amount), binwidth = 0.1) +
theme_classic()
# tibble sale users
userHash = sort(unique(c(SuperRareAllSales$seller, SuperRareAllSales$buyer)))
saleUsers = tibble(id = 1:length(userHash), hash = userHash)
# count sellers
countSellers =
group_by(SuperRareAllSales, seller) %>%
summarise(sellNumber = n(), sellAmount = sum(amount)) %>%
arrange(desc(sellNumber))
# count buyers
countBuyers =
group_by(SuperRareAllSales, buyer) %>%
summarise(buyNumber = n(), buyAmount = sum(amount)) %>%
arrange(desc(buyNumber))
# add sales and purchases to users
saleUsers =
saleUsers %>%
left_join(countSellers, by = c("hash" = "seller")) %>% # add sales (notice left join)
left_join(countBuyers, by = c("hash" = "buyer")) %>% # add purchases (notice left join)
mutate(sellNumber = ifelse(is.na(sellNumber), 0, sellNumber), # replace NA with 0
buyNumber = ifelse(is.na(buyNumber), 0, buyNumber),
sellAmount = ifelse(is.na(sellAmount), 0, sellAmount),
buyAmount = ifelse(is.na(buyAmount), 0, buyAmount))
# correlation
M = as.matrix(saleUsers[, c("sellNumber", "buyNumber", "sellAmount", "buyAmount")])
round(cor(M, method = "kendall"), 2)
## sellNumber buyNumber sellAmount buyAmount
## sellNumber 1.00 -0.19 0.88 -0.24
## buyNumber -0.19 1.00 -0.15 0.77
## sellAmount 0.88 -0.15 1.00 -0.20
## buyAmount -0.24 0.77 -0.20 1.00
# remove super buyers
superBuyers = countBuyers$buyer[1:2]
saleUsersNoOutliers = filter(saleUsers, !(hash %in% superBuyers))
qSell = quantile(saleUsersNoOutliers$sellNumber, 0.9)
qBuy = quantile(saleUsersNoOutliers$buyNumber, 0.9)
ggplot(saleUsersNoOutliers) +
geom_point(aes(x = sellNumber, y = buyNumber)) +
labs(x = "sell", y = "buy", title = "Sales versus purchases", subtitle = "Users are either sellers or buyers (with two exceptions)") +
scale_fill_gradient(low="black", high="red") +
geom_hline(aes(yintercept = qSell), size = 0.3, colour = "grey") +
geom_vline(aes(xintercept = qBuy), size = 0.3, colour = "grey") +
theme_classic()
# number of sales
nSales = nrow(SuperRareSold)
# number of primary sales
nPrimarySales = n_distinct(SuperRareSold$tokenId)
# number of secondary sales
nSecondarySales = nSales - n_distinct(SuperRareSold$tokenId)
nSecondarySales / nSales
## [1] 0.04113111
# re-sales
count(SuperRareSold, tokenId, sort = TRUE) %>%
filter(n > 1)
## # A tibble: 12 x 2
## tokenId n
## <dbl> <int>
## 1 1000 5
## 2 702 3
## 3 4 2
## 4 65 2
## 5 113 2
## 6 130 2
## 7 172 2
## 8 495 2
## 9 1039 2
## 10 1053 2
## 11 1460 2
## 12 1600 2
# build events (mint, sell, bid)
# mint
mintEvents = SuperRareCreations %>%
select(tokenId, timestamp) %>%
mutate(type = "mint")
# sell
sellEvents = SuperRareAllSales %>%
select(tokenId, timestamp) %>%
mutate(type = "sell")
# bid
bidEvents = SuperRareAllBids %>%
select(tokenId, timestamp) %>%
mutate(type = "bid")
allEvents = mintEvents %>%
dplyr::union(sellEvents) %>%
dplyr::union(bidEvents) %>%
arrange(timestamp)
# by type
count(allEvents, type, sort = TRUE)
## # A tibble: 3 x 2
## type n
## <chr> <int>
## 1 mint 2675
## 2 bid 1956
## 3 sell 1217
# by date
count(allEvents, date = date(timestamp), sort = TRUE)
## # A tibble: 344 x 2
## date n
## <date> <int>
## 1 2018-07-17 194
## 2 2019-01-03 148
## 3 2019-01-26 139
## 4 2018-07-16 123
## 5 2019-02-08 82
## 6 2019-01-14 76
## 7 2019-03-07 76
## 8 2019-03-06 71
## 9 2019-02-09 66
## 10 2019-03-08 66
## # … with 334 more rows
# by month
count(allEvents, year = year(timestamp), year = year(timestamp), month = month(timestamp), sort = TRUE)
## # A tibble: 13 x 3
## year month n
## <dbl> <dbl> <int>
## 1 2019 3 1158
## 2 2019 1 1118
## 3 2019 2 899
## 4 2018 11 605
## 5 2018 7 468
## 6 2018 12 427
## 7 2018 10 314
## 8 2019 4 197
## 9 2018 9 194
## 10 2018 8 166
## 11 2018 4 152
## 12 2018 5 79
## 13 2018 6 71
# by week day
count(allEvents, year = wday(timestamp, label = TRUE), sort = TRUE)
## # A tibble: 7 x 2
## year n
## <ord> <int>
## 1 Mar 939
## 2 Lun 902
## 3 Ven 895
## 4 Gio 869
## 5 Mer 820
## 6 Sab 727
## 7 Dom 696
# by week day (single events)
count(mintEvents, year = wday(timestamp, label = TRUE), sort = TRUE)
## # A tibble: 7 x 2
## year n
## <ord> <int>
## 1 Mar 528
## 2 Lun 497
## 3 Mer 367
## 4 Dom 357
## 5 Gio 341
## 6 Ven 307
## 7 Sab 289
count(bidEvents, year = wday(timestamp, label = TRUE), sort = TRUE)
## # A tibble: 7 x 2
## year n
## <ord> <int>
## 1 Ven 368
## 2 Gio 330
## 3 Mer 288
## 4 Sab 288
## 5 Mar 249
## 6 Lun 234
## 7 Dom 206
count(sellEvents, year = wday(timestamp, label = TRUE), sort = TRUE)
## # A tibble: 7 x 2
## year n
## <ord> <int>
## 1 Ven 222
## 2 Gio 200
## 3 Lun 175
## 4 Mer 166
## 5 Mar 164
## 6 Sab 153
## 7 Dom 137
# by date
daily = count(allEvents, date = date(timestamp), sort = TRUE)
ggplot(daily, aes(date, n)) +
geom_line() +
geom_smooth(se = FALSE, span = 0.20) +
labs(title = "Events per date") +
theme_bw()
dailyMint = count(mintEvents, date = date(timestamp), sort = TRUE)
ggplot(filter(dailyMint, !(date %in% c("2018-07-17", "2018-07-16"))), aes(date, n)) +
geom_line() +
geom_smooth(se = FALSE, span = 0.20) +
labs(title = "Mint events per date") +
theme_bw()
dailyBid = count(bidEvents, date = date(timestamp), sort = TRUE)
ggplot(dailyBid, aes(date, n)) +
geom_line() +
geom_smooth(se = FALSE, span = 0.20) +
labs(title = "Bid events per date") +
theme_bw()
dailySell = count(sellEvents, date = date(timestamp), sort = TRUE)
ggplot(dailySell, aes(date, n)) +
geom_line() +
geom_smooth(se = FALSE, span = 0.20) +
labs(title = "Sell events per date") +
theme_bw()
SuperRareUsers <- read_csv("csv/User.csv")
# sell
sellEndorse = SuperRareDirectSales %>%
select(tokenId, address = seller, timestamp) %>%
mutate(type = "sell")
# bid
bidEndorse = SuperRareAllBids %>%
select(tokenId, address = artist, timestamp) %>%
mutate(type = "bid")
allEndorse = dplyr::union(sellEndorse, bidEndorse) %>%
left_join(SuperRareUsers, by = c("address" = "ethaddress")) %>%
select(tokenId, address, username, timestamp) %>%
arrange(timestamp)
artistEndorse =
allEndorse %>%
group_by(username, address) %>%
summarise(endorsements = n())
arrange(artistEndorse, desc(endorsements))
## # A tibble: 124 x 3
## # Groups: username [124]
## username address endorsements
## <chr> <chr> <int>
## 1 XCOPY 39cc9c86e67baf2129b80fe3414c397492ea8026 148
## 2 Hackatao 21316e6a4f0af45e5f1503984e83b10c53b177d8 142
## 3 artonymousartifakt 77350e1152efd5f2d807a6124015c629a907155e 98
## 4 DrBeef_ 860c4604fe1125ea43f81e613e7afb2aa49546aa 90
## 5 opheliafu 5b0d92e138422428d8e5afab8ee8f99ba9a4e613 81
## 6 coldie d0c877b474cd51959931a7f70d7a6c60f50cdae7 70
## 7 Roses b3007ff3c3f40bdf0126fec7c8e43c3fc50ea813 66
## 8 triplecode 26c2c08cc58d7fa471ded745fcd0fdcedb282e17 64
## 9 Albert 7f67e80272d6349a70acd0a6d0dc169292431020 56
## 10 shortcut e13d4abee4b304b67c52a56871141cad1b833aa7 55
## # … with 114 more rows
TokenLike <- read_csv("csv/TokenLike.csv")
TokenView <- read_csv("csv/TokenView.csv")
ERC721Metadata <- read_csv("csv/ERC721Metadata.csv")
# add token name
TokenLike =
TokenLike %>%
left_join(ERC721Metadata, by = "tokenId") %>%
select(tokenId, name, address, timestamp = dateCreated, imageURI)
# add token name
TokenView =
TokenView %>%
left_join(ERC721Metadata, by = "tokenId") %>%
select(tokenId, name, fingerprint, timestamp = dateCreated, imageURI)
(countLike = count(TokenLike, tokenId, name, sort = TRUE))
## # A tibble: 1,645 x 3
## tokenId name n
## <dbl> <chr> <int>
## 1 1717 The dark side of the Bufficorn 18
## 2 1771 Happy Bufficorn 12
## 3 1134 Flood 11
## 4 1719 #Bufficorn ... to the Moon 11
## 5 1750 Bufficorn 2019 11
## 6 1692 A SuperRare Low Poli Bufficorn 10
## 7 1694 ETHDenver #Bufficorn by @Roses 10
## 8 2588 The ███████ Image 10
## 9 1012 Podmork HOD 131 Santa Muerte 9
## 10 1131 Cold Winter 9
## # … with 1,635 more rows
(countView = count(TokenView, tokenId, name, sort = TRUE))
## # A tibble: 2,659 x 3
## tokenId name n
## <dbl> <chr> <int>
## 1 135 Latent Space of Landscape Paintings #1 808
## 2 583 Skeletal garden 379
## 3 1717 The dark side of the Bufficorn 355
## 4 519 Hyperflower—Magnolia 326
## 5 89 AI Generated Landscape Painting #5 298
## 6 1750 Bufficorn 2019 285
## 7 1134 Flood 273
## 8 17 Girl Next door 265
## 9 59 Pakkuman 261
## 10 1774 The Bufficorn Gathering 254
## # … with 2,649 more rows
artistLike = countLike %>%
left_join(SuperRareTokenCreator, by = "tokenId") %>%
group_by(address) %>%
summarise(likes = sum(n)) %>%
left_join(SuperRareUsers, by = c("address" = "ethaddress")) %>%
select(username, address, likes) %>%
arrange(desc(likes))
artistView = countView %>%
left_join(SuperRareTokenCreator, by = "tokenId") %>%
group_by(address) %>%
summarise(views = sum(n)) %>%
left_join(SuperRareUsers, by = c("address" = "ethaddress")) %>%
select(username, address, views) %>%
arrange(desc(views))
artistLike
## # A tibble: 115 x 3
## username address likes
## <chr> <chr> <int>
## 1 Albert 7f67e80272d6349a70acd0a6d0dc169292431020 306
## 2 Roses b3007ff3c3f40bdf0126fec7c8e43c3fc50ea813 189
## 3 XCOPY 39cc9c86e67baf2129b80fe3414c397492ea8026 188
## 4 Hackatao 21316e6a4f0af45e5f1503984e83b10c53b177d8 171
## 5 ghostshoes 66f174184b24b3dafa63c4c716c4fb9d251d598e 166
## 6 shortcut e13d4abee4b304b67c52a56871141cad1b833aa7 158
## 7 snikt83 4af4aebe930e938fa11ad28cd2c88645cce739a1 143
## 8 MattiaC 576a655161b5502dcf40602be1f3519a89b71658 118
## 9 bardionson 01cb023186cab05220554ee75b4d69921dd051f1 115
## 10 opheliafu 5b0d92e138422428d8e5afab8ee8f99ba9a4e613 105
## # … with 105 more rows
artistView
## # A tibble: 120 x 3
## username address views
## <chr> <chr> <int>
## 1 Hackatao 21316e6a4f0af45e5f1503984e83b10c53b177d8 6648
## 2 Albert 7f67e80272d6349a70acd0a6d0dc169292431020 6600
## 3 DrBeef_ 860c4604fe1125ea43f81e613e7afb2aa49546aa 6130
## 4 XCOPY 39cc9c86e67baf2129b80fe3414c397492ea8026 4901
## 5 bardionson 01cb023186cab05220554ee75b4d69921dd051f1 3669
## 6 Roses b3007ff3c3f40bdf0126fec7c8e43c3fc50ea813 3337
## 7 artonymousartifakt 77350e1152efd5f2d807a6124015c629a907155e 3264
## 8 MattiaC 576a655161b5502dcf40602be1f3519a89b71658 3194
## 9 ghostshoes 66f174184b24b3dafa63c4c716c4fb9d251d598e 3123
## 10 opheliafu 5b0d92e138422428d8e5afab8ee8f99ba9a4e613 2596
## # … with 110 more rows
To answer this query we will use Kleinberg HITS on the sale network. We first build and visualize the directed weighted network where the links are from buyer to seller (following the flow of money) and the weight of the link is the sale price.
library(tidygraph)
library(ggraph)
# build nodes
x = sort(unique(c(SuperRareAllSales$seller, SuperRareAllSales$buyer)))
nodes = tibble(id = 1:length(x), address = x) %>%
left_join(SuperRareUsers, by = c("address" = "ethaddress")) %>%
select(id, username, address)
# build edges (edge direction along the flow of money: buyer --> seller)
edges =
mutate(SuperRareAllSales,
from = match(SuperRareAllSales$buyer, nodes$address),
to = match(SuperRareAllSales$seller, nodes$address)) %>%
select(from, to, buyer, seller, tokenId, amount, timestamp)
# build sale network
saleNet = tbl_graph(nodes = nodes, edges, directed = TRUE)
# visualize (undirected) network
layout_fr = create_layout(saleNet, layout = "fr")
ggraph(layout_fr) +
geom_edge_fan(alpha = 0.2, show.legend = FALSE) +
theme_graph()
We now compute some centrality measures on the sale network, namely:
and check whether they are pairwise correlated.
library(corrplot)
# compute all other ratings and add them to the nodes of the graph
saleNet = saleNet %>%
activate(nodes) %>%
mutate(authority = centrality_authority(weights = amount), # HITS authority
sellAmount = centrality_degree(weights = amount, mode = "in"), # overall amount of sold items
sellItems = centrality_degree(mode = "in"), # number of sold items
hub = centrality_hub(weights = amount), # HITS hub
buyAmount = centrality_degree(weights = amount, mode = "out"), # overall amount of bought items
buyItems = centrality_degree(mode = "out") # number of bought items
)
networkCentrality = as.list(saleNet)$nodes
M = cor(select(networkCentrality, -id, -username, -address), method = "kendall")
knitr::kable(round(M, 2))
authority | sellAmount | sellItems | hub | buyAmount | buyItems | |
---|---|---|---|---|---|---|
authority | 1.00 | 0.86 | 0.85 | -0.05 | -0.17 | -0.12 |
sellAmount | 0.86 | 1.00 | 0.88 | -0.08 | -0.20 | -0.15 |
sellItems | 0.85 | 0.88 | 1.00 | -0.10 | -0.24 | -0.19 |
hub | -0.05 | -0.08 | -0.10 | 1.00 | 0.74 | 0.73 |
buyAmount | -0.17 | -0.20 | -0.24 | 0.74 | 1.00 | 0.77 |
buyItems | -0.12 | -0.15 | -0.19 | 0.73 | 0.77 | 1.00 |
corrplot(M, order = "AOE")
Finally we list the top-10 artists and collectors using HITS centrality and check if there are top artists that are also top collectors:
networkCentrality %>%
select(username, authority, sellAmount, sellItems) %>%
arrange(desc(authority))
## # A tibble: 222 x 4
## username authority sellAmount sellItems
## <chr> <dbl> <dbl> <dbl>
## 1 Hackatao 1 45.8 51
## 2 XCOPY 0.903 45.4 57
## 3 opheliafu 0.567 21.8 38
## 4 coldie 0.487 15.6 23
## 5 bardionson 0.379 15.3 38
## 6 Albert 0.311 14.3 41
## 7 MattiaC 0.291 18.9 36
## 8 artonymousartifakt 0.289 15.2 56
## 9 Roses 0.273 17.1 38
## 10 HEX0x6C 0.258 15.9 30
## # … with 212 more rows
networkCentrality %>%
select(username, hub, buyAmount, buyItems) %>%
arrange(desc(hub))
## # A tibble: 222 x 4
## username hub buyAmount buyItems
## <chr> <dbl> <dbl> <dbl>
## 1 VK_Crypto 1.000 163. 260
## 2 sebdcl 0.430 114. 209
## 3 MomusCollection 0.140 25.7 58
## 4 Roses 0.0668 14.2 38
## 5 BoyPreviousDoor 0.0567 15.5 52
## 6 zaphodok 0.0556 16.5 39
## 7 ArtWhale 0.0521 15.1 38
## 8 EyeballKid 0.0508 8.67 37
## 9 Prometheus 0.0429 7.00 6
## 10 msaf3 0.0374 3.6 7
## # … with 212 more rows
n = 10
topArtists = networkCentrality %>%
arrange(desc(authority)) %>%
select(username) %>%
head(n)
topCollectors = networkCentrality %>%
arrange(desc(hub)) %>%
select(username) %>%
head(n)
dplyr::intersect(topArtists, topCollectors)
## # A tibble: 1 x 1
## username
## <chr>
## 1 Roses
We first program a funtion that computes all-time top actors using a timed version of the HITS method. We also adapt it to discover hot (short-time top) actors.
# TopHot - Identify top and hot actors
# INPUT
# endorsements: data frame with timestamped endorsements. Columns are: from, to, price, timestamp
# eps: hotness deflation value for unit of time (days)
# OUTPUT
# hotx: artist hotness vector
# hoty: collector hotness vector
# x: artist rating vector
# y: collector rating vector
TopHot = function(endorsements, eps = 1) {
# number of artists
n = max(c(endorsements$from, endorsements$to))
# number of events
m = nrow(endorsements)
# artist rating vector
x = rep(0, n)
# collector rating vector
y = rep(0, n)
# artist hotness vector
hotx = rep(0, n)
# collector hotness vector
hoty = rep(0, n)
# reward vectors
rew = rep(0, m)
rewx = rep(0, m)
rewy = rep(0, m)
# artist reward vector
xw = rep(0, m)
# collector reward vector
yw = rep(0, m)
# percentile function
ecdf_fun <- function(x, perc) ecdf(x)(perc)
# iterate through events
for (i in 1:m) {
# 1. get seller, buyer and price
seller = endorsements[i, "to"]
buyer = endorsements[i, "from"]
price = endorsements[i, "amount"]
# 2. compute price reward
if (i == 1) {
rew[i] = 0.5
} else {
rew[i] = ecdf_fun(endorsements[1:(i-1),]$amount, price)
}
# 3. compute rating rewards for seller and buyer
# only sellers with one sale
v = x[x > 0]
if (length(v) == 0) {
rewx[i] = 0.5
} else {
rewx[i] = ecdf_fun(v, x[seller])
}
# only buyers with one purchase
v = y[y > 0]
if (length(v) == 0) {
rewy[i] = 0.5
}
else {
rewy[i] = ecdf_fun(v, y[buyer])
}
# 4. compute artist reward using price reward and collector rating reward
xw[i] = rew[i] * rewy[i]
# 5. compute collector reward using price reward and artist reward
yw[i] = rew[i] * rewx[i]
# 6. update seller artist and buyer collector ratings with respective rewards
x[seller] = x[seller] + xw[i]
y[buyer] = y[buyer] + yw[i]
# 7. update hotness for artist and collector
hotx[seller] = hotx[seller] + xw[i]
hoty[buyer] = hoty[buyer] + yw[i]
# 9. deflate all values of hotness proportionally to elapsed time
if (i > 1) {
# current time in seconds
t2 = as.double(endorsements[i, "timestamp"])
# previous time in seconds
t1 = as.double(endorsements[i-1, "timestamp"])
# elapsed time since last event in days
timediff = (t2 - t1) / (60 * 60 * 24)
# decrease hotness
hotx = hotx - eps * timediff
hoty = hoty - eps * timediff
# force hotness to be non-negative
hotx[hotx < 0] = 0
hoty[hoty < 0] = 0
}
}
return(list(x = x, y = y, hotx = hotx, hoty = hoty))
}
library(DT)
endorsements =
as.list(saleNet)$edges %>%
select(from, to, amount, timestamp)
l = TopHot(as.data.frame(endorsements), eps = 0.15)
# add ratings to network
saleNet = saleNet %>%
activate(nodes) %>%
mutate(hotArtist = l$hotx, hotCollector = l$hoty, topArtist = l$x, topCollector = l$y)
networkCentrality = as.list(saleNet)$nodes %>%
select(username, topArtist, authority, sellAmount, sellItems, topCollector, hub, buyAmount, buyItems) %>%
rename(artist = topArtist, collector = topCollector, sellA = sellAmount, buyA = buyAmount, sellN = sellItems, buyN = buyItems)
dt = cbind(networkCentrality[,1], round(networkCentrality[, 2:ncol(networkCentrality)], 2)) %>% arrange(-artist)
datatable(dt, options = list(pageLength = 10))
M = cor(select(networkCentrality, -username), method = "kendall")
knitr::kable(round(M, 2))
artist | authority | sellA | sellN | collector | hub | buyA | buyN | |
---|---|---|---|---|---|---|---|---|
artist | 1.00 | 0.89 | 0.87 | 0.88 | -0.04 | -0.05 | -0.18 | -0.13 |
authority | 0.89 | 1.00 | 0.86 | 0.85 | -0.03 | -0.05 | -0.17 | -0.12 |
sellA | 0.87 | 0.86 | 1.00 | 0.88 | -0.06 | -0.08 | -0.20 | -0.15 |
sellN | 0.88 | 0.85 | 0.88 | 1.00 | -0.10 | -0.10 | -0.24 | -0.19 |
collector | -0.04 | -0.03 | -0.06 | -0.10 | 1.00 | 0.72 | 0.74 | 0.72 |
hub | -0.05 | -0.05 | -0.08 | -0.10 | 0.72 | 1.00 | 0.74 | 0.73 |
buyA | -0.18 | -0.17 | -0.20 | -0.24 | 0.74 | 0.74 | 1.00 | 0.77 |
buyN | -0.13 | -0.12 | -0.15 | -0.19 | 0.72 | 0.73 | 0.77 | 1.00 |
corrplot.mixed(M, lower="number", upper="ellipse", order = "AOE")
ggplot(filter(networkCentrality, artist > 0), aes(x = artist, y = authority)) +
geom_point() +
geom_smooth() +
theme_bw()
ggplot(filter(networkCentrality, artist > 0), aes(x = artist, y = sellN)) +
geom_point() +
geom_smooth() +
theme_bw()
ggplot(filter(networkCentrality, artist > 0), aes(x = artist, y = sellA)) +
geom_point() +
geom_smooth() +
theme_bw()
ggplot(filter(networkCentrality, collector > 0, collector < 50), aes(x = collector, y = hub)) +
geom_point() +
geom_smooth() +
theme_bw()
ggplot(filter(networkCentrality, collector > 0, collector < 50), aes(x = collector, y = buyN)) +
geom_point() +
geom_smooth() +
theme_bw()
ggplot(filter(networkCentrality, collector > 0, collector < 50), aes(x = collector, y = buyA)) +
geom_point() +
geom_smooth() +
theme_bw()
tophot = as.list(saleNet)$nodes %>%
select(username, topArtist, hotArtist, topCollector, hotCollector)
dt = cbind(tophot[,1], round(tophot[, 2:ncol(tophot)], 2)) %>% arrange(-topArtist)
datatable(dt, options = list(pageLength = 10))
n = 10
topArtists = tophot %>%
arrange(desc(topArtist)) %>%
select(username) %>%
head(n)
hotArtists = tophot %>%
arrange(desc(hotArtist)) %>%
select(username) %>%
head(n)
# top and hot artists
dplyr::intersect(topArtists, hotArtists)
## # A tibble: 5 x 1
## username
## <chr>
## 1 XCOPY
## 2 Hackatao
## 3 opheliafu
## 4 Roses
## 5 MattiaC
# hot artists not top
dplyr::setdiff(hotArtists, topArtists)
## # A tibble: 5 x 1
## username
## <chr>
## 1 ilan_katin
## 2 artplusbrad
## 3 MLIBTY
## 4 coldie
## 5 LoveArtHate
# top artists not hot
dplyr::setdiff(topArtists, hotArtists)
## # A tibble: 5 x 1
## username
## <chr>
## 1 artonymousartifakt
## 2 Albert
## 3 HEX0x6C
## 4 shortcut
## 5 bardionson
n = 8
topCollectors = tophot %>%
arrange(desc(topCollector)) %>%
select(username) %>%
head(n)
hotCollectors = tophot %>%
arrange(desc(hotCollector)) %>%
select(username) %>%
head(n)
# top and hot collectors
dplyr::intersect(topCollectors, hotCollectors)
## # A tibble: 5 x 1
## username
## <chr>
## 1 VK_Crypto
## 2 sebdcl
## 3 MomusCollection
## 4 BoyPreviousDoor
## 5 wyatt
# top collectors not hot
dplyr::setdiff(topCollectors, hotCollectors)
## # A tibble: 3 x 1
## username
## <chr>
## 1 zaphodok
## 2 Roses
## 3 MattiaC
# hot collectors not top
dplyr::setdiff(hotCollectors, topCollectors)
## # A tibble: 3 x 1
## username
## <chr>
## 1 Kinchasa
## 2 Cronus
## 3 ArtWhale
tophot =
arrange(tophot, username) %>%
mutate(id = row_number())
t1 =
tophot %>%
mutate(topArtist = 100 * topArtist / sum(topArtist)) %>%
arrange(desc(topArtist)) %>%
mutate(idTop = row_number()) %>%
select(username, topArtist) %>%
rename('artist' = username, 'topness' = topArtist)
t2 =
tophot %>%
mutate(hotArtist = 100 * hotArtist / sum(hotArtist)) %>%
arrange(desc(hotArtist)) %>%
mutate(idHot = row_number()) %>%
select(username, hotArtist) %>%
rename('artist' = username, 'hotness' = hotArtist)
knitr::kable(head(cbind(t1, t2), 10), digits = 2, format = "latex")
t3 =
tophot %>%
mutate(topCollector = 100 * topCollector / sum(topCollector)) %>%
arrange(desc(topCollector)) %>%
mutate(idTop = row_number()) %>%
select(username, topCollector) %>%
rename('collector' = username, 'topness' = topCollector)
t4 =
tophot %>%
mutate(hotCollector = 100 * hotCollector / sum(hotCollector)) %>%
arrange(desc(hotCollector)) %>%
mutate(idHot = row_number()) %>%
select(username, hotCollector) %>%
rename('collector' = username, 'hotness' = hotCollector)
knitr::kable(head(cbind(t3, t4), 8), digits = 2, format = "latex")
saleNet = saleNet %>%
activate(nodes) %>%
mutate(betweenness = centrality_betweenness())
brokers =
as.list(saleNet)$nodes %>%
arrange(desc(betweenness)) %>%
select(username, betweenness, sellItems, buyItems) %>%
mutate(items = sellItems + buyItems)
knitr::kable(head(brokers, 10), digits = 0)
username | betweenness | sellItems | buyItems | items |
---|---|---|---|---|
Hackatao | 3140 | 51 | 11 | 62 |
MattiaC | 2476 | 36 | 34 | 70 |
Roses | 2293 | 38 | 38 | 76 |
BoyPreviousDoor | 2159 | 4 | 52 | 56 |
artonymousartifakt | 1488 | 56 | 16 | 72 |
HEX0x6C | 1215 | 30 | 15 | 45 |
0xbull | 1089 | 34 | 3 | 37 |
ArtWhale | 854 | 5 | 38 | 43 |
EyeballKid | 770 | 7 | 37 | 44 |
pbock | 742 | 38 | 3 | 41 |
Given a rating method for artists, we evaluate its prediction accuracy as follows:
screenshot = function(sales, t, eps = 0.15) {
# retrieve sales up to time t
sales = filter(sales, timestamp < t)
# build nodes
x = sort(unique(c(sales$seller, sales$buyer)))
nodes = tibble(id = 1:length(x), address = x) %>%
left_join(SuperRareUsers, by = c("address" = "ethaddress")) %>%
select(id, username, address)
# build edges (edge direction along the flow of money: buyer --> seller)
edges =
mutate(sales,
from = match(sales$buyer, nodes$address),
to = match(sales$seller, nodes$address)) %>%
select(from, to, buyer, seller, tokenId, amount, timestamp)
# build sale network
saleGraph = tbl_graph(nodes = nodes, edges, directed = TRUE)
# compute all other ratings and add them to the nodes of the graph
saleGraph = saleGraph %>%
activate(nodes) %>%
mutate(authority = centrality_authority(weights = amount), # HITS authority
sellAmount = centrality_degree(weights = amount, mode = "in"), # overall amount of sold items
sellItems = centrality_degree(mode = "in"), # number of sold items
hub = centrality_hub(weights = amount), # HITS hub
buyAmount = centrality_degree(weights = amount, mode = "out"), # overall amount of bought items
buyItems = centrality_degree(mode = "out") # number of bought items
)
# compute topness and hotness
endorsements =
as.list(saleGraph)$edges %>%
select(from, to, amount, timestamp)
l = TopHot(as.data.frame(endorsements), eps)
# add metrics to the sale network
saleGraph = saleGraph %>%
activate(nodes) %>%
mutate(hotArtist = l$hotx, hotCollector = l$hoty, topArtist = l$x, topCollector = l$y)
# retrieve data frame with all metrics
centrality = as.list(saleGraph)$nodes %>%
select(username, topArtist, authority, sellAmount, sellItems, topCollector, hub, buyAmount, buyItems) %>%
rename(artist = topArtist, collector = topCollector, sellA = sellAmount, buyA = buyAmount, sellN = sellItems, buyN = buyItems)
return(centrality)
}
screencast = function(sales, rating, t1, t2, k = 10) {
# compute the rating for artists at time $t$
shot1 = screenshot(sales, t1)
# retrieve the ratings of top-k artists at time $t$ (this is our investment)
top = shot1 %>%
arrange(desc(!!sym(rating))) %>%
head(k)
# compute the sale increase for the top-k artists moving from time $t$ to time $t+1$ (this is our gain)
shot2 = screenshot(sales, t2)
df1 = top %>%
select(username:sellN) %>%
rename(sellA1 = sellA)
df2 = shot2 %>%
select(username, sellA) %>%
rename(sellA2 = sellA)
df3 = left_join(df1, df2, by = "username") %>%
mutate(gain = sellA2 - sellA1)
# evaluate the rating using the mean gain
if (rating == "sellA") rating = "sellA1"
x = as.vector(select(df3, !!sym(rating)))
x = x / sum(x)
y = df3$gain
gain = sum(x * y)
return(gain)
}
times = 1:10
gain1 = numeric(length(times))
gain2 = numeric(length(times))
rating1 = "artist"
rating2 = "sellA"
start = date("2018-06-01")
t1 = start
k = 10
days = 30
for(i in times) {
t2 = t1 + days
gain1[i] = screencast(SuperRareAllSales, rating1, t1, t2, k)
gain2[i] = screencast(SuperRareAllSales, rating2, t1, t2, k)
t1 = t2
}
gain1
## [1] 1.8916723 5.3698037 0.7981016 1.6808571 0.4536050 1.4420704 1.6977538
## [8] 5.0661565 4.9015678 4.3511979
gain2
## [1] 0.7870135 2.5249745 0.3773336 0.9130019 0.4481062 1.5074671 1.9415715
## [8] 5.0327734 3.9960586 4.3244226
mean(gain1)
## [1] 2.765279
mean(gain2)
## [1] 2.185272
t = rep(start, length(times))
s = seq(from = 0, by = days, length.out = length(times))
t = t + s
df = tibble(t, gain1, gain2)
ggplot(df, aes(x = t)) +
geom_line(aes(y = gain1), color = "red") +
geom_line(aes(y = gain2), color = "blue") +
labs(x = "time", y = "gain") +
theme_bw()
times = 1:10
gain1 = numeric(length(times))
gain2 = numeric(length(times))
rating1 = "artist"
rating2 = "sellN"
start = date("2018-06-01")
t1 = start
k = 10
days = 30
for(i in times) {
t2 = t1 + days
gain1[i] = screencast(SuperRareAllSales, rating1, t1, t2, k)
gain2[i] = screencast(SuperRareAllSales, rating2, t1, t2, k)
t1 = t2
}
gain1
## [1] 1.8916723 5.3698037 0.7981016 1.6808571 0.4536050 1.4420704 1.6977538
## [8] 5.0661565 4.9015678 4.3511979
gain2
## [1] 0.4302326 1.2968750 0.5665444 1.0702892 0.5082000 2.2345175 1.8580000
## [8] 4.5803861 4.2568478 3.9677223
mean(gain1)
## [1] 2.765279
mean(gain2)
## [1] 2.076961
t = rep(start, length(times))
s = seq(from = 0, by = days, length.out = length(times))
t = t + s
df = tibble(t, gain1, gain2)
ggplot(df, aes(x = t)) +
geom_line(aes(y = gain1), color = "red") +
geom_line(aes(y = gain2), color = "blue") +
labs(x = "time", y = "gain") +
theme_bw()
times = 1:10
gain1 = numeric(length(times))
gain2 = numeric(length(times))
rating1 = "artist"
rating2 = "authority"
start = date("2018-06-01")
t1 = start
k = 10
days = 30
for(i in times) {
t2 = t1 + days
gain1[i] = screencast(SuperRareAllSales, rating1, t1, t2, k)
gain2[i] = screencast(SuperRareAllSales, rating2, t1, t2, k)
t1 = t2
}
gain1
## [1] 1.8916723 5.3698037 0.7981016 1.6808571 0.4536050 1.4420704 1.6977538
## [8] 5.0661565 4.9015678 4.3511979
gain2
## [1] 1.9629709 5.5361812 0.2458114 0.5898137 0.3718380 0.8074512 2.2059794
## [8] 6.1925742 4.7229068 4.6559027
mean(gain1)
## [1] 2.765279
mean(gain2)
## [1] 2.729143
t = rep(start, length(times))
s = seq(from = 0, by = days, length.out = length(times))
t = t + s
df = tibble(t, gain1, gain2)
ggplot(df, aes(x = t)) +
geom_line(aes(y = gain1), color = "red") +
geom_line(aes(y = gain2), color = "blue") +
labs(x = "time", y = "gain") +
theme_bw()