Google BigQuery is a cloud-based big data analytics web service for processing very large read-only data sets. BigQuery was designed for analyzing data on the order of billions of rows, using a SQL-like syntax. It runs on the Google Cloud Storage infrastructure.
Here we show an example of using BigQuery from R using the bigrquery package. The example is adapted from the github of the package (read section Billing project to use the code below). We query the Ethereum dataset that contains data on the blockchain Ethereum. See the Kaggle page for more information about the dataset.
library(bigrquery)
library(DBI)
library(dplyr)
library(ggplot2)
library(readr)
# add your project id: free limits are 1 TB of queries / 10 GB of storage.
billing = "your-project-id"
# create connection
con = dbConnect(
bigrquery::bigquery(),
project = "bigquery-public-data",
dataset = "ethereum_blockchain",
billing = billing
)
# list tables of dataset
dbListTables(con)
# get data source
contracts = tbl(con, "contracts")
transactions = tbl(con, "transactions")
# data sources are remote data frames, not local ones
class(contracts)
# ERC721
topERC721_contracts =
contracts %>%
filter(is_erc721 == TRUE) %>%
inner_join(transactions, by = c("address" = "to_address")) %>%
group_by(address) %>%
summarise(tx_count = n(), tx_value = sum(value, na.rm = TRUE)) %>%
arrange(desc(tx_count)) %>%
head(10) %>%
collect() # collect the result (move it from cloud to your computer's memory)
# topERC721_contracts is a local data frame
class(topERC721_contracts)
topERC721_contracts
blocks <- tbl(con, "blocks")
miners = blocks %>%
group_by(miner) %>%
summarize(block_count = n(), tx_count = sum(transaction_count, na.rm = TRUE)) %>%
arrange(desc(block_count)) %>%
head(1000) %>%
collect()
summary(miners$block_count)
ggplot(filter(miners, block_count <= quantile(block_count, 0.75))) +
geom_histogram(aes(x = block_count)) +
theme_bw()
The SuperRare contracts have addresses 0x41a322b28d0ff354040e2cbc676f0320d8c8850d
(old contract) and 0xb932a70a57673d89f4acffbe830e8ed7f75fb9e0
(new contract). The relevant actors are:
Notice that address 0x0000000000000000000000000000000000000000
is used for token minting.
token_transfers = tbl(con, "token_transfers")
SuperRareContract = c("0x41a322b28d0ff354040e2cbc676f0320d8c8850d", "0xb932a70a57673d89f4acffbe830e8ed7f75fb9e0")
SuperRare = token_transfers %>%
filter(token_address %in% SuperRareContract) %>%
collect()
senders =
SuperRare %>%
filter(from_address != "0x0000000000000000000000000000000000000000") %>%
group_by(from_address) %>%
summarize(count = n()) %>%
arrange(desc(count))
receivers =
SuperRare %>%
filter(from_address != "0x0000000000000000000000000000000000000000") %>%
group_by(to_address) %>%
summarize(count = n()) %>%
arrange(desc(count))
senders
receivers
The value of an ERC721 token transfer is the id of the token.
tokens =
SuperRare %>%
filter(from_address != "0x0000000000000000000000000000000000000000") %>%
count(value) %>%
rename(token = value, count = n) %>%
arrange(desc(count))
tokens
# primary market
primary = nrow(filter(tokens, count == 1))
primary / nrow(tokens)
# secondary market
secondary = nrow(filter(tokens, count > 1))
secondary / nrow(tokens)