From 10092cbc5de158478b2a11ab316866d9d38c7b72 Mon Sep 17 00:00:00 2001 From: Conor Flynn Date: Tue, 18 Apr 2023 18:05:29 -0400 Subject: [PATCH 1/2] Add in finalized example files --- R-Code-Samples/DataEnginePrimaryFunctions.Rmd | 249 +++++++--- .../ExampleUserClusteringStarter.Rmd | 73 +-- R-Code-Samples/GetTransactions.Rmd | 449 ++++++++++++++++++ 3 files changed, 660 insertions(+), 111 deletions(-) create mode 100644 R-Code-Samples/GetTransactions.Rmd diff --git a/R-Code-Samples/DataEnginePrimaryFunctions.Rmd b/R-Code-Samples/DataEnginePrimaryFunctions.Rmd index fded60e7..ecabf2d4 100644 --- a/R-Code-Samples/DataEnginePrimaryFunctions.Rmd +++ b/R-Code-Samples/DataEnginePrimaryFunctions.Rmd @@ -21,14 +21,28 @@ local({r <- getOption("repos") # Set code chunk defaults knitr::opts_chunk$set(echo = TRUE) +library(ggplot2) +library(knitr) library(plyr) library(dplyr) -library(httr) library(jsonlite) +library(RColorBrewer) +library(tidyverse) +library(beeswarm) +library(ggbeeswarm) +library(xts) +library(plotly) library(lubridate) -library(stringr) -# library(tidyr) -library(knitr) +library(survival) +library(survminer) +library(ranger) +library(ggfortify) +library(factoextra) +library(cluster) +library(fclust) +library(ppclust) +library(e1071) +library(randomNames) # Load required packages; install if necessary # CAUTION: DO NOT interrupt R as it installs packages!! @@ -36,85 +50,100 @@ library(knitr) ``` We provide a function to request and parse data from our DeFi data engine living on the IDEA Cluster. This initializes a data stream from the Amber Data API, opens a socket, requests data, listens on the socket, and then parses the received data. The finished dataframe is as close as possible to the schema of the cold-storage data we currently use. -```{R} - -request <- function(protocol, properties = "", headers = "", startdate = "", enddate = "") { - #Create socket and get destination which tells the engine where to put the data - socket <- socketConnection("defi-de.idea.rpi.edu", 61200, blocking=TRUE) - #socket <- socketConnection("localhost", 61200, blocking=TRUE) - destination <- readLines(socket, 1) - - formatted_properties = "" - if(properties != "") - formatted_properties = paste("properties", "&&&", properties, "&&&") - - formatted_headers = "" - if(headers != "") - formatted_headers = paste("headers", "&&&", headers, "&&&") - - formatted_startdate = "" - formatted_enddate = "" - if(startdate != "" && enddate != "") { - formatted_startdate = paste("start_date", "&&&", startdate, "&&&") - formatted_enddate = paste("end_date", "&&&", enddate, "&&&") - } - - #Build the request delimited by &&& - #Similar to a GET request in the way we handle parameters - request.raw <- paste( - "SRC", "&&&", "RQST", "&&&", - "type", "&&&", protocol, "&&&", - formatted_properties, - formatted_headers, - formatted_startdate, - formatted_enddate, - "destination", "&&&", destination, "&&&", - "\n", sep="") - - # remove all spaces from request - request.data <- str_replace_all(request.raw, " ", "") - - #Write this request back to the socket to tell engine what we want - writeLines(request.data, socket) - - # define data frame - df = data.frame() - temp.df = data.frame() - - #Now the engine will begin feeding us data - #We grab the first to initialize the data var and then we continue listening\ - counter <- 0 - response <- "" - while (TRUE) { - temp <- readLines(socket, 1) +```{r} +request_deprecated <- function(protocol, properties = "", headers = "", startdate = "", enddate = "") { + suppressWarnings({ + #Create socket and get destination which tells the engine where to put the data + socket <- socketConnection("defi-de.idea.rpi.edu", 61200, blocking=TRUE) + # socket <- socketConnection("localhost", 61200, blocking=TRUE) + destination <- readLines(socket, 1) + + formatted_properties = "" + if(properties != "") + formatted_properties = paste("properties", "&&&", properties, "&&&") - # if line is heartbeat then acknowledge and continue - if (grepl("<<>>", temp, fixed=TRUE)) - { - print(paste("Heartbeat read")) - next + formatted_headers = "" + if(headers != "") + formatted_headers = paste("headers", "&&&", headers, "&&&") + + formatted_startdate = "" + formatted_enddate = "" + if(startdate != "" && enddate != "") { + formatted_startdate = paste("start_date", "&&&", startdate, "&&&") + formatted_enddate = paste("end_date", "&&&", enddate, "&&&") } - # if line is response then process and terminate - else if (grepl("<<>>", temp, fixed=TRUE)) - { + #Build the request delimited by &&& + #Similar to a GET request in the way we handle parameters + request.raw <- paste( + "SRC", "&&&", "RQST", "&&&", + "type", "&&&", protocol, "&&&", + formatted_properties, + formatted_headers, + formatted_startdate, + formatted_enddate, + "destination", "&&&", destination, "&&&", + "\n", sep="") + + # remove all spaces from request + request.data <- str_replace_all(request.raw, " ", "") + + #Write this request back to the socket to tell engine what we want + writeLines(request.data, socket) + + # define data frame + df = data.frame() + # temp.df = data.frame() + + #Now the engine will begin feeding us data + #We grab the first to initialize the data var and then we continue listening\ + counter <- 0 + response <- "" + json <- "{\"data\":[" + while (TRUE) { temp <- readLines(socket, 1) - response <- fromJSON(temp) - break + + if(temp == '') + { + print("Read empty string. Check engine logs or refresh configuration.") + next + } + + # if line is heartbeat then acknowledge and continue + if (grepl("<<>>", temp, fixed=TRUE)) + { + print(paste("Heartbeat read for", protocol, sep=" ")) + next + } + + # if line is response then process and terminate + else if (grepl("<<>>", temp, fixed=TRUE)) + { + temp <- readLines(socket, 1) + response <- fromJSON(temp) + break + } + + # increment processed line counter + counter <- counter + 1 + if(counter %% 1000 == 0) + print(paste("Processed", counter, "lines for", protocol)) + + # add data point line to data frame + # temp.df <- as.data.frame(fromJSON(temp)) + # df <- rbind.fill(df, temp.df) + json <- paste(json, temp, ",", sep="") } - # increment processed line counter - counter <- counter + 1 - if(counter %% 1000 == 0) - print(paste("Processed", counter, "lines")) + json <- substr(json, 0, str_length(json) - 1) + json <- paste(json, "]}") - # add data point line to data frame - temp.df <- as.data.frame(fromJSON(temp)) - df <- rbind.fill(df, temp.df) - } - - output <- list("response"=response, "df"=df) - return(output) + df <- as.data.frame(fromJSON(json)) + + output <- list("response"=response, "df"=df) + close(socket) + return(output) + }) } ``` @@ -125,11 +154,73 @@ With this function, we can now get our data. # aave.df <- aave$df # aave.response <- aave$response -aave.liquidations <- request("amberdata-aave-protocol", "action,LiquidationCall", "x-api-key,UAK7ed69235426c360be22bfc2bde1809b6", "2022-07-01", "2023-09-01") -aave.liquidations.df <- aave.liquidations$df -aave.liquidations.response <- aave.liquidations$response +# aave.liquidations <- request("amberdata-aave-protocol", "action,LiquidationCall", "x-api-key,UAK7ed69235426c360be22bfc2bde1809b6", "2022-07-01", "2023-09-01") +# aave.liquidations.df <- aave.liquidations$df +# aave.liquidations.response <- aave.liquidations$response # sushiswap <- request("amberdata-sushiswap-protocol", "", "x-api-key,UAK7ed69235426c360be22bfc2bde1809b6", "2022-08-01", "2022-08-03") # sushiswap.df <- sushiswap$df # sushiswap.response <- sushiswap$response + +start_date <- "2022-01-01" +end_date <- "2022-01-10" + +graph.borrows.df <- request("graph-aave-borrows", "", "", start_date, end_date)$df + +graph.collaterals.df <- request("graph-aave-collaterals", "", "", start_date, end_date)$df + +graph.deposits.df <- request("graph-aave-deposits", "", "", start_date, end_date)$df + +graph.flashloans.df <- request("graph-aave-flash-loans", "", "", start_date, end_date)$df + +graph.liquidations.df <- request("graph-aave-liquidations", "", "", start_date, end_date)$df + +graph.pricehist.df <- request("graph-aave-price-history-items", "", "", start_date, end_date)$df + +graph.redeems.df <- request("graph-aave-redeems", "", "", start_date, end_date)$df + +graph.repays.df <- request("graph-aave-repays", "", "", start_date, end_date)$df + +graph.reservehist.df <- request("graph-aave-reserve-params-hist-items", "", "", start_date, end_date)$df + +graph.reserves.df <- request("graph-aave-reserves", "", "")$df + +graph.swaps.df <- request("graph-aave-swaps", "", "", start_date, end_date)$df + +graph.userreserves.df <- request("graph-aave-user-reserves", "", "", start_date, end_date)$df + +graph.users.df <- request("graph-aave-users", "", "")$df +graph.users.df <- addAliases(graph.users.df, graph.users.df$id) +``` + +```{r} +amberdata.addresses.test <- request("amberdata-blockchain-addresses", "hash,0xfaf3af4f551f76af4cde17c3d3708c4f3a69d21e", "x-api-key,UAK7ed69235426c360be22bfc2bde1809b6") +``` + +```{r} +stablecoins <- request("llama-stablecoins", "", "")$df +``` + +```{r} +graph.users.sample <- graph.users.df %>% + select(-alias) +aliases = NULL +set.seed(69420) + +while(length(aliases[,1]) < length(graph.users.sample$id)){ + alias <- randomNames(1000, name.order = "first.last", name.sep = " ", sample.with.replacement = FALSE) + aliases <- aliases %>% + bind_rows(data.frame(alias)) %>% + distinct() +} + +aliases <- aliases %>% + head(length(graph.users.sample$id)) + +userAliases <- bind_cols(graph.users.sample, aliases) %>% + mutate(version = "V2", + deployment = "Mainnet") + +graph.users.sample <- userAliases + ``` \ No newline at end of file diff --git a/R-Code-Samples/ExampleUserClusteringStarter.Rmd b/R-Code-Samples/ExampleUserClusteringStarter.Rmd index bac82c6f..d6fa207f 100644 --- a/R-Code-Samples/ExampleUserClusteringStarter.Rmd +++ b/R-Code-Samples/ExampleUserClusteringStarter.Rmd @@ -100,15 +100,28 @@ request <- function(protocol, properties = "", headers = "", startdate = "", end response <- "" while (TRUE) { temp <- readLines(socket, 1) - counter <- counter + 1 - if(counter %% 1000 == 0) - print(paste("Processed", counter, "lines")) - if (grepl("<<>>", temp, fixed=TRUE)) + + # if line is heartbeat then acknowledge and continue + if (grepl("<<>>", temp, fixed=TRUE)) + { + print(paste("Heartbeat read")) + next + } + + # if line is response then process and terminate + else if (grepl("<<>>", temp, fixed=TRUE)) { temp <- readLines(socket, 1) response <- fromJSON(temp) break } + + # increment processed line counter + counter <- counter + 1 + if(counter %% 1000 == 0) + print(paste("Processed", counter, "lines")) + + # add data point line to data frame temp.df <- as.data.frame(fromJSON(temp)) df <- rbind.fill(df, temp.df) } @@ -118,25 +131,23 @@ request <- function(protocol, properties = "", headers = "", startdate = "", end } ``` +# Load the transaction data from IDEA: ```{r} -#We make a sample call to the function which will return all transactions 2022 August 1st to August 3rd -# aave <- request("amberdata-aave-protocol", "", "x-api-key,UAK7ed69235426c360be22bfc2bde1809b6", "2022-08-01", "2022-08-02") -# aave.df <- aave$df -# aave.response <- aave$response - -sushiswap <- request("amberdata-sushiswap-protocol", "", "x-api-key,UAK7ed69235426c360be22bfc2bde1809b6", "2022-08-01", "2022-08-03") -sushiswap.df <- sushiswap$df -sushiswap.response <- sushiswap$response -``` +# These are the generic names for the files with the associated information for all AAVE deployments: -# Set df to the loaded Aave set: -```{r, warning=FALSE} -df <- aave.df -``` +# Load the mainnet data: +# retrieve stable coins from defillama +stablecoins <- request("llama-stablecoins", "", "")$df +mainnetTransactions <- transactions$df +mainnetReserveInfo <- data$reserves$df %>% + mutate(reserveType = case_when(symbol %in% stablecoins$symbol ~ "Stable", + TRUE ~ "Non-Stable")) -# Set user ailas': -```{r} +mainnetTransactions <- mainnetTransactions %>% + mutate(timestamp=as.numeric(timestamp)) %>% + mutate(datetime = as_datetime(timestamp)) +df <- mainnetTransactions ``` # Clustering Users: @@ -149,31 +160,31 @@ not_all_na <- function(x) any(!is.na(x)) `%notin%` <- Negate(`%in%`) borrows <- df %>% - filter(action == "Borrow") %>% + filter(type == "borrow") %>% select(where(not_all_na)) repays <- df %>% - filter(action == "Repay") %>% + filter(type == "repay") %>% select(where(not_all_na)) deposits <- df %>% - filter(action == "Deposit") %>% + filter(type == "deposit") %>% select(where(not_all_na)) redeems <- df %>% - filter(action == "Redeem") %>% + filter(type == "redeem") %>% select(where(not_all_na)) liquidations <- df %>% - filter(action == "LiquidationCall") %>% + filter(type == "liquidation") %>% select(where(not_all_na)) swaps <- df %>% - filter(action == "SwapBorrowRateMode") %>% + filter(type == "swap") %>% select(where(not_all_na)) collaterals <- df %>% - filter(action == "UseReserveAsCollateral") %>% + filter(type == "collateral") %>% select(where(not_all_na)) liquidationsPerformed <- liquidations %>% @@ -182,12 +193,10 @@ liquidationsPerformed <- liquidations %>% rename(user = liquidator, userAlias = liquidatorAlias) reserveTypes <- mainnetReserveInfo %>% - select(reserve = symbol, stable) %>% - mutate(reserveType = case_when(stable ~ "Stable", - TRUE ~ "Non-Stable")) %>% - select(-stable) + select(reserve=symbol, reserveType) df2 <- left_join(df, reserveTypes, by="reserve") %>% + mutate(timestamp=as.numeric(timestamp)) %>% distinct() numLiqPerUser <- liquidations %>% @@ -213,7 +222,7 @@ aggregateLiquidations <- df2 %>% mutate(reserve = principalReserve) %>% left_join(reserveTypes, by = "reserve") %>% dplyr::rename(principalType = reserveType) %>% - mutate(totalCollateralUSD = sum(amountUSDCollateral), totalPrincipalUSD = sum(amountUSDPrincipal))%>% + mutate(totalCollateralUSD = sum(collateralAmountUSD), totalPrincipalUSD = sum(principalAmountUSD))%>% dplyr::mutate(numLiquidations = n()) %>% dplyr::summarise(userAlias, numLiquidations, liquidationDuration, liquidationStart, liquidationEnd, liquidationStartDatetime, liquidationEndDatetime, collateralReserves = str_flatten(str_sort(unique(collateralReserve)), collapse = ","), @@ -275,7 +284,7 @@ userLiquidatedCounts <- aggregateLiquidations %>% userLiquidationCounts <- liquidationsPerformed %>% group_by(user) %>% - dplyr::summarise(liquidationsPerformed = n(), liquidationsPerformedValue = sum(amountUSDCollateral)) + dplyr::summarise(liquidationsPerformed = n(), liquidationsPerformedValue = sum(collateralAmountUSD)) userSwapCounts <- swaps %>% group_by(user) %>% diff --git a/R-Code-Samples/GetTransactions.Rmd b/R-Code-Samples/GetTransactions.Rmd new file mode 100644 index 00000000..67233480 --- /dev/null +++ b/R-Code-Samples/GetTransactions.Rmd @@ -0,0 +1,449 @@ +--- +title: "GetTransactions" +author: "Conor Flynn" +date: "2023-04-17" +output: html_document +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) +``` + +## Define necessary libraries +```{r} +library(knitr) +library(plyr) +library(dplyr) +library(jsonlite) +library(stringr) +library(tidyr) +``` + +## Define request function +```{r} +request <- function(protocol, properties = "", headers = "", startdate = "", enddate = "") { + suppressWarnings({ + #Create socket and get destination which tells the engine where to put the data + socket <- socketConnection("defi-de.idea.rpi.edu", 61200, blocking=TRUE) + destination <- readLines(socket, 1) + + formatted_properties = "" + if(properties != "") + formatted_properties = paste("properties", "&&&", properties, "&&&") + + formatted_headers = "" + if(headers != "") + formatted_headers = paste("headers", "&&&", headers, "&&&") + + formatted_startdate = "" + formatted_enddate = "" + if(startdate != "" && enddate != "") { + formatted_startdate = paste("start_date", "&&&", startdate, "&&&") + formatted_enddate = paste("end_date", "&&&", enddate, "&&&") + } + + #Build the request delimited by &&& + #Similar to a GET request in the way we handle parameters + request.raw <- paste( + "SRC", "&&&", "RQST", "&&&", + "type", "&&&", protocol, "&&&", + formatted_properties, + formatted_headers, + formatted_startdate, + formatted_enddate, + "destination", "&&&", destination, "&&&", + "\n", sep="") + + # remove all spaces from request + request.data <- str_replace_all(request.raw, " ", "") + + #Write this request back to the socket to tell engine what we want + writeLines(request.data, socket) + + # define data frame + df = data.frame() + temp.df = data.frame() + + #Now the engine will begin feeding us data + #We grab the first to initialize the data var and then we continue listening\ + counter <- 0 + response <- "" + while (TRUE) { + temp <- readLines(socket, 1) + + if(temp == '') + { + print("Read empty string. Check engine logs or refresh configuration.") + next + } + + # if line is heartbeat then acknowledge and continue + if (grepl("<<>>", temp, fixed=TRUE)) + { + print(paste("Heartbeat read for", protocol, sep=" ")) + next + } + + # if line is response then process and terminate + else if (grepl("<<>>", temp, fixed=TRUE)) + { + temp <- readLines(socket, 1) + response <- fromJSON(temp) + break + } + + # increment processed line counter + counter <- counter + 1 + if(counter %% 1000 == 0) + print(paste("Processed", counter, "lines for", protocol)) + + # add data point line to data frame + temp.df <- as.data.frame(fromJSON(temp)) + df <- rbind.fill(df, temp.df) + } + + output <- list("response"=response, "df"=df) + close(socket) + return(output) + }) +} +``` + +## Define get_users function +```{r} +get_users <- function() { + # retrieve users and then parse with aliases + graph.users <- request("graph-aave-users", "", "") + if(graph.users$response$code != 200) + return(graph.users) + + # define data frame + graph.users.df <- graph.users$df + + aliases = NULL + set.seed(69420) + + while(length(aliases[,1]) < length(graph.users.df$id)){ + alias <- randomNames(1000, name.order = "first.last", name.sep = " ", sample.with.replacement = FALSE) + aliases <- aliases %>% + bind_rows(data.frame(alias)) %>% + distinct() + } + + aliases <- aliases %>% + head(length(graph.users.df$id)) + + userAliases <- bind_cols(graph.users, aliases) %>% + mutate(version = "V2", + deployment = "Mainnet") + + userAliases +} +``` + +## Define get_data function +```{r} +get_data <- function(start_date, end_date, users=NULL) { + ### retrieve all data from defi engine: + + # submit a request + graph.borrows <- request("graph-aave-borrows", "", "", start_date, end_date) + # for each request validate the code was successful + # if not the return the response with an empty dataframe + if(graph.borrows$response$code != 200) + return(graph.borrows) + + # repeat for all following requests + graph.collaterals <- request("graph-aave-collaterals", "", "", start_date, end_date) + if(graph.collaterals$response$code != 200) + return(graph.collaterals) + + graph.deposits <- request("graph-aave-deposits", "", "", start_date, end_date) + if(graph.deposits$response$code != 200) + return(graph.deposits) + + graph.flashloans <- request("graph-aave-flash-loans", "", "", start_date, end_date) + if(graph.flashloans$response$code != 200) + return(graph.flashloans) + + graph.liquidations <- request("graph-aave-liquidations", "", "", start_date, end_date) + if(graph.liquidations$response$code != 200) + return(graph.liquidations) + + graph.pricehist <- request("graph-aave-price-history-items", "", "", start_date, end_date) + if(graph.pricehist$response$code != 200) + return(graph.pricehist) + + graph.redeems <- request("graph-aave-redeems", "", "", start_date, end_date) + if(graph.redeems$response$code != 200) + return(graph.redeems) + + graph.repays <- request("graph-aave-repays", "", "", start_date, end_date) + if(graph.repays$response$code != 200) + return(graph.repays) + + graph.reservehist <- request("graph-aave-reserve-params-hist-items", "", "", start_date, end_date) + if(graph.reservehist$response$code != 200) + return(graph.reservehist) + + graph.reserves <- request("graph-aave-reserves", "", "") + if(graph.reserves$response$code != 200) + return(graph.reserves) + + graph.swaps <- request("graph-aave-swaps", "", "", start_date, end_date) + if(graph.swaps$response$code != 200) + return(graph.swaps) + + graph.userreserves <- request("graph-aave-user-reserves", "", "", start_date, end_date) + if(graph.userreserves$response$code != 200) + return(graph.userreserves) + + # retrieve users and then parse with aliases + graph.users <- users + if(is.null(users)) { + graph.users <- request("graph-aave-users", "", "") + if(graph.users$response$code != 200) + return(graph.users) + + # define data frame + graph.users.df <- graph.users$df + + aliases = NULL + set.seed(69420) + + while(length(aliases[,1]) < length(graph.users.df$id)){ + alias <- randomNames(1000, name.order = "first.last", name.sep = " ", sample.with.replacement = FALSE) + aliases <- aliases %>% + bind_rows(data.frame(alias)) %>% + distinct() + } + + aliases <- aliases %>% + head(length(graph.users.df$id)) + + userAliases <- bind_cols(graph.users, aliases) %>% + mutate(version = "V2", + deployment = "Mainnet") + + graph.users <- userAliases + } + + # format into output list + output <- list(borrows=graph.borrows, collaterals=graph.collaterals, deposits=graph.deposits, + flashloans=graph.flashloans, liquidations=graph.liquidations, pricehist=graph.pricehist, + redeems=graph.redeems, repays=graph.repays, reservehist=graph.reservehist, reserves=graph.reserves, + swaps=graph.swaps, userreserves=graph.userreserves, users=graph.users) +} +``` + +## Define get_transactions function +```{r} +# get transactions function includes calls to both AmberData, GraphQL, and DeFiLlama +# Input: +# start_date: +# end_date: +get_transactions <- function(data) { + ### parse all data into proper raw naming conventions + + # load raw transaction tables: + rawBorrows <- data$borrows$df + rawCollaterals <- data$collaterals$df + rawDeposits <- data$deposits$df + rawLiquidations <- data$liquidations$df + rawRedeems <- data$redeems$df + rawRepays <- data$repays$df + rawSwaps <- data$swaps$df + rawFlashLoans <- data$flashloans$df + + # load raw reserve information: + rawReserveInfo <- data$reserves$df + rawReserveParamsHistory <- data$reservehist$df + + # select aliases + aliases <- data$users %>% + select(id, alias) + + # select reserve info + reserveInfo <- rawReserveInfo %>% + select(id, + symbol, + decimals) + + # select reserve parameter history + reserveParamsHistory <- rawReserveParamsHistory %>% + mutate(txID = str_sub(id, start = 1, end = 66)) %>% + left_join(reserveInfo, by = c("reserve" = "id")) + + # mutate and parse borrows df + borrows <- rawBorrows %>% + mutate(type = "borrow", + id = str_extract(id, "0x\\w+")) %>% + left_join(reserveParamsHistory, by = c("id" = "txID", "timestamp", "reserve")) %>% + mutate(amount = as.numeric(amount) / (10^as.numeric(decimals))) %>% + mutate(amountUSD = as.numeric(amount) * as.numeric(priceInUsd)) %>% + mutate(priceInEth = as.numeric(priceInEth) / (10^18)) %>% + mutate(amountETH = as.numeric(amount) * as.numeric(priceInEth)) %>% + mutate(reserve = symbol) %>% + mutate(user = user) %>% + mutate(onBehalfOf = caller) %>% + mutate(borrowRate = as.numeric(borrowRate) / (10^25)) %>% + mutate(pool = pool) %>% + left_join(aliases, by = c("user" = "id")) %>% + rename(userAlias = alias) %>% + left_join(aliases, by = c("onBehalfOf" = "id")) %>% + rename(onBehalfOfAlias = alias) %>% + select(id, type, timestamp, user, userAlias, onBehalfOf, onBehalfOfAlias, pool, + reserve, amount, amountUSD, amountETH, borrowRate, borrowRateMode) %>% + drop_na() %>% + distinct() + + # mutate and parse collaterals df + collaterals <- rawCollaterals %>% + mutate(type = "collateral", + id = str_extract(id, "0x\\w+")) %>% + mutate(user = user, pool = pool) %>% + left_join(reserveParamsHistory, by = c("id" = "txID", "timestamp", "reserve")) %>% + mutate(reserve = symbol) %>% + left_join(aliases, by = c("user" = "id")) %>% + rename(userAlias = alias) %>% + select(id, timestamp, user, userAlias, pool, reserve, fromState, toState, type) %>% + drop_na() %>% + distinct() + + # mutate and parse deposits df + deposits <- rawDeposits %>% + mutate(type = "deposit", + id = str_extract(id, "0x\\w+")) %>% + mutate(user = user, pool = pool) %>% + left_join(reserveParamsHistory, by = c("id" = "txID", "timestamp", "reserve")) %>% + mutate(reserve = symbol) %>% + mutate(onBehalfOf = caller) %>% + mutate(amount = as.numeric(amount) / (10^as.numeric(decimals))) %>% + mutate(amountUSD = as.numeric(amount) * as.numeric(priceInUsd)) %>% + mutate(priceInEth = as.numeric(priceInEth) / (10^18)) %>% + mutate(amountETH = as.numeric(amount) * as.numeric(priceInEth)) %>% + left_join(aliases, by = c("user" = "id")) %>% + rename(userAlias = alias) %>% + left_join(aliases, by = c("onBehalfOf" = "id")) %>% + rename(onBehalfOfAlias = alias) %>% + select(id, timestamp, type, amount, amountUSD, amountETH, reserve, user, userAlias, + onBehalfOf, onBehalfOfAlias, pool) %>% + drop_na() %>% + distinct() + + # mutate and parse liquidations df + liquidations <- rawLiquidations %>% + mutate(type = "liquidation", + id = str_extract(id, "0x\\w+"), + user = user, pool = pool) %>% + left_join(reserveParamsHistory, by = c("id" = "txID", "principalReserve" = "reserve", "timestamp")) %>% + mutate(principalAmount = as.numeric(principalAmount) / 10^as.numeric(decimals), + principalAmountUSD = as.numeric(priceInUsd) * as.numeric(principalAmount), + principalAmountETH = as.numeric(priceInEth) * as.numeric(principalAmount) / 10^18) %>% + select(id, timestamp, type, user, liquidator, pool, + principalAmount, principalReserve = symbol, principalAmountUSD, principalAmountETH, + collateralAmount, collateralReserve) %>% + left_join(reserveParamsHistory, by = c("id" = "txID", "collateralReserve" = "reserve", "timestamp")) %>% + mutate(collateralAmount = as.numeric(collateralAmount) / 10^as.numeric(decimals), + collateralAmountUSD = as.numeric(priceInUsd) * as.numeric(collateralAmount), + collateralAmountETH = as.numeric(priceInEth) * as.numeric(collateralAmount) / 10^18) %>% + select(id, timestamp, type, user, liquidator, pool, + principalAmount, principalReserve, principalAmountUSD, principalAmountETH, + collateralAmount, collateralReserve = symbol, collateralAmountUSD, collateralAmountETH) %>% + left_join(aliases, by = c("user" = "id")) %>% + rename(userAlias = alias) %>% + left_join(aliases, by = c("liquidator" = "id")) %>% + rename(liquidatorAlias = alias) %>% + distinct() + + # mutate and parse redeems df + redeems <- rawRedeems %>% + mutate(type = "redeem", + id = str_extract(id, "0x\\w+")) %>% + mutate(user = user, pool = pool) %>% + left_join(reserveParamsHistory, by = c("id" = "txID", "timestamp", "reserve")) %>% + mutate(reserve = symbol) %>% + mutate(onBehalfOf = to) %>% + mutate(amount = as.numeric(amount) / (10^as.numeric(decimals))) %>% + mutate(amountUSD = as.numeric(amount) * as.numeric(priceInUsd)) %>% + mutate(priceInEth = as.numeric(priceInEth) / (10^18)) %>% + mutate(amountETH = as.numeric(amount) * as.numeric(priceInEth)) %>% + left_join(aliases, by = c("user" = "id")) %>% + rename(userAlias = alias) %>% + left_join(aliases, by = c("onBehalfOf" = "id")) %>% + rename(onBehalfOfAlias = alias) %>% + select(id,timestamp, type, amount, amountUSD, amountETH, reserve, user, userAlias, + onBehalfOf, onBehalfOfAlias, priceInUsd, pool) %>% + drop_na() %>% + distinct() + + # mutate and parse repays df + repays <- rawRepays %>% + mutate(type = "repay", + id = str_extract(id, "0x\\w+"))%>% + mutate(user = user, pool = pool) %>% + left_join(reserveParamsHistory, by = c("id" = "txID", "timestamp", "reserve")) %>% + mutate(reserve = symbol) %>% + mutate(onBehalfOf = repayer) %>% + mutate(amount = as.numeric(amount) / (10^as.numeric(decimals))) %>% + mutate(amountUSD = as.numeric(amount) * as.numeric(priceInUsd)) %>% + mutate(priceInEth = as.numeric(priceInEth) / (10^18)) %>% + mutate(amountETH = as.numeric(amount) * as.numeric(priceInEth)) %>% + left_join(aliases, by = c("user" = "id")) %>% + rename(userAlias = alias) %>% + left_join(aliases, by = c("onBehalfOf" = "id")) %>% + rename(onBehalfOfAlias = alias) %>% + select(id, timestamp, type, amount, amountUSD, amountETH, reserve, user, userAlias, + onBehalfOf, onBehalfOfAlias, priceInUsd, pool) %>% + drop_na() %>% + distinct() + + # mutate and parse swaps df + swaps <- rawSwaps %>% + mutate(type = "swap", + id = str_extract(id, "0x\\w+")) %>% + mutate(user = user, pool = pool) %>% + mutate(stableBorrowRate = as.numeric(stableBorrowRate) / (10^25), variableBorrowRate = as.numeric(variableBorrowRate) / (10^25)) %>% + left_join(reserveParamsHistory, by = c("id" = "txID", "timestamp", "reserve")) %>% + mutate(reserve = symbol) %>% + left_join(aliases, by = c("user" = "id")) %>% + rename(userAlias = alias) %>% + select(id, timestamp, type, reserve, user, userAlias, pool, borrowRateModeTo, + borrowRateModeFrom, stableBorrowRate = stableBorrowRate.x, variableBorrowRate = variableBorrowRate.x) %>% + drop_na() %>% + distinct() + + # mutate and parse flashloans df + flashLoans <- rawFlashLoans %>% + mutate(type = "flashLoan", + id = str_extract(id, "0x\\w+")) %>% + left_join(reserveParamsHistory, by = c("id" = "txID", "timestamp", "reserve")) %>% + mutate(reserve = symbol, + amount = as.numeric(amount)) %>% + select(id, timestamp, type, reserve, target, pool, amount, totalFee, target) %>% + drop_na() %>% + distinct() + + # define binded clean transactions df + cleanedTransactions <- borrows %>% + bind_rows(collaterals) %>% + bind_rows(deposits) %>% + bind_rows(liquidations) %>% + bind_rows(redeems) %>% + bind_rows(repays) %>% + bind_rows(swaps) %>% + bind_rows(flashLoans) + + # return successful output with given successful response code + output <- list("response"=list("code"=200, "data"="", "response"=200, "message"="Successful Response"), "df"=cleanedTransactions) + output +} +``` + + +## Define sample call to get_transactions function +```{r} +users <- get_users() +data <- get_data("2022-01-01", "2022-01-10", users) +transactions <- get_transactions(data) +``` \ No newline at end of file From 66ca7841c26867312d4e2ad5844ef8cb7348117b Mon Sep 17 00:00:00 2001 From: Conor Flynn Date: Tue, 18 Apr 2023 21:11:11 -0400 Subject: [PATCH 2/2] update rmd files --- R-Code-Samples/DataEnginePrimaryFunctions.Rmd | 158 +++----------- .../ExampleUserClusteringStarter.Rmd | 199 ++++++++++-------- R-Code-Samples/GetTransactions.Rmd | 127 ++++++++--- 3 files changed, 246 insertions(+), 238 deletions(-) diff --git a/R-Code-Samples/DataEnginePrimaryFunctions.Rmd b/R-Code-Samples/DataEnginePrimaryFunctions.Rmd index ecabf2d4..8717c71e 100644 --- a/R-Code-Samples/DataEnginePrimaryFunctions.Rmd +++ b/R-Code-Samples/DataEnginePrimaryFunctions.Rmd @@ -1,8 +1,8 @@ --- -title: "DeFi Engine Use Example" -subtitle: "aave-protocol-dated function" +title: "Data Engine Primary Request Function" +subtitle: "request function definition" author: "Conor Flynn" -date: "03/27/2023" +date: "04/18/2023" output: pdf_document: default html_document: @@ -12,50 +12,43 @@ output: --- # Start by loading the proper libraries: ```{r setup, include=FALSE} -# Set the default CRAN repository -local({r <- getOption("repos") - r["CRAN"] <- "http://cran.r-project.org" - options(repos=r) -}) - -# Set code chunk defaults -knitr::opts_chunk$set(echo = TRUE) - -library(ggplot2) library(knitr) library(plyr) library(dplyr) library(jsonlite) -library(RColorBrewer) -library(tidyverse) -library(beeswarm) -library(ggbeeswarm) -library(xts) -library(plotly) -library(lubridate) -library(survival) -library(survminer) -library(ranger) -library(ggfortify) -library(factoextra) -library(cluster) -library(fclust) -library(ppclust) -library(e1071) -library(randomNames) - -# Load required packages; install if necessary -# CAUTION: DO NOT interrupt R as it installs packages!! - +library(stringr) +library(tidyr) ``` -We provide a function to request and parse data from our DeFi data engine living on the IDEA Cluster. This initializes a data stream from the Amber Data API, opens a socket, requests data, listens on the socket, and then parses the received data. The finished dataframe is as close as possible to the schema of the cold-storage data we currently use. +## Define request function: + +# DESCRIPTION: +# This function serves as the primary request point for making internal requests to the defi data engine. Note that should a request be +# made outside the function, please ensure the necessary format depicted within it is used otherwise an internal error may occur. +# Note that you MUST be connected to the RPI network (such that a connection can be made to defi-de.idea.rpi.edu) otherwise this +# function will not work. + +# INPUT: +# - protocol: The name of the protocol to receive data from. +# [optional] - properties: All properties used in the call (note typically REST API URL parameters). +# [optional] - headers: All headers used in the call (note typically REST API URL headers). +# [optional] - startdate: Starting date to retrieve data from. In format 'yyyy-MM-dd' i.e. 2023-04-01 +# [optional] - enddate: Ending date to retrieve data from (non-inclusive). In format 'yyyy-MM-dd' i.e. 2023-04-01 + +# OUTPUT: +# list containing two elements: +# - $response: Contains all response information as listed below: +# - $response: Value denoting status of call TO the engine. Code 200 denotes connection was received by engine properly. +# - $code: Code returned by engine based on internal schema. Full list of codes can be found here +# (https://github.rpi.edu/DataINCITE/IDEA-DeFi-CRAFT/wiki/Response-Codes) +# - $message: Message response accompanying code should the response be irregular. +# - $data: Data returned by call should any be requested. +# - $df: Data frame containing all data parsed from the call. ```{r} -request_deprecated <- function(protocol, properties = "", headers = "", startdate = "", enddate = "") { +request <- function(protocol, properties = "", headers = "", startdate = "", enddate = "") { suppressWarnings({ #Create socket and get destination which tells the engine where to put the data socket <- socketConnection("defi-de.idea.rpi.edu", 61200, blocking=TRUE) - # socket <- socketConnection("localhost", 61200, blocking=TRUE) destination <- readLines(socket, 1) formatted_properties = "" @@ -93,13 +86,12 @@ request_deprecated <- function(protocol, properties = "", headers = "", startdat # define data frame df = data.frame() - # temp.df = data.frame() + temp.df = data.frame() #Now the engine will begin feeding us data #We grab the first to initialize the data var and then we continue listening\ counter <- 0 response <- "" - json <- "{\"data\":[" while (TRUE) { temp <- readLines(socket, 1) @@ -130,97 +122,13 @@ request_deprecated <- function(protocol, properties = "", headers = "", startdat print(paste("Processed", counter, "lines for", protocol)) # add data point line to data frame - # temp.df <- as.data.frame(fromJSON(temp)) - # df <- rbind.fill(df, temp.df) - json <- paste(json, temp, ",", sep="") + temp.df <- as.data.frame(fromJSON(temp)) + df <- rbind.fill(df, temp.df) } - json <- substr(json, 0, str_length(json) - 1) - json <- paste(json, "]}") - - df <- as.data.frame(fromJSON(json)) - output <- list("response"=response, "df"=df) close(socket) return(output) }) } -``` - -With this function, we can now get our data. -```{R} -#We make a sample call to the function which will return all transactions 2022 August 1st to August 3rd -# aave <- request("amberdata-aave-protocol", "", "x-api-key,UAK7ed69235426c360be22bfc2bde1809b6", "2022-08-01", "2022-08-03") -# aave.df <- aave$df -# aave.response <- aave$response - -# aave.liquidations <- request("amberdata-aave-protocol", "action,LiquidationCall", "x-api-key,UAK7ed69235426c360be22bfc2bde1809b6", "2022-07-01", "2023-09-01") -# aave.liquidations.df <- aave.liquidations$df -# aave.liquidations.response <- aave.liquidations$response - -# sushiswap <- request("amberdata-sushiswap-protocol", "", "x-api-key,UAK7ed69235426c360be22bfc2bde1809b6", "2022-08-01", "2022-08-03") -# sushiswap.df <- sushiswap$df -# sushiswap.response <- sushiswap$response - -start_date <- "2022-01-01" -end_date <- "2022-01-10" - -graph.borrows.df <- request("graph-aave-borrows", "", "", start_date, end_date)$df - -graph.collaterals.df <- request("graph-aave-collaterals", "", "", start_date, end_date)$df - -graph.deposits.df <- request("graph-aave-deposits", "", "", start_date, end_date)$df - -graph.flashloans.df <- request("graph-aave-flash-loans", "", "", start_date, end_date)$df - -graph.liquidations.df <- request("graph-aave-liquidations", "", "", start_date, end_date)$df - -graph.pricehist.df <- request("graph-aave-price-history-items", "", "", start_date, end_date)$df - -graph.redeems.df <- request("graph-aave-redeems", "", "", start_date, end_date)$df - -graph.repays.df <- request("graph-aave-repays", "", "", start_date, end_date)$df - -graph.reservehist.df <- request("graph-aave-reserve-params-hist-items", "", "", start_date, end_date)$df - -graph.reserves.df <- request("graph-aave-reserves", "", "")$df - -graph.swaps.df <- request("graph-aave-swaps", "", "", start_date, end_date)$df - -graph.userreserves.df <- request("graph-aave-user-reserves", "", "", start_date, end_date)$df - -graph.users.df <- request("graph-aave-users", "", "")$df -graph.users.df <- addAliases(graph.users.df, graph.users.df$id) -``` - -```{r} -amberdata.addresses.test <- request("amberdata-blockchain-addresses", "hash,0xfaf3af4f551f76af4cde17c3d3708c4f3a69d21e", "x-api-key,UAK7ed69235426c360be22bfc2bde1809b6") -``` - -```{r} -stablecoins <- request("llama-stablecoins", "", "")$df -``` - -```{r} -graph.users.sample <- graph.users.df %>% - select(-alias) -aliases = NULL -set.seed(69420) - -while(length(aliases[,1]) < length(graph.users.sample$id)){ - alias <- randomNames(1000, name.order = "first.last", name.sep = " ", sample.with.replacement = FALSE) - aliases <- aliases %>% - bind_rows(data.frame(alias)) %>% - distinct() -} - -aliases <- aliases %>% - head(length(graph.users.sample$id)) - -userAliases <- bind_cols(graph.users.sample, aliases) %>% - mutate(version = "V2", - deployment = "Mainnet") - -graph.users.sample <- userAliases - ``` \ No newline at end of file diff --git a/R-Code-Samples/ExampleUserClusteringStarter.Rmd b/R-Code-Samples/ExampleUserClusteringStarter.Rmd index d6fa207f..0b767a6f 100644 --- a/R-Code-Samples/ExampleUserClusteringStarter.Rmd +++ b/R-Code-Samples/ExampleUserClusteringStarter.Rmd @@ -1,5 +1,5 @@ --- -title: 'DAR F22 - User Clustering Starter' +title: 'User Clustering Start Example' author: "Your Name Here" subtitle: "Making Users from Transaction Data and Clustering" output: @@ -10,22 +10,6 @@ output: --- ```{r setup, include=FALSE, warning=FALSE} -# Set the default CRAN repository -local({r <- getOption("repos") - r["CRAN"] <- "http://cran.r-project.org" - options(repos=r) -}) - -# Set code chunk defaults -knitr::opts_chunk$set(echo = TRUE) - -# Load required packages; install if necessary -# CAUTION: DO NOT interrupt R as it installs packages!! -if (!require("ggplot2")) { - install.packages("ggplot2") - -} - library(ggplot2) library(knitr) library(plyr) @@ -50,84 +34,120 @@ library(e1071) library(randomNames) ``` -We provide a function to request and parse data from our DeFi data engine living on the IDEA Cluster. This initializes a data stream from the Amber Data API, opens a socket, requests data, listens on the socket, and then parses the received data. The finished dataframe is as close as possible to the schema of the cold-storage data we currently use. -```{R} +## Define request function: + +# NOTE: +# This function is officially defined in DataEnginePrimaryFunctions.Rmd and does not need to be redefined +# for every use-case. Here it is defined for and because there is a request made. + +# DESCRIPTION: +# This function serves as the primary request point for making internal requests to the defi data engine. Note that should a request be +# made outside the function, please ensure the necessary format depicted within it is used otherwise an internal error may occur. +# Note that you MUST be connected to the RPI network (such that a connection can be made to defi-de.idea.rpi.edu) otherwise this +# function will not work. + +# INPUT: +# - protocol: The name of the protocol to receive data from. +# [optional] - properties: All properties used in the call (note typically REST API URL parameters). +# [optional] - headers: All headers used in the call (note typically REST API URL headers). +# [optional] - startdate: Starting date to retrieve data from. In format 'yyyy-MM-dd' i.e. 2023-04-01 +# [optional] - enddate: Ending date to retrieve data from (non-inclusive). In format 'yyyy-MM-dd' i.e. 2023-04-01 + +# OUTPUT: +# list containing two elements: +# - $response: Contains all response information as listed below: +# - $response: Value denoting status of call TO the engine. Code 200 denotes connection was received by engine properly. +# - $code: Code returned by engine based on internal schema. Full list of codes can be found here +# (https://github.rpi.edu/DataINCITE/IDEA-DeFi-CRAFT/wiki/Response-Codes) +# - $message: Message response accompanying code should the response be irregular. +# - $data: Data returned by call should any be requested. +# - $df: Data frame containing all data parsed from the call. +```{r} request <- function(protocol, properties = "", headers = "", startdate = "", enddate = "") { - #Create socket and get destination which tells the engine where to put the data - socket <- socketConnection("defi-de.idea.rpi.edu", 61200, blocking=TRUE) - destination <- readLines(socket, 1) - - formatted_properties = "" - if(properties != "") - formatted_properties = paste("properties", "&&&", properties, "&&&") - - formatted_headers = "" - if(headers != "") - formatted_headers = paste("headers", "&&&", headers, "&&&") - - formatted_startdate = "" - formatted_enddate = "" - if(startdate != "" && enddate != "") { - formatted_startdate = paste("start_date", "&&&", startdate, "&&&") - formatted_enddate = paste("end_date", "&&&", enddate, "&&&") - } - - #Build the request delimited by &&& - #Similar to a GET request in the way we handle parameters - request.raw <- paste( - "SRC", "&&&", "RQST", "&&&", - "type", "&&&", protocol, "&&&", - formatted_properties, - formatted_headers, - formatted_startdate, - formatted_enddate, - "destination", "&&&", destination, "&&&", - "\n", sep="") - - # remove all spaces from request - request.data <- str_replace_all(request.raw, " ", "") - - #Write this request back to the socket to tell engine what we want - writeLines(request.data, socket) - - # define data frame - df = data.frame() - temp.df = data.frame() - - #Now the engine will begin feeding us data - #We grab the first to initialize the data var and then we continue listening\ - counter <- 0 - response <- "" - while (TRUE) { - temp <- readLines(socket, 1) + suppressWarnings({ + #Create socket and get destination which tells the engine where to put the data + socket <- socketConnection("defi-de.idea.rpi.edu", 61200, blocking=TRUE) + destination <- readLines(socket, 1) - # if line is heartbeat then acknowledge and continue - if (grepl("<<>>", temp, fixed=TRUE)) - { - print(paste("Heartbeat read")) - next + formatted_properties = "" + if(properties != "") + formatted_properties = paste("properties", "&&&", properties, "&&&") + + formatted_headers = "" + if(headers != "") + formatted_headers = paste("headers", "&&&", headers, "&&&") + + formatted_startdate = "" + formatted_enddate = "" + if(startdate != "" && enddate != "") { + formatted_startdate = paste("start_date", "&&&", startdate, "&&&") + formatted_enddate = paste("end_date", "&&&", enddate, "&&&") } - # if line is response then process and terminate - else if (grepl("<<>>", temp, fixed=TRUE)) - { + #Build the request delimited by &&& + #Similar to a GET request in the way we handle parameters + request.raw <- paste( + "SRC", "&&&", "RQST", "&&&", + "type", "&&&", protocol, "&&&", + formatted_properties, + formatted_headers, + formatted_startdate, + formatted_enddate, + "destination", "&&&", destination, "&&&", + "\n", sep="") + + # remove all spaces from request + request.data <- str_replace_all(request.raw, " ", "") + + #Write this request back to the socket to tell engine what we want + writeLines(request.data, socket) + + # define data frame + df = data.frame() + temp.df = data.frame() + + #Now the engine will begin feeding us data + #We grab the first to initialize the data var and then we continue listening\ + counter <- 0 + response <- "" + while (TRUE) { temp <- readLines(socket, 1) - response <- fromJSON(temp) - break + + if(temp == '') + { + print("Read empty string. Check engine logs or refresh configuration.") + next + } + + # if line is heartbeat then acknowledge and continue + if (grepl("<<>>", temp, fixed=TRUE)) + { + print(paste("Heartbeat read for", protocol, sep=" ")) + next + } + + # if line is response then process and terminate + else if (grepl("<<>>", temp, fixed=TRUE)) + { + temp <- readLines(socket, 1) + response <- fromJSON(temp) + break + } + + # increment processed line counter + counter <- counter + 1 + if(counter %% 1000 == 0) + print(paste("Processed", counter, "lines for", protocol)) + + # add data point line to data frame + temp.df <- as.data.frame(fromJSON(temp)) + df <- rbind.fill(df, temp.df) } - # increment processed line counter - counter <- counter + 1 - if(counter %% 1000 == 0) - print(paste("Processed", counter, "lines")) - - # add data point line to data frame - temp.df <- as.data.frame(fromJSON(temp)) - df <- rbind.fill(df, temp.df) - } - - output <- list("response"=response, "df"=df) - return(output) + output <- list("response"=response, "df"=df) + close(socket) + return(output) + }) } ``` @@ -135,9 +155,10 @@ request <- function(protocol, properties = "", headers = "", startdate = "", end ```{r} # These are the generic names for the files with the associated information for all AAVE deployments: -# Load the mainnet data: # retrieve stable coins from defillama stablecoins <- request("llama-stablecoins", "", "")$df + +# Load the mainnet data: mainnetTransactions <- transactions$df mainnetReserveInfo <- data$reserves$df %>% mutate(reserveType = case_when(symbol %in% stablecoins$symbol ~ "Stable", diff --git a/R-Code-Samples/GetTransactions.Rmd b/R-Code-Samples/GetTransactions.Rmd index 67233480..83f20a14 100644 --- a/R-Code-Samples/GetTransactions.Rmd +++ b/R-Code-Samples/GetTransactions.Rmd @@ -17,9 +17,35 @@ library(dplyr) library(jsonlite) library(stringr) library(tidyr) +library(randomNames) ``` -## Define request function +## Define request function: + +# NOTE: +# This function is officially defined in DataEnginePrimaryFunctions.Rmd and does not need to be redefined +# for every use-case. Here it is defined for and because there are requests made. + +# DESCRIPTION: +# This function serves as the primary request point for making internal requests to the defi data engine. Note that should a request be +# made outside the function, please ensure the necessary format depicted within it is used otherwise an internal error may occur. +# Note that you MUST be connected to the RPI network (such that a connection can be made to defi-de.idea.rpi.edu) otherwise this +# function will not work. + +# INPUT: +# - protocol: The name of the protocol to receive data from. +# [optional] - properties: All properties used in the call (note typically REST API URL parameters). +# [optional] - headers: All headers used in the call (note typically REST API URL headers). +# [optional] - startdate: Starting date to retrieve data from. In format 'yyyy-MM-dd' i.e. 2023-04-01 +# [optional] - enddate: Ending date to retrieve data from (non-inclusive). In format 'yyyy-MM-dd' i.e. 2023-04-01 + +# OUTPUT: +# list containing two elements: +# - $response: Contains all response information as listed below: +# - $response: Value denoting status of call TO the engine. Code 200 denotes connection was received by engine properly. +# - $code: Code returned by engine based on internal schema. Full list of codes can be found here (https://github.rpi.edu/DataINCITE/IDEA-DeFi-CRAFT/wiki/Response-Codes) +# - $message: Message response accompanying code should the response be irregular. +# - $data: Data returned by call should any be requested. ```{r} request <- function(protocol, properties = "", headers = "", startdate = "", enddate = "") { suppressWarnings({ @@ -109,7 +135,26 @@ request <- function(protocol, properties = "", headers = "", startdate = "", end } ``` -## Define get_users function + +## Define get_users function: + +# DESCRIPTION: +# This function loads all user data from The Graph using the request() function defined above. +# Only one call needs to be made to this function so long as the data is loaded in the cache. +# Since it is a process-intensive call, it is recommended to only call this once and then +# pass the output as a parameter to the get_data() function. + +# INPUT: + +# OUTPUT: +# list containing two elements: +# - $response: Contains all response information as listed below: +# - $response: Value denoting status of call TO the engine. Code 200 denotes connection was received by engine properly. +# - $code: Code returned by engine based on internal schema. Full list of codes can be found here +# (https://github.rpi.edu/DataINCITE/IDEA-DeFi-CRAFT/wiki/Response-Codes) +# - $message: Message response accompanying code should the response be irregular. +# - $data: Data returned by call should any be requested. +# - $df: Data frame containing data returned from 'graph-aave-users' request. ```{r} get_users <- function() { # retrieve users and then parse with aliases @@ -137,52 +182,66 @@ get_users <- function() { mutate(version = "V2", deployment = "Mainnet") - userAliases + output <- list("response"=graph.users$response, "df"=userAliases) } ``` -## Define get_data function + +## Define get_data function: + +# DESCRIPTION: +# This function will request all necessary data to compute the transaction dataframe. +# This includes multiple calls to The Graph which can be seen below. + +# INPUT: +# - startdate: Starting date to retrieve data from. In format 'yyyy-MM-dd' i.e. 2023-04-01 +# - enddate: Ending date to retrieve data from (non-inclusive). In format 'yyyy-MM-dd' i.e. 2023-04-01 +# - users: Response from get_users() function that contains all user data. If not passed then it will +# request it in this function. + +# OUTPUT: +# Dataframe containing all returned data. ```{r} -get_data <- function(start_date, end_date, users=NULL) { +get_data <- function(startdate, enddate, users=NULL) { ### retrieve all data from defi engine: # submit a request - graph.borrows <- request("graph-aave-borrows", "", "", start_date, end_date) + graph.borrows <- request("graph-aave-borrows", "", "", startdate, enddate) # for each request validate the code was successful # if not the return the response with an empty dataframe if(graph.borrows$response$code != 200) return(graph.borrows) # repeat for all following requests - graph.collaterals <- request("graph-aave-collaterals", "", "", start_date, end_date) + graph.collaterals <- request("graph-aave-collaterals", "", "", startdate, enddate) if(graph.collaterals$response$code != 200) return(graph.collaterals) - graph.deposits <- request("graph-aave-deposits", "", "", start_date, end_date) + graph.deposits <- request("graph-aave-deposits", "", "", startdate, enddate) if(graph.deposits$response$code != 200) return(graph.deposits) - graph.flashloans <- request("graph-aave-flash-loans", "", "", start_date, end_date) + graph.flashloans <- request("graph-aave-flash-loans", "", "", startdate, enddate) if(graph.flashloans$response$code != 200) return(graph.flashloans) - graph.liquidations <- request("graph-aave-liquidations", "", "", start_date, end_date) + graph.liquidations <- request("graph-aave-liquidations", "", "", startdate, enddate) if(graph.liquidations$response$code != 200) return(graph.liquidations) - graph.pricehist <- request("graph-aave-price-history-items", "", "", start_date, end_date) + graph.pricehist <- request("graph-aave-price-history-items", "", "", startdate, enddate) if(graph.pricehist$response$code != 200) return(graph.pricehist) - graph.redeems <- request("graph-aave-redeems", "", "", start_date, end_date) + graph.redeems <- request("graph-aave-redeems", "", "", startdate, enddate) if(graph.redeems$response$code != 200) return(graph.redeems) - graph.repays <- request("graph-aave-repays", "", "", start_date, end_date) + graph.repays <- request("graph-aave-repays", "", "", startdate, enddate) if(graph.repays$response$code != 200) return(graph.repays) - graph.reservehist <- request("graph-aave-reserve-params-hist-items", "", "", start_date, end_date) + graph.reservehist <- request("graph-aave-reserve-params-hist-items", "", "", startdate, enddate) if(graph.reservehist$response$code != 200) return(graph.reservehist) @@ -190,17 +249,18 @@ get_data <- function(start_date, end_date, users=NULL) { if(graph.reserves$response$code != 200) return(graph.reserves) - graph.swaps <- request("graph-aave-swaps", "", "", start_date, end_date) + graph.swaps <- request("graph-aave-swaps", "", "", startdate, enddate) if(graph.swaps$response$code != 200) return(graph.swaps) - graph.userreserves <- request("graph-aave-user-reserves", "", "", start_date, end_date) + graph.userreserves <- request("graph-aave-user-reserves", "", "", startdate, enddate) if(graph.userreserves$response$code != 200) return(graph.userreserves) # retrieve users and then parse with aliases graph.users <- users if(is.null(users)) { + # retrieve users and then parse with aliases graph.users <- request("graph-aave-users", "", "") if(graph.users$response$code != 200) return(graph.users) @@ -225,7 +285,7 @@ get_data <- function(start_date, end_date, users=NULL) { mutate(version = "V2", deployment = "Mainnet") - graph.users <- userAliases + graph.users <- list("response"=graph.users$response, "df"=userAliases) } # format into output list @@ -236,12 +296,19 @@ get_data <- function(start_date, end_date, users=NULL) { } ``` -## Define get_transactions function +## Define get_transactions function: + +# DESCRIPTION: +# This function takes input returned from the get_data(startdate, enddate, users) function to parse a table with +# all properly formatted transaction data. + +# INPUT: +# - data: Response from the get_data() function containing all properly formatted data. + +# OUTPUT: +# Dataframe containing all formatted transaction data. ```{r} # get transactions function includes calls to both AmberData, GraphQL, and DeFiLlama -# Input: -# start_date: -# end_date: get_transactions <- function(data) { ### parse all data into proper raw naming conventions @@ -260,7 +327,7 @@ get_transactions <- function(data) { rawReserveParamsHistory <- data$reservehist$df # select aliases - aliases <- data$users %>% + aliases <- data$users$df %>% select(id, alias) # select reserve info @@ -441,9 +508,21 @@ get_transactions <- function(data) { ``` -## Define sample call to get_transactions function +## Sample Code: + +# First we make a request to get user data: ```{r} users <- get_users() -data <- get_data("2022-01-01", "2022-01-10", users) +``` + +# Next we write a call to get_data to get all needed data: +```{r} +startdate <- "2022-01-01" +enddate <- "2022-02-01" +data <- get_data(startdate, enddate, users) +``` + +# Finally make a call to get all transactional data: +```{r} transactions <- get_transactions(data) ``` \ No newline at end of file