-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' of https://github.rpi.edu/DataINCITE/IDEA-DeFi-CRAFT
- Loading branch information
Showing
3 changed files
with
2,105 additions
and
33 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,92 +1,175 @@ | ||
| ```{R} | ||
| --- | ||
| title: "DeFi Engine Use Example" | ||
| subtitle: "aave-protocol-dated function" | ||
| author: "Kacy Adams" | ||
| date: "12/15/2022" | ||
| output: | ||
| pdf_document: default | ||
| html_document: | ||
| toc: true | ||
| number_sections: true | ||
| df_print: paged | ||
| --- | ||
| # Start by loading the proper libraries: | ||
| ```{r setup, include=FALSE} | ||
| # Set the default CRAN repository | ||
| local({r <- getOption("repos") | ||
| r["CRAN"] <- "http://cran.r-project.org" | ||
| options(repos=r) | ||
| }) | ||
| # Set code chunk defaults | ||
| knitr::opts_chunk$set(echo = TRUE) | ||
| library("httr") | ||
| library("jsonlite") | ||
| library("lubridate") | ||
| # Load required packages; install if necessary | ||
| # CAUTION: DO NOT interrupt R as it installs packages!! | ||
| if (!require("ggplot2")) { | ||
| install.packages("ggplot2") | ||
| library(ggplot2) | ||
| } | ||
| if (!require("httr")) { | ||
| install.packages("httr") | ||
| library(httr) | ||
| } | ||
| if (!require("jsonlite")) { | ||
| install.packages("jsonlite") | ||
| library(jsonlite) | ||
| } | ||
| if (!require("lubridate")) { | ||
| install.packages("lubridate") | ||
| library(lubridate) | ||
| } | ||
| if(!require("dplyr")){ | ||
| install.packages("dplyr") | ||
| library(dplyr) | ||
| } | ||
| if(!require("stringr")){ | ||
| install.packages("stringr") | ||
| library(stringr) | ||
| } | ||
| if(!require("tidyr")){ | ||
| install.packages("tidyr") | ||
| library(tidyr) | ||
| } | ||
| if(!require("knitr")){ | ||
| install.packages("knitr") | ||
| library(knitr) | ||
| } | ||
| ``` | ||
|
|
||
|
|
||
| We provide a function to request and parse data from our DeFi data engine living on the IDEA Cluster. This initializes a data stream from the Amber Data API, opens a socket, requests data, listens on the socket, and then parses the received data. The finished dataframe is as close as possible to the schema of the cold-storage data we currently use. | ||
| ```{R} | ||
| getJson <- function(startdate, enddate) { | ||
| httr::GET(url = "http://defi-de.idea.rpi.edu:8080/defi/v1/rest/initialize?source=amber_data&auth_data=key,UAK7ed69235426c360be22bfc2bde1809b6") | ||
| engine_key <- "b6c810a7f35f4fa0d28258278325b4b5ab82ba79868ab33d01d5c878e13872ec129a91a3fbf702e59c2404f0fb4a53420a3ffb50130c35b4d06b32d81e56c1f4" | ||
| #Initialize data stream with Amber Data API key | ||
| #It is ok to do this multiple times, will always return the same key | ||
| out <- httr::GET(url = "http://defi-de.idea.rpi.edu:8080/defi/v1/rest/initialize?source=amber_data&auth_data=key,UAK7ed69235426c360be22bfc2bde1809b6") | ||
| out <- content(out, "parsed") | ||
| engine_key <- out$data | ||
| #Create socket and get socket_key which tells the engine where to put the data | ||
| socket <- socketConnection("defi-de.idea.rpi.edu", 61200, blocking=TRUE) | ||
| ss <- readLines(socket, 1) | ||
| ss | ||
| socket_key <- readLines(socket, 1) | ||
| #Build the request delimited by &&& | ||
| #Similar to a GET request in the way we handle parameters | ||
| reqString <- paste( | ||
| "SRC&&&RQST&&&destination&&&", ss, | ||
| "SRC&&&RQST&&&destination&&&", socket_key, | ||
| "&&&key&&&", engine_key, "&&&", | ||
| "start_date&&&", startdate, "&&&", | ||
| "end_date&&&", enddate, "&&&", | ||
| "query&&&aave-protocol-dated&&&", | ||
| "request&&&aave-protocol-dated\n", sep="") | ||
| #Write this request back to the socket to tell engine what we want | ||
| writeLines(reqString, socket) | ||
| #Now the engine will begin feeding us data | ||
| #We grab the first to initialize the data var and then we continue listening\ | ||
| data <- readLines(socket, 1) | ||
| while (TRUE) { | ||
| temp <- readLines(socket, 1) | ||
| data <- paste(data, temp, "") | ||
| if (grepl("<<<end>>>", temp, fixed=TRUE)) {break} | ||
| } | ||
| data | ||
| data <- str_replace(data, " <<<end>>> ", "") | ||
| data <- str_replace_all(data, " ", "") | ||
| data <- str_replace_all(data, "\\}\\{", "\\},\\{") | ||
| data <- paste("[", data, "]", sep="") | ||
| output <- fromJSON(data) | ||
| output <- output[,-(colnames(output) == "_id")] | ||
| colnames(output)[colnames(output) == "action"] = "type" | ||
| output$type <- as.factor(output$type) | ||
| data <- str_replace(data, " <<<end>>> ", "") #We begin parsing by removing the end character | ||
| data <- str_replace_all(data, " ", "") #We remove all spaces | ||
| data <- str_replace_all(data, "\\}\\{", "\\},\\{") #We add commas in between every object | ||
| data <- paste("[", data, "]", sep="") #and format the string as a json list | ||
| output <- fromJSON(data) #and then it is finally ready to be parsed as a json | ||
| #We then need to match this with our current cold-storage data schema | ||
| output <- output[,-(colnames(output) == "_id")] #We remove the Mongodb data engine ID | ||
| colnames(output)[colnames(output) == "action"] = "type" #Rename the 'action' column to 'type' | ||
| output$type <- as.factor(output$type) #and make this column a factor for a faster df | ||
| #We calculate the date and time via the UNIX epoch timestamp provided in the data | ||
| output <- output %>% mutate(datetime = as_datetime(as.numeric(substr(timestamp, 1, nchar(timestamp)-3)))) | ||
| #We rename to match the desired schema | ||
| colnames(output)[colnames(output) == "assetId"] = "reserveId" | ||
| colnames(output)[colnames(output) == "assetSymbol"] = "reserve" | ||
| #We convert these from chars to numbers (doubles) for obvious reasons | ||
| output$amount <- as.double(output$amount) | ||
| output$borrowRate <- as.double(output$borrowRate) | ||
| output$principalAmount <- as.double(output$principalAmount) | ||
| #We rename to match desired schema | ||
| colnames(output)[colnames(output) == "user"] = "userId" | ||
| #We unite these columns as they are only present in specific actions | ||
| #and our old data refers to them purely as 'onBehalfOfId' | ||
| output <- output %>% unite(col = "onBehalfOfId", onBehalfOf,repayer,initiator, na.rm = TRUE, sep = "") | ||
| #We rename to match desired schema | ||
| colnames(output)[colnames(output) == "collateralAssetId"] = "collateralReserveId" | ||
| colnames(output)[colnames(output) == "collateralAssetSymbol"] = "collateralReserve" | ||
| colnames(output)[colnames(output) == "principalAssetId"] = "principalReserveId" | ||
| colnames(output)[colnames(output) == "principalAssetSymbol"] = "principalReserve" | ||
| output$principalAmount <- as.double(output$principalAmount) | ||
| #We remove column 15 as it is a mongoDB timestamp from the engine that is no longer needed | ||
| output <- output[,-15] | ||
| output | ||
| return(output) | ||
| } | ||
| ``` | ||
|
|
||
| With this function, we can now get our data. | ||
| ```{R} | ||
| temp <- getJson("2022-08-01", "2022-09-01") | ||
| temp | ||
| #We make a sample call to the function which will return all transactions 2022 August 1st to August 3rd | ||
| df <- getJson("2022-08-01", "2022-08-04") | ||
| kable(head(df), "simple") | ||
| ``` | ||
|
|
||
| Now that we have our dataframe, let's show that it is usable. We'll plot the daily count of transactions over the dataframe. | ||
| ```{r} | ||
| library(dplyr) | ||
| weeklySummaries <- temp %>% | ||
| mutate(week = floor_date(datetime, unit = "day")) %>% # Add a new column that rounds the date of each transaction down to the nearest week | ||
| group_by(week) %>% # Group the transactions together by the week they were performed. | ||
| dailySummaries <- df %>% | ||
| mutate(day = floor_date(datetime, unit = "day")) %>% # Add a new column that rounds the date of each transaction down to the nearest week | ||
| group_by(day) %>% # Group the transactions together by the week they were performed. | ||
| summarise(transactionCount = n()) # Count the number of transactions in each group. | ||
| ``` | ||
|
|
||
| With these weekly summaries computed, we can simply plot the week on the x-axis and the transaction count on the y-axis to visualize this new representation: | ||
|
|
||
| With these daily summaries computed, we can simply plot the week on the x-axis and the transaction count on the y-axis to visualize this. | ||
| ```{r} | ||
| weeklyTransactionsPlot <- ggplot(data = weeklySummaries, aes(x = week, y = transactionCount)) + geom_line() | ||
| weeklyTransactionsPlot | ||
| dailyTransactionsPlot <- ggplot(data = dailySummaries, aes(x = day, y = transactionCount)) + geom_line() | ||
| dailyTransactionsPlot | ||
| ``` | ||
|
|
||
| We see that we received transactions for each of the three days requested. Remember that the engine parameter 'end_date' is exclusive and as such we receive zero transactions for August 4th. | ||
|
|
Large diffs are not rendered by default.
Oops, something went wrong.
Binary file not shown.