Merge branch 'main' of https://github.rpi.edu/DataINCITE/IDEA-DeFi-CRAFT

DataINCITE · Jan 15, 2023 · f8609b5 · f8609b5
2 parents eb72bc8 + b45e81c
commit f8609b5
Show file tree

Hide file tree

Showing 3 changed files with 2,105 additions and 33 deletions.
diff --git a/R-Code-Samples/aave-protocol-dated-func-v1.Rmd b/R-Code-Samples/aave-protocol-dated-func-v1.Rmd
@@ -1,92 +1,175 @@
-```{R}
+---
+title: "DeFi Engine Use Example"
+subtitle: "aave-protocol-dated function"
+author: "Kacy Adams"
+date: "12/15/2022"
+output:
+  pdf_document: default
+  html_document:
+    toc: true
+    number_sections: true
+    df_print: paged
+---
+# Start by loading the proper libraries:
+```{r setup, include=FALSE}
+# Set the default CRAN repository
+local({r <- getOption("repos")
+       r["CRAN"] <- "http://cran.r-project.org" 
+       options(repos=r)
+})
+
+# Set code chunk defaults
+knitr::opts_chunk$set(echo = TRUE)
 
-library("httr")
-library("jsonlite")
-library("lubridate")
+# Load required packages; install if necessary
+# CAUTION: DO NOT interrupt R as it installs packages!!
+if (!require("ggplot2")) {
+  install.packages("ggplot2")
+  library(ggplot2)
+}
+
+if (!require("httr")) {
+  install.packages("httr")
+  library(httr)
+}
+if (!require("jsonlite")) {
+  install.packages("jsonlite")
+  library(jsonlite)
+}
+
+if (!require("lubridate")) {
+  install.packages("lubridate")
+  library(lubridate)
+}
+if(!require("dplyr")){
+  install.packages("dplyr")
+  library(dplyr)
+}
+if(!require("stringr")){
+  install.packages("stringr")
+  library(stringr)
+}
+if(!require("tidyr")){
+  install.packages("tidyr")
+  library(tidyr)
+}
+if(!require("knitr")){
+  install.packages("knitr")
+  library(knitr)
+}
+
+
+```
+
+
+We provide a function to request and parse data from our DeFi data engine living on the IDEA Cluster. This initializes a data stream from the Amber Data API, opens a socket, requests data, listens on the socket, and then parses the received data. The finished dataframe is as close as possible to the schema of the cold-storage data we currently use.
+```{R}
 
 getJson <- function(startdate, enddate) {
   
-  httr::GET(url = "http://defi-de.idea.rpi.edu:8080/defi/v1/rest/initialize?source=amber_data&auth_data=key,UAK7ed69235426c360be22bfc2bde1809b6")
-  
-  engine_key <- "b6c810a7f35f4fa0d28258278325b4b5ab82ba79868ab33d01d5c878e13872ec129a91a3fbf702e59c2404f0fb4a53420a3ffb50130c35b4d06b32d81e56c1f4"
+  #Initialize data stream with Amber Data API key
+  #It is ok to do this multiple times, will always return the same key
+  out <- httr::GET(url = "http://defi-de.idea.rpi.edu:8080/defi/v1/rest/initialize?source=amber_data&auth_data=key,UAK7ed69235426c360be22bfc2bde1809b6")
+  out <- content(out, "parsed")
+  engine_key <- out$data
+
   
+  #Create socket and get socket_key which tells the engine where to put the data
   socket <- socketConnection("defi-de.idea.rpi.edu", 61200, blocking=TRUE)
-  ss <- readLines(socket, 1)
-  ss
+  socket_key <- readLines(socket, 1)
+  
   
+  #Build the request delimited by &&&
+  #Similar to a GET request in the way we handle parameters
   reqString <- paste(
-  				"SRC&&&RQST&&&destination&&&", ss, 
+  				"SRC&&&RQST&&&destination&&&", socket_key, 
   				"&&&key&&&", engine_key, "&&&",
   				"start_date&&&", startdate, "&&&",
   				"end_date&&&", enddate, "&&&",
   				"query&&&aave-protocol-dated&&&",
   				"request&&&aave-protocol-dated\n", sep="")
   
+  #Write this request back to the socket to tell engine what we want
   writeLines(reqString, socket)
   
+  #Now the engine will begin feeding us data
+  #We grab the first to initialize the data var and then we continue listening\
   data <- readLines(socket, 1)
-  
   while (TRUE) {
     temp <- readLines(socket, 1)
     data <- paste(data, temp, "")
     if (grepl("<<<end>>>", temp, fixed=TRUE)) {break}
   }
-  data
   
-  data <- str_replace(data, "  <<<end>>> ", "")
-  data <- str_replace_all(data, " ", "")
-  data <- str_replace_all(data, "\\}\\{", "\\},\\{")
-  data <- paste("[", data, "]", sep="")
-  output <- fromJSON(data)
   
-  output <- output[,-(colnames(output) == "_id")]
-  colnames(output)[colnames(output) == "action"] = "type"
-  output$type <- as.factor(output$type)
+  data <- str_replace(data, "  <<<end>>> ", "") #We begin parsing by removing the end character
+  data <- str_replace_all(data, " ", "") #We remove all spaces
+  data <- str_replace_all(data, "\\}\\{", "\\},\\{") #We add commas in between every object
+  data <- paste("[", data, "]", sep="")              #and format the string as a json list
+  output <- fromJSON(data)                           #and then it is finally ready to be parsed as a json
+  
+  #We then need to match this with our current cold-storage data schema
+  
+  output <- output[,-(colnames(output) == "_id")] #We remove the Mongodb data engine ID
+  colnames(output)[colnames(output) == "action"] = "type" #Rename the 'action' column to 'type'
+  output$type <- as.factor(output$type)                   #and make this column a factor for a faster df
+  
+  #We calculate the date and time via the UNIX epoch timestamp provided in the data
   output <- output %>% mutate(datetime = as_datetime(as.numeric(substr(timestamp, 1, nchar(timestamp)-3))))
   
+  #We rename to match the desired schema
   colnames(output)[colnames(output) == "assetId"] = "reserveId"
   colnames(output)[colnames(output) == "assetSymbol"] = "reserve"
   
+  #We convert these from chars to numbers (doubles) for obvious reasons
   output$amount <- as.double(output$amount)
   output$borrowRate <- as.double(output$borrowRate)
+  output$principalAmount <- as.double(output$principalAmount)
   
+  #We rename to match desired schema
   colnames(output)[colnames(output) == "user"] = "userId"
   
+  #We unite these columns as they are only present in specific actions
+  #and our old data refers to them purely as 'onBehalfOfId'
   output <- output %>% unite(col = "onBehalfOfId", onBehalfOf,repayer,initiator, na.rm = TRUE, sep = "")
   
-  
+  #We rename to match desired schema
   colnames(output)[colnames(output) == "collateralAssetId"] = "collateralReserveId"
   colnames(output)[colnames(output) == "collateralAssetSymbol"] = "collateralReserve"
   colnames(output)[colnames(output) == "principalAssetId"] = "principalReserveId"
   colnames(output)[colnames(output) == "principalAssetSymbol"] = "principalReserve"
   
-  output$principalAmount <- as.double(output$principalAmount)
+  #We remove column 15 as it is a mongoDB timestamp from the engine that is no longer needed
   output <- output[,-15]
-  output
   
   return(output)
 }
 
+```
+
+With this function, we can now get our data.
+```{R}
 
-temp <- getJson("2022-08-01", "2022-09-01")
-temp
+#We make a sample call to the function which will return all transactions 2022 August 1st to August 3rd
+df <- getJson("2022-08-01", "2022-08-04")
+kable(head(df), "simple")
 
 ```
 
+Now that we have our dataframe, let's show that it is usable. We'll plot the daily count of transactions over the dataframe.
 ```{r}
 library(dplyr)
 
-weeklySummaries <- temp %>%
-  mutate(week = floor_date(datetime, unit = "day")) %>% # Add a new column that rounds the date of each transaction down to the nearest week
-  group_by(week) %>% # Group the transactions together by the week they were performed.
+dailySummaries <- df %>%
+  mutate(day = floor_date(datetime, unit = "day")) %>% # Add a new column that rounds the date of each transaction down to the nearest week
+  group_by(day) %>% # Group the transactions together by the week they were performed.
   summarise(transactionCount = n()) # Count the number of transactions in each group.
 ```
 
-With these weekly summaries computed, we can simply plot the week on the x-axis and the transaction count on the y-axis to visualize this new representation:
-
+With these daily summaries computed, we can simply plot the week on the x-axis and the transaction count on the y-axis to visualize this.
 ```{r}
-weeklyTransactionsPlot <- ggplot(data = weeklySummaries, aes(x = week, y = transactionCount)) + geom_line()
-weeklyTransactionsPlot
+dailyTransactionsPlot <- ggplot(data = dailySummaries, aes(x = day, y = transactionCount)) + geom_line()
+dailyTransactionsPlot
 ```
-
+We see that we received transactions for each of the three days requested. Remember that the engine parameter 'end_date' is exclusive and as such we receive zero transactions for August 4th.
 
diff --git a/R-Code-Samples/aave-protocol-dated-func.html b/R-Code-Samples/aave-protocol-dated-func.html
diff --git a/R-Code-Samples/aave-protocol-dated-func.pdf b/R-Code-Samples/aave-protocol-dated-func.pdf