survivalDataSummaries.Rmd

```{r}
library(dplyr)
library(xtable)
library(readr)
library(dotenv)


load_dot_env(file = "./.env") # We have an environment file just for the sake of determining which version of the transaction data we should read in.
                              # If you don't have access to the full transaction data (i.e. you aren't one of the paper's authors), you shouldn't need
                              # this line of code.

# Set some defaults for commonly used functions that we want to make sure use the dplyr version
select <- dplyr::select
rename <- dplyr::rename
summarize <- dplyr::summarize
group_by <- dplyr::group_by
mutate <- dplyr::mutate
```


```{r}
# Load survival data
source("./dataLoader.R")

survivalDataSummaries <- allSurvivalData %>%
  mutate(timeDiff = timeDiff/86400) %>%
  group_by(`Index Event`, `Outcome Event`) %>%
  summarize(count = n(), numberOfOutcomeEvents = sum(status),
            `Mean Time to Event` = mean(timeDiff), `Censoring Rate` = mean(1-status)*100)


print(xtable(survivalDataSummaries), include.rownames=FALSE)

totalRecords <- sum(survivalDataSummaries$count)
```

```{r}
# Create a table summarizing the raw transaction data used in our creation of the survival data:
if(Sys.getenv("DEVMODE")){
  # For generating the summaries in the paper, we will use the full transaction data as opposed to the sample data
  rawTransactionData <- readRDS("/data/IDEA_DeFi_Research/Data/Lending_Protocols/Aave/V2/Mainnet/transactions.rds")
} else{
  rawTransactionData <- read_csv("./Data/Raw_Transaction_Data_Sample/transactionsSample.csv")
}

rawTransactionData <- rawTransactionData  %>%
  dplyr::mutate(amountUSD = case_when(type == "liquidation" ~ principalAmountUSD,
                                      TRUE ~ amountUSD))

transactionSummary <- rawTransactionData %>%
  filter(timestamp <= cutoffDate) %>%
  filter(type %in% c("borrow", "liquidation", "deposit", "withdraw", "repay")) %>%
  group_by(type) %>%
  dplyr::summarize(count = n(), meanAmountUSD = mean(amountUSD), medianAmountUSD = median(amountUSD), sd = sd(amountUSD))

print(xtable(transactionSummary), include.rownames = FALSE)

```
	```{r}
	library(dplyr)
	library(xtable)
	library(readr)
	library(dotenv)


	load_dot_env(file = "./.env") # We have an environment file just for the sake of determining which version of the transaction data we should read in.
	# If you don't have access to the full transaction data (i.e. you aren't one of the paper's authors), you shouldn't need
	# this line of code.

	# Set some defaults for commonly used functions that we want to make sure use the dplyr version
	select <- dplyr::select
	rename <- dplyr::rename
	summarize <- dplyr::summarize
	group_by <- dplyr::group_by
	mutate <- dplyr::mutate
	```


	```{r}
	# Load survival data
	source("./dataLoader.R")

	survivalDataSummaries <- allSurvivalData %>%
	mutate(timeDiff = timeDiff/86400) %>%
	group_by(`Index Event`, `Outcome Event`) %>%
	summarize(count = n(), numberOfOutcomeEvents = sum(status),
	`Mean Time to Event` = mean(timeDiff), `Censoring Rate` = mean(1-status)*100)


	print(xtable(survivalDataSummaries), include.rownames=FALSE)

	totalRecords <- sum(survivalDataSummaries$count)
	```

	```{r}
	# Create a table summarizing the raw transaction data used in our creation of the survival data:
	if(Sys.getenv("DEVMODE")){
	# For generating the summaries in the paper, we will use the full transaction data as opposed to the sample data
	rawTransactionData <- readRDS("/data/IDEA_DeFi_Research/Data/Lending_Protocols/Aave/V2/Mainnet/transactions.rds")
	} else{
	rawTransactionData <- read_csv("./Data/Raw_Transaction_Data_Sample/transactionsSample.csv")
	}

	rawTransactionData <- rawTransactionData %>%
	dplyr::mutate(amountUSD = case_when(type == "liquidation" ~ principalAmountUSD,
	TRUE ~ amountUSD))

	transactionSummary <- rawTransactionData %>%
	filter(timestamp <= cutoffDate) %>%
	filter(type %in% c("borrow", "liquidation", "deposit", "withdraw", "repay")) %>%
	group_by(type) %>%
	dplyr::summarize(count = n(), meanAmountUSD = mean(amountUSD), medianAmountUSD = median(amountUSD), sd = sd(amountUSD))

	print(xtable(transactionSummary), include.rownames = FALSE)

	```