german

# This code will install required packages if they are not already installed
# ALWAYS INSTALL YOUR PACKAGES LIKE THIS!
# install packages
if (!require("tidyverse")) {
  install.packages("tidyverse")
  library(tidyverse)
}

if (!require("knitr")) {
  install.packages("knitr")
  library(knitr)
}

if(!require("digest")){
  install.packages("digest")
  library("digest")
}

if (!require("ggplot2")) {
   install.packages("ggplot2")
   library(ggplot2)
}

# use for lda
if (!require(MASS)){
  install.packages("MASS")
  library(MASS)
}

# use for plot roc curve
if (!require(pROC)){
  install.packages("pROC")
  library(pROC)
}

# use for xgboost
if (!require("xgboost")) {
   install.packages("xgboost", dependencies = TRUE)
   library(xgboost)
}

# use for svm
if (!require("e1071")) {
   install.packages("e1071")
   library(e1071)
}

# use for Naive Bayes
if(!require("naivebayes")){
  install.packages("naivebayes")
  library(naivebayes)
}

# use for Random Forest
if(!require("randomForest")){
  install.packages("randomForest")
  library("randomForest")
}


#dataset
set.seed(100)

# Load the Data
creditdata <- read.csv("~/LLM_Fairness_Fall_2023/StudentNotebooks/FinalReport/statlog_german_credit_data.csv", header = TRUE)

# Print the first few rows
head(creditdata)


# Split the dataset into training and test sets
train_indices <- sample(1:nrow(creditdata), 0.9 * nrow(creditdata))
train <- creditdata[train_indices, ]
test <- creditdata[-train_indices, ]

# Display the dimensions of the training and test sets
cat("Training set size:", nrow(train), "\n")
cat("Test set size:", nrow(test), "\n")
	# This code will install required packages if they are not already installed
	# ALWAYS INSTALL YOUR PACKAGES LIKE THIS!
	# install packages
	if (!require("tidyverse")) {
	install.packages("tidyverse")
	library(tidyverse)
	}

	if (!require("knitr")) {
	install.packages("knitr")
	library(knitr)
	}

	if(!require("digest")){
	install.packages("digest")
	library("digest")
	}

	if (!require("ggplot2")) {
	install.packages("ggplot2")
	library(ggplot2)
	}

	# use for lda
	if (!require(MASS)){
	install.packages("MASS")
	library(MASS)
	}

	# use for plot roc curve
	if (!require(pROC)){
	install.packages("pROC")
	library(pROC)
	}

	# use for xgboost
	if (!require("xgboost")) {
	install.packages("xgboost", dependencies = TRUE)
	library(xgboost)
	}

	# use for svm
	if (!require("e1071")) {
	install.packages("e1071")
	library(e1071)
	}

	# use for Naive Bayes
	if(!require("naivebayes")){
	install.packages("naivebayes")
	library(naivebayes)
	}

	# use for Random Forest
	if(!require("randomForest")){
	install.packages("randomForest")
	library("randomForest")
	}


	#dataset
	set.seed(100)

	# Load the Data
	creditdata <- read.csv("~/LLM_Fairness_Fall_2023/StudentNotebooks/FinalReport/statlog_german_credit_data.csv", header = TRUE)

	# Print the first few rows
	head(creditdata)


	# Split the dataset into training and test sets
	train_indices <- sample(1:nrow(creditdata), 0.9 * nrow(creditdata))
	train <- creditdata[train_indices, ]
	test <- creditdata[-train_indices, ]

	# Display the dimensions of the training and test sets
	cat("Training set size:", nrow(train), "\n")
	cat("Test set size:", nrow(test), "\n")