# CS计算机代考程序代写 # The aim of this session is to compare different models

# The aim of this session is to compare different models

# and to select the best one able to predict variable y

# we first load the dataset

menti = read.csv(“menti_200_full.csv”)

# before calibrating the models it is always

# output summary statistics

library(stargazer)

stargazer(menti, type = “text”)

# you can also output several plots to check

# if there are any outliers or the correlation and

# dependence between variables

# we then need to split the data between train and test

# train data will be used to calibrate the models

# I am using 75% of the samle as train

idx_train = sample(1:400, size = 300)

menti_train = menti[idx_train, ]

# the remaing 25% will be used

# to compare out-of-sample performance

menti_test = menti[-idx_train, ]

# we can start by estimating an ols model

ols_mod1 = lm(y ~ ., data = menti_train)

stargazer(ols_mod1, type = “text”)

# and we can construct our performance measures

# with the predict function

ols_pred1 = predict(ols_mod1, newdata = menti_test)

# which of these two measures penalizes more large errors

# vs smaller ones

# RMSE

sqrt(mean((ols_pred1 – menti_test$y)^2))

# MAE

mean(abs(ols_pred1 – menti_test$y))

# how would you construct in R a performance measure

# which uses absolute errors but penalises negative

# errors twice as much as positive ones?

# another useful model is the Elastic-Net

# from package glmnet

library(glmnetUtils)

# E-Net has two params: alpha and lambda

# alpha is set by the user and it is between 0 and 1

# alpha = 1 (default) gives the LASSO

# alpha = 0 gives the RIDGE

# any value in between is for a combination of the two

# which model are we estimating here?

enet_mod2 = glmnet(y ~ ., data = menti_train)

# stargazer does not support glmnet, yet

stargazer(enet_mod2, type = “text”)

# we can still use predict to compute out-of-sample performance

enet_pred2 = predict(enet_mod2, newdata = menti_test)

# which model between ols and E-Net performs better

# RMSE

sqrt(mean((enet_pred2 – menti_test$y)^2))

# MAE

mean(abs(enet_pred2 – menti_test$y))

# you can use function cv.glmnet to optimize lambda by

# cross validation but you still need to provide alpha

cv_enet_mod1 = cv.glmnet(x = as.matrix(menti_train[,-1]),

y = as.matrix(menti_train[,1]),

alpha = 0.5)

# therefore you need to use a loop by different alpha’s

# to pick the best one

# RMSE

sqrt(mean((cv_enet_mod1 – menti_test$y)^2))

# MAE

mean(abs(cv_enet_mod1 – menti_test$y))

# cva.glmnet will apply cv.glmnet by different alpha’s

# so that you can do the comparison after

cva_enet_mod1 = cva.glmnet(x = as.matrix(menti_train[,-1]),

y = as.matrix(menti_train[,1]))

for(alpha in cva_enet_mod1$alpha) {

cva_enet_pred1 = predict(cva_enet_mod1,

newx = as.matrix(menti_test[,-1]),

alpha = alpha)

cat(alpha, sqrt(mean((cva_enet_pred1 – menti_test$y)^2)), “

”)

}