7.2 Model evaluation(not part of the official work)
This section is just further exploration, I barely know what I am doing here. –Hung Nguyen
The exploration focuses on the model3
with all of the variables.
P1 <- ggplot(data=data.frame(model3$residuals), aes(y=model3$residuals, x=model3$fitted.values)) +
geom_point(alpha = 0.1) +
ggtitle("Residual Plot") +
xlab("Predicted Values") +
ylab("Residuals") +
geom_hline(yintercept = mean(model3$residuals), color = "red")+
geom_smooth(method = "loess") + #Locally Weighted Least Squares Regression
annotate("text",
x = 0.9, y = 0.5,
label = paste("Cov:",round(cov(model3$residuals,model3$fitted.values), digits =2)),
size = 4, color = "purple",
)
P2 <- ggplot(data=data.frame(model3$residuals), aes(x=model3$residuals)) +
geom_histogram() +
ggtitle("Histogram of Residuals") +
xlab("Residual")
P3 <- ggplot(data=data.frame(model3$residuals), aes(sample = model3$residuals)) +
stat_qq(alpha = 0.1) +
stat_qq_line(color = "red") +
xlab("Normal Quantiles") +
ylab("Residual Quantiles") +
ggtitle("QQ Plot")
grid.arrange(arrangeGrob(P1, P2, ncol =2), arrangeGrob(P3,textGrob(label = "Zero mean assumption is true, which is no suprise. \nThere is Heteroskedasticity in the residual. Residual \ndistribution have a slight left tail, but not too extreme.\nThe blue curve in the first plot raise doubt about \n the Zero Conditional Mean Assumption.", just = "left", x = unit(0.1, "npc"), y = unit(0.5, "npc"), gp=gpar(fontsize=10)) ,ncol =2))
Next we observe the relation between the residual and each of the variables.
v1 <- ggplot(data=data.frame(model3$residuals), aes(y=model3$residuals, x=model3$model$median_rent)) +
geom_point(alpha = 0.1) +
geom_smooth(method = "loess") +
labs(x="Median Rent", y= "Residuals") +
annotate("text",
x = 2125, y = -0.5,
label = paste("Cov:",round(cov(model3$residuals,model3$model$median_rent), digits =2)),
size = 4, color = "purple",
)
v2 <- ggplot(data=data.frame(model3$residuals), aes(y=model3$residuals, x=model3$model$median_age)) +
geom_point(alpha = 0.1) +
geom_smooth(method = "loess") +
labs(x="Median Age", y= "Residuals") +
annotate("text",
x = 65, y = -0.5,
label = paste("Cov:",round(cov(model3$residuals,model3$model$median_age), digits =2)),
size = 4, color = "purple",
)
v3 <- ggplot(data=data.frame(model3$residuals), aes(y=model3$residuals, x=model3$model$median_income)) +
geom_point(alpha = 0.1) +
geom_smooth(method = "loess") +
labs(x="Median Income", y= "Residuals") +
annotate("text",
x = 62500, y = -0.5,
label = paste("Cov:",round(cov(model3$residuals,model3$model$median_income), digits =2)),
size = 4, color = "purple",
)
v4 <- ggplot(data=data.frame(model3$residuals), aes(y=model3$residuals, x=model3$model$perc_hs)) +
geom_point(alpha = 0.1) +
geom_smooth(method = "loess") +
labs(x="People without High School(%)", y= "Residuals") +
annotate("text",
x = 60, y = 0.30,
label = paste("Cov:",round(cov(model3$residuals,model3$model$perc_hs), digits =2)),
size = 4, color = "purple",
)
v5 <- ggplot(data=data.frame(model3$residuals), aes(y=model3$residuals, x=model3$model$perc_rent)) +
geom_point(alpha = 0.1) +
geom_smooth(method = "loess") +
labs(x="People renting(%)", y= "Residuals") +
annotate("text",
x = 30, y = -0.5,
label = paste("Cov:",round(cov(model3$residuals,model3$model$perc_rent), digits =2)),
size = 4, color = "purple",
)
v6 <- ggplot(data=data.frame(model3$residuals), aes(y=model3$residuals, x=model3$model$perc_white)) +
geom_point(alpha = 0.1) +
geom_smooth(method = "loess") +
labs(x="White population(%)", y= "Residuals") +
annotate("text",
x = 10, y = -0.5,
label = paste("Cov:",round(cov(model3$residuals,model3$model$perc_white), digits =2)),
size = 4, color = "purple",
)
v7 <- ggplot(data=data.frame(model3$residuals), aes(y=model3$residuals, x=model3$model$perc_below_pov)) +
geom_point(alpha = 0.1) +
geom_smooth(method = "loess") +
labs(x="Population below poverty line(%)", y= "Residuals") +
annotate("text",
x = 50, y = 0.3,
label = paste("Cov:",round(cov(model3$residuals,model3$model$perc_below_pov), digits =2)),
size = 4, color = "purple",
)
v8 <- ggplot(data=data.frame(model3$residuals), aes(y=model3$residuals, x=model3$model$perc_doc)) +
geom_point(alpha = 0.1) +
geom_smooth(method = "loess") +
labs(x="Population with doctorate(%)", y= "Residuals") +
annotate("text",
x = 15, y = -0.35,
label = paste("Cov:",round(cov(model3$residuals,model3$model$perc_doc), digits =2)),
size = 4, color = "purple",
)
grid.arrange(arrangeGrob(v1,v2,v3,v4,v5,v6,v7,v8, ncol = 2), top ="Residual Plot over Variables")
The covariances being zero is just a property of the OLS estimator.