Update FinalProj.Rmd

741000d0 · irisqlin · cbc4b4b7 · 741000d0
Commit 741000d0 authored 3 years ago by irisqlin
--- a/FinalProj.Rmd
+++ b/FinalProj.Rmd
 ---
 title: "finalproj403"
-author: "irisqlin"
+author: "Iris Lin and Hannah Zhou"
 date: "11/13/2021"
 output: html_document
 ---
@@ -184,15 +184,18 @@ data$N.safe <- as.numeric(data$N.safe)
 ```{r}
 # response data exploration

-# hist(as.numeric(data$X30.5drinks))
 hist(as.numeric(data$X30.cig))
 hist(as.numeric(data$X30drink))
 hist(as.numeric(data$X30marijuana))

 summary(data)

-# TODO: as numeric all the predictor variables
-round(cor(data[, -c(1, 2, 3, 4, 6)], 3))
+cor_data <- data[, -c(1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 14)]
+res <- cor(cor_data)
+round(res, 2)
+cor(data[, -c(1, 2, 3, 4, 6, 7, 8, 14)], 3)
+
+(diag(var(data[,  -c(1, 2, 3, 4, 6, 7, 8, 14)])))

 ```

@@ -200,7 +203,7 @@ round(cor(data[, -c(1, 2, 3, 4, 6)], 3))

 Using the wald-test, liklihood ratio test, and the drop in deviance tests, we prefer drinks_mod_2.
 ```{r}
-# making models
+# making models, 1 = more likely to abuse substance, 0 = unlikely to abuse substance
 drinks_threshold <- mean(data$X30drink)
 data$drinks_var <- ifelse(data$X30drink >= drinks_threshold, 1, 0)

@@ -215,6 +218,7 @@ summary(drinks_mod_1)
 # dropped p > 0.1, 
 drinks_mod_2 <- glm(drinks_var ~ truth + decision + excite + safe + best.school + Wpdrink, data = data, family = "binomial")
 summary(drinks_mod_2)
+exp(coef(drinks_mod_2))

 #likelihood ratio test to test whether the observed difference in model fits is statistically significant
 # source: https://www.listendata.com/2016/07/insignificant-levels-of-categorical-variable.html
@@ -231,11 +235,10 @@ anova(drinks_mod_2, drinks_mod_1, test="Chisq")
 # source: https://bookdown.org/roback/bookdown-BeyondMLR/ch-poissonreg.html#cs-philippines
 # source: https://bookdown.org/roback/bookdown-BeyondMLR/ch-logreg.html
 ```
-
 ## Cig model

 ```{r}
-# making models
+# making models, 1 = abusing substance, 0 = not abusing substance
 cig_threshold <- mean(data$X30.cig)
 data$cig_var <- ifelse(data$X30.cig >= cig_threshold, 1, 0)

@@ -256,19 +259,25 @@ summary(cig_mod_3)

 #likelihood ratio test to test whether the observed difference in model fits is statistically significant
 # source: https://www.listendata.com/2016/07/insignificant-levels-of-categorical-variable.html
-
 anova(cig_total_mod, cig_mod_1, test="LRT")
+# not sig, prefer smaller model mod 1
 anova(cig_mod_1, cig_mod_2, test = "LRT")
+# sig, may prefer the larger model, mod 1
 anova(cig_mod_2, cig_mod_3, test="LRT")
-#Both of these are not significant, which means dropping the variables we did was not significant. 
+# not sig, prefer smaller mod 3
+anova(cig_mod_1, cig_mod_3, test="LRT")
+# sig, prefer the larger mod, mod 1

-# TODO: idk what this does
-anova(drinks_total_mod, drinks_mod_1, test="Chisq")
+# Drop in deviance test
+anova(cig_mod_1, cig_total_mod, test="Chisq")
+# not sig difference
+anova(cig_mod_1, cig_mod_2, test = "Chisq")
+# sig, may prefer the smaller model, mod 2
+anova(cig_mod_3, cig_mod_2, test="Chisq")
+# not sig, essentially no difference in residual deviance values
+anova(cig_mod_3, cig_mod_1, test="Chisq")
+# The difference in deviance is significantly significant, the drop in dev test prefers the smaller model, mod 3

-#drop in deviance test compares residual deivances from two models
-# source: https://bookdown.org/roback/bookdown-BeyondMLR/ch-poissonreg.html#cs-philippines
-# source: https://bookdown.org/roback/bookdown-BeyondMLR/ch-logreg.html
-anova(drinks_mod_1, drinks_mod_2, test = "Chisq")
 ```