Skip to content
Snippets Groups Projects
Commit 03a4b8c8 authored by zhannah's avatar zhannah
Browse files

Upload New File

parent eda8ea55
No related branches found
No related tags found
No related merge requests found
---
title: "finalproj403"
author: "irisqlin"
date: "11/13/2021"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
## R Markdown
```{r}
library(stringr)
data <- read.csv("IYSdata.csv")
names(data)
# predictor data cleaning
data$X30.cig <- str_remove_all(data$X30.cig, " days")
data$X30.cig <- str_remove_all(data$X30.cig, " day")
data$X30.cig <- str_remove_all(data$X30.cig, " or more")
data$X30.cig[data$X30.cig == "1-2"] <- 1.5
data$X30.cig[data$X30.cig == "3-5"] <- 4
data$X30.cig[data$X30.cig == "6-9"] <- 7.5
data$X30.cig[data$X30.cig == "10-19"] <- 14.5
data$X30.cig[data$X30.cig == "20-29"] <- 24.5
data$X30.cig <- as.numeric(data$X30.cig)
mean(data$X30.cig)
data$X30drink <- str_remove_all(data$X30drink, " days")
data$X30drink <- str_remove_all(data$X30drink, " day")
data$X30drink <- str_remove_all(data$X30drink, " or more")
data$X30drink[data$X30drink == "1-2"] <- 1.5
data$X30drink[data$X30drink == "3-5"] <- 4
data$X30drink[data$X30drink == "6-9"] <- 7.5
data$X30drink[data$X30drink == "10-19"] <- 14.5
data$X30drink[data$X30drink == "20-29"] <- 24.5
data <- na.omit(data)
data$X30drink <- as.numeric(data$X30drink)
mean(data$X30drink)
data$X30marijuana <- str_remove_all(data$X30marijuana, " days")
data$X30marijuana <- str_remove_all(data$X30marijuana, " day")
data$X30marijuana <- str_remove_all(data$X30marijuana, " or more")
data$X30marijuana[data$X30marijuana == "1-2"] <- 1.5
data$X30marijuana[data$X30marijuana == "3-5"] <- 4
data$X30marijuana[data$X30marijuana == "6-9"] <- 7.5
data$X30marijuana[data$X30marijuana == "10-19"] <- 14.5
data$X30marijuana[data$X30marijuana == "20-29"] <- 24.5
data <- na.omit(data)
data$X30marijuana <- as.numeric(data$X30marijuana)
mean(data$X30marijuana)
```
```{r}
# response data cleaning
data$times.moved[data$times.moved == "None"] <- 0
data$times.moved[data$times.moved == "Once"] <- 1
data$times.moved[data$times.moved == "Twice"] <- 2
data$times.moved[data$times.moved == "Three times"] <- 3
data$times.moved[data$times.moved == "Four times or more"] <- 4
data <- na.omit(data)
mean(as.numeric(data$times.moved))
data$times.moved <- as.numeric(data$times.moved)
data$pride[data$pride == "Strongly agree"] <- 4
data$pride[data$pride == "Agree"] <- 3
data$pride[data$pride == "Disagree"] <- 2
data$pride[data$pride == "Strongly disagree"] <- 1
data <- na.omit(data)
mean(as.numeric(data$pride))
data$pride <- as.numeric(data$pride)
data$truth[data$truth == "Strongly agree"] <- 4
data$truth[data$truth == "Agree"] <- 3
data$truth[data$truth == "Disagree"] <- 2
data$truth[data$truth == "Strongly disagree"] <- 1
data <- na.omit(data)
mean(as.numeric(data$truth))
data$truth <- as.numeric(data$truth)
data$responsibility[data$responsibility == "Strongly agree"] <- 4
data$responsibility[data$responsibility == "Agree"] <- 3
data$responsibility[data$responsibility == "Disagree"] <- 2
data$responsibility[data$responsibility == "Strongly disagree"] <- 1
data <- na.omit(data)
mean(as.numeric(data$responsibility))
data$responsibility <- as.numeric(data$responsibility)
data$friends[data$friends == "Strongly agree"] <- 4
data$friends[data$friends == "Agree"] <- 3
data$friends[data$friends == "Disagree"] <- 2
data$friends[data$friends == "Strongly disagree"] <- 1
data <- na.omit(data)
mean(as.numeric(data$friends))
data$friends <- as.numeric(data$friends)
data$fix.problems[data$fix.problems == "Strongly agree"] <- 4
data$fix.problems[data$fix.problems == "Agree"] <- 3
data$fix.problems[data$fix.problems == "Disagree"] <- 2
data$fix.problems[data$fix.problems == "Strongly disagree"] <- 1
data <- na.omit(data)
mean(as.numeric(data$fix.problems))
data$fix.problems <- as.numeric(data$fix.problems)
data$decision[data$decision == "Strongly agree"] <- 4
data$decision[data$decision == "Agree"] <- 3
data$decision[data$decision == "Disagree"] <- 2
data$decision[data$decision == "Strongly disagree"] <- 1
data <- na.omit(data)
mean(as.numeric(data$decision))
data$decision <- as.numeric(data$decision)
data$excite[data$excite == "Strongly agree"] <- 4
data$excite[data$excite == "Agree"] <- 3
data$excite[data$excite == "Disagree"] <- 2
data$excite[data$excite == "Strongly disagree"] <- 1
data <- na.omit(data)
mean(as.numeric(data$excite))
data$excite <- as.numeric(data$excite)
data$hard.work[data$hard.work == "Strongly agree"] <- 4
data$hard.work[data$hard.work == "Agree"] <- 3
data$hard.work[data$hard.work == "Disagree"] <- 2
data$hard.work[data$hard.work == "Strongly disagree"] <- 1
data <- na.omit(data)
mean(as.numeric(data$hard.work))
data$hard.work <- as.numeric(data$hard.work)
data$safe[data$safe == "Strongly agree"] <- 4
data$safe[data$safe == "Agree"] <- 3
data$safe[data$safe == "Disagree"] <- 2
data$safe[data$safe == "Strongly disagree"] <- 1
data <- na.omit(data)
mean(as.numeric(data$safe))
data$safe<- as.numeric(data$safe)
data$best.school[data$best.school == "Strongly agree"] <- 4
data$best.school[data$best.school == "Agree"] <- 3
data$best.school[data$best.school == "Disagree"] <- 2
data$best.school[data$best.school == "Strongly disagree"] <- 1
data <- na.omit(data)
mean(as.numeric(data$best.school))
data$best.school <- as.numeric(data$best.school)
data$talk.adult[data$talk.adult == "Strongly agree"] <- 4
data$talk.adult[data$talk.adult == "Agree"] <- 3
data$talk.adult[data$talk.adult == "Disagree"] <- 2
data$talk.adult[data$talk.adult == "Strongly disagree"] <- 1
data <- na.omit(data)
mean(as.numeric(data$talk.adult))
data$talk.adult <- as.numeric(data$talk.adult)
data$grades[data$grades == "Excellent"] <- 5
data$grades[data$grades == "Above average"] <- 4
data$grades[data$grades == "Average"] <- 3
data$grades[data$grades == "Below average"] <- 2
data$grades[data$grades == "Failing"] <- 1
data <- na.omit(data)
mean(as.numeric(data$grades))
data$grades <- as.numeric(data$grades)
data$Wpdrink[data$Wpdrink == "Very wrong"] <- 4
data$Wpdrink[data$Wpdrink == "Wrong"] <- 3
data$Wpdrink[data$Wpdrink == "Dont know"] <- 2.5
data$Wpdrink[data$Wpdrink == "A little wrong"] <- 2
data$Wpdrink[data$Wpdrink == "Not wrong at all"] <- 1
data <- na.omit(data)
mean(as.numeric(data$Wpdrink))
data$Wpdrink <- as.numeric(data$Wpdrink)
data$N.safe[data$N.safe == "Strongly agree"] <- 4
data$N.safe[data$N.safe == "Agree"] <- 3
data$N.safe[data$N.safe == "Disagree"] <- 2
data$N.safe[data$N.safe == "Strongly disagree"] <- 1
data <- na.omit(data)
mean(as.numeric(data$N.safe))
data$N.safe <- as.numeric(data$N.safe)
```
```{r}
# response data exploration
# hist(as.numeric(data$X30.5drinks))
hist(as.numeric(data$X30.cig))
hist(as.numeric(data$X30drink))
hist(as.numeric(data$X30marijuana))
summary(data)
# TODO: as numeric all the predictor variables
round(cor(data[, -c(1, 2, 3, 4, 6)], 3))
```
## Drinks Model(s)
```{r}
# making models
drinks_threshold <- mean(data$X30drink)
data$drinks_var <- ifelse(data$X30drink >= drinks_threshold, 1, 0)
# total model
drinks_total_mod <- glm(drinks_var ~ pride + truth + responsibility + friends + fix.problems + decision + excite + hard.work + safe + best.school + talk.adult +grades + Wpdrink + N.safe, data = data, family = "binomial")
summary(fiveDrinks_mod)
# dropped p > 0.1
drinks_mod_1 <- glm(drinks_var ~ truth + decision + excite + safe + best.school + grades + Wpdrink, data = data, family = "binomial")
summary(drinks_mod_1)
# dropped p > 0.1
drinks_mod_2 <- glm(drinks_var ~ truth + decision + excite + safe + best.school + Wpdrink, data = data, family = "binomial")
summary(drinks_mod_2)
#likelihood ratio test to test whether the observed difference in model fits is statistically significant
# source: https://www.listendata.com/2016/07/insignificant-levels-of-categorical-variable.html
anova(drinks_total_mod, drinks_mod_1, test="LRT")
anova(drinks_mod_1, drinks_mod_2, test = "LRT")
#Both of these are not significant, which means dropping the variables we did was not significant.
# TODO: idk what this does
anova(drinks_total_mod, drinks_mod_1, test="Chisq")
#drop in deviance test compares residual deivances from two models
# source: https://bookdown.org/roback/bookdown-BeyondMLR/ch-poissonreg.html#cs-philippines
# source: https://bookdown.org/roback/bookdown-BeyondMLR/ch-logreg.html
anova(drinks_mod_1, drinks_mod_2, test = "Chisq")
```
## Cig model
```{r}
# making models
cig_threshold <- mean(data$X30.cig)
data$cig_var <- ifelse(data$X30.cig >= cig_threshold, 1, 0)
# total model
cig_total_mod <- glm(cig_var ~ pride + truth + responsibility + friends + fix.problems + decision + excite + hard.work + safe + best.school + talk.adult + grades + Wpdrink + N.safe, data = data, family = "binomial")
summary(cig_total_mod)
# dropped p > 0.1
cig_mod_1 <- glm(cig_var ~ friends + fix.problems + decision + excite + hard.work + safe + best.school + grades + Wpdrink, data = data, family = "binomial")
summary(cig_mod_1)
# dropped p > 0.1
cig_mod_2 <- glm(cig_var ~ truth + decision + excite + best.school + Wpdrink, data = data, family = "binomial")
summary(cig_mod_2)
cig_mod_3 <- glm(cig_var ~ + decision + excite + best.school + Wpdrink, data = data, family = "binomial")
summary(cig_mod_3)
#likelihood ratio test to test whether the observed difference in model fits is statistically significant
# source: https://www.listendata.com/2016/07/insignificant-levels-of-categorical-variable.html
anova(cig_total_mod, cig_mod_1, test="LRT")
anova(cig_mod_1, cig_mod_2, test = "LRT")
anova(cig_mod_2, cig_mod_3, test="LRT")
#Both of these are not significant, which means dropping the variables we did was not significant.
# TODO: idk what this does
anova(drinks_total_mod, drinks_mod_1, test="Chisq")
#drop in deviance test compares residual deivances from two models
# source: https://bookdown.org/roback/bookdown-BeyondMLR/ch-poissonreg.html#cs-philippines
# source: https://bookdown.org/roback/bookdown-BeyondMLR/ch-logreg.html
anova(drinks_mod_1, drinks_mod_2, test = "Chisq")
```
```{r}
# Extra code
fiveDrinks_mod_3 <- glm(fiveDrinks_var ~ Wpdrink + friends, data = data, family = "binomial")
#summary(fiveDrinks_mod_3)
fiveDrinks_mod_4 <- glm(fiveDrinks_var ~ Wpdrink, data = data, family = "binomial")
#summary(fiveDrinks_mod_4)
```
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment