Skip to content
Snippets Groups Projects
Commit 5bbb589b authored by irisqlin's avatar irisqlin
Browse files

Update finalproject

parent 67ad6ed1
No related branches found
No related tags found
No related merge requests found
......@@ -18,39 +18,45 @@ names(data)
# predictor data cleaning
data <- na.omit(data)
data$X30.5drinks <- as.numeric(str_remove_all(data$X30.5drinks, " days"))
data$X30.5drinks <- as.numeric(str_remove_all(data$X30.5drinks, " day"))
data$X30.5drinks <- as.numeric(str_remove_all(data$X30.5drinks, " or more"))
data$X30.5drinks <- str_remove_all(data$X30.5drinks, " days")
data$X30.5drinks <- str_remove_all(data$X30.5drinks, " day")
data$X30.5drinks <- str_remove_all(data$X30.5drinks, " or more")
data$X30.5drinks[data$X30.5drinks == "3 to 5"] <- 4
data$X30.5drinks[data$X30.5drinks == "6 to 9"] <- 7.5
data$X30.5drinks[data$X30.5drinks == "10 to 19"] <- 14.5
mean(data$X30.5drinks)
data$X30.cig <- as.numeric(str_remove_all(data$X30.cig, " days"))
data$X30.cig <- as.numeric(str_remove_all(data$X30.cig, " day"))
data$X30.cig <- as.numeric(str_remove_all(data$X30.cig, " or more"))
data$X30.cig[data$X30.cig == "3 to 5"] <- 4
data$X30.cig[data$X30.cig == "6 to 9"] <- 7.5
data$X30.cig[data$X30.cig == "10 to 19"] <- 14.5
mean(data$X30.cig)
data$X30drink <- as.numeric(str_remove_all(data$X30drink, " days"))
data$X30drink <- as.numeric(str_remove_all(data$X30drink, " day"))
data$X30drink <- as.numeric(str_remove_all(data$X30drink, " or more"))
data$X30drink[data$X30drink == "3 to 5"] <- 4
data$X30drink[data$X30drink == "6 to 9"] <- 7.5
data$X30drink[data$X30drink == "10 to 19"] <- 14.5
data <- na.omit(data)
mean(data$X30drink)
data$X30marijuana <- as.numeric(str_remove_all(data$X30marijuana, " days"))
data$X30marijuana <- as.numeric(str_remove_all(data$X30marijuana, " day"))
data$X30marijuana <- as.numeric(str_remove_all(data$X30marijuana, " or more"))
data$X30marijuana[data$X30marijuana == "3 to 5"] <- 4
data$X30marijuana[data$X30marijuana == "6 to 9"] <- 7.5
data$X30marijuana[data$X30marijuana == "10 to 19"] <- 14.5
data <- na.omit(data)
mean(data$X30marijuana)
mean(as.numeric(data$X30.5drinks))
data$X30.cig <- str_remove_all(data$X30.cig, " days")
data$X30.cig <- str_remove_all(data$X30.cig, " day")
data$X30.cig <- str_remove_all(data$X30.cig, " or more")
data$X30.cig[data$X30.cig == "1-2"] <- 1.5
data$X30.cig[data$X30.cig == "3-5"] <- 4
data$X30.cig[data$X30.cig == "6-9"] <- 7.5
data$X30.cig[data$X30.cig == "10-19"] <- 14.5
data$X30.cig[data$X30.cig == "20-29"] <- 24.5
mean(as.numeric(data$X30.cig))
data$X30drink <- str_remove_all(data$X30drink, " days")
data$X30drink <- str_remove_all(data$X30drink, " day")
data$X30drink <- str_remove_all(data$X30drink, " or more")
data$X30drink[data$X30drink == "1-2"] <- 1.5
data$X30drink[data$X30drink == "3-5"] <- 4
data$X30drink[data$X30drink == "6-9"] <- 7.5
data$X30drink[data$X30drink == "10-19"] <- 14.5
data$X30drink[data$X30drink == "20-29"] <- 24.5
data <- na.omit(data)
mean(as.numeric(data$X30drink))
data$X30marijuana <- str_remove_all(data$X30marijuana, " days")
data$X30marijuana <- str_remove_all(data$X30marijuana, " day")
data$X30marijuana <- str_remove_all(data$X30marijuana, " or more")
data$X30marijuana[data$X30marijuana == "1-2"] <- 1.5
data$X30marijuana[data$X30marijuana == "3-5"] <- 4
data$X30marijuana[data$X30marijuana == "6-9"] <- 7.5
data$X30marijuana[data$X30marijuana == "10-19"] <- 14.5
data$X30marijuana[data$X30marijuana == "20-29"] <- 24.5
data <- na.omit(data)
mean(as.numeric(data$X30marijuana))
# response data cleaning
data$times.moved[data$times.moved == "None"] <- 0
......@@ -138,17 +144,19 @@ data$talk.adult[data$talk.adult == "Strongly disagree"] <- 1
data <- na.omit(data)
mean(as.numeric(data$talk.adult))
data$grades[data$grades == "Strongly agree"] <- 4
data$grades[data$grades == "Agree"] <- 3
data$grades[data$grades == "Disagree"] <- 2
data$grades[data$grades == "Strongly disagree"] <- 1
data$grades[data$grades == "Excellent"] <- 4
data$grades[data$grades == "Above average"] <- 3
data$grades[data$grades == "Average"] <- 2
data$grades[data$grades == "Below average"] <- 1
data$grades[data$grades == "Failing"] <- 0
data <- na.omit(data)
mean(as.numeric(data$grades))
data$Wpdrink[data$Wpdrink == "Strongly agree"] <- 4
data$Wpdrink[data$Wpdrink == "Agree"] <- 3
data$Wpdrink[data$Wpdrink == "Disagree"] <- 2
data$Wpdrink[data$Wpdrink == "Strongly disagree"] <- 1
data$Wpdrink[data$Wpdrink == "Very wrong"] <- 4
data$Wpdrink[data$Wpdrink == "Wrong"] <- 3
data$Wpdrink[data$Wpdrink == "Dont know"] <- 2.5
data$Wpdrink[data$Wpdrink == "A little wrong"] <- 2
data$Wpdrink[data$Wpdrink == "Not wrong at all"] <- 1
data <- na.omit(data)
mean(as.numeric(data$Wpdrink))
......@@ -160,3 +168,15 @@ data <- na.omit(data)
mean(as.numeric(data$N.safe))
```
```{r}
# new response binary variable
data$substance <-
```
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment