--- title: "Regression" author: "Tera Letzring" date: "October 2017" output: html_document --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) library(QuantPsyc) ``` ```{r read in data} #set the working directory (wd) to the folder that contains the script file, and read in the data x <- getwd() setwd(x) mydata = read.table("USairpollution.csv", header=T, sep=",") attach(mydata) head(mydata) ``` ```{r regression models} #basic linear model with output Model1 <- lm(SO2~manu, data=mydata) summary(Model1) #to exclude cases with missing data Model2 <- lm(SO2~manu, data=mydata, na.action=na.exclude) summary(Model2) #to get standardized coefficients, use a function from the QuantPsyc package lm.beta(Model1) ``` ```{r confidence intervals} #uses function in the QuantPsyc package confint(Model1) ``` ```{r check assumptions} #plots to check for adherence to assumptions plot(Model1) #histogram to check for normality of the distribution of residuals with a normal curve (run all lines at the same time) sresid <- studres(Model1) hist(sresid, freq=FALSE, main="Distribution of Studentized Residuals") xfit<-seq(min(sresid),max(sresid),length=40) yfit<-dnorm(xfit) lines(xfit, yfit) ```