--- title: "Scatterplots, Covariance, and Correlation" author: "Tera Letzring" date: "September 2017" output: html_document --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) library(car) library(plyr) library (psych) ``` ```{r read in data} #set the working directory (wd) to the folder that contains the script file, and read in the data x <- getwd() setwd(x) mydata = read.table("USairpollution.csv", header=T, sep=",") attach(mydata) str(mydata) ``` ```{r scatterplots} #basic scatterplot plot (temp, precip, xlab="Temperature", ylab="Precipitation") #basic scatterplot with a regression fit line. To get this to work, you hav to run both lines of code at the same time. plot (temp, precip, xlab="Temperature", ylab="Precipitation") abline(lm(precip ~ temp)) ###With enhanced features using the scatterplot function from the car package. #legend.title to set the title of the grouping variable #smoother indicates whether to include a smooth line (default is T=show line, use F to not show) #reg.line indicates whether to include a regression line (default it T=show line, use F to not show) #boxplots to add boxplots outside the matrix, for x variable, y variable, or both variables (xy) #lwd=width of linear regression line (default is 1) #jitter makes all data points visible #xlim and ylim to change the limits of the axes #legend.coords to specify where the legend for the grouping vaiable should be - options are "bottomright", "bottom", "bottomleft", "left", "topleft", "top", "topright", "right", "center". Default is above the figure on the left #by.groups=T to get separate regression lines per grouping variable, F to get one line #grid use F to not show the background grid scatterplot(precip ~ temp | popSB, data=mydata, xlab="Temperature", ylab="Precipitation", legend.title="Population", labels=row.names(mydata), smoother=F, boxplots="xy", jitter=list("xy"), xlim=c(40,80), lwd=3, legend.coords = "topleft", by.groups=F) ``` ```{r scatterplot matric} #Basic scatterplot matrix scatterplotMatrix(~ SO2 + manu + precip + temp, data=mydata, labels=row.names(mydata)) #Basic scatterplot matrix with grouping variable scatterplotMatrix(~ SO2 + manu + precip + temp | popSB, data=mydata, labels=row.names(mydata)) ###Scatterplot matrix with options. #diagnoal: density, boxplot, histogram, qqplot, none (density is the default) #nclass=number of levels for histogram #See enhanced features scatterplot above for desciptions of other options scatterplotMatrix(~ SO2 + manu + precip + temp | popSB, data=mydata, labels=row.names(mydata), var.labels=c("Sulfer Dioxoide", "Manufacturing", "Precipitation", "Temperature"), diagonal=c("histogram"), nclass=6, smoother=F, by.groups=T, lwd=2) ``` ```{r covariance and correlation} #for two variables with 95% CI and p-value cov(temp, precip) cor.test(temp, precip) ###Calculate covariances and correlations for all variables in a dataframe or matrix. All variables must be numeric. The use option is for telling R how to handle missing data, other options are all.obs (for no missing data) and complete.obs (listwise deletion). The method option is for telling R the type of correlation to compute, other options are spearman and kendall. #create a dataset with only numeric variables mydata.num <- mydata[-8] cov(mydata.num, use="pairwise.complete.obs") cor(mydata.num, use="pairwise.complete.obs", method="pearson") ###Compute correlations between certain variables (not a whole matrix) #these variables will be the rows; the numbers indicate variables 1 and 2 from the data set x <- mydata[1:2] #these variables will be the columns; the numbers indicate variables 3 through 7 y <- mydata[3:7] cov(x,y) cor(x,y) ###Calculate correlation and covariance for different levels of a categorical variable using a function from the plyr package #ddply = input is a dataframe and output is a dataframe #popSB is the variable we are splitting by #temp and precip are the variables being correlated ddply(mydata, .(popSB), summarise, "covariance" = cov(temp, precip)) ddply(mydata, .(popSB), summarise, "correlation" = cor(temp, precip)) ``` ```{r compare correlations} ###Compare independent correlations using a function in the psych package #NULL is needed because there is not a third correlation #n2= is optional #(r1, r2, NULL, N) paired.r(.52, .12, NULL, 60, n2=50) ###Compare dependent correlations #(r31, r32, r12, N) paired.r(.20,.45,.15,103) ``` ```{r convert correlations} #r to Fisher Z fisherz(.52) #Fisher Z to r fisherz2r(.576) #convert r to the effect size d r2d(.52) ```