--- title: "Descriptive statistics" author: "Tera Letzring" date: "September 2017" output: html_document --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) library(psych) ``` ```{r set wd and read in data} #set the working directory (wd) to the folder that contains the script file, and read in the data x <- getwd() setwd(x) SDdata = read.table("schooldays.csv", header=T, sep=",") head(SDdata) ``` ```{r descriptive statistics} #attach data set so you only have to refer to the variable names attach (SDdata) #calculate basic descriptives table(absent) #frequency distribution mean(absent) median(absent) sd(absent) #standard deviation var(absent) #variance range(absent) #returns the minimum and maximum scores quantile(absent) #all quantiles: 0, 25, 50, 75, 100 IQR(absent) #interquartile range summary(SDdata) #mean, median, 25th and 75th quartiels, min, and max for numeric variables frequency counts for factor variables describe(SDdata) #This command is from the psych package, so you need to have that loaded in order to use it. Results include item name, item number, nvalid, mean, sd, median, trimmed median, mad (median absolute deviation), min, max, range, skew, kurtosis, standard error. Statistics are provided for non-numeric variables, but an asteric is by the variable name. #calculate mode temp <- table(as.vector(absent)) #create a sorted list of all unique values names(temp)[temp == max(temp)] #returns the names of the values that have the highest count in temp #describe data by one grouping variable. This command is in the psych package. describeBy(absent, gender, mat=TRUE) #describe data by two grouping variable. describeBy(absent, list(gender,learner), mat=TRUE) #calculate means for all combinations of levels of the variables learner, race, and school tapply(absent, list(learner=learner, race=race, school.type=school), mean, na.rm=TRUE) #create a dataset with the means new.means <- tapply(SDdata$absent, list(learner=SDdata$learner, race=SDdata$race, school=SDdata$school), mean, na.rm=TRUE) #output to .csv file write.csv(new.means, "new.means.csv", row.names=TRUE) ```