--- title: "Lab 2 - Manipulating data" author: "Tera" date: "August 2017" output: html_document --- ```{r setup, include=FALSE} #if echo=TRUE then the source code is included in the output file, if echo=FALSE then only the output is in the output file library(knitr) knitr::opts_chunk$set(echo = T) ``` ```{r reading in other data files and some basic functions} #set the working directory (wd) to the folder that contains the script file, and read in the data x <- getwd() setwd(x) IntCon = read.table("IntCon.csv", header=T, sep=",") head(IntCon) schooldays1 <- read.table("schooldays1.csv", sep=',', header=TRUE) schooldays2 <- read.table("schooldays2.csv", sep=',', header=TRUE) head(schooldays1) head(schooldays2) ``` ```{r calculate scale scores} library(multicon) #create key for scoring. Items with negative signs will be reversed scored. ICkeys.list<-list(IC=c(-1, 2, -3, 4, 5, -6, 7, -8, -9)) #Score the measure and enter the scores in a data called ICout. Rows with less than 80% of the data will be given NA. ICout <- scoreTest(IntCon[,-1], ICkeys.list, rel=TRUE) # Use [,-1] to ignore column 1, because it isn't part of the measure describe(ICout$scores) # Descriptives for the scale score #combine scores with original dataset. cbind is a function that merges two data frames by columns. The columns must line up correctly for this to work, and both data sets must have the same number of rows. IntCon <- cbind(IntCon, ICout$scores) head(IntCon) #output reliabilty information ICout$rel #output to file write.csv(IntCon, "IntCon.scores.csv", row.names=FALSE) ``` ```{r sort data} #sort by a single variable in increasing order absent.sorted1inc <- schooldays1[order(schooldays1$absent),] View(absent.sorted1inc) #sort by a single variable in decreasing order absent.sorted1dec <- schooldays1[order(decreasing=T, schooldays1$absent),] View(absent.sorted1dec) #sort by 2 variables absent.sorted2 <- schooldays1[order(schooldays1$learner, schooldays1$absent), ] View(absent.sorted2) ``` ```{r subset data frames by variables and values} names(schooldays1) #keep variables 1 through 3 Absence1 <- schooldays1[1:3] names(Absence1) #keep variables 1 and 6 Absence2 <- schooldays1[c(1, 6)] names(Absence2) #keep the variables listed by name Absence3 <- schooldays1[c("SID", "race", "absent")] names(Absence3) #discard variable 2 Absence4 <- schooldays1[-2] names(Absence4) #discard variables 2, 3, and 4 Absence5 <- schooldays1[c(-2, -3, -4)] names(Absence5) ``` ```{r subsetting data frames with dplyr package and the filter function} ### %>% is called a pipe operator #load dplyr library to use filter function library(dplyr) #keep only females Female <- schooldays1 %>% filter (schooldays1$gender=="female") View(Female) #keep only aboriginal females Aborg.Female <- schooldays1 %>% filter (schooldays1$gender=="female" & schooldays1$race=="aboriginal") View(Aborg.Female) #keep students in levels F0 and F1. The symbol | means or. F0F1 <- schooldays1 %>% filter (schooldays1$school=="F0" | schooldays1$school=="F1") View(F0F1) #keep students who were absent less than 10 days days10 <- schooldays1 %>% filter (schooldays1$absent < 10) View(days10) ``` ```{r merging data sets} #Bind by rows, data frames must have the same variables but the variables do not have to be in the same order schooldays.all <- rbind(schooldays1, schooldays2) tail(schooldays.all) #Merge two data frames by identifying one or more common variables. Data frames may have differing numbers of rows, but the final data set will only contain the rows that are in common. all.merge <- merge(schooldays.all, IntCon, by="SID") head(all.merge) tail(all.merge) #Merge but keep only some variables some.merge <- merge(schooldays.all, IntCon[c("SID", "IC")], by="SID") head(some.merge) ```