# Run this to make sure you have the latest version of R and all the packages we'll use: #if( !( "installr" %in% installed.packages()[,"Package"] ) ){ install.packages("installr") } #library(installr); updateR() pkgs <- c( "lme4", "languageR", "devtools", "vioplot", "lmerTest", "car", "multcomp", "e1071" ) pkgs <- pkgs[ !(pkgs %in% installed.packages()[,"Package"]) ] if( length(pkgs)>0 ){ install.packages( pkgs[ !(pkgs %in% installed.packages()[,"Package"]) ] ) } if( !( "yarrr" %in% installed.packages()[,"Package"] ) ){ library(devtools); install_github("ndphillips/yarrr") } ################################ ### THE GENERAL LINEAR MODEL ### ################################ library(lme4) ### Simple example from Chick data # Let's just look at diets 1 and 2 cw <- data.frame( ChickWeight[ ChickWeight$Diet %in% c(1,2), ] ) # I'm not sure why they have this var as an ordered factor; make it unordered class(cw$Chick) <- "factor" # Fix the "weight" column name, I find it annoying that it's lowercase and the rest are capital colnames(cw)[ colnames(cw)=="weight" ] <- "Weight" # Make a linear model summary( mod <- lmer( Weight ~ Time + (1|Chick), cw ) )$coefficients # Plot the scatter plot( Weight ~ Time, cw, xaxs="i", yaxs="i", pch=16, ylim=c(0,350) ) # Plot the regression line abline( fixef(mod)[1], fixef(mod)[2], lwd=2, col="red" ) # Show the regression formula text( 8, 300, bquote( hat(italic(y))==.( round( fixef(mod)[1], 2 ) )~+~.( round( fixef(mod)[2], 2 ) )*italic(x)[time] ), col="red", cex=2 ) ### Simple example with a categorical predictor # Let's just look at diets 1 and 2 cw <- data.frame( ChickWeight[ ChickWeight$Diet %in% c(1,2), ] ) # I'm not sure why they have this var as an ordered factor; make it unordered class(cw$Chick) <- "factor" # Fix the "weight" column name, I find it annoying that it's lowercase and the rest are capital colnames(cw)[ colnames(cw)=="weight" ] <- "Weight" # Make a linear model summary( mod <- lmer( Weight ~ Diet + (1|Chick), cw ) )$coefficients # Plot the scatter (note that here I'm suppressing the normal x-axis and making a new one) plot( as.numeric( cw$Diet), cw$Weight, type="p", xaxt="n", col=rainbow(2)[cw$Diet], xlab=NA, pch=16, ylim=c(0,450) ) axis( 1, at=1:2, label=c(" Diet 1 (coded as \"0\")", "Diet 2 (coded as \"1\") ") ) # Plot the regression line for the intercept (Month 5) abline( fixef(mod)["(Intercept)"], 0, lwd=2, col=rainbow(2)[1] ) # Plot the regression line for the other months for( diet in 2:2 ){ abline( fixef(mod)["(Intercept)"] + fixef(mod)[paste0("Diet",diet)], 0, lwd=2, col=rainbow(2)[ diet ] ) } # Show the regression formula text( 1.5, 200, bquote( hat(italic(y))==.( round( fixef(mod)[1], 2 ) )~+~.( round( fixef(mod)[2], 2 ) )*italic(x)[diet2] ), col="purple", cex=2 ) ### Simple example with a polytomous categorical predictor # Let's look at all the diets cw <- data.frame( ChickWeight ) # I'm not sure why they have this var as an ordered factor; make it unordered class(cw$Chick) <- "factor" # Fix the "weight" column name, I find it annoying that it's lowercase and the rest are capital colnames(cw)[ colnames(cw)=="weight" ] <- "Weight" # Show the contrast matrix in the command window contrasts( cw$Diet ) # Make a linear model summary( mod <- lmer( Weight ~ Diet + (1|Chick), cw ) )$coefficients # Plot the scatter (note that here I'm suppressing the normal x-axis and making a new one) plot( jitter( as.numeric( cw$Diet), .25 ), cw$Weight, type="p", xaxt="n", col=rainbow(4)[cw$Diet], xlab=NA, pch=16, ylim=c(0,450), cex=.5 ) axis( 1, at=1:4, label=paste0( "Diet", 1:4 ) ) # Plot the regression line for the intercept (Diet 1) abline( fixef(mod)["(Intercept)"], 0, lwd=2, col=rainbow(4)[1] ) # Plot the regression line for the other diets for( diet in 2:4 ){ abline( fixef(mod)["(Intercept)"] + fixef(mod)[paste0("Diet",diet)], 0, lwd=2, col=rainbow(4)[ diet ] ) } # Show the regression coefficients text( 2.5, 400, paste0( names(fixef(mod)), ": ", round( fixef(mod), 2 ), collapse="\n" ), adj=1, cex=1.25 ) ### Example with a categorical*continuous interaction # Let's just look at diets 1 and 2 cw <- data.frame( ChickWeight[ ChickWeight$Diet %in% c(1,2), ] ) # I'm not sure why they have this var as an ordered factor; make it unordered class(cw$Chick) <- "factor" # Fix the "weight" column name, I find it annoying that it's lowercase and the rest are capital colnames(cw)[ colnames(cw)=="weight" ] <- "Weight" # Make a linear model summary( mod <- lmer( Weight ~ Time*Diet + (1|Chick), cw ) )$coefficients # Plot the scatter plot( Weight~Time, cw, col=c("red","blue")[cw$Diet], pch=16 ) # Plot the regression line for the baseline condition abline( fixef(mod)["(Intercept)"], fixef(mod)["Time"], lwd=3, col="red" ) # Plot the regression line for the nonbaseline condition abline( fixef(mod)["(Intercept)"] + fixef(mod)["Diet2"], fixef(mod)["Time"] + fixef(mod)["Time:Diet2"], lwd=3, col="blue" ) # add a legend legend( "left", legend=paste0( "Diet", 1:2 ), col=c("red","blue"), pch=16, lty=1, lwd=3, inset=.1) # Show the regression formula text( 8, 300, bquote( hat(italic(y))==.( round( fixef(mod)["(Intercept)"], 2 ) )~+~.( round( fixef(mod)["Diet2"], 2 ) )*italic(x)[diet2]~+~.( round( fixef(mod)["Time"], 2 ) )*italic(x)[time]~+~.( round( fixef(mod)["Time:Diet2"], 2 ) )*italic(x)[diet2%*%time] ), col="purple", cex=1 ) ### Example with a categorical*continuous interaction, centered continuous IV # Let's just look at diets 1 and 2 cw <- data.frame( ChickWeight[ ChickWeight$Diet %in% c(1,2), ] ) # I'm not sure why they have this var as an ordered factor; make it unordered class(cw$Chick) <- "factor" # Fix the "weight" column name, I find it annoying that it's lowercase and the rest are capital colnames(cw)[ colnames(cw)=="weight" ] <- "Weight" # center the continuous IV cw$cTime <- cw$Time - mean(cw$Time) # Make a linear model summary( mod <- lmer( Weight ~ cTime*Diet + (1|Chick), cw ) )$coefficients # Plot the scatter plot( Weight~cTime, cw, col=c("red","blue")[cw$Diet], pch=16 ) # Plot the regression line for the baseline condition abline( fixef(mod)["(Intercept)"], fixef(mod)["cTime"], lwd=3, col="red" ) # Plot the regression line for the nonbaseline condition abline( fixef(mod)["(Intercept)"] + fixef(mod)["Diet2"], fixef(mod)["cTime"] + fixef(mod)["cTime:Diet2"], lwd=3, col="blue" ) # add a legend legend( "left", legend=paste0( "Diet", 1:2 ), col=c("red","blue"), pch=16, lty=1, lwd=3, inset=.1) # Show the regression formula text( 8-10.6359, 300, bquote( hat(italic(y))==.( round( fixef(mod)["(Intercept)"], 2 ) )~+~.( round( fixef(mod)["Diet2"], 2 ) )*italic(x)[diet2]~+~.( round( fixef(mod)["cTime"], 2 ) )*italic(x)[time]~+~.( round( fixef(mod)["cTime:Diet2"], 2 ) )*italic(x)[diet2%*%time] ), col="purple", cex=1 ) ######################################## ### INTERACTIONS AND CONTRAST CODING ### ######################################## ### Crossed interaction # Let's look at all the diets cw <- data.frame( ChickWeight ) # I'm not sure why they have this var as an ordered factor; make it unordered class(cw$Chick) <- "factor" # Fix the "weight" column name, I find it annoying that it's lowercase and the rest are capital colnames(cw)[ colnames(cw)=="weight" ] <- "Weight" # center the continuous IV cw$cTime <- cw$Time - mean(cw$Time) # peek at a model with a crossed interaction summary( mod <- lmer( Weight ~ cTime*Diet + (1|Chick), cw ) )$coefficients # set up the space for plotting it plot( Weight ~ cTime, cw, type="n" ) # Plot the regression line for the intercept (Diet 1) abline( fixef(mod)["(Intercept)"], fixef(mod)["cTime"], lwd=2, col=rainbow(4)[1] ) # Plot the regression line for the other diets for( diet in 2:4 ){ abline( fixef(mod)["(Intercept)"] + fixef(mod)[paste0("Diet",diet)], fixef(mod)["cTime"] + fixef(mod)[paste0("cTime:Diet",diet)], lwd=2, col=rainbow(4)[ diet ] ) } # add a legend legend( "topright", legend=paste0( "Diet", 1:4 ), col=rainbow(4), lty=1, lwd=3, inset=.1) # Show the regression coefficients text( 8-10.6359, 250, paste0( names(fixef(mod)), ": ", round( fixef(mod), 2), ifelse( summary(mod)$coefficients[,"t value"]>=2,"*",""), collapse="\n" ), adj=1, cex=1.25 ) ### Nested interaction # Let's look at all the diets cw <- data.frame( ChickWeight ) # I'm not sure why they have this var as an ordered factor; make it unordered class(cw$Chick) <- "factor" # Fix the "weight" column name, I find it annoying that it's lowercase and the rest are capital colnames(cw)[ colnames(cw)=="weight" ] <- "Weight" # center the continuous IV cw$cTime <- cw$Time - mean(cw$Time) # peek at a model with a nested interaction summary( mod <- lmer( Weight ~ Diet/cTime + (1|Chick), cw ) )$coefficients # set up the space for plotting it plot( Weight ~ cTime, cw, type="n" ) # Plot the regression line for the intercept (Diet 1) abline( fixef(mod)["(Intercept)"], fixef(mod)["Diet1:cTime"], lwd=2, col=rainbow(4)[1] ) # Plot the regression line for the other diets for( diet in 2:4 ){ abline( fixef(mod)["(Intercept)"] + fixef(mod)[paste0("Diet",diet)], fixef(mod)[paste0("Diet",diet,":cTime")], lwd=2, col=rainbow(4)[ diet ] ) } # add a legend legend( "topright", legend=paste0( "Diet", 1:4 ), col=rainbow(4), lty=1, lwd=3, inset=.1) # Show the regression coefficients text( 8-10.6359, 250, paste0( names(fixef(mod)), ": ", round( fixef(mod), 2), ifelse( summary(mod)$coefficients[,"t value"]>=2,"*",""), collapse="\n" ), adj=1, cex=1.25 ) ################################ ### EXAMPLE TO WORK TOGETHER ### ################################ # Load the data load( url( "http://www.polyu.edu.hk/cbs/sjpolit/UCL_Rworkshop/File_S2.RData" ) ) # Load some custom functions (including the CIpirateplot function we'll use for plotting) source( url( "http://www.polyu.edu.hk/cbs/sjpolit/Steve_functions.txt" ) ) # Pull out just the critical (non-filler) conditions from the critical region crit <- RTdata[ RTdata$RegionNum==8 & RTdata$Quantifier!="filler", ] # Clean up factors (remove empty levels) crit$Quantifier <- factor( crit$Quantifier ) crit$Boundedness <- factor( crit$Boundedness ) crit$Item <- factor( crit$Item ) crit$RegionNum <- factor( crit$RegionNum ) # make a plot CIpirateplot( log(RT) ~ Boundedness+Quantifier, crit, bar.o=.1, ci.o=.25, make.legend=T ) ### We'll make some models together here... ### Illustrating how nested interactions can be useful # Pull out all regions of just the critical (non-filler) conditions crit_all <- RTdata[ RTdata$Quantifier!="filler", ] # Clean up factors (remove empty levels) crit_all$Quantifier <- factor( crit_all$Quantifier ) crit_all$Boundedness <- factor( crit_all$Boundedness ) crit_all$Item <- factor( crit_all$Item ) # Treat REgionNum as a factor crit_all$RegionNum <- factor( crit_all$RegionNum ) # crossed interaction summary( lmer( RT ~ RegionNum*Quantifier*Boundedness + (1|Subject) + (1|Item), crit_all ) )$coefficients # nested interaction summary( lmer( RT ~ RegionNum/(Quantifier/Boundedness) + (1|Subject) + (1|Item), crit_all ) )$coefficients ########################### ### DUMMY VS SUM CODING ### ########################### # Make an interaction model, with dummy coding summary( dummy_model <- lmer( RT ~ Quantifier*Boundedness + (1|Subject) + (1|Item), crit ) ) # Show what the condition means are ( means <- tapply( crit$RT, crit[,c("Boundedness","Quantifier")], mean ) ) # show the main effects, so we can see that these do not correspond to the dummy-coded terms summary( dummy_model )$coefficients ( quanteffect <- colMeans(means)[2] - colMeans(means)[1] ) ( boundeffect <- rowMeans(means)[2] - rowMeans(means)[1] ) ### Now we'll make a model with deviation coding # Deviation-code Quantifier contrasts(crit$Quantifier) <- rbind(-1, 1) colnames(contrasts(crit$Quantifier)) <- levels(crit$Quantifier)[2] contrasts( crit$Quantifier ) # And deviation-code Boundedness contrasts(crit$Boundedness) <- rbind(-1, 1) colnames(contrasts(crit$Boundedness)) <- levels(crit$Boundedness)[2] contrasts(crit$Boundedness) # Then redo our model, this time on deviation-coded data summary( deviation_model <- lmer( RT ~ Quantifier*Boundedness + (1|Subject) + (1|Item), crit ) ) # Use model comparison to prove that the models have the same fit anova( dummy_model, deviation_model ) # Peek at the main effects again to see that these correspond to the sum-coded terms summary( deviation_model )$coefficients quanteffect boundeffect ################################# ### INTERACTION CODING TRICKS ### ################################# # Load the data load( url( "http://www.polyu.edu.hk/cbs/sjpolit/UCL_Rworkshop/individualdifferences.RData" ) ) ### We'll try coding a few complicated interactions ############################## ### CUSTOM CONTRAST CODING ### ############################## ### This is a total hack, because I still don't really get non-orthogonal contrast coding, ### so for a 1*n design I instead just construct a dummy-coded model with no intercept ### (so that each coefficient is a condition mean) and then use glht contrasts to ### compare conditions # Make a dummy-coded model WITH NO INTERCEPT from the chick data model <- lm( weight ~ 0+Diet, ChickWeight ) # Set up a contrast contrast <- rbind( c( 0, -.5, 1, -.5 ), # Diet 3 vs. mean( Diet2, Diet4 ) c(1,0,0,-1), # Diet 1 vs. Diet 4 c(0,1,0,-1) ) # use glht to do the comparison library(multcomp) summary( glht( model, linfct=contrast ) ) # Look at the condition means to see that this test corresponds to our complex comparison ( means <- tapply( ChickWeight$weight, ChickWeight$Diet, mean ) ) means[3] - mean( means[c(2,4)] ) means[1] - means[4] ### Caveats!!! ### This method requires doing multiple contrasts; if your 'contrast' matrix has only one row ### it crashes (this wasn't the case in the past). So if you only have one contrast you care ### about, you need to add in some junk contrast in addition. ### It corrects the p-values for multiple comparisons. So if you want raw, uncorrected p-values, ### you can't get them from here. The estimates, SDs, and t-values, however, are unchanged, ### so you can get p-values from them in most of the other ways we will discuss. ### Now an example with orthogonal contrasts, done within the model instead of via glht # Set up a contrast matrix. First column compares avg(1,2) vs. avg(3,4). Second column compares # 1 vs 2, and third column compares 3 vs 4 m <- cbind( rbind(1,1,-1,-1), rbind(1,-1,0,0), rbind(0,0,1,-1) ) # Show that the contrasts are orthogonal (the sum of the row products is 0) apply(m,1,prod) # Show the model, feeding our contrast matrix in instead of the normal contrasts coefficients( lm( weight~Diet, ChickWeight, contrasts=list(Diet=m) ) ) # Show the means means <- tapply( ChickWeight$weight, ChickWeight$Diet, mean ) ) # Notice that the first coefficient corresponds to half the difference between (1,2) and (3,4) (mean(means[1:2]) - mean(means[3:4]) )/2 # and the second coefficient corresponds to half the difference between 1 and 2 (means[1] - means[2] )/2 # and the third, h alf the difference between 3 and 4 (means[3] - means[4] )/2 ### This can get much more complicated. For example, for a 3-level factor you can set up an interesting ### orthogonal contrast matrix: ### 1 0 ### -.5 1 ### -.5 -1 ### The first column compares level 1 vs. the mean of levels 2 and 3. The second compares level 2 to level ### 3. However, the coefficients get rather complicated since they're not all +-1. ############################################### ### DIRECT COMPARISON OF MODEL COEFFICIENTS ### ############################################### # The indices of the two coefficients to compare. 'ref' is the baseline/control/one that will be subtracted refcoef <- 2 comparecoef <- 4 # Gets the variance-covariance matrix varcov <- vcov(model) # numerator of the t-test (i.e., the difference) t_numerator <- fixef(model)[comparecoef] - fixef(model)[refcoef] # denominator of the t-test (i.e., the pooled variance) t_denominator <- sqrt(varcov[comparecoef,comparecoef] + varcov[refcoef,refcoef] - 2*varcov[comparecoef,refcoef]) # t ( t <- t_numerator / t_denominator ) # A p-value (approximated from an estimate of the dfs; we'll discuss p-values more next session ( p <- 2 * ( 1 - pt( abs(t), length(fixef(model))-2 ) ) )