library(tidyverse)
library(readxl)
library(lme4)

data = read_excel('~/git/r-seminar/data/Bressoux Data AnPsycho.xls')

glimpse(data)
## Rows: 609
## Columns: 38
## $ NUMELEVE  <dbl> 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 716, 717, 718, 719, 720, 72…
## $ FRAN4     <dbl> -1.03844718, 0.05081523, -1.32558075, 0.24899764, -1.32558075, 1.14062612, -0.91424034, -0.254804…
## $ CPBIS     <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ CE1BIS    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ CE2BIS    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ CM1BIS    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ CM2BIS    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ MATH4     <dbl> -0.84660133, -0.90465749, -0.90465749, 0.06410083, -0.62877082, 0.69927881, -1.20330629, -1.81509…
## $ ECOLE2    <dbl> 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5…
## $ CLASSE2   <dbl> 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8…
## $ COURS2    <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ RDBLT2    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ FRAN3     <dbl> 1.13978958, -0.25171360, 0.97295339, 1.13978958, -2.00298384, 1.59399505, -1.01594421, 1.38446085…
## $ MATH3     <dbl> 0.36853256, -0.95115243, 0.67302730, 1.59547256, -1.10303698, 0.84497577, -1.10303698, -0.5383836…
## $ MOIS      <dbl> 10, 11, 10, 4, 11, 5, 12, 2, 8, 12, 12, 4, 12, 11, 7, 2, 8, 5, 11, 3, 8, 9, 11, 10, 8, 2, 12, 9, …
## $ ANNEE     <dbl> 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 9…
## $ FRAT      <dbl> 3, 2, 2, 1, 2, 2, 2, 2, 3, 1, 2, 3, 2, 2, 2, 3, 1, 1, 2, 3, 1, 1, 1, 4, 1, 1, 1, 2, 2, 3, 1, 1, 1…
## $ PREEL     <dbl> 3, 3, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,…
## $ CSPP      <dbl> 6, 4, 1, 7, 4, 4, 6, 2, 2, 3, 5, 4, 3, 5, 6, 6, 3, 4, 2, 4, 2, 6, 3, 4, 5, 4, 3, 4, 3, 5, 4, 6, 5…
## $ CSPM      <dbl> 4, 7, 7, 6, 7, 4, 7, 7, 4, 2, 5, 5, 4, 4, 6, 6, 4, 4, 4, 8, 5, 5, 8, 5, 8, 7, 3, 4, 8, 6, 4, 3, 8…
## $ FILLE     <dbl> 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0…
## $ ANCENS2   <dbl> 21, 21, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 28, 28, 28, 28, 28, 28, 28, 28, 1…
## $ NBEL2     <dbl> 15, 15, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 22, 22, 22, 22, 22, 22, 22, 22, 2…
## $ NBCOURS2  <dbl> 5, 5, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2…
## $ arti      <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ sup       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0…
## $ inter     <dbl> 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0…
## $ empl      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1…
## $ ouvr      <dbl> 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0…
## $ autr      <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ cmult     <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ cmultnomb <dbl> 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ mfran3    <dbl> -0.009005236, -0.009005236, -0.009005236, -0.009005236, -0.009005236, -0.009005236, -0.009005236,…
## $ mmath3    <dbl> 0.04217022, 0.04217022, 0.04217022, 0.04217022, 0.04217022, 0.04217022, 0.04217022, 0.04217022, 0…
## $ msup      <dbl> 0.08080808, 0.08080808, 0.08080808, 0.08080808, 0.08080808, 0.08080808, 0.08080808, 0.08080808, 0…
## $ mouvr     <dbl> 0.2727273, 0.2727273, 0.2727273, 0.2727273, 0.2727273, 0.2727273, 0.2727273, 0.2727273, 0.2727273…
## $ stdfran3  <dbl> 0.9908583, 0.9908583, 0.9908583, 0.9908583, 0.9908583, 0.9908583, 0.9908583, 0.9908583, 0.9908583…
## $ stdmath3  <dbl> 1.003498, 1.003498, 1.003498, 1.003498, 1.003498, 1.003498, 1.003498, 1.003498, 1.003498, 1.00349…

Now it’s your turn

  1. Create a new variable in the dataset, called improvement_math, which is the difference between MATH4 and MATH3; and improvement_french which is the difference between FRAN4 and FRAN3.
data = mutate(data,
              improvement_math=(MATH4-MATH3),
              improvement_french=(FRAN4-FRAN3)
              )
  1. Run a multilevel model (separately for math and french), in which the improvement in math and french is predicted by at least 2 variables of your choice in the dataset, and that takes into account the school level. Run a random slope model, a random intercept model, and a random slope and intercept model.
# Run a random intercept model
randintmodel = lmer(improvement_math ~ FRAT*FILLE + (1 | ECOLE2), data = data)

confint(randintmodel)
## Computing profile confidence intervals ...
##                   2.5 %    97.5 %
## .sig01       0.05678531 0.2578345
## .sigma       0.74594285 0.8357127
## (Intercept) -0.23360387 0.1389840
## FRAT        -0.06964690 0.1199475
## FILLE       -0.27800769 0.2004732
## FRAT:FILLE  -0.13623633 0.1243906
# Run a random slope model
randslopemodel = lmer(improvement_math ~ FRAT*FILLE + (0 + FRAT*FILLE | ECOLE2), data = data)
## boundary (singular) fit: see ?isSingular
summary(randslopemodel)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
## Formula: improvement_math ~ FRAT * FILLE + (0 + FRAT * FILLE | ECOLE2)
##    Data: data
## 
## REML criterion at convergence: 1463.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.5806 -0.6509  0.0144  0.6231  2.9874 
## 
## Random effects:
##  Groups   Name       Variance  Std.Dev. Corr       
##  ECOLE2   FRAT       0.0064235 0.08015             
##           FILLE      0.0188100 0.13715   0.24      
##           FRAT:FILLE 0.0001447 0.01203   0.26 -0.87
##  Residual            0.6168941 0.78543             
## Number of obs: 609, groups:  ECOLE2, 16
## 
## Fixed effects:
##               Estimate Std. Error         df t value Pr(>|t|)
## (Intercept)  -0.048480   0.086606 581.204796  -0.560    0.576
## FRAT          0.027640   0.053458  67.171533   0.517    0.607
## FILLE        -0.050445   0.129298  44.764266  -0.390    0.698
## FRAT:FILLE   -0.001028   0.067833 267.329947  -0.015    0.988
## 
## Correlation of Fixed Effects:
##            (Intr) FRAT   FILLE 
## FRAT       -0.786              
## FILLE      -0.664  0.546       
## FRAT:FILLE  0.610 -0.641 -0.824
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
# Run a random intercept & slope model
randintslopemodel = lmer(improvement_math ~ FRAT*FILLE + (FRAT*FILLE | ECOLE2), data = data)
## boundary (singular) fit: see ?isSingular
summary(randintslopemodel)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
## Formula: improvement_math ~ FRAT * FILLE + (FRAT * FILLE | ECOLE2)
##    Data: data
## 
## REML criterion at convergence: 1463
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.5995 -0.6537  0.0349  0.6154  2.9962 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr             
##  ECOLE2   (Intercept) 0.008807 0.09385                   
##           FRAT        0.009791 0.09895  -0.58            
##           FILLE       0.004390 0.06626   0.83 -0.03      
##           FRAT:FILLE  0.001287 0.03587   1.00 -0.52  0.87
##  Residual             0.614540 0.78393                   
## Number of obs: 609, groups:  ECOLE2, 16
## 
## Fixed effects:
##               Estimate Std. Error         df t value Pr(>|t|)
## (Intercept)  -0.040259   0.091026  39.806128  -0.442    0.661
## FRAT          0.021253   0.056561  13.754566   0.376    0.713
## FILLE        -0.063532   0.125206 155.115752  -0.507    0.613
## FRAT:FILLE    0.006797   0.068990 107.748543   0.099    0.922
## 
## Correlation of Fixed Effects:
##            (Intr) FRAT   FILLE 
## FRAT       -0.807              
## FILLE      -0.627  0.517       
## FRAT:FILLE  0.626 -0.657 -0.814
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
# Run a random intercept model
randintmodel = lmer(improvement_french ~ FRAT*FILLE + (1 | ECOLE2), data = data)

confint(randintmodel)
## Computing profile confidence intervals ...
##                   2.5 %     97.5 %
## .sig01       0.05201971 0.27449039
## .sigma       0.71702100 0.80413848
## (Intercept) -0.22447345 0.14253539
## FRAT        -0.13170548 0.05174996
## FILLE       -0.16515852 0.29629057
## FRAT:FILLE  -0.08529853 0.16596707
# Run a random slope model
randslopemodel = lmer(improvement_french ~ FRAT*FILLE + (0 + FRAT*FILLE | ECOLE2), data = data)
## boundary (singular) fit: see ?isSingular
summary(randslopemodel)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
## Formula: improvement_french ~ FRAT * FILLE + (0 + FRAT * FILLE | ECOLE2)
##    Data: data
## 
## REML criterion at convergence: 1402
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.2966 -0.6298  0.0095  0.6142  3.1726 
## 
## Random effects:
##  Groups   Name       Variance  Std.Dev.  Corr       
##  ECOLE2   FRAT       9.758e-04 0.0312373            
##           FILLE      4.788e-02 0.2188170  1.00      
##           FRAT:FILLE 2.836e-07 0.0005326 -1.00 -1.00
##  Residual            5.724e-01 0.7565455            
## Number of obs: 602, groups:  ECOLE2, 16
## 
## Fixed effects:
##              Estimate Std. Error        df t value Pr(>|t|)
## (Intercept)  -0.02134    0.08324 576.65377  -0.256    0.798
## FRAT         -0.03795    0.04718 243.53189  -0.805    0.422
## FILLE         0.02810    0.13201  43.89253   0.213    0.832
## FRAT:FILLE    0.03113    0.06430 469.06788   0.484    0.628
## 
## Correlation of Fixed Effects:
##            (Intr) FRAT   FILLE 
## FRAT       -0.846              
## FILLE      -0.628  0.609       
## FRAT:FILLE  0.617 -0.705 -0.759
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
# Run a random intercept & slope model
randintslopemodel = lmer(improvement_french ~ FRAT*FILLE + (FRAT*FILLE | ECOLE2), data = data)
## boundary (singular) fit: see ?isSingular
## Warning: Model failed to converge with 1 negative eigenvalue: -1.4e+00
summary(randintslopemodel)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
## Formula: improvement_french ~ FRAT * FILLE + (FRAT * FILLE | ECOLE2)
##    Data: data
## 
## REML criterion at convergence: 1400.2
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.2724 -0.6230  0.0062  0.5812  3.1828 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr             
##  ECOLE2   (Intercept) 0.030009 0.17323                   
##           FRAT        0.002267 0.04762  -1.00            
##           FILLE       0.008040 0.08967   0.20 -0.20      
##           FRAT:FILLE  0.007076 0.08412   1.00 -1.00  0.23
##  Residual             0.569065 0.75436                   
## Number of obs: 602, groups:  ECOLE2, 16
## 
## Fixed effects:
##             Estimate Std. Error       df t value Pr(>|t|)
## (Intercept) -0.04296    0.09508 12.10320  -0.452    0.659
## FRAT        -0.03279    0.04800 45.77754  -0.683    0.498
## FILLE        0.04875    0.12133 14.63906   0.402    0.694
## FRAT:FILLE   0.02246    0.06842 24.04909   0.328    0.746
## 
## Correlation of Fixed Effects:
##            (Intr) FRAT   FILLE 
## FRAT       -0.853              
## FILLE      -0.586  0.564       
## FRAT:FILLE  0.672 -0.744 -0.770
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
  1. (extra) Collapse the data across classrooms. Can you look at the effect of the number of student per classrrom on the improvements of math and french, taking into account the school level as well?
grouped = group_by(data, 
                   CLASSE2, )
collapsed = summarise(grouped, 
                      mean_improvement_math = mean(improvement_math, na.rm=TRUE),
                      mean_improvement_french = mean(improvement_french, na.rm=TRUE),
                      NBEL2 = mean(NBEL2),
                      ECOLE2 = mean(ECOLE2))
collapsed
## # A tibble: 32 x 5
##    CLASSE2 mean_improvement_math mean_improvement_french NBEL2 ECOLE2
##      <dbl>                 <dbl>                   <dbl> <dbl>  <dbl>
##  1       1                0.0992                  -0.131    27      1
##  2       2                0.350                   -0.126    15      2
##  3       3               -0.724                   -0.506    25      3
##  4       4               -0.294                   -0.508    26      3
##  5       5                0.0660                   0.421    26      3
##  6       6                0.560                    0.303    24      3
##  7       7               -0.303                   -0.100    22      4
##  8       8               -0.764                    0.936    23      5
##  9       9               -0.0604                  -0.133    26      5
## 10      10               -0.519                    0.153    25      5
## # … with 22 more rows
# Run a random intercept & slope model
randintslopemodel = lmer(mean_improvement_math ~ NBEL2 + (1 | ECOLE2), data = collapsed)
## boundary (singular) fit: see ?isSingular
confint(randintslopemodel)
## Computing profile confidence intervals ...
## Warning in nextpar(mat, cc, i, delta, lowcut, upcut): Last two rows have identical or NA .zeta values: using minstep
## Warning in FUN(X[[i]], ...): non-monotonic profile for .sig01
## Warning in confint.thpr(pp, level = level, zeta = zeta): bad spline fit for .sig01: falling back to linear
## interpolation
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
##                   2.5 %     97.5 %
## .sig01       0.00000000 0.26159820
## .sigma       0.32853078 0.53801996
## (Intercept) -0.74659262 0.59586205
## NBEL2       -0.02933113 0.02995233
# Run a random intercept & slope model
randintslopemodel = lmer(mean_improvement_french ~ NBEL2 + (1 | ECOLE2), data = collapsed)
## boundary (singular) fit: see ?isSingular
confint(randintslopemodel)
## Computing profile confidence intervals ...
## Warning in FUN(X[[i]], ...): non-monotonic profile for .sig01
## Warning in confint.thpr(pp, level = level, zeta = zeta): bad spline fit for .sig01: falling back to linear
## interpolation
##                   2.5 %     97.5 %
## .sig01       0.00000000 0.22222182
## .sigma       0.29533576 0.48365798
## (Intercept) -0.78892056 0.41789139
## NBEL2       -0.01919235 0.03410106

There seem to be no effect of the number of students on the mean improvement.