library(tidyverse)
library(readxl)
library(lme4)
data = read_excel('~/git/r-seminar/data/Bressoux Data AnPsycho.xls')
glimpse(data)
## Rows: 609
## Columns: 38
## $ NUMELEVE <dbl> 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 716, 717, 718, 719, 720, 72…
## $ FRAN4 <dbl> -1.03844718, 0.05081523, -1.32558075, 0.24899764, -1.32558075, 1.14062612, -0.91424034, -0.254804…
## $ CPBIS <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ CE1BIS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ CE2BIS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ CM1BIS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ CM2BIS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ MATH4 <dbl> -0.84660133, -0.90465749, -0.90465749, 0.06410083, -0.62877082, 0.69927881, -1.20330629, -1.81509…
## $ ECOLE2 <dbl> 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5…
## $ CLASSE2 <dbl> 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8…
## $ COURS2 <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ RDBLT2 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ FRAN3 <dbl> 1.13978958, -0.25171360, 0.97295339, 1.13978958, -2.00298384, 1.59399505, -1.01594421, 1.38446085…
## $ MATH3 <dbl> 0.36853256, -0.95115243, 0.67302730, 1.59547256, -1.10303698, 0.84497577, -1.10303698, -0.5383836…
## $ MOIS <dbl> 10, 11, 10, 4, 11, 5, 12, 2, 8, 12, 12, 4, 12, 11, 7, 2, 8, 5, 11, 3, 8, 9, 11, 10, 8, 2, 12, 9, …
## $ ANNEE <dbl> 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 9…
## $ FRAT <dbl> 3, 2, 2, 1, 2, 2, 2, 2, 3, 1, 2, 3, 2, 2, 2, 3, 1, 1, 2, 3, 1, 1, 1, 4, 1, 1, 1, 2, 2, 3, 1, 1, 1…
## $ PREEL <dbl> 3, 3, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,…
## $ CSPP <dbl> 6, 4, 1, 7, 4, 4, 6, 2, 2, 3, 5, 4, 3, 5, 6, 6, 3, 4, 2, 4, 2, 6, 3, 4, 5, 4, 3, 4, 3, 5, 4, 6, 5…
## $ CSPM <dbl> 4, 7, 7, 6, 7, 4, 7, 7, 4, 2, 5, 5, 4, 4, 6, 6, 4, 4, 4, 8, 5, 5, 8, 5, 8, 7, 3, 4, 8, 6, 4, 3, 8…
## $ FILLE <dbl> 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0…
## $ ANCENS2 <dbl> 21, 21, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 28, 28, 28, 28, 28, 28, 28, 28, 1…
## $ NBEL2 <dbl> 15, 15, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 22, 22, 22, 22, 22, 22, 22, 22, 2…
## $ NBCOURS2 <dbl> 5, 5, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2…
## $ arti <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ sup <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0…
## $ inter <dbl> 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0…
## $ empl <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1…
## $ ouvr <dbl> 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0…
## $ autr <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ cmult <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ cmultnomb <dbl> 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ mfran3 <dbl> -0.009005236, -0.009005236, -0.009005236, -0.009005236, -0.009005236, -0.009005236, -0.009005236,…
## $ mmath3 <dbl> 0.04217022, 0.04217022, 0.04217022, 0.04217022, 0.04217022, 0.04217022, 0.04217022, 0.04217022, 0…
## $ msup <dbl> 0.08080808, 0.08080808, 0.08080808, 0.08080808, 0.08080808, 0.08080808, 0.08080808, 0.08080808, 0…
## $ mouvr <dbl> 0.2727273, 0.2727273, 0.2727273, 0.2727273, 0.2727273, 0.2727273, 0.2727273, 0.2727273, 0.2727273…
## $ stdfran3 <dbl> 0.9908583, 0.9908583, 0.9908583, 0.9908583, 0.9908583, 0.9908583, 0.9908583, 0.9908583, 0.9908583…
## $ stdmath3 <dbl> 1.003498, 1.003498, 1.003498, 1.003498, 1.003498, 1.003498, 1.003498, 1.003498, 1.003498, 1.00349…
improvement_math
, which is the difference between
MATH4
and MATH3
; and
improvement_french
which is the difference between
FRAN4
and FRAN3
.data = mutate(data,
improvement_math=(MATH4-MATH3),
improvement_french=(FRAN4-FRAN3)
)
# Run a random intercept model
randintmodel = lmer(improvement_math ~ FRAT*FILLE + (1 | ECOLE2), data = data)
confint(randintmodel)
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## .sig01 0.05678531 0.2578345
## .sigma 0.74594285 0.8357127
## (Intercept) -0.23360387 0.1389840
## FRAT -0.06964690 0.1199475
## FILLE -0.27800769 0.2004732
## FRAT:FILLE -0.13623633 0.1243906
# Run a random slope model
randslopemodel = lmer(improvement_math ~ FRAT*FILLE + (0 + FRAT*FILLE | ECOLE2), data = data)
## boundary (singular) fit: see ?isSingular
summary(randslopemodel)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
## Formula: improvement_math ~ FRAT * FILLE + (0 + FRAT * FILLE | ECOLE2)
## Data: data
##
## REML criterion at convergence: 1463.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.5806 -0.6509 0.0144 0.6231 2.9874
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ECOLE2 FRAT 0.0064235 0.08015
## FILLE 0.0188100 0.13715 0.24
## FRAT:FILLE 0.0001447 0.01203 0.26 -0.87
## Residual 0.6168941 0.78543
## Number of obs: 609, groups: ECOLE2, 16
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -0.048480 0.086606 581.204796 -0.560 0.576
## FRAT 0.027640 0.053458 67.171533 0.517 0.607
## FILLE -0.050445 0.129298 44.764266 -0.390 0.698
## FRAT:FILLE -0.001028 0.067833 267.329947 -0.015 0.988
##
## Correlation of Fixed Effects:
## (Intr) FRAT FILLE
## FRAT -0.786
## FILLE -0.664 0.546
## FRAT:FILLE 0.610 -0.641 -0.824
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
# Run a random intercept & slope model
randintslopemodel = lmer(improvement_math ~ FRAT*FILLE + (FRAT*FILLE | ECOLE2), data = data)
## boundary (singular) fit: see ?isSingular
summary(randintslopemodel)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
## Formula: improvement_math ~ FRAT * FILLE + (FRAT * FILLE | ECOLE2)
## Data: data
##
## REML criterion at convergence: 1463
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.5995 -0.6537 0.0349 0.6154 2.9962
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ECOLE2 (Intercept) 0.008807 0.09385
## FRAT 0.009791 0.09895 -0.58
## FILLE 0.004390 0.06626 0.83 -0.03
## FRAT:FILLE 0.001287 0.03587 1.00 -0.52 0.87
## Residual 0.614540 0.78393
## Number of obs: 609, groups: ECOLE2, 16
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -0.040259 0.091026 39.806128 -0.442 0.661
## FRAT 0.021253 0.056561 13.754566 0.376 0.713
## FILLE -0.063532 0.125206 155.115752 -0.507 0.613
## FRAT:FILLE 0.006797 0.068990 107.748543 0.099 0.922
##
## Correlation of Fixed Effects:
## (Intr) FRAT FILLE
## FRAT -0.807
## FILLE -0.627 0.517
## FRAT:FILLE 0.626 -0.657 -0.814
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
# Run a random intercept model
randintmodel = lmer(improvement_french ~ FRAT*FILLE + (1 | ECOLE2), data = data)
confint(randintmodel)
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## .sig01 0.05201971 0.27449039
## .sigma 0.71702100 0.80413848
## (Intercept) -0.22447345 0.14253539
## FRAT -0.13170548 0.05174996
## FILLE -0.16515852 0.29629057
## FRAT:FILLE -0.08529853 0.16596707
# Run a random slope model
randslopemodel = lmer(improvement_french ~ FRAT*FILLE + (0 + FRAT*FILLE | ECOLE2), data = data)
## boundary (singular) fit: see ?isSingular
summary(randslopemodel)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
## Formula: improvement_french ~ FRAT * FILLE + (0 + FRAT * FILLE | ECOLE2)
## Data: data
##
## REML criterion at convergence: 1402
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.2966 -0.6298 0.0095 0.6142 3.1726
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ECOLE2 FRAT 9.758e-04 0.0312373
## FILLE 4.788e-02 0.2188170 1.00
## FRAT:FILLE 2.836e-07 0.0005326 -1.00 -1.00
## Residual 5.724e-01 0.7565455
## Number of obs: 602, groups: ECOLE2, 16
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -0.02134 0.08324 576.65377 -0.256 0.798
## FRAT -0.03795 0.04718 243.53189 -0.805 0.422
## FILLE 0.02810 0.13201 43.89253 0.213 0.832
## FRAT:FILLE 0.03113 0.06430 469.06788 0.484 0.628
##
## Correlation of Fixed Effects:
## (Intr) FRAT FILLE
## FRAT -0.846
## FILLE -0.628 0.609
## FRAT:FILLE 0.617 -0.705 -0.759
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
# Run a random intercept & slope model
randintslopemodel = lmer(improvement_french ~ FRAT*FILLE + (FRAT*FILLE | ECOLE2), data = data)
## boundary (singular) fit: see ?isSingular
## Warning: Model failed to converge with 1 negative eigenvalue: -1.4e+00
summary(randintslopemodel)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
## Formula: improvement_french ~ FRAT * FILLE + (FRAT * FILLE | ECOLE2)
## Data: data
##
## REML criterion at convergence: 1400.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.2724 -0.6230 0.0062 0.5812 3.1828
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ECOLE2 (Intercept) 0.030009 0.17323
## FRAT 0.002267 0.04762 -1.00
## FILLE 0.008040 0.08967 0.20 -0.20
## FRAT:FILLE 0.007076 0.08412 1.00 -1.00 0.23
## Residual 0.569065 0.75436
## Number of obs: 602, groups: ECOLE2, 16
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -0.04296 0.09508 12.10320 -0.452 0.659
## FRAT -0.03279 0.04800 45.77754 -0.683 0.498
## FILLE 0.04875 0.12133 14.63906 0.402 0.694
## FRAT:FILLE 0.02246 0.06842 24.04909 0.328 0.746
##
## Correlation of Fixed Effects:
## (Intr) FRAT FILLE
## FRAT -0.853
## FILLE -0.586 0.564
## FRAT:FILLE 0.672 -0.744 -0.770
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
grouped = group_by(data,
CLASSE2, )
collapsed = summarise(grouped,
mean_improvement_math = mean(improvement_math, na.rm=TRUE),
mean_improvement_french = mean(improvement_french, na.rm=TRUE),
NBEL2 = mean(NBEL2),
ECOLE2 = mean(ECOLE2))
collapsed
## # A tibble: 32 x 5
## CLASSE2 mean_improvement_math mean_improvement_french NBEL2 ECOLE2
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.0992 -0.131 27 1
## 2 2 0.350 -0.126 15 2
## 3 3 -0.724 -0.506 25 3
## 4 4 -0.294 -0.508 26 3
## 5 5 0.0660 0.421 26 3
## 6 6 0.560 0.303 24 3
## 7 7 -0.303 -0.100 22 4
## 8 8 -0.764 0.936 23 5
## 9 9 -0.0604 -0.133 26 5
## 10 10 -0.519 0.153 25 5
## # … with 22 more rows
# Run a random intercept & slope model
randintslopemodel = lmer(mean_improvement_math ~ NBEL2 + (1 | ECOLE2), data = collapsed)
## boundary (singular) fit: see ?isSingular
confint(randintslopemodel)
## Computing profile confidence intervals ...
## Warning in nextpar(mat, cc, i, delta, lowcut, upcut): Last two rows have identical or NA .zeta values: using minstep
## Warning in FUN(X[[i]], ...): non-monotonic profile for .sig01
## Warning in confint.thpr(pp, level = level, zeta = zeta): bad spline fit for .sig01: falling back to linear
## interpolation
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## 2.5 % 97.5 %
## .sig01 0.00000000 0.26159820
## .sigma 0.32853078 0.53801996
## (Intercept) -0.74659262 0.59586205
## NBEL2 -0.02933113 0.02995233
# Run a random intercept & slope model
randintslopemodel = lmer(mean_improvement_french ~ NBEL2 + (1 | ECOLE2), data = collapsed)
## boundary (singular) fit: see ?isSingular
confint(randintslopemodel)
## Computing profile confidence intervals ...
## Warning in FUN(X[[i]], ...): non-monotonic profile for .sig01
## Warning in confint.thpr(pp, level = level, zeta = zeta): bad spline fit for .sig01: falling back to linear
## interpolation
## 2.5 % 97.5 %
## .sig01 0.00000000 0.22222182
## .sigma 0.29533576 0.48365798
## (Intercept) -0.78892056 0.41789139
## NBEL2 -0.01919235 0.03410106
There seem to be no effect of the number of students on the mean improvement.