rm(list = ls())
library(tidyverse)
library(dfoptim)
library(rtdists)
library(rstan)
library(bayesplot)
source('rwald code.r') # from https://osf.io/3sp9t/
We can write down Equation 1 of the Tillman paper to simulate the process described by the race diffusion model:
rdm_path <- function(drift1, drift2, threshold, ndt, sp1=0, sp2=0, noise_constant=1, dt=0.001, max_rt=10) {
max_tsteps <- max_rt/dt
# initialize the diffusion process
tstep <- 0
x1 <- c(sp1*threshold) # vector of accumulated evidence at t=tstep
x2 <- c(sp2*threshold) # vector of accumulated evidence at t=tstep
time <- c(ndt)
# start accumulating
while (x1[tstep+1] < threshold & x2[tstep+1] < threshold & tstep < max_tsteps) {
x1 <- c(x1, x1[tstep+1] + rnorm(mean=drift1*dt, sd=noise_constant*sqrt(dt), n=1))
x2 <- c(x2, x2[tstep+1] + rnorm(mean=drift2*dt, sd=noise_constant*sqrt(dt), n=1))
time <- c(time, dt*tstep + ndt)
tstep <- tstep + 1
}
return (data.frame(time=rep(time, 2), dv=c(x1, x2), accumulator=c(rep(1, length(x1)), rep(2, length(x2)))))
}
And visualize it:
gen_drift1 = 1.7
gen_drift2 = 2
gen_threshold = 1
gen_ndt = .23
sim_path <- rdm_path(gen_drift1, gen_drift2, gen_threshold, gen_ndt)
ggplot(data = sim_path, aes(x = time, y = dv, color=factor(accumulator)))+
geom_line(size = .5) +
geom_hline(yintercept=gen_threshold, size=1.5)
To have a look at the whole distribution, though, we want to simulate more trials:
random_rdm <- function(n_trials, drift1, drift2, threshold, ndt, sp1=0, sp2=0, noise_constant=1, dt=0.001, max_rt=10) {
choice <- rep(NA, n_trials)
rt <- rep(NA, n_trials)
max_tsteps <- max_rt/dt
# initialize the diffusion process
tstep <- 0
x1 <- rep(sp1*threshold, n_trials) # vector of accumulated evidence at t=tstep
x2 <- rep(sp2*threshold, n_trials) # vector of accumulated evidence at t=tstep
ongoing <- rep(TRUE, n_trials) # have the accumulators reached the bound?
# start accumulating
while (sum(ongoing) > 0 & tstep < max_tsteps) {
x1[ongoing] <- x1[ongoing] + rnorm(mean=drift1*dt, sd=noise_constant*sqrt(dt), n=sum(ongoing))
x2[ongoing] <- x2[ongoing] + rnorm(mean=drift2*dt, sd=noise_constant*sqrt(dt), n=sum(ongoing))
tstep <- tstep + 1
# ended trials
ended1 <- (x1 >= threshold)
ended2 <- (x2 >= threshold)
# store results and filter out ended trials
if(sum(ended1) > 0) {
choice[ended1 & ongoing] <- 1
rt[ended1 & ongoing] <- dt*tstep + ndt
ongoing[ended1] <- FALSE
}
if(sum(ended2) > 0) {
choice[ended2 & ongoing] <- 2
rt[ended2 & ongoing] <- dt*tstep + ndt
ongoing[ended2] <- FALSE
}
}
return (data.frame(trial=seq(1, n_trials), choice=choice, rt=rt))
}
And have a look at the average performance and shape of the RT distributions:
sim_data <- random_rdm(n_trials=1000, drift1=.7, drift2=1.2, threshold=1.5, ndt=.23)
summary(sim_data)
## trial choice rt
## Min. : 1.0 Min. :1.000 Min. :0.3150
## 1st Qu.: 250.8 1st Qu.:1.000 1st Qu.:0.7518
## Median : 500.5 Median :2.000 Median :1.0115
## Mean : 500.5 Mean :1.643 Mean :1.1430
## 3rd Qu.: 750.2 3rd Qu.:2.000 3rd Qu.:1.3980
## Max. :1000.0 Max. :2.000 Max. :4.7470
ggplot(data = sim_data, mapping = aes(x = rt, fill = factor(choice))) +
geom_histogram(binwidth=.05, alpha = .3, position="identity")
The same results can be obtained with the R code attached to the original paper, which can be downloaded and loaded in R:
sim_data <- rWaldRace(n=1000, v=c(.7, 1.2), B=1.5, A=0, t0=.23, gf = 0)
summary(sim_data)
## RT R
## Min. :0.3985 Min. :1.00
## 1st Qu.:0.7441 1st Qu.:1.00
## Median :0.9866 Median :2.00
## Mean :1.1444 Mean :1.64
## 3rd Qu.:1.3671 3rd Qu.:2.00
## Max. :4.8739 Max. :2.00
In the same file, we can also find the likelihood function.
log_likelihood_rdm <- function(par, data, ll_threshold=1e-10) {
# par order: drift1, drift2, threshold, ndt
density <- rep(NA, dim(data)[1])
data$RT <- (data$RT - par[4]) # shift the distribution by the NDT
# dWald: density for single accumulator
# pWald: cumulative density for single accumulator
density[data$R==1] <- dWald(data$RT[data$R==1], v=par[1], B=par[3], A=0)*(1 - pWald(data$RT[data$R==1], v=par[2], B=par[3], A=0))
density[data$R==2] <- dWald(data$RT[data$R==2], v=par[2], B=par[3], A=0)*(1 - pWald(data$RT[data$R==2], v=par[1], B=par[3], A=0))
density[density <= ll_threshold] = ll_threshold # put a threhsold on very low likelihoods for computability
return(sum(log(density)))
}
starting_values = c(1, 1, 1, .1) # set some starting values
print(log_likelihood_rdm(starting_values, data=sim_data))
## [1] -2033.958
fit1 <- nmkb(par = starting_values,
fn = function (x) log_likelihood_rdm(x, data=sim_data),
lower = c(0, 0, 0, 0),
upper = c(10, 10, 10, 5),
control = list(maximize = TRUE))
print(fit1$par) # print estimated parameters
## [1] 0.6788203 1.1721012 1.4287021 0.2572723
We can also recover the generating parameters of the simulated data with stan, to assess th model’s identifialbility.
First, we need to prepare our data for stan:
sim_data_for_stan = list(
N = dim(sim_data)[1],
choices = sim_data$R,
rt = sim_data$RT
)
And then we can fit the model:
fit1 <- stan(
file = "stan_models/RDM.stan", # Stan program
data = sim_data_for_stan, # named list of data
chains = 2, # number of Markov chains
warmup = 1000, # number of warmup iterations per chain
iter = 3000, # total number of iterations per chain
cores = 2 # number of cores (could use one per chain)
)
Compare the generating parameters with the recovered ones and check for convergence looking at the Rhat measures:
print(fit1, pars = c("transf_drift1", "transf_drift2", "transf_threshold", "transf_ndt"))
## Inference for Stan model: RDM.
## 2 chains, each with iter=3000; warmup=1000; thin=1;
## post-warmup draws per chain=2000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## transf_drift1 0.68 0 0.07 0.55 0.64 0.68 0.73 0.81 1133 1
## transf_drift2 1.17 0 0.06 1.05 1.13 1.17 1.22 1.29 1010 1
## transf_threshold 1.44 0 0.07 1.31 1.39 1.43 1.48 1.58 924 1
## transf_ndt 0.25 0 0.02 0.21 0.24 0.25 0.27 0.29 1040 1
##
## Samples were drawn using NUTS(diag_e) at Wed Sep 8 23:24:18 2021.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
And (visually) assess the model’s convergence as well as some more sampling diagnostics:
traceplot(fit1, pars = c("transf_drift1", "transf_drift2", "transf_threshold", "transf_ndt"), inc_warmup = FALSE, nrow = 2)
sampler_params <- get_sampler_params(fit1, inc_warmup = TRUE)
summary(do.call(rbind, sampler_params), digits = 2)
## accept_stat__ stepsize__ treedepth__ n_leapfrog__ divergent__ energy__
## Min. :0.00 Min. : 0.0028 Min. :0.0 Min. : 1 Min. :0.0000 Min. :1255
## 1st Qu.:0.88 1st Qu.: 0.2186 1st Qu.:3.0 1st Qu.: 7 1st Qu.:0.0000 1st Qu.:1257
## Median :0.96 Median : 0.2438 Median :3.0 Median : 15 Median :0.0000 Median :1258
## Mean :0.89 Mean : 0.2587 Mean :3.2 Mean : 14 Mean :0.0073 Mean :1260
## 3rd Qu.:0.99 3rd Qu.: 0.2438 3rd Qu.:4.0 3rd Qu.: 15 3rd Qu.:0.0000 3rd Qu.:1260
## Max. :1.00 Max. :14.3855 Max. :6.0 Max. :127 Max. :1.0000 Max. :2823
More plotting:
posterior <- as.matrix(fit1)
plot_title <- ggtitle("Posterior distributions",
"with medians and 95% intervals")
mcmc_areas(posterior,
pars = c("transf_drift1", "transf_drift2", "transf_threshold", "transf_ndt"),
prob = 0.95) + plot_title