rm(list=ls())
library(tidyverse)
library(ggplot2)
library(sirt)
<- read_csv('data/decisions-1.csv')
decisions <- read_csv('data/persons-1.csv')
persons glimpse(decisions)
glimpse(persons)
30 Comparative Judgement Practical
Create a new Comparative Judgement task on www.nomoremarking.com and upload 8 pdfs with the numbers 1 to 8. Judge the numbers 80 times and download the decisions file.
30.1 Fitting the Bradley-Terry Model
# get the person ids and their first names
<- persons %>%
persons select(Code, `First Name`) %>%
rename(id = Code, name = `First Name`)
# go through the decisions and add a column with the name of the person who was chosen or not chosen in each case
<- decisions %>% left_join(persons, by = c('chosen' = 'id')) %>%
decisions rename(chosenName = name) %>%
left_join(persons, by = c('notChosen' = 'id')) %>%
rename(notChosenName = name)
# mutate chosenName and notChosenName to be factors
<- decisions %>%
decisions mutate(chosenName = factor(chosenName), notChosenName = factor(notChosenName))
# create a name for each judge
<- decisions %>% mutate(
decisions judge = paste0('judge_', judgeName)
)
# get a vector of judges
<- decisions %>% pull(judge)
judges
# format for the btm model
<- decisions %>%
decisions select(chosenName, notChosenName) %>%
mutate(result = 1)
# the package doesn't like tibbles?
<- as.data.frame(decisions)
decisions <- btm(decisions, judge=judges, fix.eta = 0, ignore.ties = TRUE, eps = 0.3) mod1
30.2 Explore the model results
See https://alexanderrobitzsch.github.io/sirt/reference/btm.html
30.2.1 Reliability
# The mle reliability, also known as separation reliability
print(mod1$mle.rel)
[1] 0.8900993
30.2.2 Person parameters
# The person parameters
::kable(mod1$effects, digits = 2) knitr
individual | id | Ntot | N1 | ND | N0 | raw | score | propscore | theta | se.theta | outfit | infit | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
8 | 8 | 8 | 24 | 24 | 0 | 0 | 24 | 23.70 | 0.99 | 6.42 | 1.86 | 0.01 | 0.05 |
7 | 7 | 7 | 21 | 17 | 0 | 4 | 17 | 16.81 | 0.80 | 3.56 | 1.38 | 0.04 | 0.19 |
6 | 6 | 6 | 20 | 16 | 0 | 4 | 16 | 15.82 | 0.79 | 2.65 | 1.15 | 0.05 | 0.19 |
5 | 5 | 5 | 20 | 12 | 0 | 8 | 12 | 11.94 | 0.60 | 0.81 | 1.04 | 0.06 | 0.13 |
4 | 4 | 4 | 20 | 7 | 0 | 13 | 7 | 7.09 | 0.35 | -0.91 | 1.17 | 0.05 | 0.16 |
3 | 3 | 3 | 19 | 4 | 0 | 15 | 4 | 4.17 | 0.22 | -2.01 | 1.16 | 0.05 | 0.15 |
2 | 2 | 2 | 21 | 3 | 0 | 18 | 3 | 3.21 | 0.15 | -4.10 | 1.21 | 0.04 | 0.10 |
1 | 1 | 1 | 21 | 0 | 0 | 21 | 0 | 0.30 | 0.01 | -6.41 | 1.92 | 0.02 | 0.09 |
30.2.3 Judge fit
# The judge fit
# Fit statistics (outfit and infit) for judges if judge is provided. In addition, average agreement of the rating with the mode of the ratings is calculated for each judge (at least three ratings per dyad has to be available for computing the agreement)
::kable(mod1$fit_judges, digits = 2) knitr
judge | outfit | infit | agree | |
---|---|---|---|---|
judge_1 | judge_1 | 0.06 | 0.23 | 1 |
judge_2 | judge_2 | 0.02 | 0.09 | 1 |
judge_3 | judge_3 | 0.05 | 0.14 | 1 |
judge_4 | judge_4 | 0.04 | 0.11 | 1 |
judge_5 | judge_5 | 0.02 | 0.13 | 1 |
judge_6 | judge_6 | 0.08 | 0.27 | 1 |
judge_7 | judge_7 | 0.03 | 0.08 | 1 |
judge_8 | judge_8 | 0.03 | 0.08 | 1 |
judge_9 | judge_9 | 0.03 | 0.07 | 1 |
30.2.4 Visualise the person parameters (model effects) along with the standard error
# plot the model effects with the standard errors
# add a column to the person parameters with the upper and lower limits using 1.96 * standard error
<- mod1$effects %>%
effects mutate(
lower = theta - 1.96 * se.theta,
upper = theta + 1.96 * se.theta
)
<- ggplot(effects, aes(x = individual, y = theta)) +
p geom_point() +
geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.2) +
theme_bw() +
labs(x = 'Person parameter', y = 'Person')
print(p)
30.2.5 Inspect the model probabilities
30.2.6 What is p1? p0? pD?
head(mod1$probs)
p1 p0 pD
[1,] 0.9963473 0.0036526689 6.100379e-45
[2,] 0.9998848 0.0001151765 1.085183e-45
[3,] 0.7509258 0.2490741530 4.373299e-44
[4,] 0.9906881 0.0093119089 9.712569e-45
[5,] 0.9099456 0.0900544355 2.894720e-44
[6,] 0.9959516 0.0040483851 6.421054e-45
30.3 Extension exercises
- Try some less accurate judging. Note the impact on the model parameters.
- Can you recreate the model probabilities?