13  DIF using proportion correct

13.1 Load in the dataset

rm(list=ls())
library(tidyverse)
responses <- read_csv('data/responses.csv')
responses <- responses %>% drop_na(gender)
responses <- responses %>% select(gender, Q1_score:Q20_score)

13.2 Pivot the data so we have three columns, gender, score and question

long_responses <- responses %>% pivot_longer(cols = Q1_score:Q20_score, names_to = "question", values_to = "score")

13.3 Summarise the data by gender and question

response_summary <- long_responses %>% group_by(gender, question) %>% summarise(mean_score = mean(score,na.rm=TRUE))
question_difficulty <- response_summary %>%
    filter(gender=='F') %>% 
    arrange(mean_score) %>% pull(question)

13.4 Create a factor for the response summary using the question difficulty order

ordered_responses <- response_summary %>% mutate(item = factor(question, levels=question_difficulty, ordered=TRUE))

13.5 Create a factor for gender

ordered_responses <- ordered_responses %>% mutate(
    gender = factor(gender)
)

13.6 Plot the ordered responses as a line graph, grouped by gender with item on the x-axis, mean_score on the y-axis

p <- ggplot(ordered_responses, aes(x=item, y=mean_score, group=gender, colour=gender, line_style=gender))
p <- p + geom_line() # rotate the x-axis labels
p <- p + theme_light()
p <- p + theme(axis.text.x = element_text(angle = 90, hjust = 1))
p