I'm looking to adapt the codes shared in Andrew Heiss' amazingly detailed page on conjoint experiment, more specifically computing conditional marginal means for categorical variable using the dataset candidate.
I would like to generate the same graphs, but instead of having two dichotomous variables (military - has served/not served; and political party - Democrat/Republican), I was wondering how I could use the same code if either or both my variables aren't dichotomous, in such a way that the graph output would make sense.
For example, let's say that there are four parties (Party A, Party B, Party C and Party D) and three military status (Not served/Served a couple of years/Served for more than two years), how can I adapt the group_diffs_terms matrix to compute all potential differences, for example:
Not served - Party A minus Party B Not served - Party A minus Party C Not served - Party A minus Party D Not served - Party B minus Party C Not served - Party B minus Party D Not served - Party C minus Party D
and again for all military status, ideally all combined in the same graph. Sorry if it's an easy question, I am very very new to R.
Thank you!
##############################################################################
##############################################################################
### Load packages
library(tidyverse) # ggplot, dplyr, and friends
library(haven) # Read Stata files
library(broom) # Convert model objects to tidy data frames
library(cregg) # Automatically calculate frequentist conjoint AMCEs and MMs
library(survey) # Panel-ish regression models
library(scales) # Nicer labeling functions
library(marginaleffects) # Calculate marginal effects
library(broom.helpers) # Add empty reference categories to tidy model data frames
library(ggforce) # For facet_col()
library(brms) # The best formula-based interface to Stan
library(tidybayes) # Manipulate Stan results in tidy ways
library(ggdist) # Fancy distribution plots
library(patchwork) # Combine ggplot plots
# Custom ggplot theme to make pretty plots
# Get the font at https://fonts.google.com/specimen/Jost
theme_nice <- function() {
theme_minimal(base_family = "Jost") +
theme(panel.grid.minor = element_blank(),
plot.title = element_text(family = "Jost", face = "bold"),
axis.title = element_text(family = "Jost Medium"),
axis.title.x = element_text(hjust = 0),
axis.title.y = element_text(hjust = 1),
strip.text = element_text(family = "Jost", face = "bold",
size = rel(0.75), hjust = 0),
strip.background = element_rect(fill = "grey90", color = NA))
}
# Set default theme and font stuff
theme_set(theme_nice())
update_geom_defaults("text", list(family = "Jost", fontface = "plain"))
update_geom_defaults("label", list(family = "Jost", fontface = "plain"))
# Party colors from the Urban Institute's data visualization style guide, for fun
# http://urbaninstitute.github.io/graphics-styleguide/
parties <- c("#1696d2", "#db2b27")
# Functions for formatting things as percentage points
label_pp <- label_number(accuracy = 1, scale = 100,
suffix = " pp.", style_negative = "minus")
label_amce <- label_number(accuracy = 0.1, scale = 100, suffix = " pp.",
style_negative = "minus", style_positive = "plus")
# Data from https://doi.org/10.7910/DVN/THJYQR
# It's public domain
candidate <- read_stata("XXXXXXX/candidate.dta") %>%
as_factor() # Convert all the Stata categories to factors
# Make a little lookup table for nicer feature labels
variable_lookup <- tribble(
~variable, ~variable_nice,
"atmilitary", "Military",
"atreligion", "Religion",
"ated", "Education",
"atprof", "Profession",
"atmale", "Gender",
"atinc", "Income",
"atrace", "Race",
"atage", "Age"
) %>%
mutate(variable_nice = fct_inorder(variable_nice))
##############################################################################
##############################################################################
## Subgroup differences in AMCEs and Marginal Means
# No respondent-level characteristics in their data, so we'll invent a pretend column
## Code for generating fake respondent_party column
# Wrap this in withr::with_seed() so that the randomness is reproducible but the
# overall document seed doesn't get set or messed up
withr::with_seed(1234, {
respondents_party_military <- candidate %>%
group_by(resID, atmilitary) %>%
# Find the proportion of times each respondent selected the candidate when
# military service history was "Served"
summarize(prob_select = mean(selected)) %>%
filter(atmilitary == "Served") %>%
select(-atmilitary) %>%
ungroup() %>%
# If a respondent selected the candidate more than 60% of the time when
# seeing that they had served in the military, there's a 90% chance they're
# a Republican. If they didn't select the military candidate 60% of the
# time, there's a 75% chance they're a Democrat.
mutate(respondent_party = case_when(
prob_select >= 0.6 ~
sample(
c("Democrat", "Republican"), n(),
replace = TRUE, prob = c(0.1, 0.9)
),
prob_select < 0.6 ~
sample(
c("Democrat", "Republican"), n(),
replace = TRUE, prob = c(0.75, 0.25)
)
)) %>%
mutate(respondent_party = factor(respondent_party))
})
candidate_fake <- candidate %>%
left_join(respondents_party_military, by = join_by(resID))
model_military_party <- lm(
selected ~ atmilitary * respondent_party,
data = candidate_fake
)
tidy(model_military_party)
##############################################################################
## Conditional Marginal Means
# To be thorough and allow for any type of regression family
# (logistic, multinomial) with any other types of covariates,
# manually with our own regression model fed through
# marginaleffects::marginal_means()
# By specifying cross = TRUE, get all combinations of
# military service and party
# (without it, we'd get separate marginal means for just
# the two levels of military and the two levels of party)
party_mms_mfx <- marginal_means(
model_military_party,
newdata = c("atmilitary", "respondent_party"),
cross = TRUE,
wts = "cells"
)
party_mms_mfx %>% as_tibble()
# To find the differences in those marginal means, Want the differences between rows 2 and 1
# (Republican - Democrat for "Not served") and between rows 4 and 3
# (Republican - Democrat for "Served")
group_diffs_terms <- matrix(
c(-1, 1, 0, 0,
0, 0, -1, 1),
ncol = 2
) %>%
magrittr::set_colnames(levels(candidate_fake$atmilitary))
group_diffs_terms
party_mms_mfx_diff <- marginal_means(
model_military_party,
newdata = c("atmilitary", "respondent_party"),
cross = TRUE,
wts = "cells",
hypothesis = group_diffs_terms
) %>%
as_tibble()
party_mms_mfx_diff
# Can plot these conditional marginal means along with the party-based differences
mm_party_plot1 <- party_mms_mfx %>%
as_tibble() %>%
mutate(estimate_nice = case_when(
estimate != 0 ~ label_percent()(estimate),
estimate == 0 ~ NA
)) %>%
ggplot(aes(x = estimate, y = atmilitary, color = respondent_party)) +
geom_vline(xintercept = 0.5) +
geom_pointrange(aes(xmin = conf.low, xmax = conf.high)) +
geom_label(
aes(label = estimate_nice),
position = position_dodge(width = -1.2),
size = 3.5, show.legend = FALSE
) +
scale_x_continuous(labels = label_percent()) +
scale_color_manual(values = parties) +
labs(
x = "Marginal means",
y = NULL,
color = NULL,
title = "Marginal means by respondent party"
) +
facet_wrap(vars("Military")) +
theme(
legend.position = "bottom",
legend.justification = "left",
legend.margin = margin(l = -7, t = -5)
)
mm_party_plot2 <- party_mms_mfx_diff %>%
mutate(estimate_nice = label_amce(estimate)) %>%
ggplot(aes(x = estimate, y = term)) +
geom_vline(xintercept = 0) +
geom_pointrange(aes(
xmin = conf.low, xmax = conf.high,
color = "Republican marginal mean − Democrat marginal mean"
)) +
geom_label(
aes(label = estimate_nice), size = 3.5,
nudge_y = 0.3, color = "#85144b"
) +
scale_x_continuous(labels = label_pp) +
scale_color_manual(values = "#85144b") +
labs(
x = "Difference in marginal means",
y = NULL,
color = NULL,
title = "Difference in marginal means by respondent party",
subtitle = "Positive differences = Republicans prefer the level"
) +
facet_wrap(vars("Military")) +
theme(
legend.position = "bottom",
legend.justification = "left",
legend.margin = margin(l = -7, t = -5)
)
mm_party_plot1 | mm_party_plot2