How to make table combining multiple columns in R

405 Views Asked by At

Using the example data below, my goal is to create a table (publication-ready would be great, but fine if not) where I calculate what percent of each group within each column (city, race, and gender) attend, fail, or both.

So what percent of city=6 attend, what percent of city=6 fail, what percent of city=6 both. And then repeat for each group within city, each group within race, and each group within gender, with the end result being each of those as rows in the output table and attend, fail, and both being the 3 columns in the output table.

What I attempted was to calculate percentages for each group within each column separately, and then stack them all using kableExtra. The kableExtra attempt was so messy and was so incorrect that I didn't even save it, but this was how I started my calculations:

race_percentages <- d %>%
  group_by(race) %>%
  summarize(
    percent_attend = mean(attend) * 100,
    percent_fail = mean(fail) * 100,
    percent_both = mean(both) * 100)

gender_percentages <- d %>%
  group_by(gender) %>%
  summarize(
    percent_attend = mean(attend) * 100,
    percent_fail = mean(fail) * 100,
    percent_both = mean(both) * 100)

city_percentages <- d %>%
  group_by(city) %>%
  summarize(
    percent_attend = mean(attend) * 100,
    percent_fail = mean(fail) * 100,
    percent_both = mean(both) * 100)

So if there is some way to take those resulting data frames and stack them on top of each other, that would get close to what I'm hoping for as the end result.

A shortened example of how I hope the final table will be organized:

Group Attend Fail Both
Race1 X% X% X%
Race2 X% X% X%
Male X% X% X%
Female X% X% X%
City6 X% X% X%
City9 X% X% X%
City12 X% X% X%

Data:

d<-structure(list(city = structure(c(9, 6, 9, 12, 12, 6, 6, 12, 
12, 6, 6, 9, 12, 12, 6, 6, 9, 6, 9, 6, 6, 12, 12, 12, 6, 12, 
9, 6, 12, 6), format.stata = "%9.0g"), race = structure(c(3, 
3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 
2, 2, 2, 2, 2, 3, 3, 2), format.stata = "%9.0g", labels = c(White = 1, 
Black = 2, Hispanic = 3, Other = 4), class = c("haven_labelled", 
"vctrs_vctr", "double")), gender = structure(c(0, 1, 0, 1, 0, 
0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 
1, 0, 0, 0), label = "gender of subject", format.stata = "%12.0g", labels = c(female = 0, 
male = 1), class = c("haven_labelled", "vctrs_vctr", "double"
)), attend = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), format.stata = "%9.0g"), 
    fail = structure(c(0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 
    1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1), format.stata = "%9.0g"), 
    both = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)), row.names = c(NA, 
-30L), class = c("tbl_df", "tbl", "data.frame"))
3

There are 3 best solutions below

0
stefan_aus_hannover On

To match your desired table and use the code you already have you would simply need to add column name renames and a bind_rows

library(dplyr)
library(haven)

race_percentages <- d %>%
  group_by(race) %>%
  summarize(
    percent_attend = mean(attend) * 100,
    percent_fail = mean(fail) * 100,
    percent_both = mean(both) * 100)

gender_percentages <- d %>%
  group_by(gender) %>%
  summarize(
    percent_attend = mean(attend) * 100,
    percent_fail = mean(fail) * 100,
    percent_both = mean(both) * 100)

city_percentages <- d %>%
  group_by(city) %>%
  summarize(
    percent_attend = mean(attend) * 100,
    percent_fail = mean(fail) * 100,
    percent_both = mean(both) * 100)

colnames(city_percentages)[1] <- "Group"
colnames(race_percentages)[1] <- "Group"
colnames(gender_percentages)[1] <- "Group"

percentages <- bind_rows(race_percentages,gender_percentages,city_percentages)

Output

  Group         percent_attend percent_fail percent_both
  <dbl+lbl>              <dbl>        <dbl>        <dbl>
1  2 [Black]                 0         43.8            0
2  3 [Hispanic]              0         21.4            0
3  0 [female]                0         18.8            0
4  1 [White]                 0         50              0
5  6                         0         38.5            0
6  9                         0          0              0
7 12                         0         45.5            0

You would need to mutate the Group column further to match desired values

2
r2evans On

How about this?

library(dplyr)
library(tidyr) # pivot_longer
# I'm discarding the haven attributes for now ...
mutate(d, across(c(race, gender), unclass)) |>
  pivot_longer(cols = c(city, race, gender)) |>
  summarize(.by = c(name, value), across(c(attend, fail, both), ~ 100 * mean(.x))) |>
  arrange(name, value)
# # A tibble: 7 × 5
#   name   value attend  fail  both
#   <chr>  <dbl>  <dbl> <dbl> <dbl>
# 1 city       6      0  38.5     0
# 2 city       9      0   0       0
# 3 city      12      0  45.5     0
# 4 gender     0      0  18.8     0
# 5 gender     1      0  50       0
# 6 race       2      0  43.8     0
# 7 race       3      0  21.4     0

To get the literal output you said with a single Group column, we can either paste them together and remove the original columns, or we can use tidyr::unite:

mutate(d, across(c(race, gender), unclass)) |>
  pivot_longer(cols = c(city, race, gender)) |>
  summarize(.by = c(name, value), across(c(attend, fail, both), ~ 100 * mean(.x))) |>
  arrange(name, value) |>
  unite("Group", name, value, sep = "")
# # A tibble: 7 × 4
#   Group   attend  fail  both
#   <chr>    <dbl> <dbl> <dbl>
# 1 city6        0  38.5     0
# 2 city9        0   0       0
# 3 city12       0  45.5     0
# 4 gender0      0  18.8     0
# 5 gender1      0  50       0
# 6 race2        0  43.8     0
# 7 race3        0  21.4     0

and if you'd prefer to have the actual haven labels instead of city, etc, then perhaps this:

d |>
  mutate(
    city = as.character(city),
    across(c(race, gender), ~ as.character(names(attr(.x, "labels"))[ match(.x, attr(.x, "labels")) ]))
  ) |>
  pivot_longer(cols = c(city, race, gender)) |>
  summarize(.by = c(name, value), across(c(attend, fail, both), ~ 100 * mean(.x))) |>
  arrange(name, value)
# # A tibble: 7 × 5
#   name   value    attend  fail  both
#   <chr>  <chr>     <dbl> <dbl> <dbl>
# 1 city   12            0  45.5     0
# 2 city   6             0  38.5     0
# 3 city   9             0   0       0
# 4 gender female        0  18.8     0
# 5 gender male          0  50       0
# 6 race   Black         0  43.8     0
# 7 race   Hispanic      0  21.4     0
4
user12256545 On

you can use knitr kable to convert to markdown, and from there to latex or html, depending where you want to publish.

btw you dont have to repeat yourself when using pivot and group_by:

library(tidyverse)
library(haven)
library(knitr)

d %>%
  pivot_longer(city:gender) %>% 
  unite(Group,name,value) %>% 
  group_by(Group) %>% 
  summarize(
    percent_attend = mean(attend) * 100,
    percent_fail = mean(fail) * 100,
    percent_both = mean(both) * 100) %>%
  kable() # to markdown table

|Group    | percent_attend| percent_fail| percent_both|
|:--------|--------------:|------------:|------------:|
|city_12  |              0|     45.45455|            0|
|city_6   |              0|     38.46154|            0|
|city_9   |              0|      0.00000|            0|
|gender_0 |              0|     18.75000|            0|
|gender_1 |              0|     50.00000|            0|
|race_2   |              0|     43.75000|            0|
|race_3   |              0|     21.42857|            0|

EDIT

since you asked for publication ready::

library(tidyverse)
library(haven)
library(knitr)
library(kableExtra)
library(sjlabelled)


d %>%
  mutate(
    city = as_character(city),
    race = as_character(race),
    gender = as_character(gender),
    )%>% 
  pivot_longer(city:gender) %>% 
  unite(Group,name,value) %>% 
  group_by(Group) %>% 
  summarize(
    attended = mean(attend) * 100,
    failed = mean(fail) * 100,
    both = mean(both) * 100) %>%
  mutate(Group = sub(".*_","",Group)) %>% 
  kable(digits=2, ) %>%
  add_header_above(c(' ', "Percentage"=3)) %>%
  group_rows("City", 1, 3) %>% 
  group_rows("Gender", 4, 5) %>%
  group_rows("Race", 6, 7) %>% 
  kable_classic() 

enter image description here