sum across columns based on range given in another text column in r

69 Views Asked by At

i want to create new column for row total based on column range given in text column (col_range). I tried the following and it throws me an error. Does anybody know how to do this.

d = data.frame(index_id = c("a1232","c198", "s345","2ert", "e234","e567"),
               yr_ref = c(2023,2024,2025,2024,2027,NA),
               temp2023 = c(2000,5000,2300,2000,1000,200),
               temp2024 = c(3000,3000,3000,0,0,200),
               temp2025 = c(2000,3000,0,800,8000,200),
               temp2026 = c(300,200,1000,0,0,200),
               temp2027= c(1300,1200,100,10,10,200),
               col_range = c("temp2023:temp2023","temp2023:temp2024","temp2023:temp2025","temp2023:temp2024","temp2023:temp2027",NA))

d= d %>% 
  mutate(temp_total= ifelse(!is.na(yr_ref),
                               sum(c_across(col_range)),NA))

#### ERROR
Error in `mutate()`:
ℹ In argument: `temp_total = ifelse(!is.na(yr_ref), sum(c_across(col_range)), NA)`.
Caused by error in `sum()`:
! invalid 'type' (character) of argument
Run `rlang::last_trace()` to see where the error occurred.```
3

There are 3 best solutions below

0
Hoel On BEST ANSWER

Inspired by @Isaac

library(tidyverse)
    
d %>% 
  rowwise() %>% 
  mutate(total = ifelse(!is.na(yr_ref), 
                        map_dbl(col_range, ~ rowSums(across(str_split_1(.x, pattern = ":")[1]:str_split_1(.x, pattern = ":")[2]))), NA))  

# A tibble: 6 x 9
# Rowwise: 
  index_id yr_ref temp2023 temp2024 temp2025 temp2026 temp2027 col_range         total
  <chr>     <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl> <chr>             <dbl>
1 a1232      2023     2000     3000     2000      300     1300 temp2023:temp2023  2000
2 c198       2024     5000     3000     3000      200     1200 temp2023:temp2024  8000
3 s345       2025     2300     3000        0     1000      100 temp2023:temp2025  5300
4 2ert       2024     2000        0      800        0       10 temp2023:temp2024  2000
5 e234       2027     1000        0     8000        0       10 temp2023:temp2027  9010
6 e567         NA      200      200      200      200      200 NA                   NA
1
Isaac On

This is one option how to deal with error:

d %>% 
  rowwise() %>% 
  mutate(temp_total = ifelse(!is.na(yr_ref), sum(c_across(matches("temp\\d{4}"))), NA)) %>%
  ungroup()

# A tibble: 6 × 9
  index_id yr_ref temp2023 temp2024 temp2025 temp2026 temp2027 col_range         temp_total
  <chr>     <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl> <chr>                  <dbl>
1 a1232      2023     2000     3000     2000      300     1300 temp2023:temp2023       8600
2 c198       2024     5000     3000     3000      200     1200 temp2023:temp2024      12400
3 s345       2025     2300     3000        0     1000      100 temp2023:temp2025       6400
4 2ert       2024     2000        0      800        0       10 temp2023:temp2024       2810
5 e234       2027     1000        0     8000        0       10 temp2023:temp2027       9010
6 e567         NA      200      200      200      200      200 NA                        NA
0
jpsmith On

You can do this in base R by creating using strsplit to divvy up the col_range column and creat indexes (ix), then sapply to index on the columns:

# Create indices
col_ranges <- strsplit(as.character(d$col_range), ":")
ix <- lapply(col_ranges, \(x) which(names(d) %in% x))

d$sums <- sapply(seq_len(nrow(d)), \(xx)
       if(length(ix[[xx]]) == 0 ){NA}
       else{
       ifelse(length(ix[[xx]]) == 1,
              d[xx,ix[[xx]]],
              sum(d[xx, ix[[xx]][1]:ix[[xx]][2]])
       )})

  index_id yr_ref temp2023 temp2024 temp2025 temp2026 temp2027         col_range sums
1    a1232   2023     2000     3000     2000      300     1300 temp2023:temp2023 2000
2     c198   2024     5000     3000     3000      200     1200 temp2023:temp2024 8000
3     s345   2025     2300     3000        0     1000      100 temp2023:temp2025 5300
4     2ert   2024     2000        0      800        0       10 temp2023:temp2024 2000
5     e234   2027     1000        0     8000        0       10 temp2023:temp2027 9010
6     e567     NA      200      200      200      200      200              <NA>   NA