Assignment in mutate a value from dynamic column name in R

79 Views Asked by At

Here is my dataframe:

dt <- data.frame(
  session_id = c("17472631", "17472632", "17472633", "17472635", "17472636", "17472638"),
  Attractiveness_Real = c(-2, -2, 1, -1, 3, 1),
  Calmness_Real = c(-3, 2, 3, 2, 3, -2),
  Generosity_Real = rep(c(1, 3, 2), each = 2L),
  Arrogance_Real = c(-2, 1, -2, -3, 3, 2),
  Ambition_Real = c(-1, 1, -2, 2, 3, 1),
  Charisma_Real = c(-1, -3, -3, 2, 2, 2),
  Intelligence_Real = c(3, 2, 3, 2, 2, 2),
  Joyfulness_Real = c(3, -3, 1, 3, 2, -2),
  Friendliness_Real = c(-3, -1, -3, -2, -1, -3),
  Laziness_Real = c(-1, -3, NA, -2, NA, -3),
  Intelligence_Retreived = c(2, 1, NA, NA, NA, 3),
  Laziness_Retreived = rep(c(-3, NA), c(1L, 5L)),
  Attractiveness_Retreived = c(-2, NA, NA, NA, NA, 3),
  Generosity_Retreived = c(-2, 2, 3, 2, NA, 3),
  Friendliness_Retreived = c(NA, 2, -2, NA, -2, 2),
  Arrogance_Retreived = c(NA, NA, NA, -1, NA, 1),
  Calmness_Retreived = c(-2, NA, -1, NA, 1, 3),
  Charisma_Retreived = c(1, NA, NA, 3, -1, 3),
  Joyfulness_Retreived = c(NA, NA, NA, 3, NA, 3),
  Ambition_Retreived = c(NA, 1, 2, NA, 2, 2),
  RealAttribute_1 = c(
    "Attractiveness", "Charisma", "Friendliness", "Joyfulness", "Joyfulness",
    "Friendliness"
  ),
  RealAttribute_2 = c("Calmness", "Joyfulness", "Generosity", "Charisma", "Laziness", "Arrogance"),
  RealAttribute_3 = c("Generosity", "Attractiveness", "Arrogance", "Calmness", "Charisma", "Charisma"),
  RealAttribute_4 = c("Arrogance", "Generosity", "Charisma", "Friendliness", "Friendliness", "Ambition"),
  RealAttribute_5 = c("Ambition", "Calmness", "Ambition", "Laziness", "Arrogance", "Intelligence"),
  RealAttribute_6 = c("Charisma", "Ambition", "Calmness", "Ambition", "Generosity", "Generosity"),
  RealAttribute_7 = c(
    "Intelligence", "Friendliness", "Intelligence", "Attractiveness",
    "Attractiveness", "Laziness"
  ),
  RealAttribute_8 = c("Joyfulness", "Arrogance", "Joyfulness", "Arrogance", "Ambition", "Attractiveness"),
  RealAttribute_9 = c(
    "Friendliness", "Laziness", "Attractiveness", "Generosity", "Intelligence",
    "Joyfulness"
  ),
  RealAttribute_10 = c("Laziness", "Intelligence", "Laziness", "Intelligence", "Calmness", "Calmness"),
  RetrievedAttribute_1 = c(
    "Intelligence", "Intelligence", "Generosity", "Joyfulness", "Laziness",
    "Intelligence"
  ),
  RetrievedAttribute_2 = c("Laziness", "Joyfulness", "Ambition", "Arrogance", "Intelligence", "Ambition"),
  RetrievedAttribute_3 = c("Attractiveness", "Laziness", "Arrogance", "Charisma", "Calmness", "Generosity"),
  RetrievedAttribute_4 = rep(c("Generosity", "Arrogance", "Friendliness"), 2),
  RetrievedAttribute_5 = c("Friendliness", "Calmness", "Arrogance", NA, "Laziness", "Laziness"),
  RetrievedAttribute_6 = c("Arrogance", "Arrogance", "Calmness", NA, "Arrogance", "Charisma"),
  RetrievedAttribute_7 = c("Calmness", "Calmness", NA, NA, "Intelligence", "Attractiveness"),
  RetrievedAttribute_8 = c("Charisma", "Ambition", NA, NA, "Ambition", "Arrogance"),
  RetrievedAttribute_9 = c("Arrogance", "Friendliness", NA, NA, "Friendliness", "Joyfulness"),
  RetrievedAttribute_10 = c("Friendliness", "Generosity", NA, NA, "Charisma", "Calmness"),
  RankedAttribute_1 = c(
    "Intelligence", "Intelligence", "Arrogance", "Generosity", "Laziness",
    "Intelligence"
  ),
  RankedAttribute_2 = c("Laziness", "Generosity", "Friendliness", "Intelligence", "Charisma", "Calmness"),
  RankedAttribute_3 = c("Charisma", "Laziness", NA, "Gloomy/Joyful", "Intelligence", "Laziness"),
  RankedAttribute_4 = c("Friendliness", "Calmness", NA, "Charisma", "Attractiveness", "Charisma"),
  RankedAttribute_5 = c("Ambition", "Friendliness", NA, NA, "Friendliness", "Attractiveness"),
  RankedAttribute_6 = c("Arrogance", "Attractiveness", NA, NA, NA, "Generosity"),
  RankedAttribute_7 = c(NA, "Arrogance", NA, NA, NA, NA),
  RankedAttribute_8 = rep(NA_character_, 6L),
  RankedAttribute_9 = rep(NA_character_, 6L),
  RankedAttribute_10 = rep(NA_character_, 6L),
  target = c("Andy", "Daniel", "Brandon", "Jack", "Daniel", "Tyler"),
  rating_target = c("8", "5", "5", "10", "10", "11 - <i>extremely like</i>")
)
str(finalDataNumeric)
'data.frame':   6 obs. of  53 variables: 
 session_id              : chr  "17472631" "17472632" "17472633" "17472635" ...
 Attractiveness_Real     : num  -2 -2 1 -1 3 1
 Calmness_Real           : num  -3 2 3 2 3 -2
 Generosity_Real         : num  1 1 3 3 2 2
 Arrogance_Real          : num  -2 1 -2 -3 3 2
 Ambition_Real           : num  -1 1 -2 2 3 1
 Charisma_Real           : num  -1 -3 -3 2 2 2
 Intelligence_Real       : num  3 2 3 2 2 2
 Joyfulness_Real         : num  3 -3 1 3 2 -2
 Friendliness_Real       : num  -3 -1 -3 -2 -1 -3
 Laziness_Real           : num  -1 -3 NA -2 NA -3
 Intelligence_Retreived  : num  2 1 NA NA NA 3
 Laziness_Retreived      : num  -3 NA NA NA NA NA
 Attractiveness_Retreived: num  -2 NA NA NA NA 3
 Generosity_Retreived    : num  -2 2 3 2 NA 3
 Friendliness_Retreived  : num  NA 2 -2 NA -2 2
 Arrogance_Retreived     : num  NA NA NA -1 NA 1
 Calmness_Retreived      : num  -2 NA -1 NA 1 3
 Charisma_Retreived      : num  1 NA NA 3 -1 3
 Joyfulness_Retreived    : num  NA NA NA 3 NA 3
 Ambition_Retreived      : num  NA 1 2 NA 2 2
 RealAttribute_1         : chr  "Attractiveness" "Charisma" "Friendliness" "Joyfulness" ...
 RealAttribute_2         : chr  "Calmness" "Joyfulness" "Generosity" "Charisma" ...
 RealAttribute_3         : chr  "Generosity" "Attractiveness" "Arrogance" "Calmness" ...
 RealAttribute_4         : chr  "Arrogance" "Generosity" "Charisma" "Friendliness" ...
 RealAttribute_5         : chr  "Ambition" "Calmness" "Ambition" "Laziness" ...
 RealAttribute_6         : chr  "Charisma" "Ambition" "Calmness" "Ambition" ...
 RealAttribute_7         : chr  "Intelligence" "Friendliness" "Intelligence" "Attractiveness" ...
 RealAttribute_8         : chr  "Joyfulness" "Arrogance" "Joyfulness" "Arrogance" ...
 RealAttribute_9         : chr  "Friendliness" "Laziness" "Attractiveness" "Generosity" ...
 RealAttribute_10        : chr  "Laziness" "Intelligence" "Laziness" "Intelligence" ...
 RetrievedAttribute_1    : chr  "Intelligence" "Intelligence" "Generosity" "Joyfulness" ...
 RetrievedAttribute_2    : chr  "Laziness" "Joyfulness" "Ambition" "Arrogance" ...
 RetrievedAttribute_3    : chr  "Attractiveness" "Laziness" "Arrogance" "Charisma" ...
 RetrievedAttribute_4    : chr  "Generosity" "Arrogance" "Friendliness" "Generosity" ...
 RetrievedAttribute_5    : chr  "Friendliness" "Calmness" "Arrogance" NA ...
 RetrievedAttribute_6    : chr  "Arrogance" "Arrogance" "Calmness" NA ...
 RetrievedAttribute_7    : chr  "Calmness" "Calmness" NA NA ...
 RetrievedAttribute_8    : chr  "Charisma" "Ambition" NA NA ...
 RetrievedAttribute_9    : chr  "Arrogance" "Friendliness" NA NA ...
 RetrievedAttribute_10   : chr  "Friendliness" "Generosity" NA NA ...
 RankedAttribute_1       : chr  "Intelligence" "Intelligence" "Arrogance" "Generosity" ...
 RankedAttribute_2       : chr  "Laziness" "Generosity" "Friendliness" "Intelligence" ...
 RankedAttribute_3       : chr  "Charisma" "Laziness" NA "Joyfulness" ...
 RankedAttribute_4       : chr  "Friendliness" "Calmness" NA "Charisma" ...
 RankedAttribute_5       : chr  "Ambition" "Friendliness" NA NA ...
 RankedAttribute_6       : chr  "Arrogance" "Attractiveness" NA NA ...
 RankedAttribute_7       : chr  NA "Arrogance" NA NA ...
 RankedAttribute_8       : chr  NA NA NA NA ...
 RankedAttribute_9       : chr  NA NA NA NA ...
 RankedAttribute_10      : chr  NA NA NA NA ...
 target                  : chr  "Andy" "Daniel" "Brandon" "Jack" ...
 rating_target           : chr  "8" "5" "5" "10" ...

I want to recode each of the columns that contain "Attribute*" ("RealAttribute_*", "*RetrievedAttribute_", "RankedAttribute_") so they would be assigned with the value from the column that starts with the string they contain and ends with "_Real".

So for example if "RetrievedAttribute_1" contain "Intelligence" in a certain row, they would get the value under "Intelligence_Real" in that row.

Another example, taking only "RealAttribute_" columns, if the first four rows of the data looks like this:

 Attractiveness_Real     : num  -2 -2 1 -1 3 1
 Calmness_Real           : num  -3 2 3 2 3 -2
 Generosity_Real         : num  1 1 3 3 2 2
 Arrogance_Real          : num  -2 1 -2 -3 3 2
 Ambition_Real           : num  -1 1 -2 2 3 1
 Charisma_Real           : num  -1 -3 -3 2 2 2
 Intelligence_Real       : num  3 2 3 2 2 2
 Joyfulness_Real         : num  3 -3 1 3 2 -2
 Friendliness_Real       : num  -3 -1 -3 -2 -1 -3
 Laziness_Real           : num  -1 -3 NA -2 NA -3
 RealAttribute_1         : chr  "Attractiveness" "Charisma" "Friendliness" "Joyfulness" 
 RealAttribute_2         : chr  "Calmness" "Joyfulness" "Generosity" "Charisma" 
 RealAttribute_3         : chr  "Generosity" "Attractiveness" "Arrogance" "Calmness" 
 RealAttribute_4         : chr  "Arrogance" "Generosity" "Charisma" "Friendliness" ...
 RealAttribute_5         : chr  "Ambition" "Calmness" "Ambition" "Laziness" 
 RealAttribute_6         : chr  "Charisma" "Ambition" "Calmness" "Ambition" 
 RealAttribute_7         : chr  "Intelligence" "Friendliness" "Intelligence" "Attractiveness" 
 RealAttribute_8         : chr  "Joyfulness" "Arrogance" "Joyfulness" "Arrogance" 
 RealAttribute_9         : chr  "Friendliness" "Laziness" "Attractiveness" "Generosity"
 RealAttribute_10        : chr  "Laziness" "Intelligence" "Laziness" "Intelligence" 

The end result should look like this:

 Attractiveness_Real     : num  -2 -2 1 -1 3 1
 Calmness_Real           : num  -3 2 3 2 3 -2
 Generosity_Real         : num  1 1 3 3 2 2
 Arrogance_Real          : num  -2 1 -2 -3 3 2
 Ambition_Real           : num  -1 1 -2 2 3 1
 Charisma_Real           : num  -1 -3 -3 2 2 2
 Intelligence_Real       : num  3 2 3 2 2 2
 Joyfulness_Real         : num  3 -3 1 3 2 -2
 Friendliness_Real       : num  -3 -1 -3 -2 -1 -3
 Laziness_Real           : num  -1 -3 NA -2 NA -3
 RealAttribute_1         : num  -2 -3 -3 3 
 RealAttribute_2         : num  -3 -3 3 2 
 RealAttribute_3         : num  1 -2 -2 2 
 RealAttribute_4         : num  -2 1 -3 -2 
 RealAttribute_5         : num  -1 2 -2 -2 
 RealAttribute_6         : num  -1 1 3 2 
 RealAttribute_7         : num 3 -1 3 -1 
 RealAttribute_8         : num  3 1 1 -3 
 RealAttribute_9         : num  -3 -3 1 3
 RealAttribute_10        : num  -1 2 NA 2

I tryed first:

dt <- dt %>% mutate_at(vars(contains("Attribute_")), funs(. = !!paste0(., "_Real")))

And got:

Error in `call2()`:

! `.fn` must be a string, a symbol, a call, or a function

Run `rlang::last_trace()` to see where the error occurred.

Also tried

dt <- dt %>% mutate_at(vars(contains("Attribute_")), function(){. = !!paste0(., "_Real")})
dt <- dt %>% mutate_at(vars(contains("Attribute_")), funs(. = value[paste0(., "_Real")]))
dt <- dt %>% mutate_at(vars(contains("Attribute_")), funs(. = get(paste0(., "_Real"))))

All returned errors

And even:

columns_to_recode <- grep("Attribute_", colnames(dt), value = TRUE)
for (col in columns_to_recode) {
  att<-dt[[col]]
  col_name <- paste0(att, "_Real")
  dt[[col]] <- dt[[col_name]]
}

And got

Error in .subset2(x, i, exact = exact) : 
  recursive indexing failed at level 2

Thanks in advance!

2

There are 2 best solutions below

0
On BEST ANSWER

The data is quite untidy at the moment- what I mean by that is, Attribute, Ordering, and Ranking should each be their own column, rather than being encoded in column names. Pulling them out into their own columns will make the data easier to use, and ultimately make any analysis you do down the road easier also.

You have two main data types in the dataset: the objective values ("X_Real", as you said), and then the ordering data ("RankedAttribute_X"). One way of tidying the data would be to convert "X_Real"-type column names into having "RankedAttribute_X"-style column names too, and then doing a pivot_longer with pattern matching. The other way (the one I went with) is to create two separate dataframes for the two different data types, and then join them.

# first, we create the dataframe with the real observations
real_df <- dt |> 
  select(session_id, matches("Real$")) |> # session_id and columns which end in Real
  pivot_longer(-session_id, names_to = c("Attribute", ".value"), names_sep = "_") # this uses regex. It's kinda hard to explain, but if you look at the output hopefully it will give you an idea of what is going on. Feel free to ask me any questions you have!

# next, the ranking dataframe
rank_df <- dt |> 
  select(session_id, matches("^Real|^Retreived|^Ranked")) |> # ^ means the start of a line in regex, so ^Real matches columns which start with Real
  pivot_longer(-session_id, names_to = c("type", ".value", "num"), names_pattern = "(Real|Retreived|Ranked)(.*)_([0-9]+)")

# finally, we join the real observations to the rankings
left_join(rank_df, real_df, by = join_by(session_id, Attribute))

Output:

# A tibble: 120 × 5
   session_id type  num   Attribute       Real
   <chr>      <chr> <chr> <chr>          <dbl>
 1 17472631   Real  1     Attractiveness    -2
 2 17472631   Real  2     Calmness          -3
 3 17472631   Real  3     Generosity         1
 4 17472631   Real  4     Arrogance         -2
 5 17472631   Real  5     Ambition          -1
 6 17472631   Real  6     Charisma          -1
 7 17472631   Real  7     Intelligence       3
 8 17472631   Real  8     Joyfulness         3
 9 17472631   Real  9     Friendliness      -3
10 17472631   Real  10    Laziness          -1
# ℹ 110 more rows

You could pivot the data wider again, but I'd say it's debatable if this is better or worse:

# add the line below onto the end, after the join
pivot_wider(id_cols = c(session_id, type), names_from = num, values_from = Real)
0
On

Please try the below code which I tested


# get the columns that end with Real
data1 <- dt %>% select(ends_with('Real'))

# get the columns that start with Real
data2 <- dt %>% select(starts_with('Real'))

# combine the column names with values
data11 <- data1 %>% mutate(across(everything(), ~ paste(cur_column(), '##', .x)))

# replace the values of those columns that start with Real with the values of those that end with Real where the text matches
data12 <- purrr::map2_dfc(data2, data11, ~ ifelse(str_detect(trimws(str_to_lower(.x)), trimws(tolower(str_extract(.y,'.*(?=\\_)'))) ), str_extract(.y,'(?<=\\##\\s).*'), .x))


# repeat the same with Retreived

data1_re <- dt %>% select(ends_with('Retreived'))
data2_re <- dt %>% select(starts_with('Retrieved'))

data11_re <- data1_re %>% mutate(across(everything(), ~ paste(cur_column(), '##', .x)))

data12_re <- purrr::map2_dfc(data2_re, data11_re, ~ ifelse(str_detect(trimws(str_to_lower(.x)), trimws(tolower(str_extract(.y,'.*(?=\\_)'))) ), str_extract(.y,'(?<=\\##\\s).*'), .x))

# combine all the columns to create a final dataframe
final <- cbind(dt[1], dt[, endsWith(names(dt),'Real')],  dt[, endsWith(names(dt),'Retreived')], data12, data12_re, dt[, startsWith(names(dt),'Ranked')], dt[, c(52,53)])

## OUTPUT

# A tibble: 6 × 10
  RealAttribute_1 RealAttribute_2 RealAttribute_3 RealAttribute_4 RealAttribute_5 RealAttribute_6 RealAttribute_7 RealAttribute_8
  <chr>           <chr>           <chr>           <chr>           <chr>           <chr>           <chr>           <chr>          
1 -2              -3              1               -2              -1              -1              3               3              
2 Charisma        Joyfulness      Attractiveness  Generosity      Calmness        Ambition        Friendliness    Arrogance      
3 Friendliness    Generosity      Arrogance       Charisma        -2              Calmness        3               1              
4 Joyfulness      Charisma        Calmness        Friendliness    Laziness        Ambition        Attractiveness  Arrogance      
5 Joyfulness      Laziness        Charisma        Friendliness    Arrogance       Generosity      Attractiveness  Ambition       
6 Friendliness    Arrogance       Charisma        Ambition        Intelligence    Generosity      Laziness        Attractiveness 
# ℹ 2 more variables: RealAttribute_9 <chr>, RealAttribute_10 <chr>