R programming -

139 Views Asked by At

I'm looking for a way to transform this data

df <- tribble(
  ~lp, ~`2017`, ~`2018`, ~`2019`, ~`2020`, ~Bank,
  1, 3.6, 11.3, 9.7, 12.5, "SANPL",
  2, 5.5, 3.6, 6.6, 4.6, "MBANK",
  3, 6.5, 14.4, 11.6, 13.7, "INGBSK",
  4, 7.1, 11.1, 15.7, 12.7, "PKOBP",
  5, 7.7, 9.7, 13, 12.1, "PEKAO",
  6, 14, 7.2, 6.4, 5, "MILLENIUM",
  7, 7.8, 16, 3.8, 13.1, "ALIOR",
  8, 3.8, 9.8, 7.2, 8.7, "HANDLOWY"
)

into this

end result

speciffically by apply functions. Is there a way just to transform this data and not use any method to amend it?

3

There are 3 best solutions below

2
On

You could do:


df  <- read.table(text = "lp 2017 2018 2019 2020 Bank
1 1 3.6 11.3 9.7 12.5 SANPL
2 2 5.5 3.6 6.6 4.6 MBANK
3 3 6.5 14.4 11.6 13.7 INGBSK
4 4 7.1 11.1 15.7 12.7 PKOBP
5 5 7.7 9.7 13 12.1 PEKAO
6 6 14 7.2 6.4 5 MILLENIUM
7 7 7.8 16 3.8 13.1 ALIOR
8 8 3.8 9.8 7.2 8.7 HANDLOWY", h = T)

lapply(
    split(df, df$Bank),
    function(x) unname(as.vector(x[names(x)!="Bank"]))
)
# $ALIOR

# 7 7 7.8 16 3.8 13.1

# $PKOBP

# 4 4 7.1 11.1 15.7 12.7

# $SANPL

# 1 1 3.6 11.3 9.7 12.5


#  <truncated>
0
On

1) Using the input shown reproducibly in the Note at the end find the column indexes of the columns whose names contain a digit (years), convert those columns to a matrix and use split. Finally reorder the components according to the original order of Bank via subscripting.

years <- grep("\\d", names(DF))
split(as.matrix(df[years]), df$Bank)[df$Bank]

2) A different approach is to select out the year columns as above, transpose that and convert it to a data frame. At this point each row is a column so just convert the data frame to a list and add the names using setNames.

setNames(as.list(as.data.frame(t(df[years]))), df$Bank)

3) or using tidyverse

library(dplyr)
library(tibble)

DF %>%
  as_tibble %>%
  column_to_rownames("Bank") %>%
  select(-lp) %>%
  t %>%
  as.data.frame %>%
  as.list

Note

Lines <- "     lp `2017` `2018` `2019` `2020` Bank        
1     1    3.6   11.3    9.7   12.5 SANPL      
2     2    5.5    3.6    6.6    4.6 MBANK      
3     3    6.5   14.4   11.6   13.7 INGBSK    
4     4    7.1   11.1   15.7   12.7 PKOBP     
5     5    7.7    9.7   13     12.1 PEKAO     
6     6   14      7.2    6.4    5   MILLENIUM  
7     7    7.8   16      3.8   13.1 ALIOR     
8     8    3.8    9.8    7.2    8.7 HANDLOWY "
DF <- read.table(text = Lines, check.names = FALSE)
0
On
bank_data_list <- split(df[, -1], df$Bank)

# Display each bank's data
for (bank_name in names(bank_data_list)) {
  cat("$", bank_name, "\n")
  print(bank_data_list[[bank_name]])
  cat("\n")
}

will display output as below

$ ALIOR 
# A tibble: 1 × 5
  `2017` `2018` `2019` `2020` Bank 
   <dbl>  <dbl>  <dbl>  <dbl> <chr>
1    7.8     16    3.8   13.1 ALIOR

$ HANDLOWY 
# A tibble: 1 × 5
  `2017` `2018` `2019` `2020` Bank    
   <dbl>  <dbl>  <dbl>  <dbl> <chr>   
1    3.8    9.8    7.2    8.7 HANDLOWY