R Reshape Wide-to-Long without specifying stubs

58 Views Asked by At
HAVE = fread("
STUDENT CLASS GROUP S1TIME S2TIME S3TIME H2TAB H4TAB H5TAB
1 0 2 NA NA 6 NA 7 5
2 0 7 4 NA 4 5 NA 2
3 1 5 NA 4 5 6 NA NA
")

WANT = data.frame(
  STUDENT = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3),
  INPUT = c(1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5),
  CLASS = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1),
  GROUP = c(2, 2, 2, 2, 2, 7, 7, 7, 7, 7, 5, 5, 5, 5, 5),
  S.TIME = c(NA, NA, 6, NA, NA, 4, NA, 4, NA, NA, NA, 4, 5, NA, NA),
  H.TAB = c(NA, NA, NA, 7, 5, NA, 5, NA, NA, 2, NA, 6, NA, NA, NA)
)

Is it possible to transform HAVE into WANT without specifying "TIME, TAB" variables? I wish to extract time-varying stubs and reshape full data

2

There are 2 best solutions below

2
Dave2e On

Here is a solution to pivot longer, clean up the columns and then pivot_wider into the desired form.

library(tidyr)
library(magrittr)
    
temp <- HAVE %>% pivot_longer( cols=matches(".[[:digit:]]"), names_to = c("Name") , values_to = "value")

temp$INPUT <- gsub("\\D", "", temp$Name )
temp$Name <- gsub("\\d", ".", temp$Name )

answer <- temp %>% pivot_wider(names_from = "Name", values_from = "value")

I am sure there is a way to perform this in one line, but this was easier to debug.

   STUDENT CLASS GROUP INPUT S.TIME H.TAB
     <int> <int> <int> <chr>  <int> <int>
 1       1     0     2 1         NA    NA
 2       1     0     2 2         NA    NA
 3       1     0     2 3          6    NA
 4       1     0     2 4         NA     7
 5       1     0     2 5         NA     5
 6       2     0     7 1          4    NA
 7       2     0     7 2         NA     5
 8       2     0     7 3          4    NA
 9       2     0     7 4         NA    NA
10       2     0     7 5         NA     2
11       3     1     5 1         NA    NA
12       3     1     5 2          4     6
13       3     1     5 3          5    NA
14       3     1     5 4         NA    NA
15       3     1     5 5         NA    NA
0
B. Christian Kamgang On

Using data.table

library(data.table)

melt(setDT(HAVE), measure=measure(INPUT, value.name, pattern=".(\\d+)(TAB|TIME)"))

#     STUDENT CLASS GROUP  INPUT  TIME   TAB
#       <int> <int> <int> <char> <int> <int>
#  1:       1     0     2      1    NA    NA
#  2:       2     0     7      1     4    NA
#  3:       3     1     5      1    NA    NA
#  4:       1     0     2      2    NA    NA
#  5:       2     0     7      2    NA     5
#  6:       3     1     5      2     4     6
#  7:       1     0     2      3     6    NA
#  8:       2     0     7      3     4    NA
#  9:       3     1     5      3     5    NA
# 10:       1     0     2      4    NA     7
# 11:       2     0     7      4    NA    NA
# 12:       3     1     5      4    NA    NA
# 13:       1     0     2      5    NA     5
# 14:       2     0     7      5    NA     2
# 15:       3     1     5      5    NA    NA

You can rename the last two columns using setnames(c("TIME", "TAB"), c("S.TIME", "H.TAB"))