Create table, by selecting from data frames when a match occurs

140 Views Asked by At

I have three data frames dbase, data and dcode.

I want to copy the rows from dbase to data when dbase$idbase = data$iddata and add based on dcode the names of dcode when dbase$nr matches the number in dcode.

data <- 
  structure(list(iddata = 11103:11121), .Names = "iddata", class = "data.frame", row.names = c(NA, -19L))

dbase <-
 structure(list(idbase = 11102:11116, nr = c(115L, 116L, 117L, 
 118L, 37L, 37L, 37L, 41L, 41L, 51L, 51L, 59L, 74L, 77L, 85L)), .Names = c("idbase", 
 "nr"), class = "data.frame", row.names = c(NA, -15L))


dcode <- 
 structure(list(X32 = c(35L, 36L, 37L, 49L, 50L, 51L, 90L, 99L, 
 100L, 101L, 103L), X23 = c(26L, 27L, 28L, 29L, 30L, 31L, 38L, 
 39L, 40L, 41L, 42L), X9 = c(10L, 11L, 12L, 13L, 15L, 16L, 17L, 
 25L, 33L, 34L, 120L)), .Names = c("X32", "X23", "X9"), class = "data.frame", row.names = c(NA, -11L))

My aim is this table

iddata  idbase  nr  foc
11102   11102   115 0
11103   11103   116 0
11104   11104   117 0
11105   11105   118 0
11106   11106   37  X32
11107   11107   37  X32
11108   11108   37  X32
11109   11109   41  X23
11110   11110   41  X23
11111   11111   51  X32
11112   11112   51  X32
11113   11113   59  0
11114   11114   74  0
11115   11115   77  0
11116   11116   85  0
11117   0       0
11118   0       0
11119   0       0
11120   0       0
11121   0       0
2

There are 2 best solutions below

6
On BEST ANSWER

Could try some merge manipulations such as

temp <- merge(data, dbase, by.x = "iddata", by.y = "idbase", all = TRUE)
temp2 <- data.frame("nr" = unlist(dcode))
temp2$foc <- rep(names(dcode), each = nrow(dcode))
temp <- merge(temp, temp2, by = "nr", all.x = TRUE)
temp[order(temp$iddata), ]
#     nr iddata  foc
# 12 115  11102 <NA>
# 13 116  11103 <NA>
# 14 117  11104 <NA>
# 15 118  11105 <NA>
# 1   37  11106  X32
# 2   37  11107  X32
# 3   37  11108  X32
# 5   41  11109  X23
# 4   41  11110  X23
# 6   51  11111  X32
# 7   51  11112  X32
# 8   59  11113 <NA>
# 9   74  11114 <NA>
# 10  77  11115 <NA>
# 11  85  11116 <NA>
# 19  NA  11117 <NA>
# 16  NA  11118 <NA>
# 17  NA  11119 <NA>
# 18  NA  11120 <NA>
# 20  NA  11121 <NA>
4
On

Here's how I'd tackle it with dplyr and tidyr. First tidy dcode so that variables are in the columns, and use the same variable name in data and dbase:

library("dplyr")
library("tidyr")

data <- data %>% rename(id = iddata)
dbase <- dbase %>% rename(id = idbase)
dcode_tidy <- dcode %>% 
  gather(foc, nr)

Then you only need two simple joins to get the result you want:

data %>%
  full_join(dbase) %>%
  left_join(dcode_tidy)
#>     nr    id  foc
#> 1  116 11103 <NA>
#> 2  117 11104 <NA>
#> 3  118 11105 <NA>
#> 4   37 11106  X32
#> 5   37 11107  X32
#> 6   37 11108  X32
#> 7   41 11109  X23
#> 8   41 11110  X23
#> 9   51 11111  X32
#> 10  51 11112  X32
#> ...

(You'll need the dev version of dplyr to get the full join)

As with most data manipulation challenges, the answer is easy once you have the data in the right form.