R merge multiple files into 1 dataframe and keep all values

2.9k Views Asked by At

I have 6 multiple files, each having only 1 column of names. I want to read all these files and combine them in 1 dataframe so that it looks like this:

file1  file2  file3  file4  file5  file6
adam   adam   adam   adam   adam   adam
Roy    NA     Roy    Roy    NA     NA
NA     Sam    Sam    NA     NA     NA

The colnames of resulting dataframe should represent the actual file names. Let's say that the file I read were named file1.txt, file2.txt and so on..

Any help will be much appreciated.

What I've been trying so far:

multmerge = function(mypath){
+ filenames=list.files(path=mypath, full.names=TRUE, pattern = "\\.txt$")
+ datalist = lapply(filenames, function(x){read.csv(file=x,header=F)})
+ Reduce(function(x,y) {merge(x,y, all.x=T)}, datalist)}
> mymergeddata = multmerge("/Path/To/The/Folder/Having/All/Files")
> dim(mymergeddata)
[1] 11508     1

As seen here it is combining all values (names) in one column..

4

There are 4 best solutions below

2
On BEST ANSWER

Here's one way to do that with rbindlist and dcast from data.table

library(data.table)
file_list <- list.files("c:/temp/files/",full.names = TRUE)
import_files <- lapply(file_list,read.csv,stringsAsFactors =FALSE)
rbinded_files <- na.omit(rbindlist(import_files,idcol="file"))
dcast(rbinded_files,file1 ~file,fun=max, na.rm=TRUE)

  file1    1    2    3    4    5    6
1  adam adam adam adam adam adam adam
2   Roy  Roy <NA>  Roy  Roy <NA> <NA>
3   Sam <NA>  Sam  Sam <NA> <NA> <NA>

You can remove the first column if you want.

2
On

Another solution with purrr::map

files <- list.files("/path/",full.names = TRUE)

combinedDF <- files %>% map(read.csv, stringsAsFactors=FALSE, col.names=files) %>% 
  reduce(cbind)
0
On

This is not probably the most efficient, but the function should do the trick and I'd be happy to hear about ways to improve the function as I'm a stack newbie:

library(data.table); library(tidyverse)

multmerge <- function(dir) {
    # Load files and bind columns
    full_dir_filenames <- list.files(path = dir, full.names = TRUE, pattern = "\\.txt$")
    datalist <- lapply(full_dir_filenames, read_csv, col_names = FALSE) %>% 
        lapply(t) %>% 
        lapply(as.tibble)
    df <- bind_cols(datalist)

    # Append the column names
    file_names <- list.files(path = dir, full.names = FALSE, pattern = "\\.txt$")
    col_names <- tstrsplit(file_names, split = "[.]")[[1]]
    colnames(df) <- col_names

    df
}

multmerge()
1
On

Here is another version of your function that gives the output you asked for...

multmerge <- function(mypath){
  filenames <- list.files(path=mypath, full.names=TRUE, pattern = "\\.txt$")
  datalist <- lapply(filenames, function(x){
            df <- read.csv(file=x,header=F,stringsAsFactors = FALSE)
            df[,2] <- gsub("\\.txt","",basename(x))
            return(df)})
  namesdf <- do.call(rbind,datalist)
  output <- as.data.frame.matrix(table(namesdf$name,namesdf$file))
  for(j in 1:nrow(output)){
    output[j,] <- ifelse(as.numeric(output[j,])==0,NA,row.names(output)[j])
  }
  return(output)
}