I have 96 different Excel files in a folder that all contain one single column, which looks approximately like this, whereby the uppermost line is the name of the Excel file:

A1.xlsx
Distance
245
534
142
6342
634
4343
2323
1123
666

A2.xlsx
Distance
2453
2452
4434
3456
6754

However, it would like to have one single .xlsx file, where all the separate files are copied into a new column side by side containing the name of the source file as a header for the column. Furthermore, the current header should be removed. So, it should look like this then:

A1     A2     ...    Cn
245    2453   ...    ...
534    2452   ...
142    4434   ...
6342   3456   ...
634    6754   ...
4343          ...
2323          ...
1123
666

So far, I the following code:

fileList <-file.path(dir(path = ".", pattern = "^[a-zA-Z].+$", full.names = TRUE), "*.xlsx")
load_if_exists <-function(filename, ...) {
  tryCatch(
    suppressWarnings(read.table(filename, ...)),
    error = function(x) tibble(NA)
  )
}

fileList <- list.files(".", "*.xlsx", full.names = TRUE)

listData <- lapply(fileList, read.table)

names(listData) <- gsub(".xlsx","",basename(fileList))

bind_rows(listData, .id = "FileName") %>%
  group_by(FileName) %>%
  mutate(rowNum = row_number()) %>%
  dcast(rowNum~FileName, value.var = "V1") %>%
  select(-rowNum) %>%
  write.xlsx(file="Result.xlsx")

With this code, a new file is generated, but the data is not available. Furthermore, there was also no error code.

Could please anybody help me solving this problem?

Thank you very much!

2

There are 2 best solutions below

7
On BEST ANSWER

You can do this in with just a few steps - note, to combine them side-by-side you need to ensure they all have the same number of rows. So (1) import the data, (2) find the max number of rows in all the imported excel files, and (3) assign NA (or "" if you prefer) to the data frames that are shorter than that (4) combine them all (here, using do.call):

flpth <- "your_filepath_here"

fileList <- list.files(flpth, ".xlsx", full.names = TRUE)

listData <- lapply(fileList, readxl::read_excel)

maxLength <- max(unlist(lapply(listData, nrow)))

lengthList <- lapply(listData, \(x){ 
  if(nrow(x) < maxLength) x[(nrow(x)+1):maxLength,] <- NA
  x})

allData <- do.call(data.frame, lengthList)

writexl::write_xlsx(allData, path = paste0(flpth, "new_file.xlsx"))

Output:

    A1.xlsx  A2.xlsx
1  Distance Distance
2       245     2453
3       534     2452
4       142     4434
5      6342     3456
6       634     6754
7      4343     <NA>
8      2323     <NA>
9      1123     <NA>
10      666     <NA>

enter image description here

2
On

Assuming you are working in a project and the folder you are looking for files in is named "excelfiles".

library(tidyverse)
library(lubridate)
library(readxl)


# List data available for processing ----

files <- list.files("excelfiles/", full.names = TRUE,
                    recursive = TRUE, pattern = "*.xlsx", ignore.case = TRUE)

filesdb <- tibble(filename = files)

head(filesdb)

# extract information about each table from the filenames using the "word" function
filesdb <- filesdb |> 
  mutate(ID = word(word(basename(filename), 1, 1, sep = "/"), 1, 1, sep = "\\."))

# make a vector (tempfiles) to use in a function below containing the filenames & ID
tempfiles <- filesdb$filename
IDS <- filesdb$ID
tempfiles <- set_names(tempfiles, IDS)

head(tempfiles)

# write a little function to extract data from each table
import_files <- function(file) {
  read_xlsx(file, col_names = TRUE, skip = 1)
} 


# Extract the data ----

# This next bit will take a while, depending on the number of files.
# If you'd like to try a few first to make certain it's working, test it on a smaller subset; 
# e.g.,  tempfiles[1:5]
tempdf <- map_df(tempfiles, import_files, .id = "IDS") %>% 
  group_by(IDS) %>% 
  mutate(row = row_number()) %>% 
  pivot_wider(names_from = IDS, values_from = Distance, values_fill = NA) %>% 
  select(-row)

Output

tempdf
# A tibble: 9 × 2
     A1    A2
  <dbl> <dbl>
1   245  2453
2   534  2452
3   142  4434
4  6342  3456
5   634  6754
6  4343    NA
7  2323    NA
8  1123    NA
9   666    NA