How can I merge multiple Excel files with multiple sheets?

45 Views Asked by At

Can anyone please help me to write code if sheet names are matching in several files then data will merge and create one data frame. I want to merge multiple file in one dictionary.

I am getting this error:

> AttributeError  
Traceback (most recent call last)  
Cell In[78], line 8  
      6 path = os.path.join(root, fname)  
      7 print(path)  
----> 8 for sheet_name in path.sheet_names():   
      9     print(sheet_name)  
AttributeError: 'str' object has no attribute 'sheet_names'

Code

import os
from pathlib import Path
# Load the Excel file
#xls_file = pd.ExcelFile(excel_filepath)
# Use additional arguments to read the sheets
df_dict_flex = {}
for root, dirs,files in os.walk('path'):
    for fname in files:
        xls_file = pd.ExcelFile(os.path.join(root, fname))
        print(xls_file)
        # Checking sheet names in every excel file
        for sheet_name in xls_file.sheet_names:
                if sheet_name != 'Portfolios' and sheet_name != 'SP Search Term Report':
                    df_dict_flex[sheet_name] = file.parse(sheet_name,)
df_dict_flex
        
1

There are 1 best solutions below

2
Manoj Bhosle On
import os
import pandas as pd

# Define the path where the Excel files are located
path = 'your/excel/files/path'

# Initialize a dictionary to store DataFrames
df_dict_flex = {}

# Walk through the directory containing the Excel files
for root, dirs, files in os.walk(path):
    for fname in files:
        # Construct the full file path
        file_path = os.path.join(root, fname)
        # Ensure that you're only processing Excel files
        if file_path.endswith('.xlsx') or file_path.endswith('.xls'):
            # Load the Excel file
            xls_file = pd.ExcelFile(file_path)
            print(xls_file)  # This should print the ExcelFile object, not a string

            # Extract sheet names in every Excel file
            for sheet_name in xls_file.sheet_names:
                print(sheet_name)  # This should print each sheet name

                # Check if the sheet name matches the given condition
                if sheet_name == 'Portfolios' or sheet_name == 'SP Search Term Req':
                    # Parse the specific sheet into a DataFrame
                    df = xls_file.parse(sheet_name)
                    # Store the DataFrame in the dictionary
                    df_dict_flex[sheet_name] = df

# The df_dict_flex dictionary now contains DataFrames for each sheet that matches the conditions
#Make sure to replace 'your/excel/files/path' with the actual path to your Excel files. may this helps to you