python script that would move zip folders to new directory based on their names

454 Views Asked by At

I am trying to write a script in python which could allow me to move zip files to a new folder based on their names, but I am struggling with it as I cannot figure out how to make python read the zip files name and move it to relevant folders. Any help would be appreciated.

zip file names are 12345788_CATPICC1_2022_01_10_08_21_31.zip 90234578_CATPICC1_2022_01_10_08_21_31.zip 96352536_CATPICC2_2022_01_10_08_21_31.zip 78541296_CATPICC2_2022_01_10_08_21_31.zip

Folders where above zip files need to go: Markky wool (CATPICC1) Markky wool (CATPICC2)

when moving zip file python needs to read CATPICC1 from 12345788_CATPICC1_2022_01_10_08_21_31.zip and move it to Markky wool (CATPICC1) and if zip file name is 78541296_CATPICC2_2022_01_10_08_21_31.zip then move it to Markky wool (CATPICC2)

i have thousands of files like these and i want to move each of them to a folder with matching name e.g., 12345788_CATPICC1_2022_01_10_08_21_31.zip to Markky wool (CATPICC1)

3

There are 3 best solutions below

7
TheDataScienceNinja On

Try this, accounting for your own OS and filepaths:

from pathlib import Path
import os
import shutil

path = Path.cwd()  # insert your path

files = []

for file in path.rglob('*.zip'):  # loop recursively over all subdirectories
    files.append(file.name)

print(files)

for f in files:
    x = f.rsplit('_')
    x = x[1]
    print(x)

    if x == "CATPICC1":
        shutil.move("./files/"+f, './CAT1/'+f)
    if x == "CATPICC2":
        shutil.move("./files/"+f, './CAT2/'+f)
4
picobit On

What have you tried already? Which parts are working and which parts aren't?

This problem can be broken down into a few parts:

  1. Collect all file names
  2. Match substrings in file names
  3. Move files

Here's the pseudocode for what you're trying to do:

# There are fancier ways to accomplish this, but I 
# think this is the easiest to understand.
names_list = get_file_names("in_this_folder")
for files in names_list:
    if file_name contains "string1":
        move(file_name,"to_this_folder")
    elif file_name contains "string2":
        move(file_name,"to_other_folder")
    else:
        do nothing?
        

To collect filenames in a directory, you might do something like this

import os
from pathlib import Path
import shutil # Notice! We're importing this for later in the answer.

# Define folder structure
basepath = Path().resolve()
zip_folder = basepath.joinpath("zip/to-be-processed")
destination_folder_1 = basepath.joinpath("zip/done/01")
destination_folder_2 = basepath.joinpath("zip/done/02")

# Create destination folders if they don't already exist
Path(destination_folder_1).mkdir(parents=True, exist_ok=True) 
Path(destination_folder_2).mkdir(parents=True, exist_ok=True) 


def get_file_names(path: Path, extension: str):
    # Returns file names in path that match "*" + "extension"
    names = os.listdir(path)
    file_names = []
    for i in range(len(names)):
        # Look for the extension, case-insensitive.
        if names[i].lower().endswith(extension.lower()):
            file_names.append(names[i])
    return file_names

if __name__ == "__main__":
    # Create a list of the file names that looks something like this:
    # ["file1.zip", "file2.zip", "etc..."]
    file_names = get_file_names(zip_folder,"zip")
    
    # Now move on to processing the list...

Now you have a list of all zip file names in the directory. Next, you'll find the substrings that you're interested in. You can use the .find() string method for this. If the string that you're looking for exists in the string that you're searching, then find() will return the position. If the string isn't found, then it returns -1.

Note that it's sometimes good idea to use .lower() or .upper() on both your candidate string and the substring that you're looking for (so you can find ".zip" and ".ZIP" files, for example).

>>> foo = "12345788_CATPICC1_2022_01_10_08_21_31.zip"
>>> bar = "12345788_CAICC1_2022_01_10_08_21_31.zip"                                                                     
>>> foo.lower().find("CATPICC1".lower())                                                                                  
9                                                                                                                       
>>> bar.lower().find("CATPICC1".lower())                                                                                  
-1

Example of use:

# Look for the substring "special" in each of the file names.
file_names = ["file_1_special.zip","file_2.zip"]
for name in file_names:
    if name.find("special") > -1:
        # Do something
        print("Found the file.")
    else:
        # Do something else.
        print("Where are you????")

Then use shutil.move() to move a file.

# File name as a string, just like we created earlier.
file_name = "moveme.zip"
# Move it.
shutil.move(zip_folder.joinpath(file_name),destination_folder_2)

Note that I'm making assumptions about your directory structure and where the files live in relation to your script. You will have to modify accordingly. Please look up pathlib.Path.resolve() to understand what I did.

7
cards On

Used os.listdir to list file and shutil.move to move files. Since a lot file will be moved you can comment the prints inside the loop to save a bit of time. The program moves all files/dirs ending the a certain extension to a given (and existing) target directory.

import os 
import shutil


def mv_by_extension(wdir, dst, ext='.zip'):
    # dst as abs-path 
    if not os.path.isabs(dst):
        dst = os.path.abspath(dst) 

    print(f'Moving "{ext}"-file to target directory "{dst}"')
    # filter by extension and move file
    for fzip in filter(lambda path: path.endswith(ext), os.listdir(wdir)):
        file_zip = os.path.abspath(os.path.join(wdir, fzip))
        print(f'Moving file "{file_zip}"...')
        shutil.move(src=file_zip, dst=dst)
        print('... done!')
     
     print('Finished.')


wdir = # directory containg the zip-files
destination_dir = # path of the destination directory

mv_by_extension(wdir, destination_dir, ext='.zip')

Optimized solution - asynchronously execution with ThreadPoolExecutor

import os 
import shutil
from concurrent.futures import ThreadPoolExecutor

ext = '.zip'
wdir = # abs-path of exts dir
dst = #  abs-path of destination dir

# get files/dir by extension
zips = (os.path.abspath(path) for path in os.listdir(wdir) if path.endswith(ext))

# asynchro-moving
def amove(path, dst=dst):
    shutil.move(src=path, dst=dst)


print(f'Moving "{ext}" to target directory "{dst}" asynchronously.')
with ThreadPoolExecutor() as exe:
    exe.map(amove, zips)

print('...done.')

EDIT: filter files by rule (see comments) Used regex to get more control on searching pattern.

import re

def filter_files(wdir, regex='', ext='zip'):
    pattern = re.compile(rf'{regex}\.(:?{ext})$')
    return (os.path.abspath(path) for path in os.listdir(wdir) if pattern.search(path))

All together

import os 
import shutil
import re
from concurrent.futures import ThreadPoolExecutor

def amove(path, dst=dst): # asynchro-moving
    shutil.move(src=path, dst=dst)

def filter_files(wdir, regex='', ext='zip'):
    pattern = re.compile(rf'{regex}\.(:?{ext})$')
    return (os.path.abspath(path) for path in os.listdir(wdir) if pattern.search(path))

# main
wdir = # abs-path of exts dir
dst1 = #  abs-path of destination dir for CATPICC1-like

# filter rules
regex1 = r'[0-9a-zA-Z]+_CATPICC1.+'
ext = 'zip' # without dot!

# get zip-files
zips1 = filter_files(wdir, regex1, ext)

# start asynchro-operation
print(f'Moving "{ext}" to target directory "{dst1}" asynchronously.')
with ThreadPoolExecutor() as exe:
    exe.map(amove, zips1)
print('...done.')

Do the same (or do all at once with an iteration using a for-loop) for the other case use the following regex regex2 = r'[0-9a-zA-Z]+_CATPICC2.+' and don't forget to update dst2.