Divide the Shapefile into multiple parts using PyShp

857 Views Asked by At

Trying to divide the Shapefile into multiple parts based on the size. If the filesize of the Shapefile's zip file is more than 10 MB will divide it into chunks.

Earlier we used Geopandas for this task and it was working fine but was consuming more RAM to do this task. So giving a try with the PyShp library.

The main problem is it is generating the divided files but no records were inserting into the Shapefile and DBF file is also missing in that.

Am I missing something in the following code, please let me know

import os
import math
import csv
import zipfile
import shutil
from shutil import copyfile
import shapefile

path = '<shapefile_data_path>'
storage_path = '<path_to_extract_zip_file>'
current_dir = '<path_for_divided_shapefiles>'
ALLOWED_SIZE = 10
procs = []

# Here filepath means Shapefile's zip file path
def function_name(filepath):
    file_name = file_path.split('/')[-1]
    name = file_name.split('.zip')[0]
    storage_file = os.path.join(storage_path, file_name)
    storage_file = storage_file.replace('\\', '/')
    src = path +'/'+file_path
    shutil.copy(src,storage_file)
    statinfo = os.stat(storage_file)
    if (statinfo.st_size >> 20) > ALLOWED_SIZE:
        storage_path_1 = storage_path + '/' + name
        zip_ref = zipfile.ZipFile(storage_file)
        zip_ref.extractall(storage_path_1)
        zip_ref.close()
        prj_file_path = ''
        for _file1 in os.listdir(storage_path_1):
            print _file1
            if _file1.endswith('.prj'):
              prj_file_path = os.path.join(storage_path_1, _file1)

        for _file1 in os.listdir(storage_path_1):
            if _file1.endswith('.shp'):
                file_size = statinfo.st_size >> 20
                parts = int(math.ceil(float(file_size) / float(ALLOWED_SIZE)))
                # data = gpd.read_file(storage_path_1 + '/' + _file1)
                data = shapefile.Reader(storage_path_1 + '/' + _file1)
                records = data.records()
                num_lines = len(data)
                increment = int(num_lines / parts)
                start_index = 0
                part = 1
                file_name_new = file_name.split('.zip')[0]

            while start_index < num_lines:
                part_name = '{1}_part{0}'.format(part, file_name_new)
                outpath = os.path.join(current_dir, part_name)
                os.mkdir(outpath)
                outfile = os.path.join(outpath, part_name)
                end_index = start_index + increment
                if end_index > num_lines:
                  end_index = num_lines

                chunk = records[start_index:end_index]
                with open(outfile,'w') as f:
                    f.write(str(chunk))

                copyfile(prj_file_path, os.path.join(outpath, file_name_new+'.prj'))
                shutil.make_archive(outpath, 'zip', outpath)
                shutil.rmtree(outpath)
                start_index = end_index
                part += 1

While writing to the new shapefile, facing difficulties in the generated zip file. DBF file is missing and SHP file doesn't have proper records

Any help is much appreciated.

0

There are 0 best solutions below