Trying to divide the Shapefile into multiple parts based on the size. If the filesize of the Shapefile's zip file is more than 10 MB will divide it into chunks.
Earlier we used Geopandas for this task and it was working fine but was consuming more RAM to do this task. So giving a try with the PyShp library.
The main problem is it is generating the divided files but no records were inserting into the Shapefile and DBF file is also missing in that.
Am I missing something in the following code, please let me know
import os
import math
import csv
import zipfile
import shutil
from shutil import copyfile
import shapefile
path = '<shapefile_data_path>'
storage_path = '<path_to_extract_zip_file>'
current_dir = '<path_for_divided_shapefiles>'
ALLOWED_SIZE = 10
procs = []
# Here filepath means Shapefile's zip file path
def function_name(filepath):
file_name = file_path.split('/')[-1]
name = file_name.split('.zip')[0]
storage_file = os.path.join(storage_path, file_name)
storage_file = storage_file.replace('\\', '/')
src = path +'/'+file_path
shutil.copy(src,storage_file)
statinfo = os.stat(storage_file)
if (statinfo.st_size >> 20) > ALLOWED_SIZE:
storage_path_1 = storage_path + '/' + name
zip_ref = zipfile.ZipFile(storage_file)
zip_ref.extractall(storage_path_1)
zip_ref.close()
prj_file_path = ''
for _file1 in os.listdir(storage_path_1):
print _file1
if _file1.endswith('.prj'):
prj_file_path = os.path.join(storage_path_1, _file1)
for _file1 in os.listdir(storage_path_1):
if _file1.endswith('.shp'):
file_size = statinfo.st_size >> 20
parts = int(math.ceil(float(file_size) / float(ALLOWED_SIZE)))
# data = gpd.read_file(storage_path_1 + '/' + _file1)
data = shapefile.Reader(storage_path_1 + '/' + _file1)
records = data.records()
num_lines = len(data)
increment = int(num_lines / parts)
start_index = 0
part = 1
file_name_new = file_name.split('.zip')[0]
while start_index < num_lines:
part_name = '{1}_part{0}'.format(part, file_name_new)
outpath = os.path.join(current_dir, part_name)
os.mkdir(outpath)
outfile = os.path.join(outpath, part_name)
end_index = start_index + increment
if end_index > num_lines:
end_index = num_lines
chunk = records[start_index:end_index]
with open(outfile,'w') as f:
f.write(str(chunk))
copyfile(prj_file_path, os.path.join(outpath, file_name_new+'.prj'))
shutil.make_archive(outpath, 'zip', outpath)
shutil.rmtree(outpath)
start_index = end_index
part += 1
While writing to the new shapefile, facing difficulties in the generated zip file. DBF file is missing and SHP file doesn't have proper records
Any help is much appreciated.