dbf to csv using azure data factory

100 Views Asked by At

Hi Can any one help me to convert dbf files(placed in storage account) to csv using azure batch accounts using azure data factory custom activity. Kindly provide the python code

I created a batch account and tried executing python file which is placed in storage account using azure data factory custom activity using below code. It is showing executed succesfully but csv file not found under storage account.

import csv
from dbfread import DBF, FieldParser
from azure.storage.blob import BlobServiceClient

# Replace these with your actual Azure Storage account information
account_name = "my_account_name"
account_key = "my_account_key"
container_name = "dbf"
blob_name = "test.dbf"

# Connection string for Azure Blob Storage
connection_string = f"DefaultEndpointsProtocol=https;AccountName={account_name};AccountKey={account_key};EndpointSuffix=core.windows.net"
blob_service_client = BlobServiceClient.from_connection_string(connection_string)

# Download the DBF file from Azure Blob Storage
blob_client = blob_service_client.get_blob_client(container=container_name, blob=blob_name)
downloaded_file = "downloaded_file.dbf"

try:
    with open(downloaded_file, "wb") as download_file:
        download_file.write(blob_client.download_blob().readall())
except Exception as e:
    print(f"Error downloading DBF file: {e}")
    exit()

# Convert DBF to CSV
class CustomFieldParser(FieldParser):
    def _parse_memo_index(self, data):
        try:
            return int(data)
        except ValueError:
            return data.decode('utf-8', errors='replace')

try:
    dbf_file = DBF(downloaded_file, parserclass=CustomFieldParser)
except Exception as e:
    print(f"Error reading DBF file: {e}")
    exit()

csv_file = "converted_file.csv"

try:
    with open(csv_file, "w", newline="", encoding="utf-8") as csvfile:
        writer = csv.writer(csvfile)

        # Write header
        writer.writerow(dbf_file.field_names)

        # Write rows
        for record in dbf_file:
            writer.writerow(list(record.values()))

    print(f"Conversion completed. CSV file saved as {csv_file}")

except Exception as e:
    print(f"Error writing CSV file: {e}")
1

There are 1 best solutions below

9
Bhavani On

The code you provided converts a DBF file into a CSV file in the local path. You did not mention the path where the CSV file needs to be stored, so the CSV file is being stored in the path where the DBF file was downloaded. This may be the reason the pipeline is successful, but the CSV file is not in the storage account. Mention the blob storage account path for the CSV file using the code below:

# Convert to CSV
csv_data = []
csv_data.append(dbf_file.field_names)  # Add header

for record in dbf_file:
    csv_data.append(list(record.values()))

# Convert CSV data to string
csv_string = '\n'.join([','.join(map(str, row)) for row in csv_data])

# Upload CSV data to blob
try:
    blob_client = blob_service_client.get_blob_client(container=container_name, blob=cblob_name)
    blob_client.upload_blob(csv_string, overwrite=True)

    print(f"Conversion completed. CSV file saved as {cblob_name} in container {container_name}")

This will successfully convert the DBF file into a CSV file and store it in the storage account as shown below:

enter image description here

Here is the complete code for your reference:

import csv
import sys
from dbfread import DBF, FieldParser
from azure.storage.blob import BlobServiceClient

# Replace these with your actual Azure Storage account information
account_name = "bhanustoree"
account_key = "HraF6Fn5pn5DaMN1opUzX7oq2t5b0SSHcaaR2+SS5aabGsWCC5DsBTE7tfIvHGRau4ptpqB9oQTi+ASty3JSYA==;EndpointSuffix=core.windows.net"
container_name = "files"
blob_name = 'marks.dbf'
cblob_name = "converted_file.csv"  # Name of the blob where you want to save the CSV file
# Connection string for Azure Blob Storage
connection_string = f"DefaultEndpointsProtocol=https;AccountName={account_name};AccountKey={account_key};EndpointSuffix=core.windows.net"
blob_service_client = BlobServiceClient.from_connection_string(connection_string)
# Download the DBF file from Azure Blob Storage
blob_client = blob_service_client.get_blob_client(container=container_name, blob=blob_name)
downloaded_file = "downloaded1_file.dbf"

try:
    with open(downloaded_file, "wb") as download_file:
        download_file.write(blob_client.download_blob().readall())
except Exception as e:
    print(f"Error downloading DBF file: {e}")
    sys.exit()

# Convert DBF to CSV
class CustomFieldParser(FieldParser):
    def _parse_memo_index(self, data):
        try:
            return int(data)
        except ValueError:
            return data.decode('utf-8', errors='replace')

try:
    dbf_file = DBF(downloaded_file, parserclass=CustomFieldParser)
except Exception as e:
    print(f"Error reading DBF file: {e}")
    sys.exit()

# Convert to CSV
csv_data = []
csv_data.append(dbf_file.field_names)  # Add header

for record in dbf_file:
    csv_data.append(list(record.values()))

# Convert CSV data to string
csv_string = '\n'.join([','.join(map(str, row)) for row in csv_data])

# Upload CSV data to blob
try:
    blob_client = blob_service_client.get_blob_client(container=container_name, blob=cblob_name)
    blob_client.upload_blob(csv_string, overwrite=True)

    print(f"Conversion completed. CSV file saved as {cblob_name} in container {container_name}")

except Exception as e:
    print(f"Error uploading CSV file to blob storage: {e}")