I'm using Python's AsyncSSH library to download files from an SFTP server and upload them to Google Cloud Storage using Cloud Functions. The code works fine when the number of files and directories is small. However, when I try to download a large amount of data (1GB or more), the code fails to download all the files and directories.
Here is the code I am using:
import os
import asyncio
import asyncssh
import stat
from flask import jsonify, make_response
import functions_framework
from google.cloud import storage
async def download_files(server_url, username, private_key_path, remote_path, bucket_name):
storage_client = storage.Client()
bucket = storage_client.bucket(bucket_name)
downloaded_files = []
async with asyncssh.connect(
server_url, username=username, client_keys=[private_key_path], known_hosts=None
) as conn:
async with conn.start_sftp_client() as sftp:
await recursive_download(sftp, remote_path, bucket, downloaded_files)
return downloaded_files
async def recursive_download(sftp, remote_path, bucket, downloaded_files):
for filename in await sftp.listdir(remote_path):
remote_file = remote_path + "/" + filename
attrs = await sftp.stat(remote_file)
if stat.S_ISREG(attrs.permissions): # Check if it's a regular file
# Open the remote file
async with sftp.open(remote_file) as remote_file_obj:
# Read the file data into memory
file_data = await remote_file_obj.read()
# Create a blob in GCS
blob = bucket.blob(remote_file)
# Upload the file data to GCS
blob.upload_from_string(file_data)
print(f"Downloaded {remote_file}")
downloaded_files.append(remote_file)
elif stat.S_ISDIR(attrs.permissions): # Check if it's a directory
await recursive_download(sftp, remote_file, bucket, downloaded_files)
The download_files
function is called in a Flask route:
@functions_framework.http
def main(request):
try:
server_url = "my_server_url"
username = "my_username"
private_key_path = "my_private_key_path"
remote_path = "my_remote_path"
bucket_name = "my_bucket_name"
downloaded_files = asyncio.run(download_files(server_url, username, private_key_path, remote_path, bucket_name))
return make_response(jsonify({"message": f"Files downloaded successfully. Total files: {len(downloaded_files)}. Files: {downloaded_files}"}), 200)
except Exception as e:
return make_response(jsonify({"error": str(e)}), 500)
I'm not sure why the code fails when the amount of data is large. Any insights on improving the code would be greatly appreciated.
I am using asyncssh
and not paramiko
and pysft
is because I am facing trouble authenticating to the server. For some reason asyncssh
works.