Concurrent Futures vs Asyncio Difference

68 Views Asked by At

I trying to optimize a Python code by doing it asynchronous. For doing it I tried asyncio and concurrent.futures libraries.

Here are my codes:

async def get_rds_instances(session, region, engine_types):
    report_rds = []
    mandatory_tags = {'Use-Case'}

    client = session.client('rds', region_name=region)
        try:
            await asyncio.sleep(1)
            response = client.describe_db_instances()
            rds_report.append(response)
        except (ClientError, Exception) as e:
            print(e)
    return reportd_rds

async def main():
... some arguments definition
    session = get_rds_session(profile_name)
    regions = session.get_available_regions('rds')
    try:
        reports = await asyncio.gather(*[get_rds_instances(session=session, region=region, engine_types=engine_types) for region in regions])
    except Exception as e:
        print(f"An error occurred: {str(e)}")
    
    ... process report

if __name__ == "__main__":
    asyncio.run(main())

Without asyncio this code completed in ~22 seconds. With asyncio it completed in ~20 seconds.

Yet, here the things getting interesting, I used concurrent.futures:

def get_rds_instances(session, region, engine_types):
    report_rds = []
    mandatory_tags = {'Use-Case'}

    client = session.client('rds', region_name=region)
        try:
            response = client.describe_db_instances()
            rds_report.append(response)
        except (ClientError, Exception) as e:
            print(e)
    return reportd_rds


def main():
... some arguments definition
    session = get_rds_session(profile_name)
    regions = session.get_available_regions('rds')
    try:
        args = ((session, region, engine_types) for region in regions)
        with concurrent.futures.ThreadPoolExecutor() as executor:
        reports = executor.map(lambda p: get_rds_instances(*p), args)
    except Exception as e:
        print(f"An error occurred: {str(e)}")

    ... process report

if __name__ == "__main__":
    main()

And this code completed in ~3 seconds.

So, my question, is this difference normal, am I missing something for asyncio or doing something wrong for asyncio?

Edit:

describe_db_instances

Thanks!

1

There are 1 best solutions below

1
Umut TEKİN On

After talking to my colleagues, describe_db_instances is the blockable functions, but creating multiple threads for each region did help like @user4815162342 provided here. So here my code changes:

def get_session(profile):
    session = boto3.Session(profile_name=profile)
    return session


def get_clients(session):
    regions = session.get_available_regions('rds')
    clients = [session.client('rds', region_name=region) for region in regions]
    return clients


async def get_instances(client, engine_types):
    output = []
    loop = asyncio.get_event_loop()

    try:
        response = await loop.run_in_executor(None, client.describe_db_instances)
        output.append(response['DBInstances'])
    except (ClientError, Exception) as e:
        print(e)
    return output


async def main():
... some arguments definition
    session = get_session(profile_name)
    clients = get_clients(session)

    reports = await asyncio.gather(*[get_instances(client, engine) for client in clients])

... process report

if __name__ == "__main__":
    asyncio.run(main())