I am using motor but pymongo was my initial choice, switched to motor because it is an async version of mongodb in python.
My aim here is to query the mongodb with large number of calls at the same time with minimal waiting time.
There's about 1000 symbols and for each symbol I have to query its latest candlestick data from mongodb from time to time in order to perform certain calculation. I need to query the latest 5K documents for each symbol. So the collection contains roughly 1000 * 5000 = 5,000,000 documents.
With Motor and asyncio, I use the following method to fetch documents asynchronously, but it takes really long time to run the code and I can't seem to know why. I am using 8 core cpu on a virtual machine.
Any help with this problem?
async def getCandleList(symbol): # each symbol contains about 5K latest candles in the collection
final_str = "{'symbol': '%s'}"%(symbol)
resultType = 'candlestick_archive'
dbName = 'candle_db'
cursor = eval("db.{}.find({}).sort('timeStamp',-1)".format(dbName, final_str))
finalList = await cursor.to_list(length=None)
return finalList
async def taskForEachSymbol(symbol):
while True:
candleList = await getCandleList(symbol)
await generateSignal(candleList) # a function that generates certain signals in real time
def getAllTasks():
awaitableTasks = []
for symbol in symbolList: # symbolList contains around 1k symbols
awaitableTasks.append(asyncio.create_task(taskForEachSymbol(symbol)))
return awaitableTasks
async def mainTask():
awaitableTasks = getAllTasks()
await asyncio.gather(*awaitableTasks, return_exceptions=False)
async def main()
mainLoop.run_until_complete(mainTask())
print('completed! ... ')
if __name__ == '__main__':
mainLoop=asyncio.new_event_loop()
asyncio.set_event_loop(mainLoop)
client = motor.motor_asyncio.AsyncIOMotorClient(io_loop=mainLoop)
db = client.candles
main()