I have a snowpipe. I do not have control over the incoming files. The files each contain a single, relatively small json record.
The stage had about 38000 files when I restarted the pipe due to an error.
I am currently only processing 6-8 files per minute.
EDIT: Correction, I have processed about 750 files in 55 min. Still very slow.
How can I speed this up without changing the one record per file constraint?
{
"id": "55555555-5555-5555-5555-555555555555",
"val": {
"bank": "bank name",
"browser_version": "1.7.4044.138",
"cpu": "Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz",
"cpu_architecture": "x86_64",
"created_at": "2020-07-31T10:21:12.992345225Z",
"current_address": "address",
"current_city": null,
"current_country": "my country",
"date_of_birth": "1990-03-21",
"document_number": "55-55-55-55555",
"document_type": "this type of document",
"email": "[email protected]",
"full_name": "some name",
"gender": "Male",
"internal_storage": "237.87 GB",
"joined_date": "2020-07-31",
"last_logged_in": null,
"last_worked_on": "2020-12-02",
"latitude": "Not Found",
"longitude": "Not Found",
"memory": "7.74 GB",
"number_of_processors": "8",
"app_client_uid": "{55555555-5LLL-5555-55L5-555B5555DB5A}",
"operating_system": "win 10.0",
"other_display": null,
"id_number": "555555555",
"permanent_address": null,
"permanent_city": null,
"permanent_country": null,
"personal_email": "[email protected]",
"primary_display": "1920x1080",
"role_id": "55555555-5555-5555-5555-555555555555",
"source_system": [
{
"id": "astringidthiny",
"system": "system_name"
}
],
"status": "Active",
"type": "worker",
"updated_at": "2020-12-02T08:32:56Z",
"webcam": "true",
"workstream_ids": [
"55555555-5555-5555-5555-555555555555"
]
}
}