I got this error while doing ELT from mongo-tap to redshift. When I simply extract data from MongoDB to CSV it works fine. but when piping out output to redshift I am getting this _id error. not sure what is the reason. Can anybody guide me More detail https://github.com/singer-io/tap-mongodb/issues/48. As it has been written in python so need help from the python community as well.
I have setup tap-mongo via instructions here. I am running this command to export
~/.virtualenvs/tap-mongodb/bin/tap-mongodb \
--config ~/Documents/tap/config.json \
--catalog ~/Documents/tap/catalog.json | \
~/.virtualenvs/target-redshift/bin/target-redshift \
-c ~/Documents/target/config-redshift.json
running this command gives me this error.
ERROR Exception writing records
Traceback (most recent call last):
File "/Users/macbookpro/.virtualenvs/target-redshift/lib/python3.7/site-packages/target_postgres/postgres.py", line 300, in write_batch
{'version': target_table_version})
File "/Users/macbookpro/.virtualenvs/target-redshift/lib/python3.7/site-packages/target_postgres/sql_base.py", line 808, in write_batch_helper
for table_batch in denest.to_table_batches(schema, key_properties, records):
File "/Users/macbookpro/.virtualenvs/target-redshift/lib/python3.7/site-packages/target_postgres/denest.py", line 21, in to_table_batches
key_properties)
File "/Users/macbookpro/.virtualenvs/target-redshift/lib/python3.7/site-packages/target_postgres/denest.py", line 47, in _get_streamed_table_schemas
key_prop_schemas[key] = schema['properties'][key]
KeyError: '_id'
CRITICAL ('Exception writing records', KeyError('_id'))
Traceback (most recent call last):
File "/Users/macbookpro/.virtualenvs/target-redshift/lib/python3.7/site-packages/target_postgres/postgres.py", line 300, in write_batch
{'version': target_table_version})
File "/Users/macbookpro/.virtualenvs/target-redshift/lib/python3.7/site-packages/target_postgres/sql_base.py", line 808, in write_batch_helper
for table_batch in denest.to_table_batches(schema, key_properties, records):
File "/Users/macbookpro/.virtualenvs/target-redshift/lib/python3.7/site-packages/target_postgres/denest.py", line 21, in to_table_batches
key_properties)
File "/Users/macbookpro/.virtualenvs/target-redshift/lib/python3.7/site-packages/target_postgres/denest.py", line 47, in _get_streamed_table_schemas
key_prop_schemas[key] = schema['properties'][key]
KeyError: '_id'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Users/macbookpro/.virtualenvs/target-redshift/bin/target-redshift", line 10, in <module>
sys.exit(cli())
File "/Users/macbookpro/.virtualenvs/target-redshift/lib/python3.7/site-packages/target_redshift/__init__.py", line 55, in cli
main(args.config)
File "/Users/macbookpro/.virtualenvs/target-redshift/lib/python3.7/site-packages/target_redshift/__init__.py", line 49, in main
target_tools.main(redshift_target)
File "/Users/macbookpro/.virtualenvs/target-redshift/lib/python3.7/site-packages/target_postgres/target_tools.py", line 28, in main
stream_to_target(input_stream, target, config=config)
File "/Users/macbookpro/.virtualenvs/target-redshift/lib/python3.7/site-packages/target_postgres/target_tools.py", line 77, in stream_to_target
raise e
File "/Users/macbookpro/.virtualenvs/target-redshift/lib/python3.7/site-packages/target_postgres/target_tools.py", line 64, in stream_to_target
line
File "/Users/macbookpro/.virtualenvs/target-redshift/lib/python3.7/site-packages/target_postgres/target_tools.py", line 152, in _line_handler
state_tracker.flush_stream(line_data['stream'])
File "/Users/macbookpro/.virtualenvs/target-redshift/lib/python3.7/site-packages/target_postgres/stream_tracker.py", line 41, in flush_stream
self._write_batch_and_update_watermarks(stream)
File "/Users/macbookpro/.virtualenvs/target-redshift/lib/python3.7/site-packages/target_postgres/stream_tracker.py", line 67, in _write_batch_and_update_watermarks
self.target.write_batch(stream_buffer)
File "/Users/macbookpro/.virtualenvs/target-redshift/lib/python3.7/site-packages/target_redshift/redshift.py", line 72, in write_batch
return PostgresTarget.write_batch(self, nullable_stream_buffer)
File "/Users/macbookpro/.virtualenvs/target-redshift/lib/python3.7/site-packages/target_postgres/postgres.py", line 309, in write_batch
raise PostgresError(message, ex)
target_postgres.exceptions.PostgresError: ('Exception writing records', KeyError('_id'))
Redshift.config
{
"redshift_host":"redshift-cluster-1.cepdr3dnmmcm.us-east-1.redshift.amazonaws.com",
"redshift_port":5439,
"redshift_database":"dev",
"redshift_username":"awsuser",
"redshift_password":"Password",
"redshift_schema":"singer",
"default_column_length":1000,
"target_s3":{
"aws_access_key_id":"*Aws key id*",
"aws_secret_access_key":"*aws access key*",
"bucket":"singer-redshift",
"key_prefix":"__tmp"
},
"schema":{
"properties":{
"_id":{
"minimum":-2147483648,
"inclusion":"automatic",
"maximum":2147483647,
"type":[
"null",
"integer"
]
},
"name":{
"maxLength":255,
"inclusion":"available",
"type":[
"null",
"string"
]
},
"updated_at":{
"inclusion":"available",
"type":[
"string"
],
"format":"date-time"
}
}
}
}
mongo catalog.json
{
"streams": [
{
"table_name": "users",
"stream": "users",
"metadata": [
{
"breadcrumb": [],
"metadata":
{
"selected": true,
"replication-method": "FULL_TABLE",
"table-key-properties": [
"_id"
],
"database-name": "efuse",
"row-count": 434,
"is-view": false,
"valid-replication-keys": [
"_id"
]
}
}
],
"tap_stream_id": "efuse-users",
"schema": {
"type": "object",
"properties": {
"_id": {
"type": "object"
}
}
}
}
]
mongo config.json
{
"password": "root",
"user": "stitch_root",
"host": "localhost",
"port": 27017,
"database": "admin",
"ssl": "false",
"replica_set": "rs0"
}
Thanks