I need to create table in AWS Athena for Apache access logs. I tried below query but it fails. The grok parsing string seems to work well separately but when running in Athena it fails.
Note: I need the time stamp to be without square brackets.
CREATE EXTERNAL TABLE `my_logs.apache_lightsail2`(
client_ip string,
client_id string,
user_id string,
timestamp2 string,
client_request string,
server_status string,
returned_obj_size string
)
ROW FORMAT SERDE
'com.amazonaws.glue.serde.GrokSerDe'
WITH SERDEPROPERTIES (
'input.format'='^%{IPV4:client_ip} %{DATA:client_id} %{USERNAME:user_id} \[%{HTTPDATE:timestamp2}\] %{QUOTEDSTRING:client_request} %{DATA:server_status} %{DATA:returned_obj_size}$'
)
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
's3://my-accesslogs/apache-access-logs/';
FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. java.util.regex.PatternSyntaxException: Illegal/unsupported escape sequence near index 268 ^((?<![0-9])(?:(?:25[0-5]|2[0-4][0-9]|[0-1]?[0-9]{1,2})...)(?![0-9])) (.?) ([a-zA-Z0-9._-]+) [((?:(?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9])/\b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\b/(?>\d\d){1,2}:(?!<[0-9])(?:2[0123]|[01]?[0-9]):(?:[0-5][0-9])(?::(?:(?:[0-5]?[0-9]|60)(?:[:.,][0-9]+)?))(?![0-9]) (?:[+-]?(?:[0-9]+)))] ((?>(?<!\)(?>"(?>\.|[^\"]+)+"|""|(?>'(?>\.|[^\']+)+')|''|(?>
(?>\\.|[^\\]+)+`)|``))) (.?) (.*?)$ ^