I have a delta table stored in s3 and metadata stored in Glu. I updated that table's schema by adding new column to it. I also updated the schema of that table in Glu. After all these changes I'm able to query the table via Athena UI.
Here is the query
select status from deletes where request_id = '1234'
However when I'm running the same query via AWS Golang SDK, I'm getting follwing exception
DELTA_LAKE_INVALID_SCHEMA: Metadata not found in transaction log for deletes
Here is the golang snippet that I'm using to query
func queryAthena(s *StorageAthena, query string) (string, error) {
var input athena.StartQueryExecutionInput
input.SetQueryString(query)
var executionContext athena.QueryExecutionContext
executionContext.SetDatabase(s.dbName)
var resultConfig athena.ResultConfiguration
resultConfig.SetOutputLocation(s.bucketName)
input.SetQueryExecutionContext(&executionContext)
input.SetResultConfiguration(&resultConfig)
result, err := s.Db.StartQueryExecution(&input)
if err != nil {
log.Error("StartQueryExecution: Error executing the status query", err.Error())
return "", err
}
var queryExecutionInput athena.GetQueryExecutionInput
queryExecutionInput.SetQueryExecutionId(*result.QueryExecutionId)
duration := time.Duration(5) * time.Second
var queryState *athena.GetQueryExecutionOutput
for {
queryState, err = s.Db.GetQueryExecution(&queryExecutionInput)
if err != nil {
log.Error("GetQueryExecution: Error executing the status query", err.Error())
return "", err
}
s := *queryState.QueryExecution.Status.State
if s != "RUNNING" && s != "QUEUED" {
break
}
time.Sleep(duration)
}
if *queryState.QueryExecution.Status.State == "SUCCEEDED" {
var resultsInput athena.GetQueryResultsInput
resultsInput.SetQueryExecutionId(*result.QueryExecutionId)
rows, err := s.Db.GetQueryResults(&resultsInput)
if err != nil {
log.Error("GetQueryResults: Error executing the status query", err.Error())
return "", err
}
resultRows := rows.ResultSet.Rows
if len(resultRows) > 0 {
return resultRows[0].String(), nil // Fetch first row as status query will return only 1 row.
} else {
return "", errors.New(fmt.Sprintf("Error while fetching rows. Fetched %d rows", len(resultRows)))
}
} else {
return "", errors.New(fmt.Sprintf("ExcutionId : %s, State : %s. Reason : %s", *result.QueryExecutionId, *queryState.QueryExecution.Status.State, queryState.QueryExecution.Status.AthenaError.String()))
}
}
StorageAthena is a struct with Athena object and other configurations
type StorageAthena struct {
Db *athena.Athena //exported for testing
bucketName string
dbName string
}
I've already checked that the golang service principal (which is a JSON-RPC service deployed in EKS) has access to Athena, Glu and S3.
Here is the list of permissions on Athena
"athena:BatchGet*",
"athena:Get*",
"athena:List*",
"athena:CancelQueryExecution",
"athena:StartQueryExecution",
for Glu
"glue:BatchGet*",
"glue:Get*",
"glue:List*",
for S3
"s3:Get*",
"s3:List*",
I'm unable to figure out what could be wrong causing this issue. Is there any other place where I have to update the schema/metadata for the table ?