I have created a stored procedure in Snowflake to dynamically pivot the table and create a view from JSON data. The query creates a view with 370 columns as I desired but it's too slow to query over the view in fact querying a single row takes up to 20 mins
create or replace procedure create_pivot_view()
returns string
language javascript
execute as caller as
$$
var cols_query = ` select
concat('\\'',
listagg(
distinct f.value:signalName::STRING,'\\',\\''),'\\'') as signal_name_list
FROM
(select *
FROM
PM_POWER),table(flatten(input=>c1:data:baseData:signals, mode=>'ARRAY')) as f`;
var stmt1 = snowflake.createStatement({sqlText: cols_query});
var results1 = stmt1.execute();
results1.next();
var col_list = results1.getColumnValue(1);
pivot_query = `
create or replace view AWSS3_PM.PUBLIC.PM_POWER_CN8000_V2 as
select * from (
select figures,stats,SignalName,id,Latitude,Longitude,Altitude
from (
select
c1:id::STRING as id,
c1:data:baseData:unitID::varchar as UnitID,
c1:data:baseData:latitude::varchar as Latitude,
c1:data:baseData:longitude::varchar as Longitude,
c1:data:baseData:altitude::varchar as Altitude,
c1:timestamp::varchar as TimeStamp,
f.value:"dataValue"::varchar as SignalDataValue,
f.value:"dataValueValid"::varchar as SignalDataValueValid,
f.value:"signalID"::varchar as SignalID,
f.value:"units"::varchar as SignalUnits,
f.value:"dataValueEnum"::varchar as SignalDataEnum,
f.value:"signalName"::varchar as SignalName
from
(
select *
FROM
PM_POWER), table(flatten(input=>c1:data:baseData:signals, mode=>'ARRAY')) as f
) flt
unpivot (figures for stats in(UnitID,SignalDataValue, SignalDataValueValid, SignalID, SignalUnits, SignalDataEnum, TimeStamp))
) up
pivot (min(up.figures) for up.SignalName in (${col_list}
))
`;
var stmt2 = snowflake.createStatement({sqlText: pivot_query});
stmt2.execute();
return pivot_query;
$$;
call create_pivot_view();
Any suggestions to fasten this approach or an alternative would be really grateful to improve the performance of the query.
A single row of JSON data looks like the below:
{
"data": {
"baseData": {
"altitude": 0,
"altitudeValid": "false",
"customerID": "CN",
"kind": "Power",
"latitude": 0,
"latitudeValid": "false",
"longitude": 0,
"longitudeValid": "false",
"name": "Predictive Maintenance Data.Power",
"signals": [
{
"dataValue": 3112900,
"dataValueValid": "true",
"signalID": 3424,
"signalName": "mainGeneratorPower",
"units": "kW"
},
{
"dataValue": 1.4035000801086426,
"dataValueValid": "true",
"signalID": 380,
"signalName": "DB_HandlePos",
"units": "V"
},
{
"dataValue": 2,
"dataValueEnum": "Reverse",
"dataValueValid": "true",
"signalID": 813,
"signalName": "reverserPos",
"units": "none"
},
{
"dataValue": 291400,
"dataValueValid": "true",
"signalID": 586,
"signalName": "tractiveEffort",
"units": "none"
},
{
"dataValue": 8,
"dataValueEnum": "T 8",
"dataValueValid": "true",
"signalID": 899,
"signalName": "throttlePos",
"units": "none"
},
{
"dataValue": 47.05950164794922,
"dataValueValid": "true",
"signalID": 1805,
"signalName": "AWTF",
"units": "°C"
},
{
"dataValue": 0.2971585690975189,
"dataValueValid": "true",
"signalID": 4925,
"signalName": "ChpDty",
"units": "none"
},
{
"dataValue": 20.14109992980957,
"dataValueValid": "true",
"signalID": 4835,
"signalName": "LDBBLWA",
"units": "A"
},
{
"dataValue": 36.02000045776367,
"dataValueValid": "true",
"signalID": 2669,
"signalName": "IcGVelM",
"units": "km/hr"
},
{
"dataValue": 479185.125,
"dataValueValid": "true",
"signalID": 1070,
"signalName": "WPEgILP",
"units": "PSIG"
},
{
"dataValue": 293026.875,
"dataValueValid": "true",
"signalID": 1799,
"signalName": "WPEgOtP",
"units": "PSIG"
},
{
"dataValue": 926750,
"dataValueValid": "true",
"signalID": 4698,
"signalName": "MR2 Prs",
"units": "PSIG"
},
{
"dataValue": 24,
"dataValueEnum": "ON",
"dataValueValid": "true",
"signalID": 664,
"signalName": "MVCC>",
"units": "none"
},
{
"dataValue": 907422.625,
"dataValueValid": "true",
"signalID": 4804,
"signalName": "SR Pres",
"units": "PSIG"
}
],
"unitID": "CN 8000",
"ver": "1.0.0"
},
"baseType": "PredictiveMaintenanceData"
},
"dataName": "CN8000.Prod.PredictiveMaintenanceData",
"id": "18a89f9e-9620-4453-a546-23412025e7c0",
"tags": {
"iaapl.access.level1": "Private",
"iaapl.access.level2": "OEM",
"iaapl.internal.deviceID": "",
"iaapl.internal.deviceName": "",
"iaapl.internal.encodeTime": "2021-02-25T07:41:19.000Z",
"iaapl.internal.sender": "Intelligent",
"iaapl.software.name": "",
"iaapl.software.partNumber": 0,
"iaapl.software.version": ""
},
"timestamp": "2021-02-25T07:32:31.000Z"
}
The result should look like :
Thanks in Advance
the bits where you type
could just be
Your JSON is malformed.
You are reading from
data:baseData:signals
but there is no signals in you data..If you want help with your View performance, I would remove the whole Stored Procedure part. And get SELECT that get the result you want from the demo data you provide. Because presently.
Update:
So your example data still does not generate the desired output. So if this is a performance improvement project, it's super frustrating, to not even have the
input + code = output
anyways, here's you data and code mixed together:
An Answer:
so this SQL gets the answer in the same structure BUT
timestamp
,unitid
are not rows as those are all the same value, so it seem having them as rows is a mistake, and they should be a column..gives:
which means your Store Procedure needs to build that.. which is you are happy that this is more performant, you can solve how to build that above in your SP.