MySQL Query for Spread/Difference of Energy Counter and missing Value interpolation

83 Views Asked by At

I have an energy counter (kWh) recorded in a MySQL database every 15 minutes. Sometimes the recording fails for several reasons (power outage, computer reboot for updates...) and values are missing.

The table looks as followed:

id      Time                Energy
27800   13.02.2024 23:30:01 651720048
27801   13.02.2024 23:45:00 651720672
(missing)
27802   14.02.2024 00:15:02 651721917
27803   14.02.2024 00:30:00 651722540
27804   14.02.2024 00:45:00 651723129
27805   14.02.2024 01:00:02 651723769
27806   14.02.2024 01:15:01 651724405
27807   14.02.2024 01:30:01 651725030
(missing)
27808   14.02.2024 02:00:01 651726275
27809   14.02.2024 02:15:02 651726880
27810   14.02.2024 02:30:01 651727519
27811   14.02.2024 02:45:00 651728130
27812   14.02.2024 03:00:02 651728751
27813   14.02.2024 03:15:02 651729381

I am looking for a SQL query which returns the consumption (spread/difference between energy counter values) in a certain (variable) time span (eg. 15 minutes, 60 minutes, 24 hours, 1 month...) which also considers missing values by interpolation.

The result should look as showed there in the columns Consumption 15m and Consumption 1h:

id      Time                Energy      Consumption 15m     Consumption 1h
27800   13.02.2024 23:30:01 651720048   -   
27801   13.02.2024 23:45:00 651720672   624 
(missing)                   651721294.5 622.5               -
27802   14.02.2024 00:15:02 651721917   622.5   
27803   14.02.2024 00:30:00 651722540   623 
27804   14.02.2024 00:45:00 651723129   589 
27805   14.02.2024 01:00:02 651723769   640                 2474,5
27806   14.02.2024 01:15:01 651724405   636 
27807   14.02.2024 01:30:01 651725030   625 
(missing)                   651725652.5 622.5   
27808   14.02.2024 02:00:01 651726275   622.5               2506
27809   14.02.2024 02:15:02 651726880   605 
27810   14.02.2024 02:30:01 651727519   639 
27811   14.02.2024 02:45:00 651728130   611 
27812   14.02.2024 03:00:02 651728751   621                 2476
27813   14.02.2024 03:15:02 651729381   630 

I guess it is somewhow required to find the closest two values of two given time points (e.g. 14.02.2024 00:00:00 and 14.02.2024 01:00:00) and create an interpolated value of the energy counter to build then the difference of it.

Which query could achieve that desired result?

2

There are 2 best solutions below

1
ValNik On

See example.

Recursive add missing rows with interpolated time and energy.

with recursive r as( -- anchor part - all existing rows
select id, Time,Energy  
    -- time diff rounded to 15 min intervals count
  ,round(time_to_sec(timediff(lead(time,1,time)over(order by time),time))/900) td15mr
  ,lead(Energy,1,Energy)over(order by time)-Energy eDiff
from test
  union all  -- recursive part  add missing rows only
select id, addtime(Time,'00:15:00'),Energy+(eDiff/td15mr)   
  ,td15mr-1  as td15mr
  ,eDiff-(eDiff/td15mr)
from r where td15mr>1
)
select id, Time,Energy
  ,Energy-lag(Energy,1,Energy)over(order by time) cons15m
  ,case when minute(date_add(time,INTERVAL 1 minute))<15 then
     energy-(min(energy)over(order by time rows between 4 preceding and current row) )
   end cons1h
  ,td15mr,eDiff
from r
order by time;

For test data

create table test (id int,Time datetime, Energy double);
insert into test values
 (27800   ,'2024.02.13 23:30:01', 651720048)
,(27801   ,'2024.02.13 23:45:00', 651720672)
-- (missing)
,(27802   ,'2024.02.14 00:15:02', 651721917)
,(27803   ,'2024.02.14 00:30:00', 651722540)
,(27804   ,'2024.02.14 00:45:00', 651723129)
,(27805   ,'2024.02.14 01:00:02', 651723769)
,(27806   ,'2024.02.14 01:15:01', 651724405)
,(27807   ,'2024.02.14 01:30:01', 651725030)
-- (missing)
 ,(27808   ,'2024.02.14 02:00:01', 651726275)
,(27809   ,'2024.02.14 02:15:02', 651726880)
,(27810   ,'2024.02.14 02:30:01', 651727519)
,(27811   ,'2024.02.14 02:45:00', 651728130)
,(27812   ,'2024.02.14 03:00:02', 651728751)
,(27813   ,'2024.02.14 03:15:02', 651729381)
,(27814   ,'2024.02.14 03:29:58', 651729400)
  -- missing 3 rows
,(27815   ,'2024.02.14 04:30:02', 651731800)
;

Output is

id Time Energy cons15m cons1h td15mr eDiff
27800 2024-02-13 23:30:01 651720048 0 null 1 624
27801 2024-02-13 23:45:00 651720672 624 null 2 1245
27801 2024-02-14 00:00:00 651721294.5 622.5 1246.5 1 622.5
27802 2024-02-14 00:15:02 651721917 622.5 null 1 623
27803 2024-02-14 00:30:00 651722540 623 null 1 589
27804 2024-02-14 00:45:00 651723129 589 null 1 640
27805 2024-02-14 01:00:02 651723769 640 2474.5 1 636
27806 2024-02-14 01:15:01 651724405 636 null 1 625
27807 2024-02-14 01:30:01 651725030 625 null 2 1245
27807 2024-02-14 01:45:01 651725652.5 622.5 null 1 622.5
27808 2024-02-14 02:00:01 651726275 622.5 2506 1 605
27809 2024-02-14 02:15:02 651726880 605 null 1 639
27810 2024-02-14 02:30:01 651727519 639 null 1 611
27811 2024-02-14 02:45:00 651728130 611 null 1 621
27812 2024-02-14 03:00:02 651728751 621 2476 1 630
27813 2024-02-14 03:15:02 651729381 630 null 1 19
27814 2024-02-14 03:29:58 651729400 19 null 4 2400
27814 2024-02-14 03:44:58 651730000 600 null 3 1800
27814 2024-02-14 03:59:58 651730600 600 1849 2 1200
27814 2024-02-14 04:14:58 651731200 600 null 1 600
27815 2024-02-14 04:30:02 651731800 600 null 0 0

Fiddle

0
ValNik On

For MySQL 5.7, where windows functions not applicable, we use self join on on a value that precisely defines the order of the rows. We assume that the Id column satisfies this requirement and we can calculate the previous value (on (t2.id+1)=t1.id).
If we can't do that, we'll have to do SELF JOIN and GROUP BY to detect previous row or rows.
Or (select max(Id) from test t2 where t2.time<t.time) as prevId and join on id=prevId - this is lag() row.
Or (select min(Id) from test t2 where t2.time>t.time) as nextId and join on id=nextId - this is lead() row.

select id
  ,case when coalesce(n,0)>1 then date_add(Time,interval -(n-1)*15 minute) else Time end Time
  ,case when coalesce(n,0)>1 then Energy-(eDiff/td15mr)*(n-1) else energy end energy
  ,case when td15mr>1 then (eDiff/td15mr) else eDiff end cons15m
  ,n,td15mr
  from(
   select t1.id, t1.Time,t1.Energy  
    ,round(time_to_sec(timediff(t1.time,coalesce(t2.time,t1.time)))/900) td15mr
    ,t1.Energy-coalesce(t2.Energy,t1.Energy) eDiff
   from test t1
   left  join test t2 on (t2.id+1)=t1.id
  )a
  left join (select 1 n union all select 2 union all select 3 
             union all select 4 union all select 5)nn
  on nn.n<=a.td15mr

To calculate hour consumption, self join above query.
There added column round(to_seconds(time)/900) intN - interval number for every 15 min intervals. JOIN on intN=(intN-4) - 1 hour=4 intervals

select t1.id, t1.Time,t1.Energy,t1.cons15m,t1.energy-t2.energy cons1h -- ,t1.intN,t2.*
from(
 select * ,cast(round(to_seconds(time)/900) as signed) intN
 from(
  select id
  ,case when coalesce(n,0)>1 then date_add(Time,interval -(n-1)*15 minute) else Time end Time
  ,case when coalesce(n,0)>1 then Energy-(eDiff/td15mr)*(n-1) else energy end energy
  ,case when td15mr>1 then (eDiff/td15mr) else eDiff end cons15m
  ,n,td15mr
  from(
   select t1.id, t1.Time,t1.Energy  
    ,round(time_to_sec(timediff(t1.time,coalesce(t2.time,t1.time)))/900) td15mr
    ,t1.Energy-coalesce(t2.Energy,t1.Energy) eDiff
   from test t1
   left  join test t2 on (t2.id+1)=t1.id
  )a
  left join (select 1 n union all select 2 union all select 3 
             union all select 4 union all select 5)nn
    on nn.n<=a.td15mr
 )b
)t1
left join 
  (
 select * ,cast(round(to_seconds(time)/900) as signed) intN
 from(
  select id
  ,case when coalesce(n,0)>1 then date_add(Time,interval -(n-1)*15 minute) else Time end Time
  ,case when coalesce(n,0)>1 then Energy-(eDiff/td15mr)*(n-1) else energy end energy
  ,case when td15mr>1 then (eDiff/td15mr) else eDiff end cons15m
  ,n,td15mr
  from(
   select t1.id, t1.Time,t1.Energy  
    ,round(time_to_sec(timediff(t1.time,coalesce(t2.time,t1.time)))/900) td15mr
    ,t1.Energy-coalesce(t2.Energy,t1.Energy) eDiff
   from test t1
   left  join test t2 on (t2.id+1)=t1.id
  )a
  left join (select 1 n union all select 2 union all select 3 
             union all select 4 union all select 5)nn
  on nn.n<=a.td15mr
 )b
)t2 on (t2.intN)=(t1.intN-4)
order by t1.id,t1.time,t1.n

Subqueries t1 and t2 there identic.

Output

id Time energy cons15m cons1h
27800 2024-02-13 23:30:01 651720048 0 null
27801 2024-02-13 23:45:00 651720672 624 null
27802 2024-02-14 00:00:02 651721294.5 622.5 null
27802 2024-02-14 00:15:02 651721917 622.5 null
27803 2024-02-14 00:30:00 651722540 623 2492
27804 2024-02-14 00:45:00 651723129 589 2457
27805 2024-02-14 01:00:02 651723769 640 2474.5
27806 2024-02-14 01:15:01 651724405 636 2488
27807 2024-02-14 01:30:01 651725030 625 2490
27808 2024-02-14 01:45:01 651725652.5 622.5 2523.5
27808 2024-02-14 02:00:01 651726275 622.5 2506
27809 2024-02-14 02:15:02 651726880 605 2475
27810 2024-02-14 02:30:01 651727519 639 2489
27811 2024-02-14 02:45:00 651728130 611 2477.5
27812 2024-02-14 03:00:02 651728751 621 2476
27813 2024-02-14 03:15:02 651729381 630 2501
27814 2024-02-14 03:29:01 651729400 19 1881
27815 2024-02-14 03:45:02 651730000 600 1870
27815 2024-02-14 04:00:02 651730600 600 1849
27815 2024-02-14 04:15:02 651731200 600 1819
27815 2024-02-14 04:30:02 651731800 600 2400
27816 2024-02-14 04:38:02 651731999 199 1999
27817 2024-02-14 04:45:02 651732200 201 2200
27818 2024-02-14 05:00:02 651732500 300 1900
27818 2024-02-14 05:15:02 651732800 300 1600

Query is a longread. See fiddle