How to use previous quarter's data when quarter is missing from dataset

44 Views Asked by At

I am trying to write a SQL query in Presto for a cumulative delivery forecast. However, there aren't always deliveries every quarter, so for a quarter with no deliveries, there is no row at all for that quarter's data. I would like to use the previous quarter's data when this happens.

I have tried creating a CTE with all the quarters I want populated and using some window functioning to fill in the missing data. I was trying to use a CASE statement for if null, then LAG(quarter - 1) but the CASE statement in general seemed not to work.

with forecasts as (
    select quarter, type, location, sum(total)
    from delivery_db
    group by 1,2,3
)

quarters AS (
    SELECT
        *
    FROM (
        VALUES
            ('2024-Q1'),
            ('2024-Q2'),
            ('2024-Q3'),
            ('2024-Q4')
    ) v(quarter)
)

SELECT
    q.quarter,
    f.type,
    f.location,
    --CASE(WHEN f.quarter is not null then SUM(total) else LAG(f.quarter, 1))
    SUM(total) OVER(
        PARTITION BY
            f.type
        ORDER BY
            q.quarter
    ) AS total
FROM quarters q
FULL JOIN forecasts f
on f.quarter = q.quarter

DeliveryDB looks like:

Quarter Type Location Total
2024-Q1 TypeA TBD 10
2024-Q1 TypeA TBD 4
2024-Q4 TypeA TBD 5

Actual

Quarter Type Location Total
2024-Q1 TypeA TBD 14
2024-Q2 null null null
2024-Q3 null null null
2024-Q4 TypeA TBD 19

Desired

Quarter Type Location Total
2024-Q1 TypeA TBD 14
2024-Q2 TypeA TBD 14
2024-Q3 TypeA TBD 14
2024-Q4 TypeA TBD 19
1

There are 1 best solutions below

1
ValNik On BEST ANSWER

See example.

  1. First, we create a framework for the result - All quarters X (All Types and Locations)
  2. Then join calculated sums by (quarter,type,location) and sum rolling total
with forecasts as (
  select quarter, type, location, sum(total) total
    from delivery_db
    group by quarter, type, location
)
,quarters AS (
    SELECT *
    FROM (
        VALUES
            ('2024-Q1'),
            ('2024-Q2'),
            ('2024-Q3'),
            ('2024-Q4')
    ) v(quarter)
)
,AllPoints as(
   select * 
   from quarters,
    (select distinct type,location from forecasts)f
)
SELECT
    q.quarter,
    q.type,
    q.location,
    SUM(case when f.quarter=q.quarter then total else 0 end) 
       OVER(PARTITION BY q.type,q.location ORDER BY q.quarter
     ) AS total2,
     f.quarter,
     f.total qtl_total
FROM AllPoints q
left join forecasts f on q.quarter= f.quarter
    and q.type=f.type and q.location=f.location
order by type,location,q.quarter

Output

quarter type location total quarter qtl_total
2024-Q1 TypeA TBD 14 2024-Q1 14
2024-Q2 TypeA TBD 14 null null
2024-Q3 TypeA TBD 14 null null
2024-Q4 TypeA TBD 19 2024-Q4 5
2024-Q1 TypeB TBD 7 2024-Q1 7
2024-Q2 TypeB TBD 7 null null
2024-Q3 TypeB TBD 15 2024-Q3 8
2024-Q4 TypeB TBD 15 null null
2024-Q1 TypeB TBL 0 null null
2024-Q2 TypeB TBL 0 null null
2024-Q3 TypeB TBL 3 2024-Q3 3
2024-Q4 TypeB TBL 3 null null
2024-Q1 TypeB TBX 33 2024-Q1 33
2024-Q2 TypeB TBX 33 null null
2024-Q3 TypeB TBX 33 null null
2024-Q4 TypeB TBX 33 null null
2024-Q1 TypeB TBY 0 null null
2024-Q2 TypeB TBY 0 null null
2024-Q3 TypeB TBY 0 null null
2024-Q4 TypeB TBY 51 2024-Q4 51

With test data

insert into delivery_db values
 ('2024-Q1','TypeA','TBD',  10) -- 1,4 quarters 
,('2024-Q1','TypeA','TBD',  4)  -- 
,('2024-Q4','TypeA','TBD',  5)  --
,('2024-Q1','TypeB','TBD',  7)  -- 1,3 
,('2024-Q3','TypeB','TBD',  8)  --
,('2024-Q3','TypeB','TBL',  3)  -- only 3 q
,('2024-Q1','TypeB','TBX',  33) -- only 1 q
,('2024-Q4','TypeB','TBY',  51) -- only 4 q
;

Demo