SQL Server: How do I assign the correct account to past transactions?

59 Views Asked by At

SQL Gurus, I have a situation where I need to assign transactions to accounts where effective begin dates and effective end dates prior to 3/1/2017 exist in one table (#legacyt2), and anything past that date exists in the current dimension table (#type2). I need to assign the correct skey (found in #type2 and #legacyt2) for the appropriate beg/end effective dates based on the transaction date found in the #fact table.

I've come close, but I haven't been able to assign the accounts correctly. I've provided the build script and the queries that work.

The end result that I am looking for based on the script provided is this:

enter image description here

    drop table #type2;

    create table #type2
    (ckey int,
    skey int,
    lastUpdated date,
    begdate date,
    enddate date
    );

    insert into #type2
     (ckey,skey,lastupdated,begdate,enddate)
    values
     (1,100,'9/26/2017','5/9/2017','5/11/2017');

    insert into #type2
     (ckey,skey,lastupdated,begdate)
    values
     (1,125,'9/26/2017','5/11/2017');

    drop table #legacyt2;

    create table #legacyt2
    (ckey int,
    skey int,
    lastUpdated date,
    begdate date,
    enddate date
    );

    insert into #legacyt2
     (ckey,skey,lastupdated,begdate,enddate)
    values
     (1,100,'3/1/2017','1/22/2016','2/5/2017');

    insert into #legacyt2
     (ckey,skey,lastupdated,begdate)
    values
     (1,125,'3/1/2017','2/5/2017');


    drop table #fact;

    create table #fact
    (ckey int,
     xactDate date,
     acct int,
     amt decimal(10,2)
    );

    insert into #fact
    (ckey,xactDate,acct,amt)
    values
     (1,'1/24/2017',1,10.00);

    insert into #fact
    (ckey,xactDate,acct,amt)
    values
     (1,'1/24/2017',10,10.00);

     insert into #fact
    (ckey,xactDate,acct,amt)
    values
     (1,'1/30/2017',1,20.00);

     insert into #fact
    (ckey,xactDate,acct,amt)
    values
     (1,'2/2/2017',1,80.00);

     insert into #fact
    (ckey,xactDate,acct,amt)
    values
     (1,'2/3/2017',10,20.00);

     insert into #fact
    (ckey,xactDate,acct,amt)
    values
     (1,'2/6/2017',1,50.00);


     select ckey,
            skey,
            lastupdated,
            begdate,
            enddate
      from #type2

    -- Step #1: collect legacy and current dimension type 2 info and rank to identify current and previous versions.
     select ckey,
            skey,
            lastupdated,
            begdate,
            enddate
      from #legacyt2

    drop table #dim;

    with Dimension as
     (  select t2.ckey,
             t2.skey,
             t2.begdate,
             l2.begdate as LegacyBegDate,
             t2.enddate,
             l2.enddate as LegacyEndDate,
             row_number() over (partition by t2.ckey order by max(t2.begdate)) as rnk
        from #type2 t2
         left join #legacyt2 l2
          on l2.ckey = t2.ckey
          and l2.skey = t2.skey
        group by t2.ckey,t2.skey,t2.begdate,t2.enddate,l2.begdate,l2.enddate
     )
       select ckey, 
              skey,
              begdate,
              legacyBegDate,
              enddate,
              legacyenddate,
              rnk
            into #dim  
          from Dimension
         order by ckey,begdate;


    select * from #dim  
2

There are 2 best solutions below

4
Maurício Pontalti Neri On

I think I understood you. Try it:

    ;WITH dimension 
     AS (SELECT ckey 
                ,skey 
                ,begdate xactDate 
         FROM   #type2 
         UNION 
         SELECT ckey 
                ,skey 
                ,enddate xactDate 
         FROM   #type2 
         WHERE  enddate IS NOT NULL 
         UNION 
         SELECT ckey 
                ,skey 
                ,begdate xactDate 
         FROM   #legacyt2 
         UNION 
         SELECT ckey 
                ,skey 
                ,enddate xactDate 
         FROM   #legacyt2 
         WHERE  enddate IS NOT NULL), 
     dimension2 
     AS (SELECT Row_number() 
                  OVER ( 
                    partition BY ckey 
                    ORDER BY skey, xactdate) rn 
                ,ckey 
                ,skey 
                ,xactdate 
         FROM   dimension), 
     fact1 
     AS (SELECT Row_number() 
                  OVER ( 
                    partition BY ckey 
                    ORDER BY xactdate) rn 
                ,ckey 
                ,xactdate 
                ,acct 
                ,amt 
         FROM   #fact) 
SELECT A.ckey 
       ,b.skey 
       ,a.xactdate 
       ,a.acct 
       ,a.amt 
INTO   #dim 
FROM   fact1 a 
       INNER JOIN dimension2 b 
               ON a.ckey = b.ckey 
                  AND a.rn = b.rn 
ORDER  BY a.ckey 
          ,A.rn 



SELECT * 
FROM   #dim 

Result

ckey        skey        xactDate   acct        amt
----------- ----------- ---------- ----------- ---------------------------------------
1           100         2017-01-24 1           10.00
1           100         2017-01-24 10          10.00
1           100         2017-01-30 1           20.00
1           100         2017-02-02 1           80.00
1           125         2017-02-03 10          20.00
1           125         2017-02-06 1           50.00
0
plditallo On

Although @Mauricio Pontalti Neri did a super job, in the end, the simplest and clearest way to move forward with solving for this problem was to work with the #fact staging table by adding a new column for skey for updates and utilizing the #dim. This won't win any awards for eloquence, but it does get the job done.

Here's what the solution looks like:

     -- step #1 dump the legacy/current dimension information into a combined #temp table.

        with Dimension as
     (  select t2.ckey,
             t2.skey,
             t2.begdate,
             l2.begdate as LegacyBegDate,
             t2.enddate,
             l2.enddate as LegacyEndDate,
             row_number() over (partition by t2.ckey order by max(t2.begdate)) as rnk
        from #type2 t2
         left join #legacyt2 l2
          on l2.ckey = t2.ckey
          and l2.skey = t2.skey
        group by t2.ckey,t2.skey,t2.begdate,t2.enddate,l2.begdate,l2.enddate
     )
       select ckey, 
              skey,
              begdate,
              legacyBegDate,
              enddate,
              legacyenddate,
              rnk
            into #dim  
          from Dimension
         order by ckey,begdate;

        -- step #2 update the skey for current ckey/skey pairings.

     update #fact
      set #fact.skey = x.skey
      from (select skey,
                   ckey,
                   LegacyBegDate,
                   LegacyEndDate,
                   begdate,
                   enddate
              from #dim
              where enddate is null
                and legacyenddate is null
            )x
     where #fact.ckey = x.ckey
      and #fact.xactdate between cast(x.legacyBegDate as date) and cast(getdate() as date)

        -- step #3 update the skey for closed ckey/skey pairings.

      update #fact
      set #fact.skey = x.skey
      from (select skey,
                   ckey,
                   LegacyBegDate,
                   LegacyEndDate,
                   begdate,
                   enddate
              from #dim
              where enddate is NOT null
                and legacyenddate is NOT null
            )x
     where #fact.ckey = x.ckey
      and #fact.xactdate between cast(x.legacyBegDate as date) and cast(x.legacyEndDate as date)