AWS Athena would like to join table once instead of 3 times

46 Views Asked by At

Could you please help to rewrite SQL on AWS Athena to use table with one join instead of 3 times I need to get result:

with t1 as (
    select 1 id, 1 id1, 2 id2, 3 id3
    union all
    select 2 id, 4 id1, 2 id2, 4
    union all
    select 3 id, 4 id1, 4 id2, 1),
t2 as (
    select 1 id, 'Text1' txt
    union all
    select 2 id, 'Text2' txt
    union all
    select 3 id, 'Text3' txt)
select t1.*, 
  coalesce(t2.id,t3.id,t4.id) t2_id,
  coalesce(t2.txt,t3.txt,t4.txt) t2_txt
from t1
    left join t2 on t1.id1 = t2.id
    left join t2 t3 on t1.id2 = t3.id and t2.id is null
    left join t2 t4 on t1.id3 = t4.id and t2.id is null and t3.id is null

Required result:

Result

I have tried:

with t1 as (
    select 1 id, 1 id1, 2 id2, 3 id3
    union all
    select 2 id, 4 id1, 2 id2, 4
    union all
    select 3 id, 4 id1, 4 id2, 1),
t2 as (
    select 1 id, 'Text1' txt
    union all
    select 2 id, 'Text2' txt
    union all
    select 3 id, 'Text3' txt)
select t1.*, 
   t2.id  t2_id,
   t2.txt t2_txt
from t1
    left join t2 on 
      case 
        when t1.id1 = t2.id then t2.id -- First condition (1)
        when t1.id2 = t2.id then t2.id -- Should be skiped if 1 is true (2)
        when t1.id3 = t2.id then t2.id -- Should be skiped if 1 or 2 is true (3)
      end = t2.id
order by t1.id,t2.id

Could you please advise. Thank you in advance!!!

1

There are 1 best solutions below

0
On

Not sure if this would be helpful in terms of performance but you can try using or in join condition then grouping by the ids (alternatively you can introduce surrogate unique id with row_number) and then using max_by to select item which is matched by "earliest" id:

-- sample data
with t1(id, id1, id2, id3) as (
    values (1, 1, 2, 3),
     (2, 4, 2, 4),
     (3, 4, 4, 1)),
t2 (id, txt) as (
    values (1, 'Text1'),
    (2, 'Text2'),
    (3, 'Text3'))

-- query
select id, id1, id2, id3,
       max_by(t2_id, id_matched) t2_id,
       max_by(t2_txt, id_matched) t2_txt
from(
    select t1.*,
      t2.id t2_id,
      t2.txt t2_txt,
      case t2.id
        when t1.id1 then 3
        when t1.id2 then 2
        when t1.id3 then 1
      end id_matched -- surrogate order based on "first" matched id
    from t1
        left join t2 on t1.id1 = t2.id
        or (t1.id2 = t2.id and t1.id1 != t2.id)
        or (t1.id3 = t2.id and t1.id1 != t2.id and t1.id2 != t2.id)
    where t2.id is not null
    )
group by id, id1, id2, id3;

Output:

id id1 id2 id3 t2_id t2_txt
3 4 4 1 1 Text1
1 1 2 3 1 Text1
2 4 2 4 2 Text2