snowflake query is taking too much time to run

60 Views Asked by At
SELECT 
    category_tree_master__."category_id" AS category_id,
    --searchchildrenmaster.children,
    --ARRAY_CONTAINS(category_product_nb_stand_nb_product."category_id" ::varchar:: VARIANT ,TO_ARRAY(SPLIT(CAST(searchchildrenmaster."category_id" AS VARCHAR) || COALESCE (', ' || searchchildrenmaster.children, ''), ','))) AS check_,
    --category_product_nb_stand_nb_product.stands AS nb_stands,
    --category_product_nb_stand_nb_product.products AS nb_products
    COUNT(DISTINCT category_product_nb_stand_nb_product.stands) OVER (PARTITION BY category_tree_master__."category_id"),
    COUNT(DISTINCT category_product_nb_stand_nb_product.products) OVER (PARTITION BY category_tree_master__."category_id")
    --IFF (count (DISTINCT category_product_nb_stand_nb_product.stands)  IS NOT NULL , count (DISTINCT category_product_nb_stand_nb_product.stands) , 0) AS nb_stands,
    --IFF (count (DISTINCT category_product_nb_stand_nb_product.products)  IS NOT NULL , count (DISTINCT category_product_nb_stand_nb_product.products) , 0) AS nb_products
FROM 
    (SELECT 
         src."category_id",
         CAT_CHILDREN_LIST AS children
     FROM 
         (SELECT 
              EXPO_MASTER.category_tree_master."category_tree_master_id", 
              EXPO_MASTER.category_tree_master."category_id"
          FROM 
              EXPO_MASTER.category_tree_master
          INNER JOIN 
              (SELECT 
                   EXPO_MASTER.category_lang."category_id",  
                   EXPO_MASTER.category_lang."lang"
               FROM 
                   EXPO_MASTER.category_lang
               WHERE 
                   EXPO_MASTER.category_lang."lang" = 'fr') AS tmp
            ON EXPO_MASTER.category_tree_master."category_id" = tmp."category_id") src
            LEFT JOIN PERFORMANCE.SEARCH_CHILDREN_MASTER  children_cat
                ON CHILDREN_CAT."category_id"  = src."category_id" ) searchchildrenmaster
        INNER JOIN 
        (
        SELECT 
          EXPO_MASTER.category_tree_master."category_tree_master_id", 
          EXPO_MASTER.category_tree_master."category_id", 
          EXPO_MASTER.category_tree_master."parent_id",
          EXPO_MASTER.category_tree_master."gauche", 
          EXPO_MASTER.category_tree_master."droite", 
          EXPO_MASTER.category_tree_master."niveau", 
          EXPO_MASTER.category_tree_master_ref_plateforme_selection."selection_leadgen",
          EXPO_MASTER.category_tree_master_ref_plateforme_selection."selection_stand"
        FROM 
            EXPO_MASTER.category_tree_master
        JOIN 
            EXPO_MASTER.category_tree_master_ref_plateforme_selection 
            ON 
            (
                EXPO_MASTER.category_tree_master_ref_plateforme_selection."category_tree_master_id" = EXPO_MASTER.category_tree_master."category_tree_master_id" AND 
                EXPO_MASTER.category_tree_master_ref_plateforme_selection."ref_plateforme_id" = 1
            )
        )category_tree_master__ ON category_tree_master__."category_id" = 
        CASE WHEN searchchildrenmaster."category_id" IS NOT NULL THEN searchchildrenmaster."category_id" ELSE 1 END
         LEFT JOIN LATERAL
         (
            SELECT
              EXPO_MASTER.category_product."category_id",
              EXPO_MASTER.product."stand_id" AS stands,
              EXPO_MASTER.product."product_id" AS products
            FROM 
                EXPO_MASTER.category_product
            JOIN 
                EXPO_MASTER.product ON (EXPO_MASTER.product."product_id" = EXPO_MASTER.category_product."product_id")
                WHERE 
                ARRAY_CONTAINS(EXPO_MASTER.category_product."category_id" ::varchar:: VARIANT ,TO_ARRAY(SPLIT(CAST(searchchildrenmaster."category_id" AS VARCHAR) || COALESCE (', ' || searchchildrenmaster.children, ''), ',')))
                    )category_product_nb_stand_nb_product
                --GROUP BY category_tree_master__."category_id"

I'm using this query to count the number of stands and the number of products of each category in the EXPO_MASTER.category_tree_master."category_id" where EXPO_MASTER.category_product is in the a list of values.

That's the query, I tried selecting columns without aggregation it works fine but when i try to do the COUNT with partition by or group by the Query takes for ever to run (around 6-8 minutes).

Can someone please tell me what the problem is, and how to fix it?

1

There are 1 best solutions below

3
Simeon Pilgrim On

ok lets first rewrite the SQL to be just a little more readable to me, I will rename things from double quoted to normal and point out unused columns, those IFF's can be swapped for zeroifnull, and lets use some readable size aliases (just to keep it all flowing):

SELECT 
    ctm_.category_id
    --,scm.children,
    --,ARRAY_CONTAINS(cp_s_p.category_id ::varchar::variant ,  scm.array_of_ids  ) AS check_
    --,cp_s_p.stands AS nb_stands
    --,cp_s_p.products AS nb_products
    ,COUNT(DISTINCT cp_s_p.stands) OVER (PARTITION BY ctm_.category_id)
    ,COUNT(DISTINCT cp_s_p.products) OVER (PARTITION BY ctm_.category_id)
    --,zeroifnull(count (DISTINCT cp_s_p.stands)) AS nb_stands
    --,zeroifnull(count (DISTINCT cp_s_p.products)) AS nb_products
FROM (
    SELECT 
        src.category_id,
        NVL(src.category_id, 1) as category_id_or_one,
        cc.cat_children_list AS children,
        SPLIT( scm."category_id"::VARCHAR || COALESCE (', ' || scm.children, ''), ',') as array_of_ids
     FROM (
        SELECT 
            --ctm_a."category_tree_master_id" AS category_tree_master_id, /* not used in scm select */
            ctm_a."category_id" AS category_id
        FROM expo_master.category_tree_master AS ctm_a
        JOIN (
            SELECT 
                cl."category_id" AS category_id  
                --cl."lang" AS lang /* this was not used */
            FROM expo_master.category_lang AS cl
            WHERE cl."lang" = 'fr'
        ) AS tmp
            ON ctm_a.category_id = tmp.category_id
    ) AS src
    LEFT JOIN performance.search_children_master AS cc
        ON cc."category_id"  = src.category_id
) as scm /*searchchildrenmaster*/
JOIN (
    SELECT 
        -- ctm_b."category_tree_master_id" /* not used */
        ctm_b."category_id" as category_id /*,*/ 
        --ctm_b."parent_id", /* not used */
        --ctm_b."gauche", /* not used */
        --ctm_b."droite", /* not used */
        --ctm_b."niveau", /* not used */
        --ctmrps."selection_leadgen", /* not used */
        --ctmrps."selection_stand"  /* not used */
    FROM expo_master.category_tree_master as ctm_b 
    JOIN expo_master.category_tree_master_ref_plateforme_selection as ctmrps
        ON ctmrps."category_tree_master_id" = ctm_b."category_tree_master_id" 
            AND ctmrps."ref_plateforme_id" = 1
) as ctm_ /*category_tree_master__ */
    ON ctm_.category_id = scm.category_id_or_one
LEFT JOIN LATERAL (
    SELECT
        cp."category_id" as category_id,
        p."stand_id" AS stands,
        p."product_id" AS products
    FROM expo_master.category_product AS cp
    JOIN EXPO_MASTER.product AS p 
        ON p."product_id" = cp."product_id"
    WHERE ARRAY_CONTAINS(cp."category_id" ::varchar::variant , scm.array_of_ids)
) as cp_s_p /*category_product_nb_stand_nb_product*/
--GROUP BY ctm_.category_id

I would be VERY inclined to change from the ARRAY_CONTAINS to FLATTENing that "array" and doing a normal equi-join on that, and perhaps throw in a couple of DISTINCTs on the sub-selects to avoid known duplicate explosion's:

SELECT 
    ctm_.category_id
    --,scm.children,
    --,cp_s_p.stands AS nb_stands
    --,cp_s_p.products AS nb_products
    ,COUNT(DISTINCT cp_s_p.stands) OVER (PARTITION BY ctm_.category_id)
    ,COUNT(DISTINCT cp_s_p.products) OVER (PARTITION BY ctm_.category_id)
    --,zeroifnull(count (DISTINCT cp_s_p.stands)) AS nb_stands
    --,zeroifnull(count (DISTINCT cp_s_p.products)) AS nb_products
FROM (
    SELECT 
        src.category_id,
        NVL(src.category_id, 1) as category_id_or_one,
        --cc.cat_children_list AS children,
        try_to_number(trim(s.value)) as split_ids
     FROM (
        SELECT 
            ctm_a."category_id" AS category_id
        FROM expo_master.category_tree_master AS ctm_a
        JOIN (
            SELECT 
                cl."category_id" AS category_id
            FROM expo_master.category_lang AS cl
            WHERE cl."lang" = 'fr'
        ) AS tmp
            ON ctm_a.category_id = tmp.category_id
    ) AS src
    LEFT JOIN performance.search_children_master AS cc
        ON cc."category_id"  = src.category_id
    ,,LATERAL SPLIT_TO_TABLE( src.category_id::VARCHAR || COALESCE (',' || cc.cat_children_list, ''), ',') as s
) as scm /*searchchildrenmaster*/
JOIN (
    SELECT 
        ctm_b."category_id" as category_id
    FROM expo_master.category_tree_master as ctm_b 
    JOIN expo_master.category_tree_master_ref_plateforme_selection as ctmrps
        ON ctmrps."category_tree_master_id" = ctm_b."category_tree_master_id" 
            AND ctmrps."ref_plateforme_id" = 1
) as ctm_ /*category_tree_master__ */
    ON ctm_.category_id = scm.category_id_or_one
LEFT JOIN (
    SELECT DISTINCT
        cp."category_id" as category_id,
        p."stand_id" AS stands,
        p."product_id" AS products
    FROM expo_master.category_product AS cp
    JOIN EXPO_MASTER.product AS p 
        ON p."product_id" = cp."product_id"
) as cp_s_p /*category_product_nb_stand_nb_product*/
    ON cp_s_p.category_id = scm.split_ids
--GROUP BY ctm_.category_id