将 T-SQL 脚本迁移到 Snowflake - 错误标识符无效

Migrating T-SQL script to Snowflake - error invalid identifier

我目前正在尝试将 T-SQL 脚本迁移到 Snowflake。

这是T-SQL脚本:

SELECT  
    * —- a bunch of stuff
FROM
    TABLE_DATE D
LEFT JOIN 
    TABLE_CONST AS CONST ON CODE = 999
                         AND D.DATE BETWEEN CONST.STARTDATE AND ISNULL (CONST.ENDDATE, EOMONTH(GET_DATE()))
LEFT JOIN 
    TABLE_INFO M ON CONST.CODE = M.CODE
OUTER APPLY 
    (SELECT TOP 1 *
     FROM TABLE_A 
     WHERE SECID = CONST.SECID
       AND DATE_ <= D.DATE_
     ORDER BY DATE_ DESC) SEC
OUTER APPLY 
    (SELECT TOP 1 *
     FROM TABLE_B 
     WHERE I_CODE = SEC.I_CODE 
       AND DATE_ <= D.DATE_
     ORDER BY DATE_ DESC) XR
LEFT JOIN 
    TABLE_I I ON CONST.SECID = I.SECID
WHERE 
    D.DATE_ BETWEEN CAST(‘1800-01-01’ AS DATE AND GETDATE()

我已经将脚本的一些部分转换为雪花语法(isnull、eomonth、outer apply),但仍然遇到一些错误。

转换后的脚本

SELECT * —- a bunch of stuff
FROM TABLE_DATE D
LEFT JOIN TABLE_CONST AS CONST
ON CODE = 999
AND D.DATE BETWEEN CONST.STARTDATE AND COALESCE (CONST.ENDDATE,LAST_DAY(CURRENT_DATE()))

LEFT JOIN TABLE_INFO M ON CONST.CODE = M.CODE

LEFT JOIN 
(SELECT TOP 1 *
FROM TABLE_A 
WHERE SECID = CONST.SECID
AND DATE_ <= D.DATE_
ORDER BY DATE_ DESC
) SEC

LEFT JOIN 
( SELECT TOP 1 *
FROM TABLE_B 
WHERE I_CODE = SEC.I_CODE 
AND DATE_ <= D.DATE_
ORDER BY DATE_ DESC
) XR

LEFT JOIN TABLE_I I ON CONST.SECID = I.SECID

WHERE D.DATE_ BETWEEN CAST(‘1800-01-01’ AS DATE AND CURRENT_DATE()

我遇到的错误是

Invalid identifier ‘CONST.SECID’

我是雪花和 sql 的新手,非常感谢任何帮助。

*出于保密目的,脚本已更改,我也无法提供样本数据)

create or replace table TABLE_DATE(DATE_ date);
create or replace table TABLE_CONST(STARTDATE date, ENDDATE date, code number, SECID number);
create or replace table TABLE_INFO(code number);
create or replace table TABLE_A(SECID number, I_CODE number);
create or replace table TABLE_B(date_ date, I_CODE number);
create or replace table TABLE_I(SECID number);

SELECT *
FROM TABLE_DATE AS D
LEFT JOIN TABLE_CONST AS CONST
    ON CODE = 999
    AND D.DATE_ BETWEEN CONST.STARTDATE AND COALESCE(CONST.ENDDATE,LAST_DAY(CURRENT_DATE()))
LEFT JOIN TABLE_INFO AS M 
    ON CONST.CODE = M.CODE
LEFT JOIN LATERAL (
    SELECT TOP 1 *
    FROM TABLE_A 
    WHERE SECID = CONST.SECID
    AND DATE_ <= D.DATE_
    ORDER BY DATE_ DESC
) AS SEC
LEFT JOIN LATERAL (
    SELECT TOP 1 *
    FROM TABLE_B AS B 
    WHERE b.I_CODE = SEC.I_CODE 
    AND b.DATE_ <= D.DATE_
    ORDER BY b.DATE_ DESC
) XR
LEFT JOIN TABLE_I I ON CONST.SECID = I.SECID
WHERE D.DATE_ BETWEEN '1800-01-01'::date AND CURRENT_DATE()

备选方案SQL 2:

所以在这里我放弃了相关的 sub-query 的 TOP 1 并用外部 WHERE 替换,不那么迷人。但它可能会起作用。一旦避免了故障,就可以从那里增加速度。

create or replace table TABLE_DATE(DATE_ date);
create or replace table TABLE_CONST(STARTDATE date, ENDDATE date, code number, SECID number);
create or replace table TABLE_INFO(code number);
create or replace table TABLE_A(date_ date, SECID number, I_CODE number);
create or replace table TABLE_B(date_ date, I_CODE number);
create or replace table TABLE_I(SECID number);

SELECT *
FROM TABLE_DATE AS D
LEFT JOIN TABLE_CONST AS CONST
    ON CODE = 999
    AND D.DATE_ BETWEEN CONST.STARTDATE AND COALESCE(CONST.ENDDATE,LAST_DAY(CURRENT_DATE()))
LEFT JOIN TABLE_INFO AS M 
    ON CONST.CODE = M.CODE
LEFT JOIN LATERAL (
    SELECT *
        ,row_number() over (ORDER BY a.DATE_ DESC) AS RN
    FROM TABLE_A AS A
    WHERE SECID = CONST.SECID
    AND a.DATE_ <= D.DATE_
) AS SEC
LEFT JOIN LATERAL (
    SELECT *
        ,row_number() over (ORDER BY b.DATE_ DESC) AS RN
    FROM TABLE_B AS B 
    WHERE b.I_CODE = SEC.I_CODE 
    AND b.DATE_ <= D.DATE_
    ORDER BY b.DATE_ DESC
) XR
LEFT JOIN TABLE_I I ON CONST.SECID = I.SECID
WHERE D.DATE_ BETWEEN '1800-01-01'::date AND CURRENT_DATE()
and xr.rn = 1 AND sec.rn = 1

触发问题的一些数据:

WITH TABLE_DATE(DATE_) as (
    select * from values
    ('2022-04-01'::date),
    ('2022-03-01'::date),
    ('2022-01-01'::date)
), TABLE_CONST(STARTDATE, ENDDATE, code, SECID) as (
    select * from values
    ('2022-04-01'::date, '2022-04-01'::date, 997, 667),
    ('2022-03-01'::date, '2022-03-01'::date, 998, 668),
    ('2022-01-01'::date, '2022-01-01'::date, 999, 669)
), TABLE_INFO(code) as (
    select * from values
    (999),
    (998)
), TABLE_A(date_, SECID, I_CODE) as (
    select * from values
    ('2022-04-01'::date, 667, 767),
    ('2022-03-29'::date, 667, 767),
    ('2022-03-28'::date, 667, 767),
    ('2022-03-01'::date, 668, 768),
    ('2022-02-28'::date, 668, 768),
    ('2022-02-27'::date, 668, 768),
    ('2022-01-01'::date, 669, 769),
    ('2021-12-29'::date, 669, 769),
    ('2021-12-28'::date, 669, 769)
), TABLE_B(date_, I_CODE) as (
    select * from values
    ('2022-04-01'::date, 767),
    ('2022-03-29'::date, 767),
    ('2022-03-28'::date, 767),
    ('2022-03-01'::date, 768),
    ('2022-02-28'::date, 768),
    ('2022-02-27'::date, 768),
    ('2022-01-01'::date, 769),
    ('2021-12-29'::date, 769),
    ('2021-12-28'::date, 769)
), TABLE_I(SECID ) as (
    select * from values
    (999),
    (998)
)
SELECT *
FROM TABLE_DATE AS D
LEFT JOIN TABLE_CONST AS CONST
    ON CONST.CODE = 999
    AND D.DATE_ BETWEEN CONST.STARTDATE AND COALESCE(CONST.ENDDATE,LAST_DAY(CURRENT_DATE()))
LEFT JOIN TABLE_INFO AS M 
    ON CONST.CODE = M.CODE
LEFT JOIN LATERAL (
    SELECT A.*
        ,row_number() over (ORDER BY a.DATE_ DESC) AS RN
    FROM TABLE_A AS A
    WHERE A.SECID = CONST.SECID
    AND a.DATE_ <= D.DATE_
) AS SEC
LEFT JOIN LATERAL (
    SELECT B.*
        ,row_number() over (ORDER BY b.DATE_ DESC) AS RN
    FROM TABLE_B AS B 
    WHERE b.I_CODE = SEC.I_CODE 
    AND b.DATE_ <= D.DATE_
    ORDER BY b.DATE_ DESC
) XR
LEFT JOIN TABLE_I I ON CONST.SECID = I.SECID
WHERE D.DATE_ BETWEEN '1800-01-01'::date AND CURRENT_DATE()
and xr.rn = 1 AND sec.rn = 1;

现在我们很幸运,我们有一些触发问题的数据,我们可以解决问题..

现在让我们编写一些有效的代码:

所以这删除了 ​​sub-queries,并仅用于计划旧连接,然后在末尾丢弃了一堆行。这太恶心了,我更愿意在处理链的早期阶段做这件事。但是“它编译”是一个加号...

WITH TABLE_DATE(DATE_) as (
    select * from values
    ('2022-04-01'::date),
    ('2022-03-01'::date),
    ('2022-01-01'::date)
), TABLE_CONST(STARTDATE, ENDDATE, code, SECID) as (
    select * from values
    ('2022-04-01'::date, '2022-04-01'::date, 999, 667),
    ('2022-03-01'::date, '2022-03-01'::date, 999, 668),
    ('2022-01-01'::date, '2022-01-01'::date, 999, 669)
), TABLE_INFO(code) as (
    select * from values
    (999),
    (998)
), TABLE_A(date_, SECID, I_CODE) as (
    select * from values
    ('2022-04-01'::date, 667, 767),
    ('2022-03-29'::date, 667, 767),
    ('2022-03-28'::date, 667, 767),
    ('2022-03-01'::date, 668, 768),
    ('2022-02-28'::date, 668, 768),
    ('2022-02-27'::date, 668, 768),
    ('2022-01-01'::date, 669, 769),
    ('2021-12-29'::date, 669, 769),
    ('2021-12-28'::date, 669, 769)
), TABLE_B(date_, I_CODE) as (
    select * from values
    ('2022-04-01'::date, 767),
    ('2022-03-29'::date, 767),
    ('2022-03-28'::date, 767),
    ('2022-03-01'::date, 768),
    ('2022-02-28'::date, 768),
    ('2022-02-27'::date, 768),
    ('2022-01-01'::date, 769),
    ('2021-12-29'::date, 769),
    ('2021-12-28'::date, 769)
), TABLE_I(SECID ) as (
    select * from values
    (669),
    (667)
)
SELECT 
    d.date_ as d_date
    ,CONST.STARTDATE as c_STARTDATE
    ,CONST.ENDDATE as c_ENDDATE
    ,CONST.code as c_code
    ,CONST.SECID as c_SECID
    ,m.code as m_code
    ,sec.date_ as s_date
    ,sec.secid as s_secid
    ,sec.i_code as s_i_code
    ,xr.date_ as xr_date
    ,xr.i_code as xr_icode
    ,i.*
FROM TABLE_DATE AS D
LEFT JOIN TABLE_CONST AS CONST
    ON CONST.CODE = 999
    AND D.DATE_ BETWEEN CONST.STARTDATE AND COALESCE(CONST.ENDDATE,LAST_DAY(CURRENT_DATE()))
LEFT JOIN TABLE_INFO AS M 
    ON CONST.CODE = M.CODE
LEFT JOIN TABLE_A AS SEC
    ON sec.SECID = CONST.SECID and sec.date_ <= D.DATE_ 
LEFT JOIN TABLE_B AS XR
    ON xr.I_CODE = SEC.I_CODE AND xr.DATE_ <= D.DATE_
LEFT JOIN TABLE_I I ON CONST.SECID = I.SECID
WHERE D.DATE_ BETWEEN '1800-01-01'::date AND CURRENT_DATE()
QUALIFY ROW_NUMBER() OVER (partition by D.DATE_, CONST.SECID order by sec.date_ desc, xr.DATE_ DESC) = 1
ORDER BY 1;

让我们试试 CTE:

现在我假装我使用的 CTE 是完整的表,相同的主体可以与那些 sub-queries 写成增量 clean-up 来编写,这样数据就不会爆炸太多。并且为了简单起见,还将使用的值向前推进。我也会尝试不携带,并进行更清洁的连接。但这完全取决于您的数据。

WITH CTE_A AS (
    SELECT     
        d.date_ as d_date
        ,CONST.STARTDATE as c_STARTDATE
        ,CONST.ENDDATE as c_ENDDATE
        ,CONST.code as c_code
        ,CONST.SECID as c_SECID
    FROM TABLE_DATE AS D
    LEFT JOIN TABLE_CONST AS CONST
        ON CONST.CODE = 999
        AND D.DATE_ BETWEEN CONST.STARTDATE AND COALESCE(CONST.ENDDATE,LAST_DAY(CURRENT_DATE()))
    WHERE D.DATE_ BETWEEN '1800-01-01'::date AND CURRENT_DATE()
), CTE_B AS (
    SELECT 
        x.d_date
        ,x.c_STARTDATE
        ,x.c_ENDDATE
        ,x.c_code
        ,x.c_SECID
        ,sec.date_ as s_date
        ,sec.secid as s_secid
        ,sec.i_code as s_i_code
    FROM CTE_A as x
    LEFT JOIN TABLE_A AS SEC
        ON sec.SECID = x.C_SECID and sec.date_ <= x.d_DATE
    QUALIFY ROW_NUMBER() OVER (partition by x.d_DATE, x.C_SECID order by sec.date_ desc) = 1
), CTE_C AS (
    SELECT 
        y.d_date
        ,y.c_STARTDATE
        ,y.c_ENDDATE
        ,y.c_code
        ,y.c_SECID
        ,y.s_date
        ,y.s_secid
        ,y.s_i_code
        ,xr.date_ as xr_date
        ,xr.i_code as xr_icode
    FROM CTE_B as y
    LEFT JOIN TABLE_B AS XR
    ON xr.I_CODE = y.s_i_code AND xr.DATE_ <= y.d_DATE
    QUALIFY ROW_NUMBER() OVER (partition by y.d_DATE, y.C_SECID order by xr.date_ desc) = 1
)
SELECT 
    z.d_date
    ,z.c_STARTDATE
    ,z.c_ENDDATE
    ,z.c_code
    ,z.c_SECID
    ,m.code as m_code
    ,z.s_date
    ,z.s_secid
    ,z.s_i_code
    ,z.xr_date
    ,z.xr_icode
    ,i.secid as i_secid
FROM CTE_C AS z
LEFT JOIN TABLE_INFO AS M 
    ON z.c_CODE = M.CODE
LEFT JOIN TABLE_I I ON z.C_SECID = I.SECID
ORDER BY 1;