尝试避免 T-SQL 中的相关子查询 for Netezza re-write - MAX(COALESCE()) 在聚合查询中不起作用
Trying to avoid correlated subquery in T-SQL for Netezza re-write - MAX(COALESCE()) not working in aggregate query
看看这个 T-SQL 查询。它有一个嵌套查询。我试图以相同的方式使其在没有嵌套查询的情况下工作。我这样做的原因是因为我想在 Netezza 中编写类似的东西,但是 Netezza 的查询引擎不允许您在嵌套查询中引用外部查询的表。请注意,在我的虚拟数据中,我硬编码了它是树枝还是树叶,但在我的第一个查询中,我计算它是树枝还是树叶,结果是一样的。
在我的问题的最后,我有我的虚拟数据和一个简单的查询,所以你可以看到那里有什么。
SELECT
DISTINCT F1.[PATH],
F1.BRANCH_OR_LEAF,
(
SELECT
COUNT(DISTINCT [FILE].ID)
FROM FOLDER F2
JOIN [FILE] ON F2.ID = [FILE].FOLDER_ID
WHERE
F2.[PATH] LIKE (F1.[PATH] + '%')
) file_count,
(
SELECT
CASE WHEN
MAX(COALESCE(F2.ID, -1)) != COALESCE(F1.ID, -1)
THEN
'B'
ELSE
'L'
END AS BRANCH_OR_LEAF
FROM FOLDER F2
JOIN [FILE] ON F2.ID = [FILE].FOLDER_ID
WHERE
F2.[PATH] LIKE (F1.[PATH] + '%')
) branch_or_leaf
FROM
[FOLDER] F1
ORDER BY
F1.[PATH]
查询聚合 MAX(COALESCE 不起作用:
SELECT
F1.PATH AS FOLDER_PATH,
COUNT(DISTINCT F.ID) AS FILE_COUNT,
CASE
WHEN COUNT(DISTINCT F.ID) > 0 THEN 'A'
ELSE 'H'
END,
(
LEN(F1.PATH) - LEN(REPLACE(F1.PATH, '/', '')) - 1
) AS FOLDER_LEVEL
--,
--CASE
-- WHEN MAX(coalesce(F1.ID,'-1')) != coalesce(F2.ID,'-1') THEN 'B'
-- ELSE 'L'
--END AS BRANCH_OR_LEAF
FROM
[FOLDER] F1
LEFT JOIN [FOLDER] F2 ON F2.PATH LIKE (F1.PATH + '%')
JOIN [FILE] F ON F2.ID = F.FOLDER_ID
GROUP BY
F1.PATH
当我取消评论时,我明白了。
列 'FOLDER.ID' 在 select 列表中无效,因为它未包含在聚合函数或 GROUP BY 子句中。
这是我当前的虚拟数据的样子。
SELECT F1.ID, F1.[PATH], F1.BRANCH_OR_LEAF, F.ID, F.NAME
FROM [FOLDER] F1
JOIN [dbo].[FILE] F ON F1.ID = F.FOLDER_ID
我根据您的虚拟数据创建了一个 table,但我什至没有查看您的查询。我反而写了我自己的。非常非常接近,但您的虚拟数据似乎缺少路径为“/AA/BB/CC/”
的一行
您的查询应如下所示。我假设您会将 CTE 切换为子查询或临时查询 table。但我将其作为可读性的 CTE。
WITH CTE
AS
(
SELECT F1.ID FolderID, F1.[PATH], F1.BRANCH_OR_LEAF, F.ID FileID, F.NAME
FROM [FOLDER] F1
JOIN [dbo].[FILE] F ON F1.ID = F.FOLDER_ID
)
SELECT A.[Path],A.Branch_or_Leaf,COUNT(DISTINCT B.FileID) file_Count
FROM CTE A
INNER JOIN CTE B
ON A.[Path] = LEFT(B.[Path],LEN(A.[Path]))
GROUP BY A.Path,A.Branch_or_Leaf,B.Branch_Or_Leaf
ORDER BY A.[Path]
结果:
Path Branch_or_Leaf file_Count
------------------------- -------------- -----------
/AA/ B 5
/AA/ B 8
/AA/BB/ B 4
/AA/BB/ B 8
/AA/BB/CC/CC.1/ L 2
/AA/BB/CC/CC.2/ B 1
/AA/BB/CC/CC.2/ B 4
/AA/BB/CC/CC.2/CC.22/ L 3
/AA/BB/CC/CC.2/CC.23/ L 4
/AA/BB/CC/DD/ L 2
这应该可以让我在 Netezza 中完成我想要的。在实际情况下,您无法确认 LEN 函数是否足够,但由于我们已经在检查一个字符串是否包含另一个字符串,所以这应该没问题。我会使用 CHECKSUM_AGG 函数,但 Netezza 不使用它。所以这将不得不做。
create table #branch_or_leaf
(
[folder_id] int,
[path] nvarchar(50), -- for reference purposes only
[branch_or_leaf] [nvarchar](50)
)
INSERT INTO #branch_or_leaf
SELECT
DISTINCT F1.ID,
F1.PATH,
CASE
WHEN SUM(LEN(F2.PATH)) = LEN(F1.PATH) THEN 'L'
ELSE 'B'
END
FROM
[FOLDER] F1
LEFT JOIN [FOLDER] F2 ON F2.PATH LIKE (F1.PATH + '%')
GROUP BY
F1.ID, F1.PATH
SELECT * FROM #branch_or_leaf
SELECT
DISTINCT F1.PATH AS FOLDER_PATH,
COUNT(DISTINCT F.ID) AS FILE_COUNT,
CASE
WHEN COUNT(DISTINCT F.ID) > 0 THEN 'A'
ELSE 'H'
END,
(
LEN(F1.PATH) - LEN(REPLACE(F1.PATH, '/', '')) - 1
) AS FOLDER_LEVEL,
BL.branch_or_leaf AS branch_or_leaf
FROM
[FOLDER] F1
LEFT JOIN [FOLDER] F2 ON F2.PATH LIKE (F1.PATH + '%')
JOIN [FILE] F ON F2.ID = F.FOLDER_ID
JOIN #branch_or_leaf BL ON F1.ID = BL.folder_id
GROUP BY
F1.PATH, BL.branch_or_leaf
ORDER BY
F1.PATH
DROP TABLE #branch_or_leaf
编辑:
类似查询,但现在我们在 FROM 子句中有了嵌套查询。 可能在 Netezza 中是合法的。
SELECT
DISTINCT F1.PATH AS FOLDER_PATH,
COUNT(DISTINCT F.ID) AS FILE_COUNT,
CASE
WHEN COUNT(DISTINCT F.ID) > 0 THEN 'A'
ELSE 'H'
END,
(
LEN(F1.PATH) - LEN(REPLACE(F1.PATH, '/', '')) - 1
) AS FOLDER_LEVEL,
BL.BL AS branch_or_leaf
FROM
[FOLDER] F1
LEFT JOIN [FOLDER] F2 ON F2.PATH LIKE (F1.PATH + '%')
JOIN [FILE] F ON F2.ID = F.FOLDER_ID
JOIN
(
SELECT
DISTINCT F1.ID,
CASE
WHEN SUM(LEN(F2.PATH)) = LEN(F1.PATH) THEN 'L'
ELSE 'B'
END AS BL
FROM
[FOLDER] F1
LEFT JOIN [FOLDER] F2 ON F2.PATH LIKE (F1.PATH + '%')
GROUP BY
F1.ID, F1.PATH
) AS BL ON F1.ID = BL.ID
GROUP BY
F1.PATH, BL.BL
ORDER BY
F1.PATH
看看这个 T-SQL 查询。它有一个嵌套查询。我试图以相同的方式使其在没有嵌套查询的情况下工作。我这样做的原因是因为我想在 Netezza 中编写类似的东西,但是 Netezza 的查询引擎不允许您在嵌套查询中引用外部查询的表。请注意,在我的虚拟数据中,我硬编码了它是树枝还是树叶,但在我的第一个查询中,我计算它是树枝还是树叶,结果是一样的。
在我的问题的最后,我有我的虚拟数据和一个简单的查询,所以你可以看到那里有什么。
SELECT
DISTINCT F1.[PATH],
F1.BRANCH_OR_LEAF,
(
SELECT
COUNT(DISTINCT [FILE].ID)
FROM FOLDER F2
JOIN [FILE] ON F2.ID = [FILE].FOLDER_ID
WHERE
F2.[PATH] LIKE (F1.[PATH] + '%')
) file_count,
(
SELECT
CASE WHEN
MAX(COALESCE(F2.ID, -1)) != COALESCE(F1.ID, -1)
THEN
'B'
ELSE
'L'
END AS BRANCH_OR_LEAF
FROM FOLDER F2
JOIN [FILE] ON F2.ID = [FILE].FOLDER_ID
WHERE
F2.[PATH] LIKE (F1.[PATH] + '%')
) branch_or_leaf
FROM
[FOLDER] F1
ORDER BY
F1.[PATH]
查询聚合 MAX(COALESCE 不起作用:
SELECT
F1.PATH AS FOLDER_PATH,
COUNT(DISTINCT F.ID) AS FILE_COUNT,
CASE
WHEN COUNT(DISTINCT F.ID) > 0 THEN 'A'
ELSE 'H'
END,
(
LEN(F1.PATH) - LEN(REPLACE(F1.PATH, '/', '')) - 1
) AS FOLDER_LEVEL
--,
--CASE
-- WHEN MAX(coalesce(F1.ID,'-1')) != coalesce(F2.ID,'-1') THEN 'B'
-- ELSE 'L'
--END AS BRANCH_OR_LEAF
FROM
[FOLDER] F1
LEFT JOIN [FOLDER] F2 ON F2.PATH LIKE (F1.PATH + '%')
JOIN [FILE] F ON F2.ID = F.FOLDER_ID
GROUP BY
F1.PATH
当我取消评论时,我明白了。
列 'FOLDER.ID' 在 select 列表中无效,因为它未包含在聚合函数或 GROUP BY 子句中。
这是我当前的虚拟数据的样子。
SELECT F1.ID, F1.[PATH], F1.BRANCH_OR_LEAF, F.ID, F.NAME
FROM [FOLDER] F1
JOIN [dbo].[FILE] F ON F1.ID = F.FOLDER_ID
我根据您的虚拟数据创建了一个 table,但我什至没有查看您的查询。我反而写了我自己的。非常非常接近,但您的虚拟数据似乎缺少路径为“/AA/BB/CC/”
的一行您的查询应如下所示。我假设您会将 CTE 切换为子查询或临时查询 table。但我将其作为可读性的 CTE。
WITH CTE
AS
(
SELECT F1.ID FolderID, F1.[PATH], F1.BRANCH_OR_LEAF, F.ID FileID, F.NAME
FROM [FOLDER] F1
JOIN [dbo].[FILE] F ON F1.ID = F.FOLDER_ID
)
SELECT A.[Path],A.Branch_or_Leaf,COUNT(DISTINCT B.FileID) file_Count
FROM CTE A
INNER JOIN CTE B
ON A.[Path] = LEFT(B.[Path],LEN(A.[Path]))
GROUP BY A.Path,A.Branch_or_Leaf,B.Branch_Or_Leaf
ORDER BY A.[Path]
结果:
Path Branch_or_Leaf file_Count
------------------------- -------------- -----------
/AA/ B 5
/AA/ B 8
/AA/BB/ B 4
/AA/BB/ B 8
/AA/BB/CC/CC.1/ L 2
/AA/BB/CC/CC.2/ B 1
/AA/BB/CC/CC.2/ B 4
/AA/BB/CC/CC.2/CC.22/ L 3
/AA/BB/CC/CC.2/CC.23/ L 4
/AA/BB/CC/DD/ L 2
这应该可以让我在 Netezza 中完成我想要的。在实际情况下,您无法确认 LEN 函数是否足够,但由于我们已经在检查一个字符串是否包含另一个字符串,所以这应该没问题。我会使用 CHECKSUM_AGG 函数,但 Netezza 不使用它。所以这将不得不做。
create table #branch_or_leaf
(
[folder_id] int,
[path] nvarchar(50), -- for reference purposes only
[branch_or_leaf] [nvarchar](50)
)
INSERT INTO #branch_or_leaf
SELECT
DISTINCT F1.ID,
F1.PATH,
CASE
WHEN SUM(LEN(F2.PATH)) = LEN(F1.PATH) THEN 'L'
ELSE 'B'
END
FROM
[FOLDER] F1
LEFT JOIN [FOLDER] F2 ON F2.PATH LIKE (F1.PATH + '%')
GROUP BY
F1.ID, F1.PATH
SELECT * FROM #branch_or_leaf
SELECT
DISTINCT F1.PATH AS FOLDER_PATH,
COUNT(DISTINCT F.ID) AS FILE_COUNT,
CASE
WHEN COUNT(DISTINCT F.ID) > 0 THEN 'A'
ELSE 'H'
END,
(
LEN(F1.PATH) - LEN(REPLACE(F1.PATH, '/', '')) - 1
) AS FOLDER_LEVEL,
BL.branch_or_leaf AS branch_or_leaf
FROM
[FOLDER] F1
LEFT JOIN [FOLDER] F2 ON F2.PATH LIKE (F1.PATH + '%')
JOIN [FILE] F ON F2.ID = F.FOLDER_ID
JOIN #branch_or_leaf BL ON F1.ID = BL.folder_id
GROUP BY
F1.PATH, BL.branch_or_leaf
ORDER BY
F1.PATH
DROP TABLE #branch_or_leaf
编辑:
类似查询,但现在我们在 FROM 子句中有了嵌套查询。 可能在 Netezza 中是合法的。
SELECT
DISTINCT F1.PATH AS FOLDER_PATH,
COUNT(DISTINCT F.ID) AS FILE_COUNT,
CASE
WHEN COUNT(DISTINCT F.ID) > 0 THEN 'A'
ELSE 'H'
END,
(
LEN(F1.PATH) - LEN(REPLACE(F1.PATH, '/', '')) - 1
) AS FOLDER_LEVEL,
BL.BL AS branch_or_leaf
FROM
[FOLDER] F1
LEFT JOIN [FOLDER] F2 ON F2.PATH LIKE (F1.PATH + '%')
JOIN [FILE] F ON F2.ID = F.FOLDER_ID
JOIN
(
SELECT
DISTINCT F1.ID,
CASE
WHEN SUM(LEN(F2.PATH)) = LEN(F1.PATH) THEN 'L'
ELSE 'B'
END AS BL
FROM
[FOLDER] F1
LEFT JOIN [FOLDER] F2 ON F2.PATH LIKE (F1.PATH + '%')
GROUP BY
F1.ID, F1.PATH
) AS BL ON F1.ID = BL.ID
GROUP BY
F1.PATH, BL.BL
ORDER BY
F1.PATH