我正在尝试提高 Oracle SQL 的性能,它正在寻找两个表之间的差异
I am trying to improve the performance of an Oracle SQL that is finding the differences between two tables
我有两个 Oracle 表,我在它们之间进行 UNION 以找出存储在这两个表中的数据的差异,但是当我 运行 SQL Developer 中的查询然后查询太慢,我在 Informatica 中使用相同的查询,它的吞吐量也更少。
TABLE 1: W_SALES_INVOICE_LINE_FS EBS(NET_AMT,
INVOICED_QTY,
CREATED_ON_DT,
CHANGED_ON_DT,
INTEGRATION_ID,
'EBS' 作为 SOURCE_NAME)
TABLE 2: W_SALES_INVOICE_LINE_F DWH (NET_AMT,
INVOICED_QTY,
CREATED_ON_DT,
CHANGED_ON_DT,
INTEGRATION_ID,
'EBS' 作为 SOURCE_NAME)
我将问题附加到查询中:
SELECT EBS.NET_AMT,
nvl(EBS.INVOICED_QTY,
case nvl(EBS.NET_AMT,0) when 0 then EBS.INVOICED_QTY
else -1 end) INVOICED_QTY,
EBS.CREATED_ON_DT,
EBS.CHANGED_ON_DT,
EBS.INTEGRATION_ID,
'EBS' AS SOURCE_NAME
FROM
W_SALES_INVOICE_LINE_FS EBS
WHERE NOT EXISTS (SELECT INTEGRATION_ID FROM W_SALES_INVOICE_LINE_F DWH
WHERE EBS.INTEGRATION_ID = DWH.INTEGRATION_ID)
UNION
SELECT DWH.NET_AMT,
DWH.INVOICED_QTY,
DWH.CREATED_ON_DT,
DWH.CHANGED_ON_DT,
DWH.INTEGRATION_ID,
'DWH' AS SOURCE_NAME
FROM
W_SALES_INVOICE_LINE_F DWH
where DWH.IS_POS = 'N' and
not exists (SELECT INTEGRATION_ID FROM W_SALES_INVOICE_LINE_FS EBS
WHERE EBS.INTEGRATION_ID = DWH.INTEGRATION_ID);
如果您想查看解释计划,请告诉我。有人可以告诉我如何提高性能,或者让我知道问题是否出在其他方面而不是上述查询!
你不是在表演JOIN
,你在表演UNION
。但是,您正在执行子查询,而这些可能会降低整体性能。您可以将 EXISTS
更改为 IN
,这样可以利用索引(如果存在)。
尝试以下操作:
SELECT EBS.NET_AMT,
nvl(EBS.INVOICED_QTY,
case nvl(EBS.NET_AMT,0) when 0 then EBS.INVOICED_QTY
else -1 end) INVOICED_QTY,
EBS.CREATED_ON_DT,
EBS.CHANGED_ON_DT,
EBS.INTEGRATION_ID,
'EBS' AS SOURCE_NAME
FROM
W_SALES_INVOICE_LINE_FS EBS
WHERE EBS.INTEGRATION_ID NOT IN (
SELECT INTEGRATION_ID
FROM W_SALES_INVOICE_LINE_F
)
UNION ALL
SELECT DWH.NET_AMT,
DWH.INVOICED_QTY,
DWH.CREATED_ON_DT,
DWH.CHANGED_ON_DT,
DWH.INTEGRATION_ID,
'DWH' AS SOURCE_NAME
FROM
W_SALES_INVOICE_LINE_F DWH
where DWH.IS_POS = 'N'
and DWH.INTEGRATION_ID not in (
SELECT INTEGRATION_ID
FROM W_SALES_INVOICE_LINE_FS
);
此外,正如其他人在评论中提到的,UNION ALL
可能更合适。
此外,您可以尝试使用 LEFT OUTER JOIN
,如果您有索引,这是执行上述操作的更明确的方法。我无法从我的当前位置访问我的 oracle 来尝试解释计划,但上面和下面实际上可能会进行类似的优化。
SELECT EBS.NET_AMT,
Nvl(EBS.INVOICED_QTY,
CASE Nvl(EBS.NET_AMT, 0) WHEN 0
THEN EBS.INVOICED_QTY
ELSE -1 END
) AS INVOICED_QTY,
EBS.CREATED_ON_DT,
EBS.CHANGED_ON_DT,
EBS.INTEGRATION_ID,
'EBS' AS SOURCE_NAME
FROM W_SALES_INVOICE_LINE_FS EBS
LEFT OUTER JOIN W_SALES_INVOICE_LINE_F DWH
ON DWH.INTEGRATION_ID = EBS.INTEGRATION_ID
WHERE DWH.INTEGRATION_ID IS NULL
UNION ALL
SELECT DWH.NET_AMT,
DWH.INVOICED_QTY,
DWH.CREATED_ON_DT,
DWH.CHANGED_ON_DT,
DWH.INTEGRATION_ID,
'DWH' AS SOURCE_NAME
FROM W_SALES_INVOICE_LINE_F DWH
LEFT OUTER JOIN W_SALES_INVOICE_LINE_FS EBS
ON EBS.INTEGRATION_ID = DWH.INTEGRATION_ID
WHERE EBS.INTEGRATION_ID IS NULL
AND DWH.IS_POS = 'N'
;
能否简要说明问题中的 table?每个 table 中有多少(大约)条记录?你有什么指标吗?是否有任何字段 calculated/derived?当您对这些或您的原始查询执行解释计划时,它在哪里显示瓶颈?
不存在和不在语句中通常是性能瓶颈。解决这个问题的一个性能技巧是使用 LEFT OUTER JOIN 和一个声明第二个 table 列为空的子句,即没有匹配的行。所以尝试:
SELECT EBS.NET_AMT,
nvl(EBS.INVOICED_QTY,
case nvl(EBS.NET_AMT,0) when 0 then EBS.INVOICED_QTY
else -1 end) INVOICED_QTY,
EBS.CREATED_ON_DT,
EBS.CHANGED_ON_DT,
EBS.INTEGRATION_ID,
'EBS' AS SOURCE_NAME
FROM
W_SALES_INVOICE_LINE_FS EBS
LEFT OUTER JOIN
W_SALES_INVOICE_LINE_F DWH
ON EBS.INTEGRATION_ID = DWH.INTEGRATION_ID
WHERE DWH.INTEGRATION_ID IS NULL
UNION
SELECT DWH.NET_AMT,
DWH.INVOICED_QTY,
DWH.CREATED_ON_DT,
DWH.CHANGED_ON_DT,
DWH.INTEGRATION_ID,
'DWH' AS SOURCE_NAME
FROM
W_SALES_INVOICE_LINE_F DWH
LEFT OUTER JOIN W_SALES_INVOICE_LINE_FS EBS
ON EBS.INTEGRATION_ID = DWH.INTEGRATION_ID
where EBS.INTEGRATION_ID IS NULL
AND DWH.IS_POS = 'N'
您正在手动编写完整的外部联接,Oracle 可以自动为此类比较任务执行此操作(我猜它可能 运行 更快)
select
ebs.net_amt ebs_net_amt,
dwh.net_amt dwh_net_amt,
nvl(ebs.invoiced_qty,case nvl(ebs.net_amt,0) when 0 then ebs.invoiced_qty else -1 end) invoiced_qty_ebs,
dwh.invoiced_qty invoiced_qty_dwh,
ebs.created_on_dt ebs_created_on_dt,
dwh.created_on_dt dwh_created_on_dt,
ebs.changed_on_dt ebs_changed_on_dt,
dwh.changed_on_dt dwh_changed_on_dt,
nvl(ebs.integration_id,ebs.integration_id) integration_id,
case
when ebs.integration_id is not null and ebs.integration_id is not null and then 'EBS and DWH'
when dwh.integration_id is not null then 'EBS'
else 'DWH'
end source_name
from
w_sales_invoice_line_fs ebs
full outer join
(select * from w_sales_invoice_line_f dwh where dwh.is_pos = 'N') dwh
on
(ebs.integration_id = dwh.integration_id)
where
ebs.integration_id is null or dwh.integration_id is null --restrict to records missing on one side
我有两个 Oracle 表,我在它们之间进行 UNION 以找出存储在这两个表中的数据的差异,但是当我 运行 SQL Developer 中的查询然后查询太慢,我在 Informatica 中使用相同的查询,它的吞吐量也更少。
TABLE 1: W_SALES_INVOICE_LINE_FS EBS(NET_AMT, INVOICED_QTY, CREATED_ON_DT, CHANGED_ON_DT, INTEGRATION_ID, 'EBS' 作为 SOURCE_NAME)
TABLE 2: W_SALES_INVOICE_LINE_F DWH (NET_AMT, INVOICED_QTY, CREATED_ON_DT, CHANGED_ON_DT, INTEGRATION_ID, 'EBS' 作为 SOURCE_NAME)
我将问题附加到查询中:
SELECT EBS.NET_AMT,
nvl(EBS.INVOICED_QTY,
case nvl(EBS.NET_AMT,0) when 0 then EBS.INVOICED_QTY
else -1 end) INVOICED_QTY,
EBS.CREATED_ON_DT,
EBS.CHANGED_ON_DT,
EBS.INTEGRATION_ID,
'EBS' AS SOURCE_NAME
FROM
W_SALES_INVOICE_LINE_FS EBS
WHERE NOT EXISTS (SELECT INTEGRATION_ID FROM W_SALES_INVOICE_LINE_F DWH
WHERE EBS.INTEGRATION_ID = DWH.INTEGRATION_ID)
UNION
SELECT DWH.NET_AMT,
DWH.INVOICED_QTY,
DWH.CREATED_ON_DT,
DWH.CHANGED_ON_DT,
DWH.INTEGRATION_ID,
'DWH' AS SOURCE_NAME
FROM
W_SALES_INVOICE_LINE_F DWH
where DWH.IS_POS = 'N' and
not exists (SELECT INTEGRATION_ID FROM W_SALES_INVOICE_LINE_FS EBS
WHERE EBS.INTEGRATION_ID = DWH.INTEGRATION_ID);
如果您想查看解释计划,请告诉我。有人可以告诉我如何提高性能,或者让我知道问题是否出在其他方面而不是上述查询!
你不是在表演JOIN
,你在表演UNION
。但是,您正在执行子查询,而这些可能会降低整体性能。您可以将 EXISTS
更改为 IN
,这样可以利用索引(如果存在)。
尝试以下操作:
SELECT EBS.NET_AMT,
nvl(EBS.INVOICED_QTY,
case nvl(EBS.NET_AMT,0) when 0 then EBS.INVOICED_QTY
else -1 end) INVOICED_QTY,
EBS.CREATED_ON_DT,
EBS.CHANGED_ON_DT,
EBS.INTEGRATION_ID,
'EBS' AS SOURCE_NAME
FROM
W_SALES_INVOICE_LINE_FS EBS
WHERE EBS.INTEGRATION_ID NOT IN (
SELECT INTEGRATION_ID
FROM W_SALES_INVOICE_LINE_F
)
UNION ALL
SELECT DWH.NET_AMT,
DWH.INVOICED_QTY,
DWH.CREATED_ON_DT,
DWH.CHANGED_ON_DT,
DWH.INTEGRATION_ID,
'DWH' AS SOURCE_NAME
FROM
W_SALES_INVOICE_LINE_F DWH
where DWH.IS_POS = 'N'
and DWH.INTEGRATION_ID not in (
SELECT INTEGRATION_ID
FROM W_SALES_INVOICE_LINE_FS
);
此外,正如其他人在评论中提到的,UNION ALL
可能更合适。
此外,您可以尝试使用 LEFT OUTER JOIN
,如果您有索引,这是执行上述操作的更明确的方法。我无法从我的当前位置访问我的 oracle 来尝试解释计划,但上面和下面实际上可能会进行类似的优化。
SELECT EBS.NET_AMT,
Nvl(EBS.INVOICED_QTY,
CASE Nvl(EBS.NET_AMT, 0) WHEN 0
THEN EBS.INVOICED_QTY
ELSE -1 END
) AS INVOICED_QTY,
EBS.CREATED_ON_DT,
EBS.CHANGED_ON_DT,
EBS.INTEGRATION_ID,
'EBS' AS SOURCE_NAME
FROM W_SALES_INVOICE_LINE_FS EBS
LEFT OUTER JOIN W_SALES_INVOICE_LINE_F DWH
ON DWH.INTEGRATION_ID = EBS.INTEGRATION_ID
WHERE DWH.INTEGRATION_ID IS NULL
UNION ALL
SELECT DWH.NET_AMT,
DWH.INVOICED_QTY,
DWH.CREATED_ON_DT,
DWH.CHANGED_ON_DT,
DWH.INTEGRATION_ID,
'DWH' AS SOURCE_NAME
FROM W_SALES_INVOICE_LINE_F DWH
LEFT OUTER JOIN W_SALES_INVOICE_LINE_FS EBS
ON EBS.INTEGRATION_ID = DWH.INTEGRATION_ID
WHERE EBS.INTEGRATION_ID IS NULL
AND DWH.IS_POS = 'N'
;
能否简要说明问题中的 table?每个 table 中有多少(大约)条记录?你有什么指标吗?是否有任何字段 calculated/derived?当您对这些或您的原始查询执行解释计划时,它在哪里显示瓶颈?
不存在和不在语句中通常是性能瓶颈。解决这个问题的一个性能技巧是使用 LEFT OUTER JOIN 和一个声明第二个 table 列为空的子句,即没有匹配的行。所以尝试:
SELECT EBS.NET_AMT,
nvl(EBS.INVOICED_QTY,
case nvl(EBS.NET_AMT,0) when 0 then EBS.INVOICED_QTY
else -1 end) INVOICED_QTY,
EBS.CREATED_ON_DT,
EBS.CHANGED_ON_DT,
EBS.INTEGRATION_ID,
'EBS' AS SOURCE_NAME
FROM
W_SALES_INVOICE_LINE_FS EBS
LEFT OUTER JOIN
W_SALES_INVOICE_LINE_F DWH
ON EBS.INTEGRATION_ID = DWH.INTEGRATION_ID
WHERE DWH.INTEGRATION_ID IS NULL
UNION
SELECT DWH.NET_AMT,
DWH.INVOICED_QTY,
DWH.CREATED_ON_DT,
DWH.CHANGED_ON_DT,
DWH.INTEGRATION_ID,
'DWH' AS SOURCE_NAME
FROM
W_SALES_INVOICE_LINE_F DWH
LEFT OUTER JOIN W_SALES_INVOICE_LINE_FS EBS
ON EBS.INTEGRATION_ID = DWH.INTEGRATION_ID
where EBS.INTEGRATION_ID IS NULL
AND DWH.IS_POS = 'N'
您正在手动编写完整的外部联接,Oracle 可以自动为此类比较任务执行此操作(我猜它可能 运行 更快)
select
ebs.net_amt ebs_net_amt,
dwh.net_amt dwh_net_amt,
nvl(ebs.invoiced_qty,case nvl(ebs.net_amt,0) when 0 then ebs.invoiced_qty else -1 end) invoiced_qty_ebs,
dwh.invoiced_qty invoiced_qty_dwh,
ebs.created_on_dt ebs_created_on_dt,
dwh.created_on_dt dwh_created_on_dt,
ebs.changed_on_dt ebs_changed_on_dt,
dwh.changed_on_dt dwh_changed_on_dt,
nvl(ebs.integration_id,ebs.integration_id) integration_id,
case
when ebs.integration_id is not null and ebs.integration_id is not null and then 'EBS and DWH'
when dwh.integration_id is not null then 'EBS'
else 'DWH'
end source_name
from
w_sales_invoice_line_fs ebs
full outer join
(select * from w_sales_invoice_line_f dwh where dwh.is_pos = 'N') dwh
on
(ebs.integration_id = dwh.integration_id)
where
ebs.integration_id is null or dwh.integration_id is null --restrict to records missing on one side