嵌套循环是否有可能将不同数据连接到不同循环中的相同 id
Is it possible that nested loop joins different data to same id in different loops
我们有一个有趣的现象,sql 和我们无法重现的 oracle 数据库。这个例子被简化了。我们不相信,但可能过于简单化了。
主要问题:给定一个嵌套循环,其中内部(不是驱动)table有一个解析函数,其结果不明确(多行可能是order by 的第一行),对于不同的外部循环,所述分析函数可以 return 不同的结果是否可行?
第二个问题:如果是,我们如何重现此行为?
如果没有,您是否有任何其他想法为什么此查询会为同一家公司生成多行。
不是问题:如果错误的假设是正确的,那么更正 sql 将很容易。只需在分析函数中明确排序即可,例如通过添加 id 列作为第二个条件。
问题:
公司与所有者有 n:m 关系,与地址有 1:n 关系。
SQL 加入所有 table,同时使用分析函数 row_number() 只读取每个公司的单个地址,按公司和地址分组并累积所有者名称。
我们将查询用于多种目的,其他目的涉及读取“最佳”地址,有问题的地址则没有。我们收到多个错误报告,结果如下:
A公司有所有者N1、N2、N3。
结果是
Company
Owner list
A
N1
A
N2, N3
报告的所有案例都涉及具有多个“最佳”地址的公司,因此理论上,应该提供单个地址的子查询不知何故被破坏了。但是我们无法重现结果。
详细信息:
(对于较小的数字,listagg() 是使用的原始函数,但对于较大的数字它会失败。count(*) 应该是 suitable 替换)
--cleanup
DROP TABLE rau_companyowner;
DROP TABLE rau_owner;
DROP TABLE rau_address;
DROP TABLE rau_company;
--create structure
CREATE TABLE rau_company (
id NUMBER CONSTRAINT pk_rau_company PRIMARY KEY USING INDEX (CREATE UNIQUE INDEX idx_rau_company_p ON rau_company(id))
);
CREATE TABLE rau_owner (
id NUMBER CONSTRAINT pk_rau_owner PRIMARY KEY USING INDEX (CREATE UNIQUE INDEX idx_rau_owner_p ON rau_owner(id)),
name varchar2(1000)
);
CREATE TABLE rau_companyowner (
company_id NUMBER,
owner_id NUMBER,
CONSTRAINT pk_rau_companyowner PRIMARY KEY (company_id, owner_id) USING INDEX (CREATE UNIQUE INDEX idx_rau_companyowner_p ON rau_companyowner(company_id, owner_id)),
CONSTRAINT fk_companyowner_company FOREIGN KEY (company_id) REFERENCES rau_company(id),
CONSTRAINT fk_companyowner_owner FOREIGN KEY (owner_id) REFERENCES rau_owner(id)
);
CREATE TABLE rau_address (
id NUMBER CONSTRAINT pk_rau_address PRIMARY KEY USING INDEX (CREATE UNIQUE INDEX idx_rau_address_p ON rau_address(id)),
company_id NUMBER,
prio NUMBER NOT NULL,
street varchar2(1000),
CONSTRAINT fk_address_company FOREIGN KEY (company_id) REFERENCES rau_company(id)
);
--create testdata
DECLARE
TYPE t_address IS TABLE OF rau_address%rowtype INDEX BY pls_integer;
address t_address;
TYPE t_owner IS TABLE OF rau_owner%rowtype INDEX BY pls_integer;
owner t_owner;
TYPE t_companyowner IS TABLE OF rau_companyowner%rowtype INDEX BY pls_integer;
companyowner t_companyowner;
ii pls_integer;
company_id pls_integer := 1;
test_count PLS_INTEGER := 10000;
--test_count PLS_INTEGER := 50;
BEGIN
--rau_company
INSERT INTO rau_company VALUES (company_id);
--rau_owner,rau_companyowner
FOR ii IN 1 .. test_count
LOOP
owner(ii).id:=ii;
owner(ii).name:='N'||to_char(ii);
companyowner(ii).company_id:=company_id;
companyowner(ii).owner_id:=ii;
END LOOP;
forall ii IN owner.FIRST .. owner.LAST
INSERT INTO rau_owner VALUES (owner(ii).id, owner(ii).name);
forall ii IN companyowner.FIRST .. companyowner.LAST
INSERT INTO rau_companyowner VALUES (companyowner(ii).company_id, companyowner(ii).owner_id);
--rau_address
FOR ii IN 1 .. test_count
LOOP
address(ii).id:=ii;
address(ii).company_id:=company_id;
address(ii).prio:=1;
address(ii).street:='S'||to_char(ii);
END LOOP;
forall ii IN address.FIRST .. address.LAST
INSERT INTO rau_address VALUES (address(ii).id, address(ii).company_id, address(ii).prio, address(ii).street);
COMMIT;
END;
-- check testdata
SELECT 'rau_company' tab, COUNT(*) count FROM rau_company
UNION all
SELECT 'rau_owner', COUNT(*) FROM rau_owner
UNION all
SELECT 'rau_companyowner', COUNT(*) FROM rau_companyowner
UNION all
SELECT 'rau_address', COUNT(*) FROM rau_address;
-- the sql: NL with address as inner loop enforced
-- ‘order BY prio’ is ambiguous because all addresses have the same prio
-- => the single row in ad could be any row
SELECT /*+ leading(hh hhoo oo ad) use_hash(hhoo oo) USE_NL(hh ad) */
hh.id company,
ad.street,
-- LISTAGG(oo.name || ', ') within group (order by oo.name) owner_list,
count(oo.id) owner_count
FROM rau_company hh
LEFT JOIN rau_companyowner hhoo ON hh.id = hhoo.company_id
LEFT JOIN rau_owner oo ON hhoo.owner_id = oo.id
LEFT JOIN (
SELECT *
FROM (
SELECT company_id, street,
row_number() over ( partition by company_id order BY prio asc ) as row_num
FROM rau_address
)
WHERE row_num = 1
) ad ON hh.id = ad.company_id
GROUP BY hh.id,
ad.street;
Cris Saxon 很高兴回答我的问题:https://asktom.oracle.com/pls/apex/f?p=100:11:::::P11_QUESTION_ID:9546263400346154452
简而言之:只要 order by 不明确 (non-deterministic),即使在相同的 sql.
中,也总会有不同结果的机会
要重现,请将此添加到我的测试数据中:
更改 TABLE rau_address 并行 8;
并尝试底部的 select,它应该提供多行。
我们有一个有趣的现象,sql 和我们无法重现的 oracle 数据库。这个例子被简化了。我们不相信,但可能过于简单化了。
主要问题:给定一个嵌套循环,其中内部(不是驱动)table有一个解析函数,其结果不明确(多行可能是order by 的第一行),对于不同的外部循环,所述分析函数可以 return 不同的结果是否可行?
第二个问题:如果是,我们如何重现此行为? 如果没有,您是否有任何其他想法为什么此查询会为同一家公司生成多行。 不是问题:如果错误的假设是正确的,那么更正 sql 将很容易。只需在分析函数中明确排序即可,例如通过添加 id 列作为第二个条件。
问题:
公司与所有者有 n:m 关系,与地址有 1:n 关系。
SQL 加入所有 table,同时使用分析函数 row_number() 只读取每个公司的单个地址,按公司和地址分组并累积所有者名称。
我们将查询用于多种目的,其他目的涉及读取“最佳”地址,有问题的地址则没有。我们收到多个错误报告,结果如下:
A公司有所有者N1、N2、N3。
结果是
Company | Owner list |
---|---|
A | N1 |
A | N2, N3 |
报告的所有案例都涉及具有多个“最佳”地址的公司,因此理论上,应该提供单个地址的子查询不知何故被破坏了。但是我们无法重现结果。
详细信息: (对于较小的数字,listagg() 是使用的原始函数,但对于较大的数字它会失败。count(*) 应该是 suitable 替换)
--cleanup
DROP TABLE rau_companyowner;
DROP TABLE rau_owner;
DROP TABLE rau_address;
DROP TABLE rau_company;
--create structure
CREATE TABLE rau_company (
id NUMBER CONSTRAINT pk_rau_company PRIMARY KEY USING INDEX (CREATE UNIQUE INDEX idx_rau_company_p ON rau_company(id))
);
CREATE TABLE rau_owner (
id NUMBER CONSTRAINT pk_rau_owner PRIMARY KEY USING INDEX (CREATE UNIQUE INDEX idx_rau_owner_p ON rau_owner(id)),
name varchar2(1000)
);
CREATE TABLE rau_companyowner (
company_id NUMBER,
owner_id NUMBER,
CONSTRAINT pk_rau_companyowner PRIMARY KEY (company_id, owner_id) USING INDEX (CREATE UNIQUE INDEX idx_rau_companyowner_p ON rau_companyowner(company_id, owner_id)),
CONSTRAINT fk_companyowner_company FOREIGN KEY (company_id) REFERENCES rau_company(id),
CONSTRAINT fk_companyowner_owner FOREIGN KEY (owner_id) REFERENCES rau_owner(id)
);
CREATE TABLE rau_address (
id NUMBER CONSTRAINT pk_rau_address PRIMARY KEY USING INDEX (CREATE UNIQUE INDEX idx_rau_address_p ON rau_address(id)),
company_id NUMBER,
prio NUMBER NOT NULL,
street varchar2(1000),
CONSTRAINT fk_address_company FOREIGN KEY (company_id) REFERENCES rau_company(id)
);
--create testdata
DECLARE
TYPE t_address IS TABLE OF rau_address%rowtype INDEX BY pls_integer;
address t_address;
TYPE t_owner IS TABLE OF rau_owner%rowtype INDEX BY pls_integer;
owner t_owner;
TYPE t_companyowner IS TABLE OF rau_companyowner%rowtype INDEX BY pls_integer;
companyowner t_companyowner;
ii pls_integer;
company_id pls_integer := 1;
test_count PLS_INTEGER := 10000;
--test_count PLS_INTEGER := 50;
BEGIN
--rau_company
INSERT INTO rau_company VALUES (company_id);
--rau_owner,rau_companyowner
FOR ii IN 1 .. test_count
LOOP
owner(ii).id:=ii;
owner(ii).name:='N'||to_char(ii);
companyowner(ii).company_id:=company_id;
companyowner(ii).owner_id:=ii;
END LOOP;
forall ii IN owner.FIRST .. owner.LAST
INSERT INTO rau_owner VALUES (owner(ii).id, owner(ii).name);
forall ii IN companyowner.FIRST .. companyowner.LAST
INSERT INTO rau_companyowner VALUES (companyowner(ii).company_id, companyowner(ii).owner_id);
--rau_address
FOR ii IN 1 .. test_count
LOOP
address(ii).id:=ii;
address(ii).company_id:=company_id;
address(ii).prio:=1;
address(ii).street:='S'||to_char(ii);
END LOOP;
forall ii IN address.FIRST .. address.LAST
INSERT INTO rau_address VALUES (address(ii).id, address(ii).company_id, address(ii).prio, address(ii).street);
COMMIT;
END;
-- check testdata
SELECT 'rau_company' tab, COUNT(*) count FROM rau_company
UNION all
SELECT 'rau_owner', COUNT(*) FROM rau_owner
UNION all
SELECT 'rau_companyowner', COUNT(*) FROM rau_companyowner
UNION all
SELECT 'rau_address', COUNT(*) FROM rau_address;
-- the sql: NL with address as inner loop enforced
-- ‘order BY prio’ is ambiguous because all addresses have the same prio
-- => the single row in ad could be any row
SELECT /*+ leading(hh hhoo oo ad) use_hash(hhoo oo) USE_NL(hh ad) */
hh.id company,
ad.street,
-- LISTAGG(oo.name || ', ') within group (order by oo.name) owner_list,
count(oo.id) owner_count
FROM rau_company hh
LEFT JOIN rau_companyowner hhoo ON hh.id = hhoo.company_id
LEFT JOIN rau_owner oo ON hhoo.owner_id = oo.id
LEFT JOIN (
SELECT *
FROM (
SELECT company_id, street,
row_number() over ( partition by company_id order BY prio asc ) as row_num
FROM rau_address
)
WHERE row_num = 1
) ad ON hh.id = ad.company_id
GROUP BY hh.id,
ad.street;
Cris Saxon 很高兴回答我的问题:https://asktom.oracle.com/pls/apex/f?p=100:11:::::P11_QUESTION_ID:9546263400346154452
简而言之:只要 order by 不明确 (non-deterministic),即使在相同的 sql.
中,也总会有不同结果的机会要重现,请将此添加到我的测试数据中: 更改 TABLE rau_address 并行 8; 并尝试底部的 select,它应该提供多行。