T-SQL |比 WHERE NOT EXISTS 更好的选择
T-SQL | Better alternative to WHERE NOT EXISTS
我有以下查询:
INSERT INTO [Table A] ([student_name], [class_id],[contact_detail], [birth_date],[note_average])
SELECT [student_name] = case when CHARINDEX('.', [student_name])>0 then LEFT([student_name],CHARINDEX('.', [student_name])-1)
else [student_name] end
,[class_id]
,case when reverse(SUBSTRING(REVERSE([contact_detail]),1,CHARINDEX(':', REVERSE([contact_detail])))) like ':'
then ([contact_detail] + '|')
else [contact_detail]
end as [contact_detail]
,[birth_date]
,CAST([note_average] AS decimal(13,2)) as [note_average]
,GETDATE()
FROM [Table A]
WHERE CAST([birth_date] AS DATE) <= CAST(GETDATE() AS DATE)
AND LEN([student_name]) >= 5
AND NOT EXISTS
(
SELECT [student_name]
,[class_id]
,[contact_detail]
,[birth_date]
FROM [Table A] a
WHERE '%' + ods.[student_name] + '%' LIKE a.[student_name]
AND '%' + ods.[class_id] + '%' LIKE a.[class_id]
AND '%' + ods.[contact_detail] + '%' LIKE a.[contact_detail]
AND ods.[birth_date] = a.[birth_date]
)
GO
我不想插入重复值,而且我的 table 中没有密钥。我的问题是:此查询需要花费大量时间来插入新值。我正在尝试插入 1000000 行。
我有哪些选择?
非常感谢!
假设您真的想匹配完全重复的匹配项,请尝试这样的操作:
INSERT INTO [Table A] ([student_name], [os_name], [class_id],[contact_detail], [birth_date],[note_average])
SELECT ods.[student_name] = case when CHARINDEX('.', ods.[student_name])>0 then LEFT(ods.[student_name],CHARINDEX('.', ods.[student_name])-1)
else ods.[student_name] end
,ods.[class_id]
,case when reverse(SUBSTRING(REVERSE(ods.[contact_detail]),1,CHARINDEX(':', REVERSE(ods.[contact_detail])))) like ':'
then (ods.[contact_detail] + '|')
else ods.[contact_detail]
end as [contact_detail]
,ods.[birth_date]
,CAST(ods.[note_average] AS decimal(13,2)) as ods.[note_average]
,GETDATE()
FROM [Table A] ods
OUTER JOIN [Table A] a ON ods.[student_name] = a.[student_name]
AND ods.[os_name] = a.[os_name]
AND ods.[class_id] = a.[class_id]
AND ods.[contact_detail] = a.[contact_detail]
AND ods.[birth_date] = a.[birth_date]
WHERE CAST(ods.[birth_date] AS DATE) <= CAST(GETDATE() AS DATE)
AND LEN(ods.[student_name]) >= 5
-- Only include when no matching duplicate is found.
AND a.[student_name] IS NULL
如果需要,您可以坚持使用子查询和 NOT EXISTS,这也很好,但使用 SELECT 0 或类似查询而不是选择所有这些列,这不是必需的。我相信,将比较从 LIKE 更改为 = 会为您提供所需的结果,并在必要时使用索引优化查询。
AND NOT EXISTS
(
SELECT 0
FROM [Table A] a
WHERE ods.[student_name] = a.[student_name]
AND ods.[os_name] = a.[os_name]
AND ods.[class_id] = a.[class_id]
AND ods.[contact_detail] = a.[contact_detail]
AND ods.[birth_date] = a.[birth_date]
)
一旦您使该查询正常工作,如果您需要更好的性能,您可以考虑添加索引。您可以只在 [birth_date] 或 [student_name] 上添加索引以获得可接受的查询性能。
我有以下查询:
INSERT INTO [Table A] ([student_name], [class_id],[contact_detail], [birth_date],[note_average])
SELECT [student_name] = case when CHARINDEX('.', [student_name])>0 then LEFT([student_name],CHARINDEX('.', [student_name])-1)
else [student_name] end
,[class_id]
,case when reverse(SUBSTRING(REVERSE([contact_detail]),1,CHARINDEX(':', REVERSE([contact_detail])))) like ':'
then ([contact_detail] + '|')
else [contact_detail]
end as [contact_detail]
,[birth_date]
,CAST([note_average] AS decimal(13,2)) as [note_average]
,GETDATE()
FROM [Table A]
WHERE CAST([birth_date] AS DATE) <= CAST(GETDATE() AS DATE)
AND LEN([student_name]) >= 5
AND NOT EXISTS
(
SELECT [student_name]
,[class_id]
,[contact_detail]
,[birth_date]
FROM [Table A] a
WHERE '%' + ods.[student_name] + '%' LIKE a.[student_name]
AND '%' + ods.[class_id] + '%' LIKE a.[class_id]
AND '%' + ods.[contact_detail] + '%' LIKE a.[contact_detail]
AND ods.[birth_date] = a.[birth_date]
)
GO
我不想插入重复值,而且我的 table 中没有密钥。我的问题是:此查询需要花费大量时间来插入新值。我正在尝试插入 1000000 行。
我有哪些选择?
非常感谢!
假设您真的想匹配完全重复的匹配项,请尝试这样的操作:
INSERT INTO [Table A] ([student_name], [os_name], [class_id],[contact_detail], [birth_date],[note_average])
SELECT ods.[student_name] = case when CHARINDEX('.', ods.[student_name])>0 then LEFT(ods.[student_name],CHARINDEX('.', ods.[student_name])-1)
else ods.[student_name] end
,ods.[class_id]
,case when reverse(SUBSTRING(REVERSE(ods.[contact_detail]),1,CHARINDEX(':', REVERSE(ods.[contact_detail])))) like ':'
then (ods.[contact_detail] + '|')
else ods.[contact_detail]
end as [contact_detail]
,ods.[birth_date]
,CAST(ods.[note_average] AS decimal(13,2)) as ods.[note_average]
,GETDATE()
FROM [Table A] ods
OUTER JOIN [Table A] a ON ods.[student_name] = a.[student_name]
AND ods.[os_name] = a.[os_name]
AND ods.[class_id] = a.[class_id]
AND ods.[contact_detail] = a.[contact_detail]
AND ods.[birth_date] = a.[birth_date]
WHERE CAST(ods.[birth_date] AS DATE) <= CAST(GETDATE() AS DATE)
AND LEN(ods.[student_name]) >= 5
-- Only include when no matching duplicate is found.
AND a.[student_name] IS NULL
如果需要,您可以坚持使用子查询和 NOT EXISTS,这也很好,但使用 SELECT 0 或类似查询而不是选择所有这些列,这不是必需的。我相信,将比较从 LIKE 更改为 = 会为您提供所需的结果,并在必要时使用索引优化查询。
AND NOT EXISTS
(
SELECT 0
FROM [Table A] a
WHERE ods.[student_name] = a.[student_name]
AND ods.[os_name] = a.[os_name]
AND ods.[class_id] = a.[class_id]
AND ods.[contact_detail] = a.[contact_detail]
AND ods.[birth_date] = a.[birth_date]
)
一旦您使该查询正常工作,如果您需要更好的性能,您可以考虑添加索引。您可以只在 [birth_date] 或 [student_name] 上添加索引以获得可接受的查询性能。