'Merge Fields' - 相似 SQL 服务器功能
'Merge Fields' - alike SQL Server function
我试图找到一种方法让 SGBD 在长文本中执行大量合并字段。
创建结构:
CREATE TABLE [dbo].[store]
(
[id] [int] NOT NULL,
[text] [nvarchar](MAX) NOT NULL
)
CREATE TABLE [dbo].[statement]
(
[id] [int] NOT NULL,
[store_id] [int] NOT NULL
)
CREATE TABLE [dbo].[statement_merges]
(
[statement_id] [int] NOT NULL,
[merge_field] [nvarchar](30) NOT NULL,
[user_data] [nvarchar](MAX) NOT NULL
)
现在,创建测试值
INSERT INTO [store] (id, text)
VALUES (1, 'Waw, Whosebug is an amazing library of lost people in the IT hell, and i have the feeling that $$PERC_SAT$$ of the users found a solution, personally I asked $$ASKED$$ questions.')
INSERT INTO [statement] (id, store_id)
VALUES (1, 1)
INSERT INTO [statement_merges] (statement_id, merge_field, user_data)
VALUES (1, '$$PERC_SAT$$', '85%')
INSERT INTO [statement_merges] (statement_id, merge_field, user_data)
VALUES (1, '$$ASKED$$', '12')
目前我的应用程序正在传递最终语句,循环合并,替换存储的文本和输出
Waw, Whosebug is an amazing library of lost people in the IT
hell, and i have the feeling that 85% of the users found a solution,
personally I asked 12 questions.
我试图找到一种独立于代码并在单个查询中提供输出的方法,正如您所理解的那样,select 一条语句,其中存储的文本已填充了用户数据。我希望我已经清楚了。
我查看了 TRANSLATE 函数,但它看起来像是一个字符替换,所以我有两个选择:
- 我尝试一个递归函数,一个一个替换,直到计算出的文本中没有
merge_fields
;但我对这种方法的性能有疑问;
- 有魔法可以做到,但我需要你的知识...
考虑到我想要这个是因为真实的文本很长,我不想在我的数据库中多次存储它。您可以想象一份只有 12 个参数的 3 页合同,例如开始日期、开票金额等...为了合规性,其他一切都无法更改。
感谢您的宝贵时间!
编辑:
感谢 Randy 的帮助,这看起来可以解决问题:
WITH cte_replace_tokens AS (
SELECT replace(r.text, m.merge_field, m.user_data) as [final], m.merge_field, s.id, 1 AS i
FROM store r
INNER JOIN statement s ON s.store_id = r.id
INNER JOIN statement_merges m ON m.statement_id = s.id
WHERE m.statement_id = 1
UNION ALL
SELECT replace(r.final, m.merge_field, m.user_data) as [final], m.merge_field, r.id, r.i + 1 AS i
FROM cte_replace_tokens r
INNER JOIN statement_merges m ON m.statement_id = r.id
WHERE m.merge_field > r.merge_field
)
select TOP 1 final from cte_replace_tokens ORDER BY i DESC
如果性能好,我会检查更大的数据库...
至少,我可以“填充”一个语句,我还需要弄清楚才能提取一个列表。
再次感谢!
不建议在 sql 引擎中执行此类任务,但如果你想这样做,你需要在函数或存储过程中使用游标循环执行,如下所示:
DECLARE @merge_field nvarchar(30)
, @user_data nvarchar(MAX)
, @statementid INT = 1
, @text varchar(MAX) = 'Waw, Whosebug is an amazing library of lost people in the IT hell, and i have the feeling that $$PERC_SAT$$ of the users found a solution, personally I asked $$ASKED$$ questions.'
DECLARE merge_statements CURSOR FAST_FORWARD
FOR SELECT
sm.merge_field
, sm.user_data
FROM dbo.statement_merges AS sm
WHERE sm.statement_id = @statementid
OPEN merge_statements
FETCH NEXT FROM merge_statements
INTO @merge_field , @user_data
WHILE @@FETCH_STATUS = 0
BEGIN
set @text = REPLACE(@text , @merge_field, @user_data )
FETCH NEXT FROM merge_statements
INTO @merge_field , @user_data
END
CLOSE merge_statements
DEALLOCATE merge_statements
SELECT @text
如果一条记录被同一次更新多次更新,最后一次获胜。 None 的更新受其他更新影响 - 无累积效应。在某些情况下,可以使用局部变量来欺骗 SQL 以获得累积效果,但这很棘手且不推荐。 (顺序变得重要并且在更新中不可靠。)
一个替代方案是 CTE 中的递归。随着每个令牌被替换,直到没有令牌为止,从先前生成一个新记录。这是一个工作示例,将 1 替换为 A,将 2 替换为 B,等等。(我想知道是否有一些棘手的 xml 也可以做到这一点。)
if not object_id('tempdb..#Raw') is null drop table #Raw
CREATE TABLE #Raw(
[test] [varchar](100) NOT NULL PRIMARY KEY CLUSTERED,
)
if not object_id('tempdb..#Token') is null drop table #Token
CREATE TABLE #Token(
[id] [int] NOT NULL PRIMARY KEY CLUSTERED,
[token] [char](1) NOT NULL,
[value] [char](1) NOT NULL,
)
insert into #Raw values('123456'), ('1122334456')
insert into #Token values(1, '1', 'A'), (2, '2', 'B'), (3, '3', 'C'), (4, '4', 'D'), (5, '5', 'E'), (6, '6', 'F');
WITH cte_replace_tokens AS (
SELECT r.test, replace(r.test, l.token, l.value) as [final], l.id
FROM [Raw] r
CROSS JOIN #Token l
WHERE l.id = 1
UNION ALL
SELECT r.test, replace(r.final, l.token, l.value) as [final], l.id
FROM cte_replace_tokens r
CROSS JOIN #Token l
WHERE l.id = r.id + 1
)
select * from cte_replace_tokens where id = 6
这是一个递归的解决方案。
SQL Fiddle
MS SQL Server 2017 架构设置:
CREATE TABLE [dbo].[store]
(
[id] [int] NOT NULL,
[text] [nvarchar](MAX) NOT NULL
)
CREATE TABLE [dbo].[statement]
(
[id] [int] NOT NULL,
[store_id] [int] NOT NULL
)
CREATE TABLE [dbo].[statement_merges]
(
[statement_id] [int] NOT NULL,
[merge_field] [nvarchar](30) NOT NULL,
[user_data] [nvarchar](MAX) NOT NULL
)
INSERT INTO store (id, text)
VALUES (1, '$$(*)$$, Whosebug...$$PERC_SAT$$...$$ASKED$$ questions.')
INSERT INTO store (id, text)
VALUES (2, 'Use The @_@')
INSERT INTO statement (id, store_id) VALUES (1, 1)
INSERT INTO statement (id, store_id) VALUES (2, 2)
INSERT INTO statement_merges (statement_id, merge_field, user_data) VALUES (1, '$$PERC_SAT$$', '85%')
INSERT INTO statement_merges (statement_id, merge_field, user_data) VALUES (1, '$$ASKED$$', '12')
INSERT INTO statement_merges (statement_id, merge_field, user_data) VALUES (1, '$$(*)$$', 'Wow')
INSERT INTO statement_merges (statement_id, merge_field, user_data) VALUES (2, ' @_@', 'Flux!')
查询 1:
;WITH Normalized AS
(
SELECT
store_id=store.id,
store.text,
sm.merge_field,
sm.user_data,
RowNumber = ROW_NUMBER() OVER(PARTITION BY store.id,sm.statement_id ORDER BY merge_field),
statement_id = st.id
FROM
store store
INNER JOIN statement st ON st.store_id = store.id
INNER JOIN statement_merges sm ON sm.statement_id = st.id
)
, Recurse AS
(
SELECT
store_id, statement_id, old_text = text, merge_field,user_data, RowNumber,
Iteration=1,
new_text = REPLACE(text, merge_field, user_data)
FROM
Normalized
WHERE
RowNumber=1
UNION ALL
SELECT
n.store_id, n.statement_id, r.old_text, n.merge_field, n.user_data,
RowNumber=r.RowNumber+1,
Iteration=Iteration+1,
new_text = REPLACE(r.new_text, n.merge_field, n.user_data)
FROM
Normalized n
INNER JOIN Recurse r ON r.RowNumber = n.RowNumber AND r.statement_id = n.statement_id
)
,ReverseOnIteration AS
(
SELECT *,
ReverseIteration = ROW_NUMBER() OVER(PARTITION BY statement_id ORDER BY Iteration DESC)
FROM
Recurse
)
SELECT
store_id, statement_id, new_text, old_text
FROM
ReverseOnIteration
WHERE
ReverseIteration=1
| store_id | statement_id | new_text | old_text |
|----------|--------------|------------------------------------------|--------------------------------------------------------------|
| 1 | 1 | Wow, Whosebug...85%...12 questions. | $$(*)$$, Whosebug...$$PERC_SAT$$...$$ASKED$$ questions. |
| 2 | 2 | Use TheFlux! | Use The @_@ |
在Randy的帮助下,我想我已经实现了我想做的事情!
据了解,我的真实案例是一份合同,其中有几条语句可能是:
- 自由文本
- 没有任何合并的存储文本
- 存储的文本带有一个或
几次合并
这个 CTE 可以胜任!
WITH cte_replace_tokens AS (
-- The initial query dont join on merges neither on store because can be a free text
SELECT COALESCE(r.text, s.part_text) AS [final], CAST('' AS NVARCHAR) AS merge_field, s.id, 1 AS i, s.contract_id
FROM statement s
LEFT JOIN store r ON s.store_id = r.id
UNION ALL
-- We loop till the last merge field, output contains iteration to be able to keep the last record ( all fields updated )
SELECT replace(r.final, m.merge_field, m.user_data) as [final], m.merge_field, r.id, r.i + 1 AS i, r.contract_id
FROM cte_replace_tokens r
INNER JOIN statement_merges m ON m.statement_id = r.id
WHERE m.merge_field > r.merge_field AND r.final LIKE '%' + m.merge_field + '%'
-- spare lost replacements by forcing only one merge_field per loop
AND NOT EXISTS( SELECT mm.statement_id FROM statement_merges mm WHERE mm.statement_id = m.statement_id AND mm.merge_field > r.merge_field AND mm.merge_field < m.merge_field)
)
select s.id,
(select top 1 final from cte_replace_tokens t WHERE t.contract_id = s.contract_id AND t.id = s.id ORDER BY i DESC) as res
FROM statement s
where contract_id = 1
如果带有交叉连接的 CTE 解决方案太慢,另一种解决方案是动态构建一个标量 fn,它具有令牌 table 所需的每个 REPLACE。每个记录的一个标量 fn 调用是 order(N)。我得到了和以前一样的结果。
函数很简单,可能不会太长,具体取决于令牌的大小 table 变成...256 MB 的批量限制。我已经看到尝试动态创建查询以提高性能适得其反 - 将问题转移到编译时。这里应该不是问题。
if not object_id('tempdb..#Raw') is null drop table #Raw
CREATE TABLE #Raw(
[test] [varchar](100) NOT NULL PRIMARY KEY CLUSTERED,
)
if not object_id('tempdb..#Token') is null drop table #Token
CREATE TABLE #Token(
[id] [int] NOT NULL PRIMARY KEY CLUSTERED,
[token] [char](1) NOT NULL,
[value] [char](1) NOT NULL,
)
insert into #Raw values('123456'), ('1122334456')
insert into #Token values(1, '1', 'A'), (2, '2', 'B'), (3, '3', 'C'), (4, '4', 'D'), (5, '5', 'E'), (6, '6', 'F');
DECLARE @sql varchar(max) = 'CREATE FUNCTION dbo.fn_ReplaceTokens(@raw varchar(8000)) RETURNS varchar(8000) AS BEGIN RETURN ';
WITH cte_replace_statement AS (
SELECT a.id, CAST('replace(@raw,''' + a.token + ''',''' + a.value + ''')' as varchar(max)) as [statement]
FROM #Token a
WHERE a.id = 1
UNION ALL
SELECT n.id, CAST(replace(l.[statement], '@raw', 'replace(@raw,''' + n.token + ''',''' + n.value + ''')') as varchar(max)) as [statement]
FROM #Token n
INNER JOIN cte_replace_statement l
ON n.id = l.id + 1
)
select @sql += [statement] + ' END' from cte_replace_statement where id = 6
print @sql
if not object_id('dbo.fn_ReplaceTokens') is null drop function dbo.fn_ReplaceTokens
execute (@sql)
SELECT r.test, dbo.fn_ReplaceTokens(r.test) as [final] FROM [Raw] r
我试图找到一种方法让 SGBD 在长文本中执行大量合并字段。
创建结构:
CREATE TABLE [dbo].[store]
(
[id] [int] NOT NULL,
[text] [nvarchar](MAX) NOT NULL
)
CREATE TABLE [dbo].[statement]
(
[id] [int] NOT NULL,
[store_id] [int] NOT NULL
)
CREATE TABLE [dbo].[statement_merges]
(
[statement_id] [int] NOT NULL,
[merge_field] [nvarchar](30) NOT NULL,
[user_data] [nvarchar](MAX) NOT NULL
)
现在,创建测试值
INSERT INTO [store] (id, text)
VALUES (1, 'Waw, Whosebug is an amazing library of lost people in the IT hell, and i have the feeling that $$PERC_SAT$$ of the users found a solution, personally I asked $$ASKED$$ questions.')
INSERT INTO [statement] (id, store_id)
VALUES (1, 1)
INSERT INTO [statement_merges] (statement_id, merge_field, user_data)
VALUES (1, '$$PERC_SAT$$', '85%')
INSERT INTO [statement_merges] (statement_id, merge_field, user_data)
VALUES (1, '$$ASKED$$', '12')
目前我的应用程序正在传递最终语句,循环合并,替换存储的文本和输出
Waw, Whosebug is an amazing library of lost people in the IT hell, and i have the feeling that 85% of the users found a solution, personally I asked 12 questions.
我试图找到一种独立于代码并在单个查询中提供输出的方法,正如您所理解的那样,select 一条语句,其中存储的文本已填充了用户数据。我希望我已经清楚了。
我查看了 TRANSLATE 函数,但它看起来像是一个字符替换,所以我有两个选择:
- 我尝试一个递归函数,一个一个替换,直到计算出的文本中没有
merge_fields
;但我对这种方法的性能有疑问; - 有魔法可以做到,但我需要你的知识...
考虑到我想要这个是因为真实的文本很长,我不想在我的数据库中多次存储它。您可以想象一份只有 12 个参数的 3 页合同,例如开始日期、开票金额等...为了合规性,其他一切都无法更改。
感谢您的宝贵时间!
编辑:
感谢 Randy 的帮助,这看起来可以解决问题:
WITH cte_replace_tokens AS (
SELECT replace(r.text, m.merge_field, m.user_data) as [final], m.merge_field, s.id, 1 AS i
FROM store r
INNER JOIN statement s ON s.store_id = r.id
INNER JOIN statement_merges m ON m.statement_id = s.id
WHERE m.statement_id = 1
UNION ALL
SELECT replace(r.final, m.merge_field, m.user_data) as [final], m.merge_field, r.id, r.i + 1 AS i
FROM cte_replace_tokens r
INNER JOIN statement_merges m ON m.statement_id = r.id
WHERE m.merge_field > r.merge_field
)
select TOP 1 final from cte_replace_tokens ORDER BY i DESC
如果性能好,我会检查更大的数据库...
至少,我可以“填充”一个语句,我还需要弄清楚才能提取一个列表。
再次感谢!
不建议在 sql 引擎中执行此类任务,但如果你想这样做,你需要在函数或存储过程中使用游标循环执行,如下所示:
DECLARE @merge_field nvarchar(30)
, @user_data nvarchar(MAX)
, @statementid INT = 1
, @text varchar(MAX) = 'Waw, Whosebug is an amazing library of lost people in the IT hell, and i have the feeling that $$PERC_SAT$$ of the users found a solution, personally I asked $$ASKED$$ questions.'
DECLARE merge_statements CURSOR FAST_FORWARD
FOR SELECT
sm.merge_field
, sm.user_data
FROM dbo.statement_merges AS sm
WHERE sm.statement_id = @statementid
OPEN merge_statements
FETCH NEXT FROM merge_statements
INTO @merge_field , @user_data
WHILE @@FETCH_STATUS = 0
BEGIN
set @text = REPLACE(@text , @merge_field, @user_data )
FETCH NEXT FROM merge_statements
INTO @merge_field , @user_data
END
CLOSE merge_statements
DEALLOCATE merge_statements
SELECT @text
如果一条记录被同一次更新多次更新,最后一次获胜。 None 的更新受其他更新影响 - 无累积效应。在某些情况下,可以使用局部变量来欺骗 SQL 以获得累积效果,但这很棘手且不推荐。 (顺序变得重要并且在更新中不可靠。)
一个替代方案是 CTE 中的递归。随着每个令牌被替换,直到没有令牌为止,从先前生成一个新记录。这是一个工作示例,将 1 替换为 A,将 2 替换为 B,等等。(我想知道是否有一些棘手的 xml 也可以做到这一点。)
if not object_id('tempdb..#Raw') is null drop table #Raw
CREATE TABLE #Raw(
[test] [varchar](100) NOT NULL PRIMARY KEY CLUSTERED,
)
if not object_id('tempdb..#Token') is null drop table #Token
CREATE TABLE #Token(
[id] [int] NOT NULL PRIMARY KEY CLUSTERED,
[token] [char](1) NOT NULL,
[value] [char](1) NOT NULL,
)
insert into #Raw values('123456'), ('1122334456')
insert into #Token values(1, '1', 'A'), (2, '2', 'B'), (3, '3', 'C'), (4, '4', 'D'), (5, '5', 'E'), (6, '6', 'F');
WITH cte_replace_tokens AS (
SELECT r.test, replace(r.test, l.token, l.value) as [final], l.id
FROM [Raw] r
CROSS JOIN #Token l
WHERE l.id = 1
UNION ALL
SELECT r.test, replace(r.final, l.token, l.value) as [final], l.id
FROM cte_replace_tokens r
CROSS JOIN #Token l
WHERE l.id = r.id + 1
)
select * from cte_replace_tokens where id = 6
这是一个递归的解决方案。 SQL Fiddle
MS SQL Server 2017 架构设置:
CREATE TABLE [dbo].[store]
(
[id] [int] NOT NULL,
[text] [nvarchar](MAX) NOT NULL
)
CREATE TABLE [dbo].[statement]
(
[id] [int] NOT NULL,
[store_id] [int] NOT NULL
)
CREATE TABLE [dbo].[statement_merges]
(
[statement_id] [int] NOT NULL,
[merge_field] [nvarchar](30) NOT NULL,
[user_data] [nvarchar](MAX) NOT NULL
)
INSERT INTO store (id, text)
VALUES (1, '$$(*)$$, Whosebug...$$PERC_SAT$$...$$ASKED$$ questions.')
INSERT INTO store (id, text)
VALUES (2, 'Use The @_@')
INSERT INTO statement (id, store_id) VALUES (1, 1)
INSERT INTO statement (id, store_id) VALUES (2, 2)
INSERT INTO statement_merges (statement_id, merge_field, user_data) VALUES (1, '$$PERC_SAT$$', '85%')
INSERT INTO statement_merges (statement_id, merge_field, user_data) VALUES (1, '$$ASKED$$', '12')
INSERT INTO statement_merges (statement_id, merge_field, user_data) VALUES (1, '$$(*)$$', 'Wow')
INSERT INTO statement_merges (statement_id, merge_field, user_data) VALUES (2, ' @_@', 'Flux!')
查询 1:
;WITH Normalized AS
(
SELECT
store_id=store.id,
store.text,
sm.merge_field,
sm.user_data,
RowNumber = ROW_NUMBER() OVER(PARTITION BY store.id,sm.statement_id ORDER BY merge_field),
statement_id = st.id
FROM
store store
INNER JOIN statement st ON st.store_id = store.id
INNER JOIN statement_merges sm ON sm.statement_id = st.id
)
, Recurse AS
(
SELECT
store_id, statement_id, old_text = text, merge_field,user_data, RowNumber,
Iteration=1,
new_text = REPLACE(text, merge_field, user_data)
FROM
Normalized
WHERE
RowNumber=1
UNION ALL
SELECT
n.store_id, n.statement_id, r.old_text, n.merge_field, n.user_data,
RowNumber=r.RowNumber+1,
Iteration=Iteration+1,
new_text = REPLACE(r.new_text, n.merge_field, n.user_data)
FROM
Normalized n
INNER JOIN Recurse r ON r.RowNumber = n.RowNumber AND r.statement_id = n.statement_id
)
,ReverseOnIteration AS
(
SELECT *,
ReverseIteration = ROW_NUMBER() OVER(PARTITION BY statement_id ORDER BY Iteration DESC)
FROM
Recurse
)
SELECT
store_id, statement_id, new_text, old_text
FROM
ReverseOnIteration
WHERE
ReverseIteration=1
| store_id | statement_id | new_text | old_text |
|----------|--------------|------------------------------------------|--------------------------------------------------------------|
| 1 | 1 | Wow, Whosebug...85%...12 questions. | $$(*)$$, Whosebug...$$PERC_SAT$$...$$ASKED$$ questions. |
| 2 | 2 | Use TheFlux! | Use The @_@ |
在Randy的帮助下,我想我已经实现了我想做的事情!
据了解,我的真实案例是一份合同,其中有几条语句可能是:
- 自由文本
- 没有任何合并的存储文本
- 存储的文本带有一个或 几次合并
这个 CTE 可以胜任!
WITH cte_replace_tokens AS (
-- The initial query dont join on merges neither on store because can be a free text
SELECT COALESCE(r.text, s.part_text) AS [final], CAST('' AS NVARCHAR) AS merge_field, s.id, 1 AS i, s.contract_id
FROM statement s
LEFT JOIN store r ON s.store_id = r.id
UNION ALL
-- We loop till the last merge field, output contains iteration to be able to keep the last record ( all fields updated )
SELECT replace(r.final, m.merge_field, m.user_data) as [final], m.merge_field, r.id, r.i + 1 AS i, r.contract_id
FROM cte_replace_tokens r
INNER JOIN statement_merges m ON m.statement_id = r.id
WHERE m.merge_field > r.merge_field AND r.final LIKE '%' + m.merge_field + '%'
-- spare lost replacements by forcing only one merge_field per loop
AND NOT EXISTS( SELECT mm.statement_id FROM statement_merges mm WHERE mm.statement_id = m.statement_id AND mm.merge_field > r.merge_field AND mm.merge_field < m.merge_field)
)
select s.id,
(select top 1 final from cte_replace_tokens t WHERE t.contract_id = s.contract_id AND t.id = s.id ORDER BY i DESC) as res
FROM statement s
where contract_id = 1
如果带有交叉连接的 CTE 解决方案太慢,另一种解决方案是动态构建一个标量 fn,它具有令牌 table 所需的每个 REPLACE。每个记录的一个标量 fn 调用是 order(N)。我得到了和以前一样的结果。
函数很简单,可能不会太长,具体取决于令牌的大小 table 变成...256 MB 的批量限制。我已经看到尝试动态创建查询以提高性能适得其反 - 将问题转移到编译时。这里应该不是问题。
if not object_id('tempdb..#Raw') is null drop table #Raw
CREATE TABLE #Raw(
[test] [varchar](100) NOT NULL PRIMARY KEY CLUSTERED,
)
if not object_id('tempdb..#Token') is null drop table #Token
CREATE TABLE #Token(
[id] [int] NOT NULL PRIMARY KEY CLUSTERED,
[token] [char](1) NOT NULL,
[value] [char](1) NOT NULL,
)
insert into #Raw values('123456'), ('1122334456')
insert into #Token values(1, '1', 'A'), (2, '2', 'B'), (3, '3', 'C'), (4, '4', 'D'), (5, '5', 'E'), (6, '6', 'F');
DECLARE @sql varchar(max) = 'CREATE FUNCTION dbo.fn_ReplaceTokens(@raw varchar(8000)) RETURNS varchar(8000) AS BEGIN RETURN ';
WITH cte_replace_statement AS (
SELECT a.id, CAST('replace(@raw,''' + a.token + ''',''' + a.value + ''')' as varchar(max)) as [statement]
FROM #Token a
WHERE a.id = 1
UNION ALL
SELECT n.id, CAST(replace(l.[statement], '@raw', 'replace(@raw,''' + n.token + ''',''' + n.value + ''')') as varchar(max)) as [statement]
FROM #Token n
INNER JOIN cte_replace_statement l
ON n.id = l.id + 1
)
select @sql += [statement] + ' END' from cte_replace_statement where id = 6
print @sql
if not object_id('dbo.fn_ReplaceTokens') is null drop function dbo.fn_ReplaceTokens
execute (@sql)
SELECT r.test, dbo.fn_ReplaceTokens(r.test) as [final] FROM [Raw] r