使用 SQL 根据固定字符长度将文本拆分为多行块
Split text into multiline chunks based on fixed character length using SQL
我正在寻找 SQL 接受一串文本(最多 3 行,但通常为 1 行)并将其分成最多 3 行,每行最多 30 个字符。 (背景是我试图将文本插入地址行字段,有时传入的值超过 30 个字符,需要拆分成更小的块并向下移动到另一行)。
我什至不知道从哪里开始。由于它是地址文本,因此存在相当大的差异,理想情况下,如果可能的话,它不应该在单词中间拆分。这将在 SQL Server 2012 中。有什么建议吗?
这应该使用递归 CTE 完成您想要的:
CREATE PROCEDURE REDISTRIBUTE_TEXT
@IN_LINE_1 VARCHAR(200)
,@IN_LINE_2 VARCHAR(200)
,@IN_LINE_3 VARCHAR(200)
,@MAX_LINE_LENGTH INTEGER
,@OUT_LINE_1 VARCHAR(200) OUT
,@OUT_LINE_2 VARCHAR(200) OUT
,@OUT_LINE_3 VARCHAR(200) OUT
AS
BEGIN
SET NOCOUNT ON;
DECLARE @s VARCHAR(1000);
SET @s = @IN_LINE_1 + ' ' + @IN_LINE_2 + ' ' + @IN_LINE_3;
WITH CHAR_ARRAY AS (
SELECT v.number + 1 Char_ID
,SUBSTRING(a.b, v.number+1, 1) Char_Value
FROM (SELECT @s b) a
JOIN master..spt_values v
ON v.number < len(a.b)
WHERE v.type = 'P'
)
, WORD_ARRAY AS (
SELECT S.ID AS Word_ID
,S.Value AS Word_Value
,LEN(S.VALUE) AS Word_Length
FROM dbo.String_Split_1(@s, ' ') S -- This could be replaced with built-in STRING_SPLIT function in MSSQL 2016+
)
, WORD_EXT AS (
SELECT x.*
,(
SELECT SUM(y.Word_Length) + COUNT(*)
FROM WORD_ARRAY y
WHERE y.Word_ID <= x.Word_ID
) AS RunningLength
FROM WORD_ARRAY x
)
, CHAR_EXT AS (
SELECT C.*
,W2.Word_ID AS Next_Word_ID
,W2.Word_Length AS Next_Word_Length
FROM (
SELECT C.*
,W.*
,ROW_NUMBER() OVER (PARTITION BY C.Char_ID ORDER BY RunningLength ASC) RN
FROM CHAR_ARRAY C
CROSS JOIN WORD_EXT W
WHERE C.Char_ID <= W.RunningLength
) C
LEFT JOIN WORD_ARRAY W2
ON W2.Word_ID = C.Word_ID + 1
WHERE RN = 1
)
, RECUR (CURR_CHAR_ID, START_WORD_ID, CURR_WORD_ID, LINE_NUM, LINE_TEXT, LINE_LENGTH) AS (
SELECT C.Char_ID
,W.Word_ID
,W.Word_ID
,1
,CAST(C.Char_Value AS VARCHAR)
,LEN(C.Char_Value)
FROM CHAR_EXT C
JOIN WORD_EXT W
ON C.Word_ID = W.Word_ID
WHERE C.Char_ID = 1
UNION ALL
SELECT CHAR_ID CURR_CHAR_ID
,CASE WHEN WRAP_END_WORD = 1 THEN Next_Word_ID ELSE START_WORD_ID END START_WORD_ID
,CASE WHEN WRAP_END_WORD = 1 THEN Next_Word_ID ELSE Word_ID END CURR_WORD_ID
,CASE WHEN WRAP_END_WORD = 1 OR WRAP_MID_WORD = 1 THEN LINE_NUM + 1 ELSE LINE_NUM END LINE_NUM
,CAST(
CASE WHEN WRAP_END_WORD = 1 OR WRAP_MID_WORD = 1
THEN (CASE WHEN C.Char_Value = ' ' THEN '' ELSE C.Char_Value END )
ELSE LINE_TEXT + C.Char_Value
END AS VARCHAR) LINE_TEXT
,CASE WHEN WRAP_END_WORD = 1 OR WRAP_MID_WORD = 1 THEN 1 ELSE LINE_LENGTH + 1 END LINE_LENGTH
FROM (
SELECT R.*
,C.*
,CASE WHEN R.START_WORD_ID = C.WORD_ID AND LINE_LENGTH > @MAX_LINE_LENGTH
THEN 1 ELSE 0
END AS WRAP_MID_WORD
,CASE WHEN C.Char_Value = ' ' AND (
LEN(R.LINE_TEXT) + 1 + C.Next_Word_Length > @MAX_LINE_LENGTH
)
THEN 1 ELSE 0
END AS WRAP_END_WORD
FROM RECUR R
JOIN CHAR_EXT C
ON C.Char_ID = R.CURR_CHAR_ID + 1
) C
)
SELECT *
INTO #LINES
FROM (
SELECT *
,ROW_NUMBER() OVER (PARTITION BY LINE_NUM ORDER BY LEN(LINE_TEXT) DESC) RN
FROM RECUR R
) R
WHERE RN = 1
;
SELECT @OUT_LINE_1 = LINE_TEXT FROM #LINES WHERE LINE_NUM = 1;
SELECT @OUT_LINE_2 = LINE_TEXT FROM #LINES WHERE LINE_NUM = 2;
SELECT @OUT_LINE_3 = LINE_TEXT FROM #LINES WHERE LINE_NUM = 3;
DROP TABLE #LINES;
END
GO
CREATE Function [String_Split_1]( @string varchar(4000), @delimeter varchar(10))
Returns
@Result Table (ID int identity(1,1) not null, value varchar(100))
As
Begin
declare @len int, @loc int = 1
While @loc <= len(@string)
Begin
Set @len = CHARINDEX(@delimeter, @string, @loc) - @loc
If @Len < 0 Set @Len = len(@string)
Insert Into @Result Values (SUBSTRING(@string,@loc,@len))
Set @loc = @loc + @len + 1
End
Return
End
我正在寻找 SQL 接受一串文本(最多 3 行,但通常为 1 行)并将其分成最多 3 行,每行最多 30 个字符。 (背景是我试图将文本插入地址行字段,有时传入的值超过 30 个字符,需要拆分成更小的块并向下移动到另一行)。
我什至不知道从哪里开始。由于它是地址文本,因此存在相当大的差异,理想情况下,如果可能的话,它不应该在单词中间拆分。这将在 SQL Server 2012 中。有什么建议吗?
这应该使用递归 CTE 完成您想要的:
CREATE PROCEDURE REDISTRIBUTE_TEXT
@IN_LINE_1 VARCHAR(200)
,@IN_LINE_2 VARCHAR(200)
,@IN_LINE_3 VARCHAR(200)
,@MAX_LINE_LENGTH INTEGER
,@OUT_LINE_1 VARCHAR(200) OUT
,@OUT_LINE_2 VARCHAR(200) OUT
,@OUT_LINE_3 VARCHAR(200) OUT
AS
BEGIN
SET NOCOUNT ON;
DECLARE @s VARCHAR(1000);
SET @s = @IN_LINE_1 + ' ' + @IN_LINE_2 + ' ' + @IN_LINE_3;
WITH CHAR_ARRAY AS (
SELECT v.number + 1 Char_ID
,SUBSTRING(a.b, v.number+1, 1) Char_Value
FROM (SELECT @s b) a
JOIN master..spt_values v
ON v.number < len(a.b)
WHERE v.type = 'P'
)
, WORD_ARRAY AS (
SELECT S.ID AS Word_ID
,S.Value AS Word_Value
,LEN(S.VALUE) AS Word_Length
FROM dbo.String_Split_1(@s, ' ') S -- This could be replaced with built-in STRING_SPLIT function in MSSQL 2016+
)
, WORD_EXT AS (
SELECT x.*
,(
SELECT SUM(y.Word_Length) + COUNT(*)
FROM WORD_ARRAY y
WHERE y.Word_ID <= x.Word_ID
) AS RunningLength
FROM WORD_ARRAY x
)
, CHAR_EXT AS (
SELECT C.*
,W2.Word_ID AS Next_Word_ID
,W2.Word_Length AS Next_Word_Length
FROM (
SELECT C.*
,W.*
,ROW_NUMBER() OVER (PARTITION BY C.Char_ID ORDER BY RunningLength ASC) RN
FROM CHAR_ARRAY C
CROSS JOIN WORD_EXT W
WHERE C.Char_ID <= W.RunningLength
) C
LEFT JOIN WORD_ARRAY W2
ON W2.Word_ID = C.Word_ID + 1
WHERE RN = 1
)
, RECUR (CURR_CHAR_ID, START_WORD_ID, CURR_WORD_ID, LINE_NUM, LINE_TEXT, LINE_LENGTH) AS (
SELECT C.Char_ID
,W.Word_ID
,W.Word_ID
,1
,CAST(C.Char_Value AS VARCHAR)
,LEN(C.Char_Value)
FROM CHAR_EXT C
JOIN WORD_EXT W
ON C.Word_ID = W.Word_ID
WHERE C.Char_ID = 1
UNION ALL
SELECT CHAR_ID CURR_CHAR_ID
,CASE WHEN WRAP_END_WORD = 1 THEN Next_Word_ID ELSE START_WORD_ID END START_WORD_ID
,CASE WHEN WRAP_END_WORD = 1 THEN Next_Word_ID ELSE Word_ID END CURR_WORD_ID
,CASE WHEN WRAP_END_WORD = 1 OR WRAP_MID_WORD = 1 THEN LINE_NUM + 1 ELSE LINE_NUM END LINE_NUM
,CAST(
CASE WHEN WRAP_END_WORD = 1 OR WRAP_MID_WORD = 1
THEN (CASE WHEN C.Char_Value = ' ' THEN '' ELSE C.Char_Value END )
ELSE LINE_TEXT + C.Char_Value
END AS VARCHAR) LINE_TEXT
,CASE WHEN WRAP_END_WORD = 1 OR WRAP_MID_WORD = 1 THEN 1 ELSE LINE_LENGTH + 1 END LINE_LENGTH
FROM (
SELECT R.*
,C.*
,CASE WHEN R.START_WORD_ID = C.WORD_ID AND LINE_LENGTH > @MAX_LINE_LENGTH
THEN 1 ELSE 0
END AS WRAP_MID_WORD
,CASE WHEN C.Char_Value = ' ' AND (
LEN(R.LINE_TEXT) + 1 + C.Next_Word_Length > @MAX_LINE_LENGTH
)
THEN 1 ELSE 0
END AS WRAP_END_WORD
FROM RECUR R
JOIN CHAR_EXT C
ON C.Char_ID = R.CURR_CHAR_ID + 1
) C
)
SELECT *
INTO #LINES
FROM (
SELECT *
,ROW_NUMBER() OVER (PARTITION BY LINE_NUM ORDER BY LEN(LINE_TEXT) DESC) RN
FROM RECUR R
) R
WHERE RN = 1
;
SELECT @OUT_LINE_1 = LINE_TEXT FROM #LINES WHERE LINE_NUM = 1;
SELECT @OUT_LINE_2 = LINE_TEXT FROM #LINES WHERE LINE_NUM = 2;
SELECT @OUT_LINE_3 = LINE_TEXT FROM #LINES WHERE LINE_NUM = 3;
DROP TABLE #LINES;
END
GO
CREATE Function [String_Split_1]( @string varchar(4000), @delimeter varchar(10))
Returns
@Result Table (ID int identity(1,1) not null, value varchar(100))
As
Begin
declare @len int, @loc int = 1
While @loc <= len(@string)
Begin
Set @len = CHARINDEX(@delimeter, @string, @loc) - @loc
If @Len < 0 Set @Len = len(@string)
Insert Into @Result Values (SUBSTRING(@string,@loc,@len))
Set @loc = @loc + @len + 1
End
Return
End