如何使用 sql 服务器中的查找 table 替换列中的子字符串

How to replace substring in a column using lookup table in sql server

我有一个 table,它有两列数据代码点,例如 .这些代码点需要用日文字符更改。我用日文字符查找了这些代码点 table。但问题是在两列中,单行中有多个代码点。

主要table:-

Id    body                                      subject
1    <U+9876> Hi <U+1234>No <U+6543>           <U+9876> Hi <U+1234>No <U+6543>
2    <U+9826> <U+5678><U+FA32> data            <U+9006> <U+6502>

查找table:-

char     value
<U+9876>  だ
<U+9826>  づ

我尝试在内部联接中使用 like 运算符创建一个更新查询,但它花费了很多时间,因为我们在主 table 中有 14k 行,在查找 tables 中有 6K 值。

您可以创建一个自定义函数,循环遍历替换其代码点的任何文本。

CREATE FUNCTION DecodeString( @STRING nvarchar(1000) )
RETURNS nvarchar(1000)
AS
BEGIN
  DECLARE @POS int
  DECLARE @CODE nvarchar(20)

  SET @POS = CHARINDEX('<', @STRING);
  WHILE @POS > 0 BEGIN
    SET @CODE = SUBSTRING(@STRING, @POST , CHARINDEX('>', @STRING) - @POS + 1);

    SELECT @STRING = REPLACE(@STRING, @CODE, VALUE)
    FROM MYLOOKUPTABLE 
    WHERE CHAR = @CODE;

    SET @POS = CHARINDEX('<', @STRING);
  END

  RETURN @STRING;
END
GO

现在您可以使用该函数来获取或更新结果字符串,它只会在每个字符串上查找所需的键。

SELECT Body, DecodeString(Body) as JapaneseBody,
       Subject, DecodeString(Subject) as JapaneseSubject
FROM MYTABLE

请记住在查找 table 时在 "char" 列上有一个索引,这样这些搜索将是最佳的。

如果性能真的很重要,您需要提前具体化数据。这可以通过创建单独的 table 并使用触发器或修改填充原始 table 的例程来完成。如果您的记录不是 inserted/updated 批处理,您将不会损害 CRUD 执行时间。

您可以轻松创建一个漂亮的短 T-SQL 语句来构建执行 6K 更新的动态代码,因此您也可以试一试 - 不要使用 LIKE 或复杂条件 - 每个查找值只需简单的 UPDATE-REPLACE 语句。

在某些情况下,我使用 SQL CLR 函数进行此类替换。例如:

DECLARE @Main TABLE
(
    [id] TINYINT
   ,[body] NVARCHAR(MAX)
   ,[subject] NVARCHAR(MAX)
);

DECLARE @Lookup TABLE
(
    [id] TINYINT -- you can use row_number to order
   ,[char] NVARCHAR(32)
   ,[value] NVARCHAR(32)
);

INSERT INTO @Main ([id], [body], [subject])
VALUES (1, '<U+9876> Hi <U+1234>No <U+6543>', '<U+9876> Hi <U+1234>No <U+6543>')
      ,(2, '<U+9826> <U+5678><U+FA32> data', '<U+9006> <U+6502>');

INSERT INTO @Lookup ([id], [char], [value])
VALUES (1, '<U+9876>', N'だ')
      ,(2, '<U+9826>', N'づ');

DECLARE @Pattern NVARCHAR(MAX)
       ,@Replacement NVARCHAR(MAX);

SELECT @Pattern = [dbo].[ConcatenateWithOrderAndDelimiter] ([id], [char], '|')
      ,@Replacement = [dbo].[ConcatenateWithOrderAndDelimiter] ([id], [value], '|')
FROM @Lookup;


UPDATE @Main
SET [body] = [dbo].[fn_Utils_ReplaceStrings] ([body], @Pattern, @Replacement, '|')
   ,[subject] = [dbo].[fn_Utils_ReplaceStrings] ([subject], @Pattern, @Replacement, '|');

 SELECT [id]
       ,[body]
       ,[subject]
 FROM @Main;        

我正在向您展示下面的代码,但这只是一个想法。你可以自由地自己实现一些东西,以满足你的性能要求。

,可以看到SQL CLR函数是如何创建的。这里是聚合函数的变体,按顺序连接:

[Serializable]
[
    Microsoft.SqlServer.Server.SqlUserDefinedAggregate
    (
        Microsoft.SqlServer.Server.Format.UserDefined,
        IsInvariantToNulls = true,
        IsInvariantToDuplicates = false,
        IsInvariantToOrder = false,
        IsNullIfEmpty = false,
        MaxByteSize = -1
    )
]
/// <summary>
/// Concatenates <int, string, string> values defining order using the specified number and using the given delimiter
/// </summary>
public class ConcatenateWithOrderAndDelimiter : Microsoft.SqlServer.Server.IBinarySerialize
{
    private List<Tuple<int, string>> intermediateResult;
    private string delimiter;
    private bool isDelimiterNotDefined;

    public void Init()
    {
        this.delimiter = ",";
        this.isDelimiterNotDefined = true;
        this.intermediateResult = new List<Tuple<int, string>>();
    }

    public void Accumulate(SqlInt32 position, SqlString text, SqlString delimiter)
    {
        if (this.isDelimiterNotDefined)
        {
            this.delimiter = delimiter.IsNull ? "," : delimiter.Value;
            this.isDelimiterNotDefined = false;
        }

        if (!(position.IsNull || text.IsNull))
        {
            this.intermediateResult.Add(new Tuple<int, string>(position.Value, text.Value));
        }
    }

    public void Merge(ConcatenateWithOrderAndDelimiter other)
    {
        this.intermediateResult.AddRange(other.intermediateResult);
    }

    public SqlString Terminate()
    {
        this.intermediateResult.Sort();
        return new SqlString(String.Join(this.delimiter, this.intermediateResult.Select(tuple => tuple.Item2)));
    }

    public void Read(BinaryReader r)
    {
        if (r == null) throw new ArgumentNullException("r");

        int count = r.ReadInt32();
        this.intermediateResult = new List<Tuple<int, string>>(count);

        for (int i = 0; i < count; i++)
        {
            this.intermediateResult.Add(new Tuple<int, string>(r.ReadInt32(), r.ReadString()));
        }

        this.delimiter = r.ReadString();
    }

    public void Write(BinaryWriter w)
    {
        if (w == null) throw new ArgumentNullException("w");

        w.Write(this.intermediateResult.Count);
        foreach (Tuple<int, string> record in this.intermediateResult)
        {
            w.Write(record.Item1);
            w.Write(record.Item2);
        }
        w.Write(this.delimiter);
    }
}

这是执行替换的函数的一种变体:

[SqlFunction(DataAccess = DataAccessKind.None, IsDeterministic = true)]
public static SqlString ReplaceStrings( SqlString input, SqlString pattern, SqlString replacement, SqlString separator ){
    string output = null;
    if(
        input.IsNull == false
        && pattern.IsNull == false
        && replacement.IsNull == false
    ){
        StringBuilder tempBuilder = new StringBuilder( input.Value );

        if( separator.IsNull || String.IsNullOrEmpty( separator.Value ) ){
            tempBuilder.Replace( pattern.Value, replacement.Value );
        }
        else{
            //both must have the exact number of elements
            string[] vals = pattern.Value.Split( new[]{separator.Value}, StringSplitOptions.None ),
                newVals = replacement.Value.Split( new[]{separator.Value}, StringSplitOptions.None );

            for( int index = 0, count = vals.Length; index < count; index++ ){
                tempBuilder.Replace( vals[ index ], newVals[ index ] );
            }
        }

        output = tempBuilder.ToString();
    }

    return output;
}

或这个但使用正则表达式:

[SqlFunction(DataAccess = DataAccessKind.None, IsDeterministic = true, Name = "RegexReplaceStrings")]
public static SqlString ReplaceStrings(SqlString sqlInput, SqlString sqlPattern, SqlString sqlReplacement, SqlString sqlSeparator)
{
    string returnValue = "";

    // if any of the input parameters is "NULL" no replacement is performed at all
    if (sqlInput.IsNull || sqlPattern.IsNull || sqlReplacement.IsNull || sqlSeparator.IsNull)
    {
        returnValue = sqlInput.Value;
    }
    else
    {
        string[] patterns = sqlPattern.Value.Split(new string[] {sqlSeparator.Value}, StringSplitOptions.None);
        string[] replacements = sqlReplacement.Value.Split(new string[] { sqlSeparator.Value }, StringSplitOptions.None);

        var map = new Dictionary<string, string>();

        // The map structure is populated with all values from the "patterns" array as if no corresponding value exists
        // in the "replacements" array the current value from the "pattern" array is used a a replacement value. The
        // result is no replacement is done in the "sqlInput" string if the given "pattern" is matched.
        for (int index = 0; index < patterns.Length; index++)
        {
            map[patterns[index]] = index < replacements.Length ? replacements[index] : patterns[index];
        }

        returnValue = Regex.Replace(sqlInput.Value, String.Join("|", patterns.Select(patern => Regex.Replace(patern, @"\(|\)|\||\.", @"$&")).OrderByDescending(patern => patern.Length).ToArray()), match =>
        {
            string currentValue;

            if (!map.TryGetValue(match.Value, out currentValue))
            {
                currentValue = match.Value;
            }

            return currentValue; 
        });
    }

    return new SqlString(returnValue);
}