区分大小写和不区分大小写的字典

Dictionary both case sensitive and insensitive

我需要一个像 Dictionary<string,T> 这样的数据结构,我可以在其中进行区分大小写和不区分大小写的搜索。

我希望通过使用区分大小写或不区分大小写的 StringComparer 迭代 foreach 来改进 List<Tuple<string,T>> 可以获得的 O(n) 时间。

这是我希望最终用户在 Search 方法调用时 select 区分大小写的库。 (否则我可以在 class 构造函数中创建一个具有敏感性 on/off 的不同字典)

有什么想法吗?

您可以只使用普通字典,但定义一个扩展方法来执行不区分大小写的搜索:

static class ExtensionMethods
{
    static public T GetValue<T>(this Dictionary<string,T> source, string key, bool caseSensitive)
    {
        if (caseSensitive) return source[key];
        key = source.Keys.FirstOrDefault( k => String.Compare(key, k, StringComparison.CurrentCultureIgnoreCase) == 0);
        if (key == null) throw new KeyNotFoundException();
        return source[key];
    }
}

或者,如果您确实需要,可以将字典子类化并使上面的内容成为适当的实例成员。

您绝对不会绕过编写自己的字典(派生词)。第一个值是一个键。因此,它仅适用于 exact 匹配,而不是不区分大小写的匹配。其实更糟的是:

我最近了解到字典是也是我们的通用哈希表。它使用 Hashtable 方法(为每个键和输入获取哈希并首先比较那个)来加速比较,尤其是在字符串之类的东西上。因此,在查找密钥时,它会通过密钥集合和:

  1. 比较哈希值。如果不匹配,这就不是关键。
  2. 如果匹配,则对类型进行完整比较。哈希冲突是一回事,因此哈希只能用于早期 "not a match" 过滤。

您的要求有点打破了这一点。绝地。由于散列,当它应该匹配时,你实际上会以不匹配结束。

第一个解决方案 将停止尝试在代码中执行此操作,而是转到适当的 DBMS。他们倾向于支持您可能想到的所有奇怪的比较。有很多方法可以加快它们的速度,比如索引。那里应该有一个进程内数据库。但很少有人愿意走那条路。

我能想到的第二种解决方案是尝试重写Dictionary,尽可能少地工作。一些想法:

  • 密钥应该只以大写或小写形式内部存储,而不是用户输入的内容。我将采用小写字母,因为这对我来说似乎很直观,只需调用 .toLower() 即可。
  • 您需要存储完整的密钥、大小写和所有内容。为简单起见,我会在值中添加一个 htat 字段,假设您真的确定没有人会修改那个。
  • 查找键时,首先使用输入的小写版本的内置匹配。然后(如果需要)在报告 match/not 匹配之前还要检查原始密钥。

你基本上是在我上面的列表中添加了第 3 步:

  1. 如果输入的小写匹配(小写)键并且需要区分大小写,现在检查存储的大小写键与大小写输入

希望只修改添加和查找例程。像 remove should 这样的东西使用 find 函数首先找到元素。这有点老套。理想情况下,您希望向用户隐藏您如何执行此操作的内部结构,因此大小写键列表应该是私有的。当然,这意味着必须接触更多代码。

经过进一步思考并阅读评论,我认为最好的实现是使用新的不区分大小写的属性和方法来扩展看似区分大小写的 Dictionary。由于实现基于不区分大小写的 Dictionary 持有区分大小写的子词典,并且 C# 没有私有继承,因此最好只实现一个新的 Dictionary 包装器。

public class CaseDictionary<TValue> : IDictionary<string, TValue>, IDictionary, IReadOnlyDictionary<string, TValue> {
    #region Members
    Dictionary<string, Dictionary<string, TValue>> CIDict;
    #endregion

    #region Constructors
    public CaseDictionary() {
        CIDict = new Dictionary<string, Dictionary<string, TValue>>(StringComparer.OrdinalIgnoreCase);
    }

    public CaseDictionary(int init) {
        CIDict = new Dictionary<string, Dictionary<string, TValue>>(init, StringComparer.OrdinalIgnoreCase);
    }

    public CaseDictionary(IDictionary<string, TValue> init)
        : this(init != null ? init.Count : 0) {
        foreach (var kvp in init)
            Add(kvp.Key, kvp.Value);
    }
    #endregion

    #region Properties
    public ICollection<string> Keys => CIDict.Values.SelectMany(v => v.Keys).ToList();
    public ICollection<TValue> Values => CIDict.Values.SelectMany(v => v.Values).ToList();
    public int Count => CIDict.Values.Select(v => v.Count).Sum();

    public TValue this[string aKey]
    {
        get
        {
            if (CIDict.TryGetValue(aKey, out var possibles) && possibles.TryGetValue(aKey, out var theValue))
                return theValue;
            throw new KeyNotFoundException();
        }
        set
        {
            if (CIDict.TryGetValue(aKey, out var possibles)) {
                if (possibles.ContainsKey(aKey))
                    possibles[aKey] = value;
                else
                    possibles.Add(aKey, value);
            }
            else
                CIDict.Add(aKey, new Dictionary<string, TValue>() { { aKey, value } });
        }
    }
    #endregion

    #region Methods
    public void Add(string aKey, TValue aValue) {
        if (CIDict.TryGetValue(aKey, out var values))
            values.Add(aKey, aValue);
        else
            CIDict.Add(aKey, new Dictionary<string, TValue>() { { aKey, aValue } });
    }

    public bool ContainsKey(string aKey) {
        if (CIDict.TryGetValue(aKey, out var possibles))
            return possibles.ContainsKey(aKey);
        else
            return false;
    }

    public bool Remove(string aKey) {
        if (CIDict.TryGetValue(aKey, out var possibles))
            return possibles.Remove(aKey);
        else
            return false;
    }

    public bool TryGetValue(string aKey, out TValue theValue) {
        if (CIDict.TryGetValue(aKey, out var possibles))
            return possibles.TryGetValue(aKey, out theValue);
        else {
            theValue = default(TValue);
            return false;
        }
    }
    #endregion

    #region ICollection<KeyValuePair<,>> Properties and Methods
    bool ICollection<KeyValuePair<string, TValue>>.IsReadOnly => false;

    void ICollection<KeyValuePair<string, TValue>>.Add(KeyValuePair<string, TValue> item) => Add(item.Key, item.Value);
    public void Clear() => CIDict.Clear();

    bool ICollection<KeyValuePair<string, TValue>>.Contains(KeyValuePair<string, TValue> item) {
        if (CIDict.TryGetValue(item.Key, out var possibles))
            return ((ICollection<KeyValuePair<string, TValue>>)possibles).Contains(item);
        else
            return false;
    }

    bool ICollection<KeyValuePair<string, TValue>>.Remove(KeyValuePair<string, TValue> item) {
        if (CIDict.TryGetValue(item.Key, out var possibles))
            return ((ICollection<KeyValuePair<string, TValue>>)possibles).Remove(item);
        else
            return false;
    }

    public void CopyTo(KeyValuePair<string, TValue>[] array, int index) {
        if (array == null)
            throw new ArgumentNullException("array");
        if (index < 0 || index > array.Length)
            throw new ArgumentException("index must be non-negative and within array argument Length");
        if (array.Length - index < Count)
            throw new ArgumentException("array argument plus index offset is too small");

        foreach (var subd in CIDict.Values)
            foreach (var kvp in subd)
                array[index++] = kvp;
    }
    #endregion

    #region IDictionary Methods
    bool IDictionary.IsFixedSize => false;
    bool IDictionary.IsReadOnly => false;
    ICollection IDictionary.Keys => (ICollection)Keys;
    ICollection IDictionary.Values => (ICollection)Values;

    object IDictionary.this[object key]
    {
        get
        {
            if (key == null)
                throw new ArgumentNullException("key");
            if (key is string aKey)
                if (CIDict.TryGetValue(aKey, out var possibles))
                    if (possibles.TryGetValue(aKey, out var theValue))
                        return theValue;

            return null;
        }
        set
        {
            if (key == null)
                throw new ArgumentNullException("key");
            if (value == null && default(TValue) != null)
                throw new ArgumentNullException("value");
            if (key is string aKey) {
                if (value is TValue aValue)
                    this[aKey] = aValue;
                else
                    throw new ArgumentException("value argument has wrong type");
            }
            else
                throw new ArgumentException("key argument has wrong type");
        }
    }

    void IDictionary.Add(object key, object value) {
        if (key == null)
            throw new ArgumentNullException("key");
        if (value == null && default(TValue) != null)
            throw new ArgumentNullException("value");
        if (key is string aKey) {
            if (value is TValue aValue)
                Add(aKey, aValue);
            else
                throw new ArgumentException("value argument has wrong type");
        }
        else
            throw new ArgumentException("key argument has wrong type");
    }

    bool IDictionary.Contains(object key) {
        if (key == null)
            throw new ArgumentNullException("key");

        if (key is string aKey)
            if (CIDict.TryGetValue(aKey, out var possibles))
                return possibles.ContainsKey(aKey);

        return false;
    }

    void IDictionary.Remove(object key) {
        if (key == null)
            throw new ArgumentNullException("key");

        if (key is string aKey)
            Remove(aKey);
    }
    #endregion

    #region ICollection Methods
    bool ICollection.IsSynchronized => false;
    object ICollection.SyncRoot => throw new NotImplementedException();

    void ICollection.CopyTo(Array array, int index) {
        if (array == null)
            throw new ArgumentNullException("array");
        if (array.Rank != 1)
            throw new ArgumentException("array argument can not be multi-dimensional");
        if (array.GetLowerBound(0) != 0)
            throw new ArgumentException("array argument has non-zero lower bound");

        if (array is KeyValuePair<string, TValue>[] kvps) {
            CopyTo(kvps, index);
        }
        else {
            if (index < 0 || index > array.Length)
                throw new ArgumentException("index must be non-negative and within array argument Length");
            if (array.Length - index < Count)
                throw new ArgumentException("array argument plus index offset is too small");
            if (array is DictionaryEntry[] des) {
                foreach (var subd in CIDict.Values)
                    foreach (var kvp in subd)
                        des[index++] = new DictionaryEntry(kvp.Key, kvp.Value);
            }
            else if (array is object[] objects) {   
                foreach (var subd in CIDict.Values)
                    foreach (var kvp in subd)
                        objects[index++] = kvp;
            }
            else
                throw new ArgumentException("array argument is an invalid type");
        }
    }
    #endregion

    #region IReadOnlyDictionary<,> Methods
    IEnumerable<string> IReadOnlyDictionary<string, TValue>.Keys => CIDict.Values.SelectMany(v => v.Keys);
    IEnumerable<TValue> IReadOnlyDictionary<string, TValue>.Values => CIDict.Values.SelectMany(v => v.Values);
    #endregion

    #region Case-Insensitive Properties and Methods
    public ICollection<string> KeysCI => CIDict.Keys;
    public IndexerPropertyAtCI AtCI => new IndexerPropertyAtCI(this);

    public bool ContainsKeyCI(string aKey) => CIDict.ContainsKey(aKey);
    public bool TryGetValueCI(string aKey, out ICollection<TValue> rtnValues) {
        if (CIDict.TryGetValue(aKey, out var theValues)) {
            rtnValues = theValues.Select(v => v.Value).ToList();
            return true;
        }
        else {
            rtnValues = default(List<TValue>);
            return false;
        }
    }

    public class IndexerPropertyAtCI {
        CaseDictionary<TValue> myDict;

        public IndexerPropertyAtCI(CaseDictionary<TValue> d) => myDict = d;

        public ICollection<TValue> this[string aKey] => myDict.CIDict[aKey].Select(v => v.Value).ToList();
    }
    #endregion

    #region IEnumerable Methods
    IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();

    public IEnumerator<KeyValuePair<string, TValue>> GetEnumerator() {
        foreach (var subd in CIDict.Values)
            foreach (var kvp in subd)
                yield return kvp;
    }

    IDictionaryEnumerator IDictionary.GetEnumerator() => new CaseDictionaryEnumerator(GetEnumerator());

    struct CaseDictionaryEnumerator : IDictionaryEnumerator {
        private IEnumerator<KeyValuePair<string, TValue>> en;

        public CaseDictionaryEnumerator(IEnumerator<KeyValuePair<string, TValue>> anEn) => en = anEn;

        public DictionaryEntry Entry => new DictionaryEntry(en.Current.Key, en.Current.Value);
        public object Current => Entry;

        public bool MoveNext() => en.MoveNext();
        public void Reset() => en.Reset();

        public object Key => en.Current.Key;
        public object Value => en.Current.Value;
    }
    #endregion
}

鉴于此 class,它可以用作:

var d = new CaseDictionary<int>();
d.Add("word", 1);
d.Add("Word", 2);
d.Add("WOrd", 3);
d.Add("word2", 4);
d.Add("worD2", 5);

Console.WriteLine(d.ContainsKey("WOrd"));
Console.WriteLine(d.ContainsKey("WOrd2"));
Console.WriteLine(d.ContainsKeyCI("WOrd2"));
Console.WriteLine(d["word2"]);
d["word2"] = 6;
Console.WriteLine(d["word2"]);

Console.WriteLine();
foreach (var w in d.AtCI["word2"])
    Console.WriteLine(w);

输出为:

True
False
True
4
6

6
5

您可以使用 new Dictionary<string,(string CaseSensitiveKey,T Data),其中键总是小写(见下文),但是...

一个。用户更友好的搜索 string.ContainsRegex.IsMatch

(这个是我后来加的)

我认为您最终可能会使用 string.Contains(或者甚至 Regex.IsMatch),以便您的搜索可以捕获部分匹配项。

Regex.IsMatch

var d = new Dictionary<string, string>() {
      { "First Last", "Some data" },
      { "Fir La", "Some data 2" } };

while (true)
{
    var term = Console.ReadLine();

    // Case-sensitive flag would control RegexOptions
    var results = d.Where( kvp => Regex.IsMatch(kvp.Key, term, RegexOptions.IgnoreCase)).ToList();

    if (results.Any())
        foreach (var kvp in results)
            Console.WriteLine($"\t{kvp.Key}:{kvp.Value}");
    else
        Console.WriteLine("Not found");
}
fi.*la
        First Last:Some data
        Fir La:Some data 2
fir.*t
        First Last:Some data

包含

    // Case-sensitive flag would control `StrinComparison` flag.
    var results = d.Where(
      kvp => kvp.Key.ToLower().Contains(term.ToLower(), StringComparison.InvariantCultureIgnoreCase))
      .ToList();
}
Fi
Found First Last:Some data
Found Fir La:Some data 2
First
Found First Last:Some data
Fal
Not found

乙。我想要字典搜索。和快速

您可以使用 new Dictionary<string,(string CaseSensitiveKey,T Data),其中键总是小写。

如果字典中可能有 'Gerardo Grignoli' 和 'gerardo grignoli',这将不起作用,但我怀疑你的情况并非如此,因为如果你'重新要求查找键,你不会在部分匹配之后。这显然只是一个假设。

如果您正在寻求完全匹配的快速解决方案并处理仅因大小写而异的条目,请参阅 Dictionary<string, Dictionary<string, TValue>>.[=26= 的其他答案]

public static T LowerCaseKeyWay<T>(Dictionary<string, (string CaseSensitiveKey, T Data)> d, string term, bool isCS)
          => d.TryGetValue(term.ToLower(), out var item)
                  ? !isCS
                        ? item.Data
                        : term == item.CaseSensitiveKey ? item.Data : default
                  : default;

使用示例。

class SO
{
    public int Number { get; set; }
    public int Rep { get; set; }
}


public static void Main(string[] args)
{

  var d = new Dictionary<string,(string CaseSensitiveKey,SO Data)>() {
    { "Gerardo Grignoli".ToLower(), ("Gerardo Grignoli", new SO { Number=97471, Rep=7987} )},
    { "John Wu".ToLower(), ("John Wu", new SO { Number=2791540, Rep=34973})}
  };

  foreach( var searchTerm in new []{ "Gerardo Grignoli",  "Gerardo Grignoli".ToLower()} )
  foreach( var isSearchCaseSensitive in new[]{true,false} ) {
      Console.WriteLine($"{searchTerm}/case-sensitive:{isSearchCaseSensitive}: {Search(d, searchTerm, isSearchCaseSensitive)?.Rep}");
  }

}

输出

Gerardo Grignoli/case-sensitive:True: 7987
Gerardo Grignoli/case-sensitive:False: 7987
gerardo grignoli/case-sensitive:True: 
gerardo grignoli/case-sensitive:False: 7987

原始速度测试

结果

noOfSearches: 1000
  noOfItems: 100
    Lowercase key way:        Elapsed 4ms, count found: 1500
    Linq way                  Elapsed 57ms, count found: 1500
noOfSearches: 1000
  noOfItems: 1000
    Lowercase key way:        Elapsed 3ms, count found: 3000
    Linq way                  Elapsed 454ms, count found: 3000
noOfSearches: 10000
  noOfItems: 100
    Lowercase key way:        Elapsed 11ms, count found: 15000
    Linq way                  Elapsed 447ms, count found: 15000
noOfSearches: 10000
  noOfItems: 1000
    Lowercase key way:        Elapsed 10ms, count found: 15000
    Linq way                  Elapsed 5156ms, count found: 15000
noOfSearches: 100000
  noOfItems: 100
    Lowercase key way:        Elapsed 113ms, count found: 150000
    Linq way                  Elapsed 5059ms, count found: 150000
noOfSearches: 100000
  noOfItems: 1000
    Lowercase key way:        Elapsed 83ms, count found: 150000
    Linq way                  Elapsed 48855ms, count found: 150000
noOfSearches: 1000000
  noOfItems: 100
    Lowercase key way:        Elapsed 1279ms, count found: 1500000
    Linq way                  Elapsed 49558ms, count found: 1500000
noOfSearches: 1000000
  noOfItems: 1000
    Lowercase key way:        Elapsed 961ms, count found: 1500000
(...)

测试代码(我很高兴这个被撕开)

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;

namespace ConsoleApp4
{

    class SO
    {
        public int Number { get; set; }

        public int Rep { get; set; }
    }

  class Program
  {
      public static void Main(string[] args)
      {
        // Preload linq
        var _ = new []{"•`_´•"}.FirstOrDefault( k => k == "(O_O)" );

        foreach( int noOfSearches in new []{1000, 10000, 100000, 1000000} ) 
          foreach( int noOfItems in new []{100, 1000} ) 
          {
            var d1 = new Dictionary<string, SO>();

            for(int i = 0; i < noOfItems; i++) {
              d1.Add($"Name {i}", new SO {Number = i, Rep = i *2});
            }

            var d2 = new Dictionary<string, (string CaseSensitiveKey, SO Data)>();
            foreach (var entry in d1)
            {
                d2.Add(entry.Key.ToLower(), (entry.Key, entry.Value));
            }


            Console.WriteLine($"noOfSearches: {noOfSearches}");
            Console.WriteLine($"  noOfItems: {noOfItems}");

            Console.Write("    Lowercase key way:".PadRight(30));
            PrimitiveSpeedTest( (term, isCS) => LowerCaseKeyWay(d2, term, isCS), noOfItems, noOfSearches);
            Console.Write("    Linq way".PadRight(30));
            PrimitiveSpeedTest( (term, isCS) => LinqWay(d1, term, isCS), noOfItems, noOfSearches);
          }

      }

      private static void PrimitiveSpeedTest(Func<string, bool, SO> search, int noOfItems, int noOfSearches)
      {
          var count = 0;
          Stopwatch sw = Stopwatch.StartNew();
          for (int i = 0; i < noOfSearches; i++)
          {
            var originalTerm = $"Name {i % (noOfItems*2)}"; // Some found, some not found
              foreach (var term in new[] { originalTerm, originalTerm.ToLower() })
                  foreach (var isCS in new[] { true, false })
                  {
                      var so = search(term, isCS);
                      if (so != null) count++;
                      //Console.WriteLine($"{term}/case-sensitive:{isCS}: {Search(d, term, isCS)?.Rep}");
                  }

          }
          var elapsed = sw.Elapsed;

          Console.WriteLine($"Elapsed {sw.ElapsedMilliseconds}ms, count found: {count} ");
      }

      public static SO LowerCaseKeyWay(Dictionary<string, (string CaseSensitiveKey, SO Data)> d, string term, bool isCS)
        => d.TryGetValue(term.ToLower(), out var item)
                ? !isCS
                      ? item.Data
                      : term == item.CaseSensitiveKey ? item.Data : null
                : null;

      static public T LinqWay<T>(Dictionary<string,T> source, string key, bool caseSensitive)
      {
          //Original: if (caseSensitive) return source[key];
          if(caseSensitive) return source.ContainsKey(key) ? source[key] : default;
          key = source.Keys.FirstOrDefault( k => String.Compare(key, k, StringComparison.CurrentCultureIgnoreCase) == 0);
          //Original: if (key == null) throw new KeyNotFoundException();
          if (key == null) return default;
          return source[key];
      }
  }
}

由于字典对密钥进行哈希处理,因此您应该使用 Dictionary<String, Dictionary<String, T>>


添加密钥:

  • 将给定的混合大小写密钥转换为全部小写;
  • 获取全部小写键的字典;
  • 将 加入该字典。

不区分大小写的搜索:

  • 将混合大小写的键转换为全部小写;
  • 获取此全小写键的字典;
  • 迭代字典中的值。

区分大小写搜索

  • 将混合大小写的键转换为全部小写;
  • 获取此全小写键的字典;
  • 在上述步骤得到的字典中搜索大小写混合键。