使用 utfOffset16 处理特殊字符时出错
Error handling special characters using utfOffset16
我有一个函数可以搜索 returns 字符串中第一次出现 searchStr
的索引,但是每当字符串包含任何特殊字符(例如核)。错误似乎发生在 utf16Offset 调用中,我似乎无法弄清楚为什么......这是我正在使用的代码:
func index(of aString: String, startingFrom position: Int? = 0) -> String.Index? {
guard let position = position else {
return nil
}
if self.startIndex.utf16Offset(in: aString) + position > self.endIndex.utf16Offset(in: aString) {
return nil
} // produces fatal error when special character encountered
let start: String.Index = self.index(self.startIndex, offsetBy: position)
let range: Range<Index> = Range<Index>.init(uncheckedBounds: (lower: start, upper: self.endIndex))
return self.range(of: aString, options: .literal, range: range, locale: nil)?.lowerBound
}
这部分我觉得有问题
if self.startIndex.utf16Offset(in: aString) + position > self.endIndex.utf16Offset(in: aString) {
return nil
}
您正在获取 self
上的起始索引并将其转换为其在 aString
中的 UTF-16 偏移量。 self
和 aString
是两个不相关的字符串,所以这可能是未定义的行为(这可能是您在某些情况下看到它崩溃的原因)。
此 if
语句的目的似乎是确保生成有效范围 (lower <= upper
)
let start: String.Index = self.index(self.startIndex, offsetBy: position)
let range: Range<Index> = Range<Index>.init(uncheckedBounds: (lower: start, upper: self.endIndex))
你实际上可以像这样直接比较 Index
es
let start: String.Index = self.index(self.startIndex, offsetBy: position)
guard start < self.endIndex else {
return nil
}
// Range is guaranteed to have valid boundaries now
let range: Range<Index> = Range<Index>.init(uncheckedBounds: (lower: start, upper: self.endIndex))
完整示例:
extension String {
func index(of aString: String, startingFrom position: Int? = 0) -> String.Index? {
guard let position = position else {
return nil
}
let start: String.Index = self.index(self.startIndex, offsetBy: position)
guard start < self.endIndex else {
return nil
}
let range: Range<Index> = Range<Index>.init(uncheckedBounds: (lower: start, upper: self.endIndex))
return self.range(of: aString, options: .literal, range: range, locale: nil)?.lowerBound
}
}
// Doesn't crash anymore
"aaç".distance(from: foobar.startIndex, to: foobar.index(of: "ç", startingFrom: 0)!)
我有一个函数可以搜索 returns 字符串中第一次出现 searchStr
的索引,但是每当字符串包含任何特殊字符(例如核)。错误似乎发生在 utf16Offset 调用中,我似乎无法弄清楚为什么......这是我正在使用的代码:
func index(of aString: String, startingFrom position: Int? = 0) -> String.Index? {
guard let position = position else {
return nil
}
if self.startIndex.utf16Offset(in: aString) + position > self.endIndex.utf16Offset(in: aString) {
return nil
} // produces fatal error when special character encountered
let start: String.Index = self.index(self.startIndex, offsetBy: position)
let range: Range<Index> = Range<Index>.init(uncheckedBounds: (lower: start, upper: self.endIndex))
return self.range(of: aString, options: .literal, range: range, locale: nil)?.lowerBound
}
这部分我觉得有问题
if self.startIndex.utf16Offset(in: aString) + position > self.endIndex.utf16Offset(in: aString) {
return nil
}
您正在获取 self
上的起始索引并将其转换为其在 aString
中的 UTF-16 偏移量。 self
和 aString
是两个不相关的字符串,所以这可能是未定义的行为(这可能是您在某些情况下看到它崩溃的原因)。
此 if
语句的目的似乎是确保生成有效范围 (lower <= upper
)
let start: String.Index = self.index(self.startIndex, offsetBy: position)
let range: Range<Index> = Range<Index>.init(uncheckedBounds: (lower: start, upper: self.endIndex))
你实际上可以像这样直接比较 Index
es
let start: String.Index = self.index(self.startIndex, offsetBy: position)
guard start < self.endIndex else {
return nil
}
// Range is guaranteed to have valid boundaries now
let range: Range<Index> = Range<Index>.init(uncheckedBounds: (lower: start, upper: self.endIndex))
完整示例:
extension String {
func index(of aString: String, startingFrom position: Int? = 0) -> String.Index? {
guard let position = position else {
return nil
}
let start: String.Index = self.index(self.startIndex, offsetBy: position)
guard start < self.endIndex else {
return nil
}
let range: Range<Index> = Range<Index>.init(uncheckedBounds: (lower: start, upper: self.endIndex))
return self.range(of: aString, options: .literal, range: range, locale: nil)?.lowerBound
}
}
// Doesn't crash anymore
"aaç".distance(from: foobar.startIndex, to: foobar.index(of: "ç", startingFrom: 0)!)