使用 NSRegularExpression 匹配的奇怪字符串范围行为
strange string range behavior using NSRegularExpression matches
我正在尝试解析原始 HTTP 响应,但在尝试将 NSRange 转换为范围时得到的范围不正确。这是来自 playground 的相关代码:
public extension NSRange {
public func toStringRange(_ str: String) -> Range<String.Index>? {
guard str.characters.count >= length - location && location < str.characters.count else { return nil }
let fromIdx = str.characters.index(str.startIndex, offsetBy: self.location)
print("from: \(self.location) = \(fromIdx)")
let toIdx = str.characters.index(fromIdx, offsetBy: self.length)
return fromIdx..<toIdx
}
}
let responseString = "HTTP/1.0 200 OK\r\nContent-Length: 193\r\nContent-Type: application/json\r\n"
let responseRange = NSRange(location: 0, length: responseString.characters.count)
let responseRegex = try! NSRegularExpression(pattern: "^(HTTP/1.\d) (\d+) (.*?\r\n)(.*)", options: [.anchorsMatchLines])
guard let matchResult = responseRegex.firstMatch(in: responseString, options: [], range: responseRange),
matchResult.numberOfRanges == 5,
let versionRange = matchResult.rangeAt(1).toStringRange(responseString),
let statusRange = matchResult.rangeAt(2).toStringRange(responseString),
let headersRange = matchResult.rangeAt(4).toStringRange(responseString)
else { fatalError() }
toStringRange() 中打印的输出是
from: 0 = Index(_base: Swift.String.UnicodeScalarView.Index(_position: 0), _countUTF16: 1)
from: 9 = Index(_base: Swift.String.UnicodeScalarView.Index(_position: 9), _countUTF16: 1)
from: 17 = Index(_base: Swift.String.UnicodeScalarView.Index(_position: 18), _countUTF16: 1)
为什么第 3 次 toStringRange() 调用返回的字符串范围从 18 而不是 17 开始?
你的从NSRange
到Range<String.Index>
的转换方法不
对于外部的扩展字素簇和字符可以正常工作
"basic multilingual plane"(表情符号、旗帜等)。
NSRange
计算 UTF-16 代码点(对应于 unichar
NSString
中的表示)。 Range<String.Index>
计数 Swift
Characters
表示扩展字素簇。
在您的具体情况下,"\r\n"
算作两个 UTF-16 代码点,但是
作为单个 Character
,这会导致不需要的 "shift".
这是一个简化的例子:
let responseString = "OK\r\nContent-Length"
let nsRange = (responseString as NSString).range(of: "Content")
print(nsRange.location, nsRange.length) // 4 7
if let sRange1 = nsRange.toStringRange(responseString) {
print(responseString.substring(with: sRange1)) // "ontent-"
}
使用方法
extension String {
func range(from nsRange: NSRange) -> Range<String.Index>? {
guard
let from16 = utf16.index(utf16.startIndex, offsetBy: nsRange.location, limitedBy: utf16.endIndex),
let to16 = utf16.index(from16, offsetBy: nsRange.length, limitedBy: utf16.endIndex),
let from = String.Index(from16, within: self),
let to = String.Index(to16, within: self)
else { return nil }
return from ..< to
}
}
从 NSRange to Range<String.Index> 你会得到预期的结果:
if let sRange2 = responseString.range(from: nsRange) {
print(responseString.substring(with: sRange2)) // "Content"
}
我正在尝试解析原始 HTTP 响应,但在尝试将 NSRange 转换为范围时得到的范围不正确。这是来自 playground 的相关代码:
public extension NSRange {
public func toStringRange(_ str: String) -> Range<String.Index>? {
guard str.characters.count >= length - location && location < str.characters.count else { return nil }
let fromIdx = str.characters.index(str.startIndex, offsetBy: self.location)
print("from: \(self.location) = \(fromIdx)")
let toIdx = str.characters.index(fromIdx, offsetBy: self.length)
return fromIdx..<toIdx
}
}
let responseString = "HTTP/1.0 200 OK\r\nContent-Length: 193\r\nContent-Type: application/json\r\n"
let responseRange = NSRange(location: 0, length: responseString.characters.count)
let responseRegex = try! NSRegularExpression(pattern: "^(HTTP/1.\d) (\d+) (.*?\r\n)(.*)", options: [.anchorsMatchLines])
guard let matchResult = responseRegex.firstMatch(in: responseString, options: [], range: responseRange),
matchResult.numberOfRanges == 5,
let versionRange = matchResult.rangeAt(1).toStringRange(responseString),
let statusRange = matchResult.rangeAt(2).toStringRange(responseString),
let headersRange = matchResult.rangeAt(4).toStringRange(responseString)
else { fatalError() }
toStringRange() 中打印的输出是
from: 0 = Index(_base: Swift.String.UnicodeScalarView.Index(_position: 0), _countUTF16: 1)
from: 9 = Index(_base: Swift.String.UnicodeScalarView.Index(_position: 9), _countUTF16: 1)
from: 17 = Index(_base: Swift.String.UnicodeScalarView.Index(_position: 18), _countUTF16: 1)
为什么第 3 次 toStringRange() 调用返回的字符串范围从 18 而不是 17 开始?
你的从NSRange
到Range<String.Index>
的转换方法不
对于外部的扩展字素簇和字符可以正常工作
"basic multilingual plane"(表情符号、旗帜等)。
NSRange
计算 UTF-16 代码点(对应于 unichar
NSString
中的表示)。 Range<String.Index>
计数 Swift
Characters
表示扩展字素簇。
在您的具体情况下,"\r\n"
算作两个 UTF-16 代码点,但是
作为单个 Character
,这会导致不需要的 "shift".
这是一个简化的例子:
let responseString = "OK\r\nContent-Length"
let nsRange = (responseString as NSString).range(of: "Content")
print(nsRange.location, nsRange.length) // 4 7
if let sRange1 = nsRange.toStringRange(responseString) {
print(responseString.substring(with: sRange1)) // "ontent-"
}
使用方法
extension String {
func range(from nsRange: NSRange) -> Range<String.Index>? {
guard
let from16 = utf16.index(utf16.startIndex, offsetBy: nsRange.location, limitedBy: utf16.endIndex),
let to16 = utf16.index(from16, offsetBy: nsRange.length, limitedBy: utf16.endIndex),
let from = String.Index(from16, within: self),
let to = String.Index(to16, within: self)
else { return nil }
return from ..< to
}
}
从 NSRange to Range<String.Index> 你会得到预期的结果:
if let sRange2 = responseString.range(from: nsRange) {
print(responseString.substring(with: sRange2)) // "Content"
}