HTML 字符串中的标签,转换为普通字符串
HTML tags in String, convert to normal String
Html 标签来自 api 响应的字符串,需要显示格式化的字符串而不是标签。以下是我尝试过的代码:
html 字符串:
"<span class="st"><em>Bread<\/em> is a staple food, usually by baking. Throughout ... <em>Sourdough<\/em> is a type of <em>bread<\/em> produced by dough using naturally occurring yeasts and lactobacilli. ... List of <em>toast<\/em> dishes<\/span>",
尝试过的代码:
let data = Data(vm.description!.utf8)
if let attributedString = try? NSAttributedString(data: data, options: [.documentType: NSAttributedString.DocumentType.html], documentAttributes: nil) {
infoDescription.attributedText = attributedString
}
尝试过的其他方法:
extension String {
var htmlToAttributedString: NSAttributedString? {
guard let data = data(using: .utf8) else { return nil }
do {
return try NSAttributedString(data: data, options: [.documentType: NSAttributedString.DocumentType.html, .characterEncoding:String.Encoding.utf8.rawValue], documentAttributes: nil)
} catch {
return nil
}
}
var htmlToString: String {
return htmlToAttributedString?.string ?? ""
}
}
请指导我做错了什么或遗漏了什么。谢谢
使用下面的 String
扩展从字符串中删除 html 标签
extension String {
public var withoutHtml: String {
guard let data = self.data(using: .utf8) else {
return self
}
let options: [NSAttributedString.DocumentReadingOptionKey: Any] = [
.documentType: NSAttributedString.DocumentType.html,
.characterEncoding: String.Encoding.utf8.rawValue
]
guard let attributedString = try? NSAttributedString(data: data, options: options, documentAttributes: nil) else {
return self
}
return attributedString.string
}
}
用法
let formattedStr = yourString?.withoutHtml
您需要先解码 HTML 个实体,然后您可以使用当前的实现来获取样式化的字符串。
对于HTML实体解码你可以参考这个:
但我发现您可以使用 NSAttributesString 来获得相同的结果。
let html1 = """
<span class="st"><em>Bread</em> is a staple food, usually by baking. Throughout ... <em>Sourdough</em> is a type of <em>bread</em> produced by dough using naturally occurring yeasts and lactobacilli. ... List of <em>toast</em> dishes</span>
"""
extension String {
var toAttributedString: NSAttributedString? {
return try? NSAttributedString(
data: data(using: .utf8)!,
options: [
.documentType: NSAttributedString.DocumentType.html,
],
documentAttributes: nil)
}
}
let output1 = html1.toAttributedString!.string
let output2 = output1.toAttributedString
对我来说也有点奇怪,但它有效...
我使用扫描仪将 html 文本转换为普通文本并且效果很好。
此函数去除 <
和 >
标签之间的文本。
func stripHTML(fromString rawString: String) -> String {
let scanner = Scanner.init(string: rawString)
var convertedString = rawString
while !scanner.isAtEnd {
let _ = scanner.scanUpToString("<")
if let text = scanner.scanUpToString(">") {
convertedString = convertedString.replacingOccurrences(of: "\(text)>", with: "")
}
}
return convertedString
}
检查 here 以查看扫描仪工作原理的详细说明。
将其用作下面的代码。享受:)
let normalText = stripHTML(fromString: yourHtmlText))
Html 标签来自 api 响应的字符串,需要显示格式化的字符串而不是标签。以下是我尝试过的代码:
html 字符串:
"<span class="st"><em>Bread<\/em> is a staple food, usually by baking. Throughout ... <em>Sourdough<\/em> is a type of <em>bread<\/em> produced by dough using naturally occurring yeasts and lactobacilli. ... List of <em>toast<\/em> dishes<\/span>",
尝试过的代码:
let data = Data(vm.description!.utf8)
if let attributedString = try? NSAttributedString(data: data, options: [.documentType: NSAttributedString.DocumentType.html], documentAttributes: nil) {
infoDescription.attributedText = attributedString
}
尝试过的其他方法:
extension String {
var htmlToAttributedString: NSAttributedString? {
guard let data = data(using: .utf8) else { return nil }
do {
return try NSAttributedString(data: data, options: [.documentType: NSAttributedString.DocumentType.html, .characterEncoding:String.Encoding.utf8.rawValue], documentAttributes: nil)
} catch {
return nil
}
}
var htmlToString: String {
return htmlToAttributedString?.string ?? ""
}
}
请指导我做错了什么或遗漏了什么。谢谢
使用下面的 String
扩展从字符串中删除 html 标签
extension String {
public var withoutHtml: String {
guard let data = self.data(using: .utf8) else {
return self
}
let options: [NSAttributedString.DocumentReadingOptionKey: Any] = [
.documentType: NSAttributedString.DocumentType.html,
.characterEncoding: String.Encoding.utf8.rawValue
]
guard let attributedString = try? NSAttributedString(data: data, options: options, documentAttributes: nil) else {
return self
}
return attributedString.string
}
}
用法
let formattedStr = yourString?.withoutHtml
您需要先解码 HTML 个实体,然后您可以使用当前的实现来获取样式化的字符串。
对于HTML实体解码你可以参考这个:
但我发现您可以使用 NSAttributesString 来获得相同的结果。
let html1 = """
<span class="st"><em>Bread</em> is a staple food, usually by baking. Throughout ... <em>Sourdough</em> is a type of <em>bread</em> produced by dough using naturally occurring yeasts and lactobacilli. ... List of <em>toast</em> dishes</span>
"""
extension String {
var toAttributedString: NSAttributedString? {
return try? NSAttributedString(
data: data(using: .utf8)!,
options: [
.documentType: NSAttributedString.DocumentType.html,
],
documentAttributes: nil)
}
}
let output1 = html1.toAttributedString!.string
let output2 = output1.toAttributedString
对我来说也有点奇怪,但它有效...
我使用扫描仪将 html 文本转换为普通文本并且效果很好。
此函数去除 <
和 >
标签之间的文本。
func stripHTML(fromString rawString: String) -> String {
let scanner = Scanner.init(string: rawString)
var convertedString = rawString
while !scanner.isAtEnd {
let _ = scanner.scanUpToString("<")
if let text = scanner.scanUpToString(">") {
convertedString = convertedString.replacingOccurrences(of: "\(text)>", with: "")
}
}
return convertedString
}
检查 here 以查看扫描仪工作原理的详细说明。
将其用作下面的代码。享受:)
let normalText = stripHTML(fromString: yourHtmlText))