如何将一个句子拆分成单词以及标点符号和空格? [Swift]
How to split a sentence into words as well as punctuations and spaces? [Swift]
我想使用 swift 5
以编程方式将句子拆分为单词和标点符号,包括 spaces。
- 输入:
"Hello, I am Albert Einstein."
- 输出:
["Hello", ",", " ", "I", " ", "am", " ", "Albert", " ", "Einstein", "."]
我采用了@Duyen-Hoa (Split text into array while maintaining the punctuation in Swift)提供的代码,稍微修改成下面的代码(我基本上只是删除了删除space的部分)。但是,我无法获得仅包含 space 的数组。相反,我以某种方式让 space 包含在每个单词的数组开头。
func sentenceSplitter(text_input: String) -> [String] {
var list = [String]()
var currentSubString = "";
text.enumerateSubstrings(in: text.startIndex..<text.endIndex, options: String.EnumerationOptions.byComposedCharacterSequences) { (substring, substringRange, enclosingRange, value) in
if let _subString = substring {
if (!currentSubString.isEmpty &&
(_subString.compare(" ") == .orderedSame
|| _subString.compare(",") == .orderedSame
|| _subString.compare(".") == .orderedSame
|| _subString.compare(";") == .orderedSame
|| _subString.compare("!") == .orderedSame
|| _subString.compare("?") == .orderedSame
)
) {
//create word if see any of those character and currentSubString is not empty
list.append(currentSubString)
currentSubString = _subString
} else {
currentSubString += _subString
}
}
}
//last word
if (!currentSubString.isEmpty) {
list.append(currentSubString)
}
return list
}
你能告诉我我做错了什么吗?
原始代码来自:Duyen-Hoa
var str = "Hello, I am Albert Einstein."
var list = [String]()
var currentSubString = "";
//enumerate to get all characters including ".", ",", ";", " "
str.enumerateSubstrings(in: str.startIndex..<str.endIndex, options: String.EnumerationOptions.byComposedCharacterSequences) { (substring, substringRange, enclosingRange, value) in
if let _subString = substring {
if (!currentSubString.isEmpty &&
(_subString.compare(" ") == .orderedSame
|| _subString.compare(",") == .orderedSame
|| _subString.compare(".") == .orderedSame
|| _subString.compare(";") == .orderedSame
)
) {
//create word if see any of those character and currentSubString is not empty
list.append(currentSubString)
currentSubString = _subString.trimmingCharacters(in: CharacterSet.whitespaces )
} else {
//add to current sub string if current character is not space.
if (_subString.compare(" ") != .orderedSame) {
currentSubString += _subString
}
}
}
}
//last word
if (!currentSubString.isEmpty) {
list.append(currentSubString)
}
这里不需要删除
.trimmingCharacters(in: CharacterSet.whitespaces)
相反,您需要检测忽略 _subString
中的 space
的位置,您需要在此处添加
if _subString == " " {
list.append(_subString)
}
您的代码如下所示:
let str = "Hello, I am Albert Einstein."
var list = [String]()
var currentSubString = "";
str.enumerateSubstrings(in: str.startIndex..<str.endIndex, options: String.EnumerationOptions.byComposedCharacterSequences) { (substring, substringRange, enclosingRange, value) in
if let _subString = substring {
if (!currentSubString.isEmpty &&
(_subString.compare(" ") == .orderedSame
|| _subString.compare(",") == .orderedSame
|| _subString.compare(".") == .orderedSame
|| _subString.compare(";") == .orderedSame
)
) {
list.append(currentSubString)
//If _subString is a space
if _subString == " " {
list.append(_subString)
}
currentSubString = _subString.trimmingCharacters(in: CharacterSet.whitespaces)
} else {
if (_subString.compare(" ") != .orderedSame) {
currentSubString += _subString
} else {
//If _subString is a space at start
if _subString == " " {
list.append(_subString)
}
}
}
}
}
if (!currentSubString.isEmpty) {
list.append(currentSubString)
}
print(list)
这里输入的是"Hello, I am Albert Einstein."
输出将是:
["Hello", ",", " ", "I", " ", "am", " ", "Albert", " ", "Einstein", "."]
希望对您有所帮助。
我想使用 swift 5
以编程方式将句子拆分为单词和标点符号,包括 spaces。
- 输入:
"Hello, I am Albert Einstein."
- 输出:
["Hello", ",", " ", "I", " ", "am", " ", "Albert", " ", "Einstein", "."]
我采用了@Duyen-Hoa (Split text into array while maintaining the punctuation in Swift)提供的代码,稍微修改成下面的代码(我基本上只是删除了删除space的部分)。但是,我无法获得仅包含 space 的数组。相反,我以某种方式让 space 包含在每个单词的数组开头。
func sentenceSplitter(text_input: String) -> [String] {
var list = [String]()
var currentSubString = "";
text.enumerateSubstrings(in: text.startIndex..<text.endIndex, options: String.EnumerationOptions.byComposedCharacterSequences) { (substring, substringRange, enclosingRange, value) in
if let _subString = substring {
if (!currentSubString.isEmpty &&
(_subString.compare(" ") == .orderedSame
|| _subString.compare(",") == .orderedSame
|| _subString.compare(".") == .orderedSame
|| _subString.compare(";") == .orderedSame
|| _subString.compare("!") == .orderedSame
|| _subString.compare("?") == .orderedSame
)
) {
//create word if see any of those character and currentSubString is not empty
list.append(currentSubString)
currentSubString = _subString
} else {
currentSubString += _subString
}
}
}
//last word
if (!currentSubString.isEmpty) {
list.append(currentSubString)
}
return list
}
你能告诉我我做错了什么吗?
原始代码来自:Duyen-Hoa
var str = "Hello, I am Albert Einstein."
var list = [String]()
var currentSubString = "";
//enumerate to get all characters including ".", ",", ";", " "
str.enumerateSubstrings(in: str.startIndex..<str.endIndex, options: String.EnumerationOptions.byComposedCharacterSequences) { (substring, substringRange, enclosingRange, value) in
if let _subString = substring {
if (!currentSubString.isEmpty &&
(_subString.compare(" ") == .orderedSame
|| _subString.compare(",") == .orderedSame
|| _subString.compare(".") == .orderedSame
|| _subString.compare(";") == .orderedSame
)
) {
//create word if see any of those character and currentSubString is not empty
list.append(currentSubString)
currentSubString = _subString.trimmingCharacters(in: CharacterSet.whitespaces )
} else {
//add to current sub string if current character is not space.
if (_subString.compare(" ") != .orderedSame) {
currentSubString += _subString
}
}
}
}
//last word
if (!currentSubString.isEmpty) {
list.append(currentSubString)
}
这里不需要删除
.trimmingCharacters(in: CharacterSet.whitespaces)
相反,您需要检测忽略 _subString
中的 space
的位置,您需要在此处添加
if _subString == " " {
list.append(_subString)
}
您的代码如下所示:
let str = "Hello, I am Albert Einstein."
var list = [String]()
var currentSubString = "";
str.enumerateSubstrings(in: str.startIndex..<str.endIndex, options: String.EnumerationOptions.byComposedCharacterSequences) { (substring, substringRange, enclosingRange, value) in
if let _subString = substring {
if (!currentSubString.isEmpty &&
(_subString.compare(" ") == .orderedSame
|| _subString.compare(",") == .orderedSame
|| _subString.compare(".") == .orderedSame
|| _subString.compare(";") == .orderedSame
)
) {
list.append(currentSubString)
//If _subString is a space
if _subString == " " {
list.append(_subString)
}
currentSubString = _subString.trimmingCharacters(in: CharacterSet.whitespaces)
} else {
if (_subString.compare(" ") != .orderedSame) {
currentSubString += _subString
} else {
//If _subString is a space at start
if _subString == " " {
list.append(_subString)
}
}
}
}
}
if (!currentSubString.isEmpty) {
list.append(currentSubString)
}
print(list)
这里输入的是"Hello, I am Albert Einstein."
输出将是:
["Hello", ",", " ", "I", " ", "am", " ", "Albert", " ", "Einstein", "."]
希望对您有所帮助。