使用 set/multiset 查找以 's' 开头的单词
Find words beginning with 's' using set/multiset
我在这段代码中遇到的问题源于 get_words_beginning_s 函数的最后一段代码。
/*
Name: xx
Date: xx
Purpose:Read text from file, count number of words, unique words, word frequency, & number of words that begin with letter 's'
*/
#include <iostream>
#include <fstream>
#include <string>
#include <set>
using namespace std;
multiset<string> display_and_load_words(string filename);
set<string> get_unique_words(multiset<string>& words);
set<string> get_words_beginning_s(multiset<string>& words);
int main() {
cout << "The Word Counter program\n\n";
string filename = "dickens.txt";
cout << "FILE TEXT: ";
//display_text(filename);
auto words = display_and_load_words(filename);
cout << "WORD COUNT: " << words.size() << endl << endl;
auto unique_words = get_unique_words(words);
auto words_beginning_s = get_words_beginning_s(words);
cout << unique_words.size() << " UNIQUE WORDS: ";
for (string word : unique_words) {
cout << word << ' ';
}
cout << endl << endl;
cout << "COUNT PER WORD: ";
for (string word : unique_words) {
cout << word << '=' << words.count(word) << ' ';
}
cout << endl << endl;
cout << "WORDS THAT BEGIN WITH 'S': ";
for (string word : words_beginning_s) {
cout << word << ' ';
}
cout << endl << endl;
}
multiset<string> display_and_load_words(string filename) {
multiset<string> words;
ifstream infile(filename);
if (infile) {
string word;
while (infile >> word) {
cout << word << ' ';
string new_word = "";
for (char c : word) {
if (c == '.' || c == ',') {
continue; // remove punctuation
}
else if (isupper(c)) {
new_word += tolower(c); // convert to lowercase
}
else {
new_word += c;
}
}
words.insert(new_word); // add word
}
cout << endl << endl;
infile.close();
}
return words;
}
set<string> get_unique_words(multiset<string>& words) {
set<string> unique_words;
for (string word : words) {
auto search = unique_words.find(word);
if (search == unique_words.end()) {
unique_words.insert(word);
}
}
return unique_words;
}
set<string> get_words_beginning_s(multiset<string>& words) {
set<string> words_beginning_s;
for (string word : words) {
auto search = words_beginning_s.find(word);
if (search == words_beginning_s.end()) {
for (int i = 0; i < words_beginning_s.size(); ++i) {
if (words_beginning_s[0] == 's') {
words_beginning_s.insert(word);
}
}
}
}
return words_beginning_s;
}
如果使用 set/multiset,如何比较每个单独单词本身而不是整个单词中的位置值?文本文件中的示例字符串 - "John goes to the store":通常可以使用简单的 for 循环和初始位置来比较值并计算它出现的次数(类似) -
for (int i = 0; i < words_beginning_s.length(); ++i) {
if (words_beginning_s[0] == 's') {
++s_word;
}
这在使用 set/multiset 时不起作用。对此很陌生,如果这个问题看起来很愚蠢,我很抱歉。
您可以使用 multiset
s 成员函数 lower_bound
将迭代器获取到一个范围,然后从该范围创建一个 set
。
示例:
#include <iostream>
#include <set>
#include <string>
std::set<std::string> get_words_beginning_s(const std::multiset<std::string>& words) {
// create a set from the iterators you get from lower_bound("s") and lower_bound("t"):
return {words.lower_bound("s"), words.lower_bound("t")};
}
int main() {
std::multiset<std::string> words{
"foo", "slayer", "bar", "sepultura", "tesseract", "skinny puppy", "yello"
};
for(const std::string& word : get_words_beginning_s(words)) {
std::cout << word << '\n';
}
}
输出:
sepultura
skinny puppy
slayer
我在这段代码中遇到的问题源于 get_words_beginning_s 函数的最后一段代码。
/*
Name: xx
Date: xx
Purpose:Read text from file, count number of words, unique words, word frequency, & number of words that begin with letter 's'
*/
#include <iostream>
#include <fstream>
#include <string>
#include <set>
using namespace std;
multiset<string> display_and_load_words(string filename);
set<string> get_unique_words(multiset<string>& words);
set<string> get_words_beginning_s(multiset<string>& words);
int main() {
cout << "The Word Counter program\n\n";
string filename = "dickens.txt";
cout << "FILE TEXT: ";
//display_text(filename);
auto words = display_and_load_words(filename);
cout << "WORD COUNT: " << words.size() << endl << endl;
auto unique_words = get_unique_words(words);
auto words_beginning_s = get_words_beginning_s(words);
cout << unique_words.size() << " UNIQUE WORDS: ";
for (string word : unique_words) {
cout << word << ' ';
}
cout << endl << endl;
cout << "COUNT PER WORD: ";
for (string word : unique_words) {
cout << word << '=' << words.count(word) << ' ';
}
cout << endl << endl;
cout << "WORDS THAT BEGIN WITH 'S': ";
for (string word : words_beginning_s) {
cout << word << ' ';
}
cout << endl << endl;
}
multiset<string> display_and_load_words(string filename) {
multiset<string> words;
ifstream infile(filename);
if (infile) {
string word;
while (infile >> word) {
cout << word << ' ';
string new_word = "";
for (char c : word) {
if (c == '.' || c == ',') {
continue; // remove punctuation
}
else if (isupper(c)) {
new_word += tolower(c); // convert to lowercase
}
else {
new_word += c;
}
}
words.insert(new_word); // add word
}
cout << endl << endl;
infile.close();
}
return words;
}
set<string> get_unique_words(multiset<string>& words) {
set<string> unique_words;
for (string word : words) {
auto search = unique_words.find(word);
if (search == unique_words.end()) {
unique_words.insert(word);
}
}
return unique_words;
}
set<string> get_words_beginning_s(multiset<string>& words) {
set<string> words_beginning_s;
for (string word : words) {
auto search = words_beginning_s.find(word);
if (search == words_beginning_s.end()) {
for (int i = 0; i < words_beginning_s.size(); ++i) {
if (words_beginning_s[0] == 's') {
words_beginning_s.insert(word);
}
}
}
}
return words_beginning_s;
}
如果使用 set/multiset,如何比较每个单独单词本身而不是整个单词中的位置值?文本文件中的示例字符串 - "John goes to the store":通常可以使用简单的 for 循环和初始位置来比较值并计算它出现的次数(类似) -
for (int i = 0; i < words_beginning_s.length(); ++i) {
if (words_beginning_s[0] == 's') {
++s_word;
}
这在使用 set/multiset 时不起作用。对此很陌生,如果这个问题看起来很愚蠢,我很抱歉。
您可以使用 multiset
s 成员函数 lower_bound
将迭代器获取到一个范围,然后从该范围创建一个 set
。
示例:
#include <iostream>
#include <set>
#include <string>
std::set<std::string> get_words_beginning_s(const std::multiset<std::string>& words) {
// create a set from the iterators you get from lower_bound("s") and lower_bound("t"):
return {words.lower_bound("s"), words.lower_bound("t")};
}
int main() {
std::multiset<std::string> words{
"foo", "slayer", "bar", "sepultura", "tesseract", "skinny puppy", "yello"
};
for(const std::string& word : get_words_beginning_s(words)) {
std::cout << word << '\n';
}
}
输出:
sepultura
skinny puppy
slayer