使用 set/multiset 查找以 's' 开头的单词

Find words beginning with 's' using set/multiset

我在这段代码中遇到的问题源于 get_words_beginning_s 函数的最后一段代码。

    /*
Name:   xx
Date:   xx

Purpose:Read text from file, count number of words, unique words, word frequency, & number of words that begin with letter 's'
*/

#include <iostream>
#include <fstream>
#include <string>
#include <set>

using namespace std;

multiset<string> display_and_load_words(string filename);
set<string> get_unique_words(multiset<string>& words);
set<string> get_words_beginning_s(multiset<string>& words);

int main() {
    cout << "The Word Counter program\n\n";

    string filename = "dickens.txt";

    cout << "FILE TEXT: ";
    //display_text(filename);

    auto words = display_and_load_words(filename);
    cout << "WORD COUNT: " << words.size() << endl << endl;

    auto unique_words = get_unique_words(words);
    auto words_beginning_s = get_words_beginning_s(words);

    cout << unique_words.size() << " UNIQUE WORDS: ";
    for (string word : unique_words) {
        cout << word << ' ';
    }
    cout << endl << endl;

    cout << "COUNT PER WORD: ";
    for (string word : unique_words) {
        cout << word << '=' << words.count(word) << ' ';
    }
    cout << endl << endl;

    cout << "WORDS THAT BEGIN WITH 'S': ";
    for (string word : words_beginning_s) {
        cout << word << ' ';
    }
    cout << endl << endl;
}

multiset<string> display_and_load_words(string filename) {
    multiset<string> words;
    ifstream infile(filename);

    if (infile) {
        string word;
        while (infile >> word) {
            cout << word << ' ';

            string new_word = "";
            for (char c : word) {
                if (c == '.' || c == ',') {
                    continue;               // remove punctuation
                }
                else if (isupper(c)) {
                    new_word += tolower(c); // convert to lowercase
                }
                else {
                    new_word += c;
                }
            }
            words.insert(new_word);      // add word 
        }
        cout << endl << endl;
        infile.close();
    }
    return words;
}

set<string> get_unique_words(multiset<string>& words) {
    set<string> unique_words;

    for (string word : words) {
        auto search = unique_words.find(word);
        if (search == unique_words.end()) {
            unique_words.insert(word);
        }
    }
    return unique_words;
}


set<string> get_words_beginning_s(multiset<string>& words) {
    set<string> words_beginning_s;

    for (string word : words) {
        auto search = words_beginning_s.find(word);
        if (search == words_beginning_s.end()) {
            for (int i = 0; i < words_beginning_s.size(); ++i) {
                if (words_beginning_s[0] == 's') {
                words_beginning_s.insert(word);
                }
            }
        }
    }

    return words_beginning_s;
}

如果使用 set/multiset,如何比较每个单独单词本身而不是整个单词中的位置值?文本文件中的示例字符串 - "John goes to the store":通常可以使用简单的 for 循环和初始位置来比较值并计算它出现的次数(类似) -

for (int i = 0; i < words_beginning_s.length(); ++i) {
            if (words_beginning_s[0] == 's') {
            ++s_word;
            }

这在使用 set/multiset 时不起作用。对此很陌生,如果这个问题看起来很愚蠢,我很抱歉。

您可以使用 multisets 成员函数 lower_bound 将迭代器获取到一个范围,然后从该范围创建一个 set

示例:

#include <iostream>
#include <set>
#include <string>

std::set<std::string> get_words_beginning_s(const std::multiset<std::string>& words) {
    // create a set from the iterators you get from lower_bound("s") and lower_bound("t"):
    return {words.lower_bound("s"), words.lower_bound("t")};
}

int main() {
    std::multiset<std::string> words{
        "foo", "slayer", "bar", "sepultura", "tesseract", "skinny puppy", "yello"
    };

    for(const std::string& word : get_words_beginning_s(words)) {
        std::cout << word << '\n';
    }
}

输出:

sepultura
skinny puppy
slayer