需要帮助修复代码以从文件中输出正确的唯一字数
Need help fixing code to output correct unique word count from a file
我正在尝试从文本文件中查找唯一字数。但不知为何,我的号码总是关机。不过,我的常规字数统计结果很好。
我的字符串数组 wordArr
包含文件中的所有单词。
我尝试将每个单词分配给另一个数组,发现它不是唯一的,然后我遍历我已经通过的单词列表,看看它是否与当前显示的单词匹配。如果单词匹配,我将 oldWord
设置为 true,并且该单词不计入我的 unique
计数。
//New portion
int main(int argc, char *argv[]) {
//File Paths
ifstream fp;
fp.open(argv[1]);
if (fp.fail()) {
cout << "Error No file" << endl;
return 0;
}
string wordArr[10000];
string words;
string temp;
int wordCount = 0;
while (fp >> words) {
int newWord = 0;
for (int i; i < words.length(); i++) {
if (isalpha(words[i])) {
} else {
wordArr[wordCount++] = words.substr(0, i);
//wordCount++;
newWord = 1;
if(words[i] + 1 != '[=10=]') {
for (int j = i + 1; j < words.length(); j++) {
temp = temp +words[j];
}
wordArr[wordCount++] = temp;
//wordCount++;
}
}
}
if (newWord == 0) {
wordArr[wordCount] = words;
wordCount++;
}
}
cout << "Number of words found was: " << wordCount << endl;
//New portion
// makes all lower
for(int k=0; k<wordCount;k++){ //need to find size of array
for(int l=0; l<wordArr[k].length(); l++){
tolower(wordArr[k].at(l));
}
}
//unique count
string tempArr[10000];
int unique=0;
int oldWord=0;
for(int m=0; m<wordCount;m++ ) {
for (int n = 0; n < wordCount; n++) {
if (wordArr[m] == tempArr[n]) {
oldWord = 1;
}
}
if(oldWord==0){
wordArr[m] = tempArr[n];
unique++;
}
}
cout << "Unique word count is: " << unique << endl;
}
我希望从我的测试用例中得到 52 个独特的单词,但最终只得到 37 个。
测试用例:
Cryptography is both the practice and study of the techniques used to
communicate and/or store information or data privately and
securely, without being intercepted by third parties. This can include
processes such as encryption, hashing, and steganography. Until the
modern era, cryptography almost exclusively referred to encryption, but
now cryptography is a broad field with applications in many critical
areas of our lives.
您需要在每次迭代中重置旧字:
//unique count
string tempArr[10000];
int unique=0;
int oldWord=0;
for(int m=0; m<wordCount;m++ ) {
for (int n = 0; n < wordCount; n++) {
if (wordArr[m] == tempArr[n]) {
oldWord = 1;
}
}
if(oldWord==0){
wordArr[m] = tempArr[n];
unique++;
}
// reset the oldWord variable here
oldWord=0;
}
cout << "Unique word count is: " << unique << endl;
}
您的解析代码逻辑错误(事实上,它甚至无法编译)。在非字母字符上拆分单词的方式、查找和跟踪重复单词的方式,甚至是低位转换单词的方式都存在逻辑错误。
简而言之,整个代码充满了需要修复的错误,例如:
#include <iostream>
#include <fstream>
#include <string>
#include <ctype.h>
using namespace std;
int main(int argc, char *argv[]) {
//File Paths
ifstream fp;
fp.open(argv[1]);
if (!fp.is_open()) {
cout << "Error No file" << endl;
return 0;
}
string wordArr[10000];
string words;
int wordCount = 0;
while ((fp >> words) && (wordCount < 10000)) {
for (int i = 0; i < words.length(); ++i) {
if (!isalpha(words[i])) {
wordArr[wordCount++] = words.substr(0, i);
if (wordCount == 10000) break;
++i;
while ((i < words.length()) && (!isalpha(words[i]))) {
++i;
}
words.erase(0, i);
i = -1;
}
}
if (words.length() > 0) {
wordArr[wordCount++] = words;
}
}
cout << "Number of words found was: " << wordCount << endl;
// makes all lower
for(int k=0; k<wordCount;k++){ //need to find size of array
for(int l=0; l<wordArr[k].length(); l++){
wordArr[k][l] = tolower(wordArr[k][l]);
}
}
//unique count
string tempArr[10000];
int unique=0;
for(int m=0; m<wordCount;m++ ) {
int oldWord=0;
for (int n = 0; n < unique; n++) {
if (wordArr[m] == tempArr[n]) {
oldWord = 1;
break;
}
}
if(oldWord==0){
tempArr[unique++] = wordArr[m];
}
}
cout << "Unique word count is: " << unique << endl;
}
Now the code works as expected:
Number of words found was: 64
Unique word count is: 52
我正在尝试从文本文件中查找唯一字数。但不知为何,我的号码总是关机。不过,我的常规字数统计结果很好。
我的字符串数组 wordArr
包含文件中的所有单词。
我尝试将每个单词分配给另一个数组,发现它不是唯一的,然后我遍历我已经通过的单词列表,看看它是否与当前显示的单词匹配。如果单词匹配,我将 oldWord
设置为 true,并且该单词不计入我的 unique
计数。
//New portion
int main(int argc, char *argv[]) {
//File Paths
ifstream fp;
fp.open(argv[1]);
if (fp.fail()) {
cout << "Error No file" << endl;
return 0;
}
string wordArr[10000];
string words;
string temp;
int wordCount = 0;
while (fp >> words) {
int newWord = 0;
for (int i; i < words.length(); i++) {
if (isalpha(words[i])) {
} else {
wordArr[wordCount++] = words.substr(0, i);
//wordCount++;
newWord = 1;
if(words[i] + 1 != '[=10=]') {
for (int j = i + 1; j < words.length(); j++) {
temp = temp +words[j];
}
wordArr[wordCount++] = temp;
//wordCount++;
}
}
}
if (newWord == 0) {
wordArr[wordCount] = words;
wordCount++;
}
}
cout << "Number of words found was: " << wordCount << endl;
//New portion
// makes all lower
for(int k=0; k<wordCount;k++){ //need to find size of array
for(int l=0; l<wordArr[k].length(); l++){
tolower(wordArr[k].at(l));
}
}
//unique count
string tempArr[10000];
int unique=0;
int oldWord=0;
for(int m=0; m<wordCount;m++ ) {
for (int n = 0; n < wordCount; n++) {
if (wordArr[m] == tempArr[n]) {
oldWord = 1;
}
}
if(oldWord==0){
wordArr[m] = tempArr[n];
unique++;
}
}
cout << "Unique word count is: " << unique << endl;
}
我希望从我的测试用例中得到 52 个独特的单词,但最终只得到 37 个。
测试用例:
Cryptography is both the practice and study of the techniques used to communicate and/or store information or data privately and securely, without being intercepted by third parties. This can include processes such as encryption, hashing, and steganography. Until the modern era, cryptography almost exclusively referred to encryption, but now cryptography is a broad field with applications in many critical areas of our lives.
您需要在每次迭代中重置旧字:
//unique count
string tempArr[10000];
int unique=0;
int oldWord=0;
for(int m=0; m<wordCount;m++ ) {
for (int n = 0; n < wordCount; n++) {
if (wordArr[m] == tempArr[n]) {
oldWord = 1;
}
}
if(oldWord==0){
wordArr[m] = tempArr[n];
unique++;
}
// reset the oldWord variable here
oldWord=0;
}
cout << "Unique word count is: " << unique << endl;
}
您的解析代码逻辑错误(事实上,它甚至无法编译)。在非字母字符上拆分单词的方式、查找和跟踪重复单词的方式,甚至是低位转换单词的方式都存在逻辑错误。
简而言之,整个代码充满了需要修复的错误,例如:
#include <iostream>
#include <fstream>
#include <string>
#include <ctype.h>
using namespace std;
int main(int argc, char *argv[]) {
//File Paths
ifstream fp;
fp.open(argv[1]);
if (!fp.is_open()) {
cout << "Error No file" << endl;
return 0;
}
string wordArr[10000];
string words;
int wordCount = 0;
while ((fp >> words) && (wordCount < 10000)) {
for (int i = 0; i < words.length(); ++i) {
if (!isalpha(words[i])) {
wordArr[wordCount++] = words.substr(0, i);
if (wordCount == 10000) break;
++i;
while ((i < words.length()) && (!isalpha(words[i]))) {
++i;
}
words.erase(0, i);
i = -1;
}
}
if (words.length() > 0) {
wordArr[wordCount++] = words;
}
}
cout << "Number of words found was: " << wordCount << endl;
// makes all lower
for(int k=0; k<wordCount;k++){ //need to find size of array
for(int l=0; l<wordArr[k].length(); l++){
wordArr[k][l] = tolower(wordArr[k][l]);
}
}
//unique count
string tempArr[10000];
int unique=0;
for(int m=0; m<wordCount;m++ ) {
int oldWord=0;
for (int n = 0; n < unique; n++) {
if (wordArr[m] == tempArr[n]) {
oldWord = 1;
break;
}
}
if(oldWord==0){
tempArr[unique++] = wordArr[m];
}
}
cout << "Unique word count is: " << unique << endl;
}
Now the code works as expected:
Number of words found was: 64 Unique word count is: 52