C 文本格式化程序字符数已关闭
C text formatter characters count is off
所以我有一个任务是根据规则格式化文本并最终打印出字符串中的字符数(包括 \n
和 spaces,除了 [=13=]
在字符串的末尾)。
基本上,一个有效的输入以一个点结束,但我想我在最后一个点之后还有几个白色的spaces。
我尝试了几种方法,例如从字符串末尾开始用 [=13=]
替换 spaces 的循环。
虽然似乎没有任何效果...
编辑 要求是:
- 将双点 (..) 转换为新行
- 删除多个space只留下一个,
- 确保逗号或点前没有 space
- 确保逗号或点后有单个 space。
- 不更改撇号之间的原始内容。
- 并验证大写字母只出现在正确的位置(新 line\paragraph)。
我们需要执行主函数中的所有代码(不幸的是),而我的代码通常会在计数中将字符计数错误地增加 1-2(可能会在 space 之后执行额外的 spaces最后一个点
这是我的代码无法计算字符的输入示例
the LANGUAGE "C" is a procedural programming language .It was initially developed by "Dennis Ritchie".. the Main feAtures of "C" language include low-level access to memory, simple set of keywords, and clean style .
int main() {
char ans;
printf("*************** Welcome to the text cleaner ***************\n\n");
do
{
int length, i, j = 0;
int word, sentence, para, space;
char tin[601], tout[601], * dex, * pos;
printf("\nPlease enter text:\n");
gets_s(tin, 600);
length = strlen(tin);
dex = strchr(tin, '.'); //converts double dots to new line
while (dex != NULL)
{
if (tin[dex - tin + 1] == '.') {
tin[dex - tin + 1] = '\n';
}
dex = strchr(dex + 1, '.');
}
length = strlen(tin);
dex = strchr(tin, ' '); //converting multiple spaces to single space
while (dex != NULL)
{
while (dex != NULL && tin[dex - tin + 1] == ' ')
{
for (i = dex - tin + 1; i < strlen(tin); i++)
{
tin[i - 1] = tin[i];
}
dex = strchr(dex, ' ');
j++;
}
dex = strchr(dex + 1, ' ');
}
tin[length - j] = '[=11=]';
j = 0;
dex = strchr(tin, '\n');
while (dex != NULL && tin[dex-tin+1] == ' ') //delets spaces in the beggining of new row
{
for (i = dex - tin + 1;i < strlen(tin);i++) {
tin[i] = tin[i + 1];
}
dex = strchr(dex + 1, '\n');
}
dex = strchr(tin, ','); //deletes space before comma
while (dex != NULL && tin[dex - tin - 1] == ' ')
{
for (i = dex - tin - 1; i < strlen(tin); i++)
{
tin[i] = tin[i+1];
}
dex = strchr(dex+1, ',');
}
dex = strchr(tin, '.'); //deletes space before dots
while (dex != NULL && tin[dex - tin - 1] == ' ')
{
for (i = dex - tin - 1; i < strlen(tin); i++)
{
tin[i] = tin[i+1];
}
dex = strchr(dex + 1, '.');
}
dex = strchr(tin, ','); // adds space after comma
while (dex != NULL && tin[dex - tin + 1] != ' ')
{
if (tin[dex - tin + 1] != '\n')
{
tin[strlen(tin) + 1] = '[=11=]';
for (i = strlen(tin); i > dex - tin; i--)
{
if (i == dex - tin + 1)
{
tin[i] = ' ';
}
else
{
tin[i] = tin[i - 1];
}
}
dex = strchr(dex + 1, ',');
}
}
dex = strchr(tin, '.'); // adds space after dot
while (dex != NULL && tin[dex - tin + 1] != ' ')
{
tin[strlen(tin) + 1] = '[=11=]';
if (tin[dex - tin + 1] == '\n')
{
dex = strchr(dex + 1, '.');
}
else
{
for (i = strlen(tin); i > dex - tin; i--)
{
if (i == dex - tin + 1)
{
tin[i] = ' ';
}
else
{
tin[i] = tin[i - 1];
}
}
dex = strchr(dex + 1, '.');
}
}
strcpy_s(tout, sizeof(tout), tin);
_strlwr_s(tout,sizeof(tout)); //copies and lowercasing the input string
dex = strchr(tin, '"');
if (dex != NULL) {
pos = strchr(dex + 1, '"');
while (dex != NULL)
{
for (i = dex - tin; i < pos - tin; i++) {
tout[i] = tin[i];
}
dex = strchr(pos + 1, '"');
if (dex)
{
pos = strchr(dex + 1, '"');
}
} //making sure that the letters in the quotes have't been lowercased
}
_strupr_s(tin, sizeof(tin));
dex = strchr(tout, '.');
pos = strchr(tin, '.');
while (dex != NULL && pos != NULL)
{
tout[dex - tout + 2] = tin[pos - tin + 2];
dex = strchr(dex + 1, '.');
pos = strchr(pos + 1, '.');
}
//CAPSLOCK
dex = strchr(tout, '.'); //deletes space before dots
while (dex != NULL)
{
if (tout[dex - tout - 1] == ' ')
{
for (i = dex - tout - 1; i < strlen(tout); i++)
{
tout[i] = tout[i+1];
}
}
dex = strchr(dex + 1, '.');
}
if (tout[0] == ' ') {
for (i = 0 ;i < strlen(tout); i++) {
tout[i] = tout[i + 1];
}
}//handeling single space in the beggining of the string
if (tout[0] >= 'a' && tout[0] <= 'z') {
tout[0] -= 32;
} //First letter always capital
word = 0;
sentence = 0;
para = 1;
space = 0;
length = strlen(tout);
for (i = 0; tout[i] != '[=11=]';i++)
{
if (tout[i] == ' ' && tout[i + 1] != ' ')
word++;
}
dex = strchr(tout, '.');
while (dex != NULL)
{
sentence++;
dex = strchr(dex + 1, '.');
}
dex = strchr(tout, '\n');
while (dex != NULL)
{
space++;
para++;
word++;
dex = strchr(dex + 1, '\n');
}
//dex = strchr(tout, '-');
//while (dex != NULL)
//{
// word++;
// dex = strchr(dex + 1, '-');
//}
printf_s("\nText after cleaning:\n------------------------------------------------------------------------------------------------\n");
printf_s("%s\n\n", tout);
printf_s("characters: %d | words: %d | sentences: %d | paragraphs: %d\n------------------------------------------------------------------------------------------------\n",length, word, sentence, para);
printf_s("\nIf you want to clean another string press (y): ");
scanf_s(" %c", &ans, 1);
if (ans == 'y')
{
gets_s(tin, 600);
}
} while (ans =='y');
这是一个可能的解决方案。
希望这会表明您不需要所有的重复。
我只用给定的示例对其进行了测试,很可能仍然存在它可能会中断的边缘情况。您可能想要分配 buffer
而不是使用特定值,但您可能需要检查是否存在可能导致扩展的输入。
关于大写在正确的地方,除了一行之外,原文没有任何范围,所以没有段落的概念。因此,我在句子开头使用了大写字母。
注意: OP 没有指定正确的输出是什么,因为 post 的标题是“错误的字符数”,所以这是基于最佳猜测关于要求和 OP 代码的一些见解(如前所述,可能没有产生正确的结果)。
我不认为这里的重点是修复 OP 的错误,而是说明更接近或实现解决方案的替代方法。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
char original[] = "the LANGUAGE \"C\" is a procedural programming language .It was initially developed by \"Dennis Ritchie\".. the Main feAtures of \"C\" language include low-level access to memory, simple set of keywords, and clean style .";
int main() {
char buffer[256];
strcpy(buffer, original);
char *src, *dest;
bool quoted = false;
bool sentence = false;
int periods = 0;
bool space = false;
bool paragraph = true;
bool comma = false;
int letters = 0;
int words = 0;
int sentences = 0;
int paragraphs = 0;
src = dest = buffer;
for ( ; *src ; src++) {
if (quoted) {
switch (*src) {
case '"':
quoted = false;
break;
}
if ((*src >= 'a' && *src <= 'z') || (*src >= 'a' && *src <= 'z')) {
letters++;
} else {
if (letters) {
words++;
letters = 0;
}
}
*dest++ = *src;
} else {
switch (*src) {
case '"':
quoted = true;
break;
case ',':
comma = true;
continue;
case ' ':
if (letters) {
words++;
letters = 0;
}
space = true;
continue;
case '.':
if (++periods == 2) {
*dest++ = '.';
*dest++ = '\n';
periods = 0;
paragraph = true;
} else {
sentence = true;
}
continue;
}
if (comma) {
*dest++ = ',';
*dest++ = ' ';
comma = space = false;
}
if (periods) {
*dest++ = '.';
periods = 0;
}
if (space) {
if (!paragraph) {
*dest++ = ' ';
}
space = false;
}
if ((*src >= 'a' && *src <= 'z') || (*src >= 'a' && *src <= 'z')) {
letters++;
} else {
if (letters) {
words++;
letters = 0;
}
}
*dest++ = sentence || paragraph ? toupper(*src) : tolower(*src);
if (sentence || paragraph) {
if (letters) {
words++;
}
letters = 0;
}
if (sentence) {
sentences++;
}
if (paragraph) {
paragraphs++;
}
sentence = paragraph = false;
}
}
if (sentence) {
sentences++;
}
if (paragraph) {
paragraphs++;
}
if (periods) {
*dest++ = '.';
}
*dest++ = '\n';
*dest = '[=10=]';
printf("\nInput Chars=%d\n\n\"%s\"\n", (int)strlen(original), original);
printf("\nOutput Chars=%d, Words=%d, Sentences=%d, Paragraphs=%d\n\n\"%s\"\n", (int)strlen(buffer), words, sentences, paragraphs, buffer);
return 0;
}
这会产生:
Input Chars=259
"the LANGUAGE "C" is a procedural programming language .It was initially developed by "Dennis Ritchie".. the Main feAtures of "C" language include low-level access to memory, simple set of keywords, and clean style ."
Output Chars=214, Words=34, Sentences=3, Paragraphs=2
"The language "C" is a procedural programming language. It was initially developed by "Dennis Ritchie".
The main features of "C" language include low-level access to memory, simple set of keywords, and clean style.
"
正如我在热门评论中提到的,这可以在一个带有状态变量的循环中完成。
一些假设:
- 每当我们看到
..
(转换为换行符)时,它就会开始一个新段落
- 你所谓的“apostrophe”,我称之为双引号(因为这是唯一有意义的东西)。
- 引号内没有任何内容被转换
- 引用[自己]被复制过来(即没有被剥离)
不幸的是,我不得不完全重构代码。是有注释的。我知道您必须 仅 使用 main
。额外的功能只是为了调试,所以它们“不算数”:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
int opt_d; // debug
int opt_quo; // preserve quote
#if DEBUG
#define dbgprt(_fmt...) \
do { \
if (opt_d) \
printf(_fmt); \
} while (0)
#else
#define dbgprt(_fmt...) do { } while (0)
#endif
#define COPYX(_chr) \
do { \
*dst++ = _chr; \
dbgprt("COPY %2.2X/%s\n",_chr,showchr(_chr)); \
} while (0)
#define COPY \
COPYX(chr)
#define WHITEOUT \
do { \
if (! white) \
break; \
COPYX(' '); \
white = 0; \
ctr_word += 1; \
} while (0)
const char *
showchr(int chr)
{
static char buf[10];
if ((chr >= 0x20) && (chr <= 0x7E))
sprintf(buf,"%c",chr);
else
sprintf(buf,"{%2.2X}",chr);
return buf;
}
void
showbuf(const char *buf,const char *who)
{
const char *sep = "'";
printf("%s: %s",who,sep);
for (int chr = *buf++; chr != 0; chr = *buf++)
printf("%s",showchr(chr));
printf("%s\n",sep);
}
int
main(int argc,char **argv)
{
char inp[1000];
char buf[1000];
char *src;
char *dst;
--argc;
++argv;
for (; argc > 0; --argc, ++argv) {
char *cp = *argv;
if (*cp != '-')
break;
cp += 2;
switch (cp[-1]) {
case 'd':
opt_d = ! opt_d;
break;
case 'q':
opt_quo = ! opt_quo;
break;
}
}
opt_quo = ! opt_quo;
const char *file;
if (argc > 0)
file = *argv;
else
file = "inp.txt";
FILE *xfsrc = fopen(file,"r");
if (xfsrc == NULL) {
perror(file);
exit(1);
}
while (fgets(inp,sizeof(inp),xfsrc) != NULL) {
strcpy(buf,inp);
src = buf;
dst = buf;
int quo = 0;
int white = 0;
int dot = 1;
int ctr_sent = 0;
int ctr_word = 0;
int ctr_para = 1;
for (int chr = *src++; chr != 0; chr = *src++) {
dbgprt("LOOP %2.2X/%s quo=%d white=%d dot=%d word=%d sent=%d para=%d\n",
chr,showchr(chr),quo,white,dot,
ctr_word,ctr_sent,ctr_para);
// got a quote
if (chr == '"') {
if (! quo)
WHITEOUT;
if (opt_quo)
COPY;
quo = ! quo;
continue;
}
// non-quote
else {
if (quo) {
COPY;
continue;
}
}
// got a dot
if (chr == '.') {
dot = 1;
// double dot --> newline (new paragraph)
if (*src == '.') {
COPYX('\n');
++src;
ctr_para += 1;
continue;
}
COPY;
white = 1;
continue;
}
// from fgets, this can _only_ occur at the end of the buffer
if (chr == '\n') {
dot = 1;
white = 1;
COPY;
break;
}
// accumulate/skip over whitespace
if (chr == ' ') {
white = 1;
continue;
}
// output accumulated whitespace
WHITEOUT;
// got uppercase -- convert to lowercase if we're not at the start
// of a sentence
if (isupper(chr)) {
if (! dot)
chr = tolower(chr);
}
// got lowercase -- capitalize if we're just starting a sentence
else {
if (islower(chr)) {
if (dot)
chr = toupper(chr);
}
}
COPY;
// count sentences
if (dot)
ctr_sent += 1;
dot = 0;
}
*dst = 0;
showbuf(inp,"inp");
showbuf(buf,"buf");
#if 0
if (dot)
ctr_word += 1;
#endif
printf("TOTAL: length=%zu sentences=%d paragraphs=%d words=%d\n",
strlen(buf),ctr_sent,ctr_para,ctr_word);
}
fclose(xfsrc);
return 0;
}
程序输出如下:
inp: 'the LANGUAGE "C" is a procedural programming language .It was initially developed by "Dennis Ritchie".. the Main feAtures of "C" language include low-level access to memory, simple set of keywords, and clean style .{0A}'
buf: 'The language "C" is a procedural programming language. It was initially developed by "Dennis Ritchie"{0A} The main features of "C" language include low-level access to memory, simple set of keywords, and clean style.{0A}'
TOTAL: length=214 sentences=3 paragraphs=2 words=31
更新:
Great, thank you! I've taken what you've done and implemented it on my code, basically, the professor restricted us to us only stdio.h and string.h libraries so I couldn't use any other functions... –
Nitai Dan
不客气!
很高兴您能够将我的代码合并到您的代码中。这是所有可能的学习场景中最好的。
我不确定我是否添加了足够的注释来使我的算法清晰,所以我对它进行了一些清理,并在考虑是否要 post 它。它在下面 posted。
根据您刚才所说的,我[仍然]不确定是否允许创建您自己的函数。正如我所说,debug 函数如果不更改算法,[可能] 没问题。
无论如何,这是更新后的代码:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
int opt_d; // debug
int opt_quo; // preserve quote
int opt_x; // show space as hex
#define _dbgprt(_fmt...) \
printf(_fmt)
#if DEBUG
#define dbgprt(_fmt...) \
do { \
if (opt_d) \
_dbgprt(_fmt); \
} while (0)
#else
#define dbgprt(_fmt...) do { } while (0)
#endif
#if DEBUG
#define COPYX(_chr) \
do { \
*dst++ = _chr; \
copy = 1; \
if (_chr != src[-1]) \
_dbgprt(" COPY %s\n",_showchr(_chr,1)); \
} while (0)
#else
#define COPYX(_chr) \
do { \
*dst++ = _chr; \
} while (0)
#endif
#define COPY \
COPYX(chr)
// output accumulated white space and increment count of number of words
#define SPACEOUT \
do { \
if (! spc) \
break; \
spc = 0; \
COPYX(' '); \
ctr_word += 1; \
} while (0)
const char *
_showchr(int chr,int xflg)
{
static char buf[10];
int lo = xflg ? 0x21 : 0x20;
if ((chr >= lo) && (chr <= 0x7E))
sprintf(buf,"%c",chr);
else
sprintf(buf,"{%2.2X}",chr);
return buf;
}
const char *
showchr(int chr)
{
return _showchr(chr,opt_x);
}
void
showbuf(const char *buf,const char *who)
{
const char *sep = "'";
printf("%s: %s",who,sep);
for (int chr = *buf++; chr != 0; chr = *buf++)
printf("%s",showchr(chr));
printf("%s\n",sep);
}
void
dbgint(const char *sym,int val)
{
do {
int ctr = isupper(sym[0]);
if (! ctr) {
if (! val)
break;
}
_dbgprt(" %s",sym);
if (ctr)
_dbgprt("%d",val);
} while (0);
}
int
main(int argc,char **argv)
{
char inp[1000];
char buf[1000];
char *src;
char *dst;
--argc;
++argv;
for (; argc > 0; --argc, ++argv) {
char *cp = *argv;
if (*cp != '-')
break;
cp += 2;
switch (cp[-1]) {
case 'd':
opt_d = ! opt_d;
break;
case 'q':
opt_quo = ! opt_quo;
break;
case 'x':
opt_x = (*cp != 0) ? atoi(cp) : 1;
break;
}
}
opt_quo = ! opt_quo;
const char *file;
if (argc > 0)
file = *argv;
else
file = "inp.txt";
FILE *xfsrc = fopen(file,"r");
if (xfsrc == NULL) {
perror(file);
exit(1);
}
while (fgets(inp,sizeof(inp),xfsrc) != NULL) {
strcpy(buf,inp);
src = buf;
dst = buf;
// state variables
int quo = 0; // 1=within quoted string
int spc = 0; // 1=space seen
int dot = 1; // 1=period/newline seen
// counters
int ctr_sent = 0; // number of sentences
int ctr_word = 0; // number of words
int ctr_para = 1; // number of paragraphs
#if DEBUG
int copy = 0;
int ochr = 0;
#endif
for (int chr = *src++; chr != 0; chr = *src++) {
#if DEBUG
if (opt_d) {
// show if we skipped the prior char (and it was _not_ a space)
if ((! copy) && (ochr != ' '))
_dbgprt("SKIP\n");
copy = 0;
ochr = chr;
_dbgprt("LOOP %s",showchr(chr));
dbgint("W:",ctr_word);
dbgint("S:",ctr_sent);
dbgint("P:",ctr_para);
dbgint("quo",quo);
dbgint("spc",spc);
dbgint("dot",dot);
_dbgprt("\n");
}
#endif
// got a quote
if (chr == '"') {
// flush whitespace if starting a quoted string
if (! quo)
SPACEOUT;
// copy the quote
if (opt_quo)
COPY;
// flip the quote mode
quo = ! quo;
continue;
}
// non-quote
else {
// if inside a quoted string, just copy out the char verbatim
if (quo) {
COPY;
continue;
}
}
// got a dot
if (chr == '.') {
dot = 1;
// double dot --> newline (new paragraph)
if (*src == '.') {
COPYX('\n');
++src;
ctr_para += 1;
continue;
}
COPY;
// force whitespace mode (ensure space after dot)
// (e.g.) change:
// i go.he goes.
// into:
// i go. he goes.
spc = 1;
continue;
}
// from fgets, this can _only_ occur at the end of the buffer
if (chr == '\n') {
dot = 1;
spc = 1;
COPY;
break;
}
// accumulate/skip over whitespace
if (chr == ' ') {
spc = 1;
continue;
}
// output accumulated whitespace
SPACEOUT;
// convert case
if (dot)
chr = toupper(chr);
else
chr = tolower(chr);
// output the current character -- it's _not_ special
COPY;
// count sentences
if (dot)
ctr_sent += 1;
// we're no longer at the start of a sentence
dot = 0;
}
*dst = 0;
if (opt_x == 1)
opt_x = 0;
showbuf(inp,"inp");
showbuf(buf,"buf");
#if 0
if (dot)
ctr_word += 1;
#endif
printf("TOTAL: length=%zu sentences=%d paragraphs=%d words=%d\n",
strlen(buf),ctr_sent,ctr_para,ctr_word);
}
fclose(xfsrc);
return 0;
}
输出 -d
:
SKIP
LOOP t W:0 S:0 P:1 dot
LOOP h W:0 S:1 P:1
LOOP e W:0 S:1 P:1
LOOP W:0 S:1 P:1
LOOP L W:0 S:1 P:1 spc
COPY {20}
LOOP A W:1 S:1 P:1
LOOP N W:1 S:1 P:1
LOOP G W:1 S:1 P:1
LOOP U W:1 S:1 P:1
LOOP A W:1 S:1 P:1
LOOP G W:1 S:1 P:1
LOOP E W:1 S:1 P:1
LOOP W:1 S:1 P:1
LOOP W:1 S:1 P:1 spc
LOOP " W:1 S:1 P:1 spc
COPY {20}
LOOP C W:2 S:1 P:1 quo
LOOP " W:2 S:1 P:1 quo
LOOP W:2 S:1 P:1
LOOP i W:2 S:1 P:1 spc
COPY {20}
LOOP s W:3 S:1 P:1
LOOP W:3 S:1 P:1
LOOP a W:3 S:1 P:1 spc
COPY {20}
LOOP W:4 S:1 P:1
LOOP p W:4 S:1 P:1 spc
COPY {20}
LOOP r W:5 S:1 P:1
LOOP o W:5 S:1 P:1
LOOP c W:5 S:1 P:1
LOOP e W:5 S:1 P:1
LOOP d W:5 S:1 P:1
LOOP u W:5 S:1 P:1
LOOP r W:5 S:1 P:1
LOOP a W:5 S:1 P:1
LOOP l W:5 S:1 P:1
LOOP W:5 S:1 P:1
LOOP W:5 S:1 P:1 spc
LOOP W:5 S:1 P:1 spc
LOOP W:5 S:1 P:1 spc
LOOP W:5 S:1 P:1 spc
LOOP W:5 S:1 P:1 spc
LOOP W:5 S:1 P:1 spc
LOOP W:5 S:1 P:1 spc
LOOP W:5 S:1 P:1 spc
LOOP W:5 S:1 P:1 spc
LOOP W:5 S:1 P:1 spc
LOOP W:5 S:1 P:1 spc
LOOP W:5 S:1 P:1 spc
LOOP W:5 S:1 P:1 spc
LOOP p W:5 S:1 P:1 spc
COPY {20}
LOOP r W:6 S:1 P:1
LOOP o W:6 S:1 P:1
LOOP g W:6 S:1 P:1
LOOP r W:6 S:1 P:1
LOOP a W:6 S:1 P:1
LOOP m W:6 S:1 P:1
LOOP m W:6 S:1 P:1
LOOP i W:6 S:1 P:1
LOOP n W:6 S:1 P:1
LOOP g W:6 S:1 P:1
LOOP W:6 S:1 P:1
LOOP l W:6 S:1 P:1 spc
COPY {20}
LOOP a W:7 S:1 P:1
LOOP n W:7 S:1 P:1
LOOP g W:7 S:1 P:1
LOOP u W:7 S:1 P:1
LOOP a W:7 S:1 P:1
LOOP g W:7 S:1 P:1
LOOP e W:7 S:1 P:1
LOOP W:7 S:1 P:1
LOOP W:7 S:1 P:1 spc
LOOP W:7 S:1 P:1 spc
LOOP W:7 S:1 P:1 spc
LOOP W:7 S:1 P:1 spc
LOOP . W:7 S:1 P:1 spc
LOOP I W:7 S:1 P:1 spc dot
COPY {20}
LOOP t W:8 S:2 P:1
LOOP W:8 S:2 P:1
LOOP w W:8 S:2 P:1 spc
COPY {20}
LOOP a W:9 S:2 P:1
LOOP s W:9 S:2 P:1
LOOP W:9 S:2 P:1
LOOP i W:9 S:2 P:1 spc
COPY {20}
LOOP n W:10 S:2 P:1
LOOP i W:10 S:2 P:1
LOOP t W:10 S:2 P:1
LOOP i W:10 S:2 P:1
LOOP a W:10 S:2 P:1
LOOP l W:10 S:2 P:1
LOOP l W:10 S:2 P:1
LOOP y W:10 S:2 P:1
LOOP W:10 S:2 P:1
LOOP d W:10 S:2 P:1 spc
COPY {20}
LOOP e W:11 S:2 P:1
LOOP v W:11 S:2 P:1
LOOP e W:11 S:2 P:1
LOOP l W:11 S:2 P:1
LOOP o W:11 S:2 P:1
LOOP p W:11 S:2 P:1
LOOP e W:11 S:2 P:1
LOOP d W:11 S:2 P:1
LOOP W:11 S:2 P:1
LOOP b W:11 S:2 P:1 spc
COPY {20}
LOOP y W:12 S:2 P:1
LOOP W:12 S:2 P:1
LOOP " W:12 S:2 P:1 spc
COPY {20}
LOOP D W:13 S:2 P:1 quo
LOOP e W:13 S:2 P:1 quo
LOOP n W:13 S:2 P:1 quo
LOOP n W:13 S:2 P:1 quo
LOOP i W:13 S:2 P:1 quo
LOOP s W:13 S:2 P:1 quo
LOOP W:13 S:2 P:1 quo
LOOP R W:13 S:2 P:1 quo
LOOP i W:13 S:2 P:1 quo
LOOP t W:13 S:2 P:1 quo
LOOP c W:13 S:2 P:1 quo
LOOP h W:13 S:2 P:1 quo
LOOP i W:13 S:2 P:1 quo
LOOP e W:13 S:2 P:1 quo
LOOP " W:13 S:2 P:1 quo
LOOP . W:13 S:2 P:1
COPY {0A}
LOOP W:13 S:2 P:2 dot
LOOP W:13 S:2 P:2 spc dot
LOOP W:13 S:2 P:2 spc dot
LOOP W:13 S:2 P:2 spc dot
LOOP W:13 S:2 P:2 spc dot
LOOP W:13 S:2 P:2 spc dot
LOOP W:13 S:2 P:2 spc dot
LOOP W:13 S:2 P:2 spc dot
LOOP W:13 S:2 P:2 spc dot
LOOP W:13 S:2 P:2 spc dot
LOOP W:13 S:2 P:2 spc dot
LOOP W:13 S:2 P:2 spc dot
LOOP t W:13 S:2 P:2 spc dot
COPY {20}
COPY T
LOOP h W:14 S:3 P:2
LOOP e W:14 S:3 P:2
LOOP W:14 S:3 P:2
LOOP M W:14 S:3 P:2 spc
COPY {20}
COPY m
LOOP a W:15 S:3 P:2
LOOP i W:15 S:3 P:2
LOOP n W:15 S:3 P:2
LOOP W:15 S:3 P:2
LOOP f W:15 S:3 P:2 spc
COPY {20}
LOOP e W:16 S:3 P:2
LOOP A W:16 S:3 P:2
COPY a
LOOP t W:16 S:3 P:2
LOOP u W:16 S:3 P:2
LOOP r W:16 S:3 P:2
LOOP e W:16 S:3 P:2
LOOP s W:16 S:3 P:2
LOOP W:16 S:3 P:2
LOOP o W:16 S:3 P:2 spc
COPY {20}
LOOP f W:17 S:3 P:2
LOOP W:17 S:3 P:2
LOOP " W:17 S:3 P:2 spc
COPY {20}
LOOP C W:18 S:3 P:2 quo
LOOP " W:18 S:3 P:2 quo
LOOP W:18 S:3 P:2
LOOP l W:18 S:3 P:2 spc
COPY {20}
LOOP a W:19 S:3 P:2
LOOP n W:19 S:3 P:2
LOOP g W:19 S:3 P:2
LOOP u W:19 S:3 P:2
LOOP a W:19 S:3 P:2
LOOP g W:19 S:3 P:2
LOOP e W:19 S:3 P:2
LOOP W:19 S:3 P:2
LOOP i W:19 S:3 P:2 spc
COPY {20}
LOOP n W:20 S:3 P:2
LOOP c W:20 S:3 P:2
LOOP l W:20 S:3 P:2
LOOP u W:20 S:3 P:2
LOOP d W:20 S:3 P:2
LOOP e W:20 S:3 P:2
LOOP W:20 S:3 P:2
LOOP l W:20 S:3 P:2 spc
COPY {20}
LOOP o W:21 S:3 P:2
LOOP w W:21 S:3 P:2
LOOP - W:21 S:3 P:2
LOOP l W:21 S:3 P:2
LOOP e W:21 S:3 P:2
LOOP v W:21 S:3 P:2
LOOP e W:21 S:3 P:2
LOOP l W:21 S:3 P:2
LOOP W:21 S:3 P:2
LOOP a W:21 S:3 P:2 spc
COPY {20}
LOOP c W:22 S:3 P:2
LOOP c W:22 S:3 P:2
LOOP e W:22 S:3 P:2
LOOP s W:22 S:3 P:2
LOOP s W:22 S:3 P:2
LOOP W:22 S:3 P:2
LOOP t W:22 S:3 P:2 spc
COPY {20}
LOOP o W:23 S:3 P:2
LOOP W:23 S:3 P:2
LOOP m W:23 S:3 P:2 spc
COPY {20}
LOOP e W:24 S:3 P:2
LOOP m W:24 S:3 P:2
LOOP o W:24 S:3 P:2
LOOP r W:24 S:3 P:2
LOOP y W:24 S:3 P:2
LOOP , W:24 S:3 P:2
LOOP W:24 S:3 P:2
LOOP s W:24 S:3 P:2 spc
COPY {20}
LOOP i W:25 S:3 P:2
LOOP m W:25 S:3 P:2
LOOP p W:25 S:3 P:2
LOOP l W:25 S:3 P:2
LOOP e W:25 S:3 P:2
LOOP W:25 S:3 P:2
LOOP s W:25 S:3 P:2 spc
COPY {20}
LOOP e W:26 S:3 P:2
LOOP t W:26 S:3 P:2
LOOP W:26 S:3 P:2
LOOP o W:26 S:3 P:2 spc
COPY {20}
LOOP f W:27 S:3 P:2
LOOP W:27 S:3 P:2
LOOP k W:27 S:3 P:2 spc
COPY {20}
LOOP e W:28 S:3 P:2
LOOP y W:28 S:3 P:2
LOOP w W:28 S:3 P:2
LOOP o W:28 S:3 P:2
LOOP r W:28 S:3 P:2
LOOP d W:28 S:3 P:2
LOOP s W:28 S:3 P:2
LOOP , W:28 S:3 P:2
LOOP W:28 S:3 P:2
LOOP a W:28 S:3 P:2 spc
COPY {20}
LOOP n W:29 S:3 P:2
LOOP d W:29 S:3 P:2
LOOP W:29 S:3 P:2
LOOP c W:29 S:3 P:2 spc
COPY {20}
LOOP l W:30 S:3 P:2
LOOP e W:30 S:3 P:2
LOOP a W:30 S:3 P:2
LOOP n W:30 S:3 P:2
LOOP W:30 S:3 P:2
LOOP s W:30 S:3 P:2 spc
COPY {20}
LOOP t W:31 S:3 P:2
LOOP y W:31 S:3 P:2
LOOP l W:31 S:3 P:2
LOOP e W:31 S:3 P:2
LOOP W:31 S:3 P:2
LOOP W:31 S:3 P:2 spc
LOOP W:31 S:3 P:2 spc
LOOP W:31 S:3 P:2 spc
LOOP W:31 S:3 P:2 spc
LOOP W:31 S:3 P:2 spc
LOOP W:31 S:3 P:2 spc
LOOP W:31 S:3 P:2 spc
LOOP W:31 S:3 P:2 spc
LOOP W:31 S:3 P:2 spc
LOOP W:31 S:3 P:2 spc
LOOP W:31 S:3 P:2 spc
LOOP W:31 S:3 P:2 spc
LOOP W:31 S:3 P:2 spc
LOOP W:31 S:3 P:2 spc
LOOP W:31 S:3 P:2 spc
LOOP . W:31 S:3 P:2 spc
LOOP {0A} W:31 S:3 P:2 spc dot
inp: 'the LANGUAGE "C" is a procedural programming language .It was initially developed by "Dennis Ritchie".. the Main feAtures of "C" language include low-level access to memory, simple set of keywords, and clean style .{0A}'
buf: 'The language "C" is a procedural programming language. It was initially developed by "Dennis Ritchie"{0A} The main features of "C" language include low-level access to memory, simple set of keywords, and clean style.{0A}'
TOTAL: length=214 sentences=3 paragraphs=2 words=31
所以我有一个任务是根据规则格式化文本并最终打印出字符串中的字符数(包括 \n
和 spaces,除了 [=13=]
在字符串的末尾)。
基本上,一个有效的输入以一个点结束,但我想我在最后一个点之后还有几个白色的spaces。
我尝试了几种方法,例如从字符串末尾开始用 [=13=]
替换 spaces 的循环。
虽然似乎没有任何效果...
编辑 要求是:
- 将双点 (..) 转换为新行
- 删除多个space只留下一个,
- 确保逗号或点前没有 space
- 确保逗号或点后有单个 space。
- 不更改撇号之间的原始内容。
- 并验证大写字母只出现在正确的位置(新 line\paragraph)。
我们需要执行主函数中的所有代码(不幸的是),而我的代码通常会在计数中将字符计数错误地增加 1-2(可能会在 space 之后执行额外的 spaces最后一个点
这是我的代码无法计算字符的输入示例
the LANGUAGE "C" is a procedural programming language .It was initially developed by "Dennis Ritchie".. the Main feAtures of "C" language include low-level access to memory, simple set of keywords, and clean style .
int main() {
char ans;
printf("*************** Welcome to the text cleaner ***************\n\n");
do
{
int length, i, j = 0;
int word, sentence, para, space;
char tin[601], tout[601], * dex, * pos;
printf("\nPlease enter text:\n");
gets_s(tin, 600);
length = strlen(tin);
dex = strchr(tin, '.'); //converts double dots to new line
while (dex != NULL)
{
if (tin[dex - tin + 1] == '.') {
tin[dex - tin + 1] = '\n';
}
dex = strchr(dex + 1, '.');
}
length = strlen(tin);
dex = strchr(tin, ' '); //converting multiple spaces to single space
while (dex != NULL)
{
while (dex != NULL && tin[dex - tin + 1] == ' ')
{
for (i = dex - tin + 1; i < strlen(tin); i++)
{
tin[i - 1] = tin[i];
}
dex = strchr(dex, ' ');
j++;
}
dex = strchr(dex + 1, ' ');
}
tin[length - j] = '[=11=]';
j = 0;
dex = strchr(tin, '\n');
while (dex != NULL && tin[dex-tin+1] == ' ') //delets spaces in the beggining of new row
{
for (i = dex - tin + 1;i < strlen(tin);i++) {
tin[i] = tin[i + 1];
}
dex = strchr(dex + 1, '\n');
}
dex = strchr(tin, ','); //deletes space before comma
while (dex != NULL && tin[dex - tin - 1] == ' ')
{
for (i = dex - tin - 1; i < strlen(tin); i++)
{
tin[i] = tin[i+1];
}
dex = strchr(dex+1, ',');
}
dex = strchr(tin, '.'); //deletes space before dots
while (dex != NULL && tin[dex - tin - 1] == ' ')
{
for (i = dex - tin - 1; i < strlen(tin); i++)
{
tin[i] = tin[i+1];
}
dex = strchr(dex + 1, '.');
}
dex = strchr(tin, ','); // adds space after comma
while (dex != NULL && tin[dex - tin + 1] != ' ')
{
if (tin[dex - tin + 1] != '\n')
{
tin[strlen(tin) + 1] = '[=11=]';
for (i = strlen(tin); i > dex - tin; i--)
{
if (i == dex - tin + 1)
{
tin[i] = ' ';
}
else
{
tin[i] = tin[i - 1];
}
}
dex = strchr(dex + 1, ',');
}
}
dex = strchr(tin, '.'); // adds space after dot
while (dex != NULL && tin[dex - tin + 1] != ' ')
{
tin[strlen(tin) + 1] = '[=11=]';
if (tin[dex - tin + 1] == '\n')
{
dex = strchr(dex + 1, '.');
}
else
{
for (i = strlen(tin); i > dex - tin; i--)
{
if (i == dex - tin + 1)
{
tin[i] = ' ';
}
else
{
tin[i] = tin[i - 1];
}
}
dex = strchr(dex + 1, '.');
}
}
strcpy_s(tout, sizeof(tout), tin);
_strlwr_s(tout,sizeof(tout)); //copies and lowercasing the input string
dex = strchr(tin, '"');
if (dex != NULL) {
pos = strchr(dex + 1, '"');
while (dex != NULL)
{
for (i = dex - tin; i < pos - tin; i++) {
tout[i] = tin[i];
}
dex = strchr(pos + 1, '"');
if (dex)
{
pos = strchr(dex + 1, '"');
}
} //making sure that the letters in the quotes have't been lowercased
}
_strupr_s(tin, sizeof(tin));
dex = strchr(tout, '.');
pos = strchr(tin, '.');
while (dex != NULL && pos != NULL)
{
tout[dex - tout + 2] = tin[pos - tin + 2];
dex = strchr(dex + 1, '.');
pos = strchr(pos + 1, '.');
}
//CAPSLOCK
dex = strchr(tout, '.'); //deletes space before dots
while (dex != NULL)
{
if (tout[dex - tout - 1] == ' ')
{
for (i = dex - tout - 1; i < strlen(tout); i++)
{
tout[i] = tout[i+1];
}
}
dex = strchr(dex + 1, '.');
}
if (tout[0] == ' ') {
for (i = 0 ;i < strlen(tout); i++) {
tout[i] = tout[i + 1];
}
}//handeling single space in the beggining of the string
if (tout[0] >= 'a' && tout[0] <= 'z') {
tout[0] -= 32;
} //First letter always capital
word = 0;
sentence = 0;
para = 1;
space = 0;
length = strlen(tout);
for (i = 0; tout[i] != '[=11=]';i++)
{
if (tout[i] == ' ' && tout[i + 1] != ' ')
word++;
}
dex = strchr(tout, '.');
while (dex != NULL)
{
sentence++;
dex = strchr(dex + 1, '.');
}
dex = strchr(tout, '\n');
while (dex != NULL)
{
space++;
para++;
word++;
dex = strchr(dex + 1, '\n');
}
//dex = strchr(tout, '-');
//while (dex != NULL)
//{
// word++;
// dex = strchr(dex + 1, '-');
//}
printf_s("\nText after cleaning:\n------------------------------------------------------------------------------------------------\n");
printf_s("%s\n\n", tout);
printf_s("characters: %d | words: %d | sentences: %d | paragraphs: %d\n------------------------------------------------------------------------------------------------\n",length, word, sentence, para);
printf_s("\nIf you want to clean another string press (y): ");
scanf_s(" %c", &ans, 1);
if (ans == 'y')
{
gets_s(tin, 600);
}
} while (ans =='y');
这是一个可能的解决方案。
希望这会表明您不需要所有的重复。
我只用给定的示例对其进行了测试,很可能仍然存在它可能会中断的边缘情况。您可能想要分配 buffer
而不是使用特定值,但您可能需要检查是否存在可能导致扩展的输入。
关于大写在正确的地方,除了一行之外,原文没有任何范围,所以没有段落的概念。因此,我在句子开头使用了大写字母。
注意: OP 没有指定正确的输出是什么,因为 post 的标题是“错误的字符数”,所以这是基于最佳猜测关于要求和 OP 代码的一些见解(如前所述,可能没有产生正确的结果)。
我不认为这里的重点是修复 OP 的错误,而是说明更接近或实现解决方案的替代方法。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
char original[] = "the LANGUAGE \"C\" is a procedural programming language .It was initially developed by \"Dennis Ritchie\".. the Main feAtures of \"C\" language include low-level access to memory, simple set of keywords, and clean style .";
int main() {
char buffer[256];
strcpy(buffer, original);
char *src, *dest;
bool quoted = false;
bool sentence = false;
int periods = 0;
bool space = false;
bool paragraph = true;
bool comma = false;
int letters = 0;
int words = 0;
int sentences = 0;
int paragraphs = 0;
src = dest = buffer;
for ( ; *src ; src++) {
if (quoted) {
switch (*src) {
case '"':
quoted = false;
break;
}
if ((*src >= 'a' && *src <= 'z') || (*src >= 'a' && *src <= 'z')) {
letters++;
} else {
if (letters) {
words++;
letters = 0;
}
}
*dest++ = *src;
} else {
switch (*src) {
case '"':
quoted = true;
break;
case ',':
comma = true;
continue;
case ' ':
if (letters) {
words++;
letters = 0;
}
space = true;
continue;
case '.':
if (++periods == 2) {
*dest++ = '.';
*dest++ = '\n';
periods = 0;
paragraph = true;
} else {
sentence = true;
}
continue;
}
if (comma) {
*dest++ = ',';
*dest++ = ' ';
comma = space = false;
}
if (periods) {
*dest++ = '.';
periods = 0;
}
if (space) {
if (!paragraph) {
*dest++ = ' ';
}
space = false;
}
if ((*src >= 'a' && *src <= 'z') || (*src >= 'a' && *src <= 'z')) {
letters++;
} else {
if (letters) {
words++;
letters = 0;
}
}
*dest++ = sentence || paragraph ? toupper(*src) : tolower(*src);
if (sentence || paragraph) {
if (letters) {
words++;
}
letters = 0;
}
if (sentence) {
sentences++;
}
if (paragraph) {
paragraphs++;
}
sentence = paragraph = false;
}
}
if (sentence) {
sentences++;
}
if (paragraph) {
paragraphs++;
}
if (periods) {
*dest++ = '.';
}
*dest++ = '\n';
*dest = '[=10=]';
printf("\nInput Chars=%d\n\n\"%s\"\n", (int)strlen(original), original);
printf("\nOutput Chars=%d, Words=%d, Sentences=%d, Paragraphs=%d\n\n\"%s\"\n", (int)strlen(buffer), words, sentences, paragraphs, buffer);
return 0;
}
这会产生:
Input Chars=259
"the LANGUAGE "C" is a procedural programming language .It was initially developed by "Dennis Ritchie".. the Main feAtures of "C" language include low-level access to memory, simple set of keywords, and clean style ."
Output Chars=214, Words=34, Sentences=3, Paragraphs=2
"The language "C" is a procedural programming language. It was initially developed by "Dennis Ritchie".
The main features of "C" language include low-level access to memory, simple set of keywords, and clean style.
"
正如我在热门评论中提到的,这可以在一个带有状态变量的循环中完成。
一些假设:
- 每当我们看到
..
(转换为换行符)时,它就会开始一个新段落 - 你所谓的“apostrophe”,我称之为双引号(因为这是唯一有意义的东西)。
- 引号内没有任何内容被转换
- 引用[自己]被复制过来(即没有被剥离)
不幸的是,我不得不完全重构代码。是有注释的。我知道您必须 仅 使用 main
。额外的功能只是为了调试,所以它们“不算数”:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
int opt_d; // debug
int opt_quo; // preserve quote
#if DEBUG
#define dbgprt(_fmt...) \
do { \
if (opt_d) \
printf(_fmt); \
} while (0)
#else
#define dbgprt(_fmt...) do { } while (0)
#endif
#define COPYX(_chr) \
do { \
*dst++ = _chr; \
dbgprt("COPY %2.2X/%s\n",_chr,showchr(_chr)); \
} while (0)
#define COPY \
COPYX(chr)
#define WHITEOUT \
do { \
if (! white) \
break; \
COPYX(' '); \
white = 0; \
ctr_word += 1; \
} while (0)
const char *
showchr(int chr)
{
static char buf[10];
if ((chr >= 0x20) && (chr <= 0x7E))
sprintf(buf,"%c",chr);
else
sprintf(buf,"{%2.2X}",chr);
return buf;
}
void
showbuf(const char *buf,const char *who)
{
const char *sep = "'";
printf("%s: %s",who,sep);
for (int chr = *buf++; chr != 0; chr = *buf++)
printf("%s",showchr(chr));
printf("%s\n",sep);
}
int
main(int argc,char **argv)
{
char inp[1000];
char buf[1000];
char *src;
char *dst;
--argc;
++argv;
for (; argc > 0; --argc, ++argv) {
char *cp = *argv;
if (*cp != '-')
break;
cp += 2;
switch (cp[-1]) {
case 'd':
opt_d = ! opt_d;
break;
case 'q':
opt_quo = ! opt_quo;
break;
}
}
opt_quo = ! opt_quo;
const char *file;
if (argc > 0)
file = *argv;
else
file = "inp.txt";
FILE *xfsrc = fopen(file,"r");
if (xfsrc == NULL) {
perror(file);
exit(1);
}
while (fgets(inp,sizeof(inp),xfsrc) != NULL) {
strcpy(buf,inp);
src = buf;
dst = buf;
int quo = 0;
int white = 0;
int dot = 1;
int ctr_sent = 0;
int ctr_word = 0;
int ctr_para = 1;
for (int chr = *src++; chr != 0; chr = *src++) {
dbgprt("LOOP %2.2X/%s quo=%d white=%d dot=%d word=%d sent=%d para=%d\n",
chr,showchr(chr),quo,white,dot,
ctr_word,ctr_sent,ctr_para);
// got a quote
if (chr == '"') {
if (! quo)
WHITEOUT;
if (opt_quo)
COPY;
quo = ! quo;
continue;
}
// non-quote
else {
if (quo) {
COPY;
continue;
}
}
// got a dot
if (chr == '.') {
dot = 1;
// double dot --> newline (new paragraph)
if (*src == '.') {
COPYX('\n');
++src;
ctr_para += 1;
continue;
}
COPY;
white = 1;
continue;
}
// from fgets, this can _only_ occur at the end of the buffer
if (chr == '\n') {
dot = 1;
white = 1;
COPY;
break;
}
// accumulate/skip over whitespace
if (chr == ' ') {
white = 1;
continue;
}
// output accumulated whitespace
WHITEOUT;
// got uppercase -- convert to lowercase if we're not at the start
// of a sentence
if (isupper(chr)) {
if (! dot)
chr = tolower(chr);
}
// got lowercase -- capitalize if we're just starting a sentence
else {
if (islower(chr)) {
if (dot)
chr = toupper(chr);
}
}
COPY;
// count sentences
if (dot)
ctr_sent += 1;
dot = 0;
}
*dst = 0;
showbuf(inp,"inp");
showbuf(buf,"buf");
#if 0
if (dot)
ctr_word += 1;
#endif
printf("TOTAL: length=%zu sentences=%d paragraphs=%d words=%d\n",
strlen(buf),ctr_sent,ctr_para,ctr_word);
}
fclose(xfsrc);
return 0;
}
程序输出如下:
inp: 'the LANGUAGE "C" is a procedural programming language .It was initially developed by "Dennis Ritchie".. the Main feAtures of "C" language include low-level access to memory, simple set of keywords, and clean style .{0A}'
buf: 'The language "C" is a procedural programming language. It was initially developed by "Dennis Ritchie"{0A} The main features of "C" language include low-level access to memory, simple set of keywords, and clean style.{0A}'
TOTAL: length=214 sentences=3 paragraphs=2 words=31
更新:
Great, thank you! I've taken what you've done and implemented it on my code, basically, the professor restricted us to us only stdio.h and string.h libraries so I couldn't use any other functions... – Nitai Dan
不客气!
很高兴您能够将我的代码合并到您的代码中。这是所有可能的学习场景中最好的。
我不确定我是否添加了足够的注释来使我的算法清晰,所以我对它进行了一些清理,并在考虑是否要 post 它。它在下面 posted。
根据您刚才所说的,我[仍然]不确定是否允许创建您自己的函数。正如我所说,debug 函数如果不更改算法,[可能] 没问题。
无论如何,这是更新后的代码:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
int opt_d; // debug
int opt_quo; // preserve quote
int opt_x; // show space as hex
#define _dbgprt(_fmt...) \
printf(_fmt)
#if DEBUG
#define dbgprt(_fmt...) \
do { \
if (opt_d) \
_dbgprt(_fmt); \
} while (0)
#else
#define dbgprt(_fmt...) do { } while (0)
#endif
#if DEBUG
#define COPYX(_chr) \
do { \
*dst++ = _chr; \
copy = 1; \
if (_chr != src[-1]) \
_dbgprt(" COPY %s\n",_showchr(_chr,1)); \
} while (0)
#else
#define COPYX(_chr) \
do { \
*dst++ = _chr; \
} while (0)
#endif
#define COPY \
COPYX(chr)
// output accumulated white space and increment count of number of words
#define SPACEOUT \
do { \
if (! spc) \
break; \
spc = 0; \
COPYX(' '); \
ctr_word += 1; \
} while (0)
const char *
_showchr(int chr,int xflg)
{
static char buf[10];
int lo = xflg ? 0x21 : 0x20;
if ((chr >= lo) && (chr <= 0x7E))
sprintf(buf,"%c",chr);
else
sprintf(buf,"{%2.2X}",chr);
return buf;
}
const char *
showchr(int chr)
{
return _showchr(chr,opt_x);
}
void
showbuf(const char *buf,const char *who)
{
const char *sep = "'";
printf("%s: %s",who,sep);
for (int chr = *buf++; chr != 0; chr = *buf++)
printf("%s",showchr(chr));
printf("%s\n",sep);
}
void
dbgint(const char *sym,int val)
{
do {
int ctr = isupper(sym[0]);
if (! ctr) {
if (! val)
break;
}
_dbgprt(" %s",sym);
if (ctr)
_dbgprt("%d",val);
} while (0);
}
int
main(int argc,char **argv)
{
char inp[1000];
char buf[1000];
char *src;
char *dst;
--argc;
++argv;
for (; argc > 0; --argc, ++argv) {
char *cp = *argv;
if (*cp != '-')
break;
cp += 2;
switch (cp[-1]) {
case 'd':
opt_d = ! opt_d;
break;
case 'q':
opt_quo = ! opt_quo;
break;
case 'x':
opt_x = (*cp != 0) ? atoi(cp) : 1;
break;
}
}
opt_quo = ! opt_quo;
const char *file;
if (argc > 0)
file = *argv;
else
file = "inp.txt";
FILE *xfsrc = fopen(file,"r");
if (xfsrc == NULL) {
perror(file);
exit(1);
}
while (fgets(inp,sizeof(inp),xfsrc) != NULL) {
strcpy(buf,inp);
src = buf;
dst = buf;
// state variables
int quo = 0; // 1=within quoted string
int spc = 0; // 1=space seen
int dot = 1; // 1=period/newline seen
// counters
int ctr_sent = 0; // number of sentences
int ctr_word = 0; // number of words
int ctr_para = 1; // number of paragraphs
#if DEBUG
int copy = 0;
int ochr = 0;
#endif
for (int chr = *src++; chr != 0; chr = *src++) {
#if DEBUG
if (opt_d) {
// show if we skipped the prior char (and it was _not_ a space)
if ((! copy) && (ochr != ' '))
_dbgprt("SKIP\n");
copy = 0;
ochr = chr;
_dbgprt("LOOP %s",showchr(chr));
dbgint("W:",ctr_word);
dbgint("S:",ctr_sent);
dbgint("P:",ctr_para);
dbgint("quo",quo);
dbgint("spc",spc);
dbgint("dot",dot);
_dbgprt("\n");
}
#endif
// got a quote
if (chr == '"') {
// flush whitespace if starting a quoted string
if (! quo)
SPACEOUT;
// copy the quote
if (opt_quo)
COPY;
// flip the quote mode
quo = ! quo;
continue;
}
// non-quote
else {
// if inside a quoted string, just copy out the char verbatim
if (quo) {
COPY;
continue;
}
}
// got a dot
if (chr == '.') {
dot = 1;
// double dot --> newline (new paragraph)
if (*src == '.') {
COPYX('\n');
++src;
ctr_para += 1;
continue;
}
COPY;
// force whitespace mode (ensure space after dot)
// (e.g.) change:
// i go.he goes.
// into:
// i go. he goes.
spc = 1;
continue;
}
// from fgets, this can _only_ occur at the end of the buffer
if (chr == '\n') {
dot = 1;
spc = 1;
COPY;
break;
}
// accumulate/skip over whitespace
if (chr == ' ') {
spc = 1;
continue;
}
// output accumulated whitespace
SPACEOUT;
// convert case
if (dot)
chr = toupper(chr);
else
chr = tolower(chr);
// output the current character -- it's _not_ special
COPY;
// count sentences
if (dot)
ctr_sent += 1;
// we're no longer at the start of a sentence
dot = 0;
}
*dst = 0;
if (opt_x == 1)
opt_x = 0;
showbuf(inp,"inp");
showbuf(buf,"buf");
#if 0
if (dot)
ctr_word += 1;
#endif
printf("TOTAL: length=%zu sentences=%d paragraphs=%d words=%d\n",
strlen(buf),ctr_sent,ctr_para,ctr_word);
}
fclose(xfsrc);
return 0;
}
输出 -d
:
SKIP
LOOP t W:0 S:0 P:1 dot
LOOP h W:0 S:1 P:1
LOOP e W:0 S:1 P:1
LOOP W:0 S:1 P:1
LOOP L W:0 S:1 P:1 spc
COPY {20}
LOOP A W:1 S:1 P:1
LOOP N W:1 S:1 P:1
LOOP G W:1 S:1 P:1
LOOP U W:1 S:1 P:1
LOOP A W:1 S:1 P:1
LOOP G W:1 S:1 P:1
LOOP E W:1 S:1 P:1
LOOP W:1 S:1 P:1
LOOP W:1 S:1 P:1 spc
LOOP " W:1 S:1 P:1 spc
COPY {20}
LOOP C W:2 S:1 P:1 quo
LOOP " W:2 S:1 P:1 quo
LOOP W:2 S:1 P:1
LOOP i W:2 S:1 P:1 spc
COPY {20}
LOOP s W:3 S:1 P:1
LOOP W:3 S:1 P:1
LOOP a W:3 S:1 P:1 spc
COPY {20}
LOOP W:4 S:1 P:1
LOOP p W:4 S:1 P:1 spc
COPY {20}
LOOP r W:5 S:1 P:1
LOOP o W:5 S:1 P:1
LOOP c W:5 S:1 P:1
LOOP e W:5 S:1 P:1
LOOP d W:5 S:1 P:1
LOOP u W:5 S:1 P:1
LOOP r W:5 S:1 P:1
LOOP a W:5 S:1 P:1
LOOP l W:5 S:1 P:1
LOOP W:5 S:1 P:1
LOOP W:5 S:1 P:1 spc
LOOP W:5 S:1 P:1 spc
LOOP W:5 S:1 P:1 spc
LOOP W:5 S:1 P:1 spc
LOOP W:5 S:1 P:1 spc
LOOP W:5 S:1 P:1 spc
LOOP W:5 S:1 P:1 spc
LOOP W:5 S:1 P:1 spc
LOOP W:5 S:1 P:1 spc
LOOP W:5 S:1 P:1 spc
LOOP W:5 S:1 P:1 spc
LOOP W:5 S:1 P:1 spc
LOOP W:5 S:1 P:1 spc
LOOP p W:5 S:1 P:1 spc
COPY {20}
LOOP r W:6 S:1 P:1
LOOP o W:6 S:1 P:1
LOOP g W:6 S:1 P:1
LOOP r W:6 S:1 P:1
LOOP a W:6 S:1 P:1
LOOP m W:6 S:1 P:1
LOOP m W:6 S:1 P:1
LOOP i W:6 S:1 P:1
LOOP n W:6 S:1 P:1
LOOP g W:6 S:1 P:1
LOOP W:6 S:1 P:1
LOOP l W:6 S:1 P:1 spc
COPY {20}
LOOP a W:7 S:1 P:1
LOOP n W:7 S:1 P:1
LOOP g W:7 S:1 P:1
LOOP u W:7 S:1 P:1
LOOP a W:7 S:1 P:1
LOOP g W:7 S:1 P:1
LOOP e W:7 S:1 P:1
LOOP W:7 S:1 P:1
LOOP W:7 S:1 P:1 spc
LOOP W:7 S:1 P:1 spc
LOOP W:7 S:1 P:1 spc
LOOP W:7 S:1 P:1 spc
LOOP . W:7 S:1 P:1 spc
LOOP I W:7 S:1 P:1 spc dot
COPY {20}
LOOP t W:8 S:2 P:1
LOOP W:8 S:2 P:1
LOOP w W:8 S:2 P:1 spc
COPY {20}
LOOP a W:9 S:2 P:1
LOOP s W:9 S:2 P:1
LOOP W:9 S:2 P:1
LOOP i W:9 S:2 P:1 spc
COPY {20}
LOOP n W:10 S:2 P:1
LOOP i W:10 S:2 P:1
LOOP t W:10 S:2 P:1
LOOP i W:10 S:2 P:1
LOOP a W:10 S:2 P:1
LOOP l W:10 S:2 P:1
LOOP l W:10 S:2 P:1
LOOP y W:10 S:2 P:1
LOOP W:10 S:2 P:1
LOOP d W:10 S:2 P:1 spc
COPY {20}
LOOP e W:11 S:2 P:1
LOOP v W:11 S:2 P:1
LOOP e W:11 S:2 P:1
LOOP l W:11 S:2 P:1
LOOP o W:11 S:2 P:1
LOOP p W:11 S:2 P:1
LOOP e W:11 S:2 P:1
LOOP d W:11 S:2 P:1
LOOP W:11 S:2 P:1
LOOP b W:11 S:2 P:1 spc
COPY {20}
LOOP y W:12 S:2 P:1
LOOP W:12 S:2 P:1
LOOP " W:12 S:2 P:1 spc
COPY {20}
LOOP D W:13 S:2 P:1 quo
LOOP e W:13 S:2 P:1 quo
LOOP n W:13 S:2 P:1 quo
LOOP n W:13 S:2 P:1 quo
LOOP i W:13 S:2 P:1 quo
LOOP s W:13 S:2 P:1 quo
LOOP W:13 S:2 P:1 quo
LOOP R W:13 S:2 P:1 quo
LOOP i W:13 S:2 P:1 quo
LOOP t W:13 S:2 P:1 quo
LOOP c W:13 S:2 P:1 quo
LOOP h W:13 S:2 P:1 quo
LOOP i W:13 S:2 P:1 quo
LOOP e W:13 S:2 P:1 quo
LOOP " W:13 S:2 P:1 quo
LOOP . W:13 S:2 P:1
COPY {0A}
LOOP W:13 S:2 P:2 dot
LOOP W:13 S:2 P:2 spc dot
LOOP W:13 S:2 P:2 spc dot
LOOP W:13 S:2 P:2 spc dot
LOOP W:13 S:2 P:2 spc dot
LOOP W:13 S:2 P:2 spc dot
LOOP W:13 S:2 P:2 spc dot
LOOP W:13 S:2 P:2 spc dot
LOOP W:13 S:2 P:2 spc dot
LOOP W:13 S:2 P:2 spc dot
LOOP W:13 S:2 P:2 spc dot
LOOP W:13 S:2 P:2 spc dot
LOOP t W:13 S:2 P:2 spc dot
COPY {20}
COPY T
LOOP h W:14 S:3 P:2
LOOP e W:14 S:3 P:2
LOOP W:14 S:3 P:2
LOOP M W:14 S:3 P:2 spc
COPY {20}
COPY m
LOOP a W:15 S:3 P:2
LOOP i W:15 S:3 P:2
LOOP n W:15 S:3 P:2
LOOP W:15 S:3 P:2
LOOP f W:15 S:3 P:2 spc
COPY {20}
LOOP e W:16 S:3 P:2
LOOP A W:16 S:3 P:2
COPY a
LOOP t W:16 S:3 P:2
LOOP u W:16 S:3 P:2
LOOP r W:16 S:3 P:2
LOOP e W:16 S:3 P:2
LOOP s W:16 S:3 P:2
LOOP W:16 S:3 P:2
LOOP o W:16 S:3 P:2 spc
COPY {20}
LOOP f W:17 S:3 P:2
LOOP W:17 S:3 P:2
LOOP " W:17 S:3 P:2 spc
COPY {20}
LOOP C W:18 S:3 P:2 quo
LOOP " W:18 S:3 P:2 quo
LOOP W:18 S:3 P:2
LOOP l W:18 S:3 P:2 spc
COPY {20}
LOOP a W:19 S:3 P:2
LOOP n W:19 S:3 P:2
LOOP g W:19 S:3 P:2
LOOP u W:19 S:3 P:2
LOOP a W:19 S:3 P:2
LOOP g W:19 S:3 P:2
LOOP e W:19 S:3 P:2
LOOP W:19 S:3 P:2
LOOP i W:19 S:3 P:2 spc
COPY {20}
LOOP n W:20 S:3 P:2
LOOP c W:20 S:3 P:2
LOOP l W:20 S:3 P:2
LOOP u W:20 S:3 P:2
LOOP d W:20 S:3 P:2
LOOP e W:20 S:3 P:2
LOOP W:20 S:3 P:2
LOOP l W:20 S:3 P:2 spc
COPY {20}
LOOP o W:21 S:3 P:2
LOOP w W:21 S:3 P:2
LOOP - W:21 S:3 P:2
LOOP l W:21 S:3 P:2
LOOP e W:21 S:3 P:2
LOOP v W:21 S:3 P:2
LOOP e W:21 S:3 P:2
LOOP l W:21 S:3 P:2
LOOP W:21 S:3 P:2
LOOP a W:21 S:3 P:2 spc
COPY {20}
LOOP c W:22 S:3 P:2
LOOP c W:22 S:3 P:2
LOOP e W:22 S:3 P:2
LOOP s W:22 S:3 P:2
LOOP s W:22 S:3 P:2
LOOP W:22 S:3 P:2
LOOP t W:22 S:3 P:2 spc
COPY {20}
LOOP o W:23 S:3 P:2
LOOP W:23 S:3 P:2
LOOP m W:23 S:3 P:2 spc
COPY {20}
LOOP e W:24 S:3 P:2
LOOP m W:24 S:3 P:2
LOOP o W:24 S:3 P:2
LOOP r W:24 S:3 P:2
LOOP y W:24 S:3 P:2
LOOP , W:24 S:3 P:2
LOOP W:24 S:3 P:2
LOOP s W:24 S:3 P:2 spc
COPY {20}
LOOP i W:25 S:3 P:2
LOOP m W:25 S:3 P:2
LOOP p W:25 S:3 P:2
LOOP l W:25 S:3 P:2
LOOP e W:25 S:3 P:2
LOOP W:25 S:3 P:2
LOOP s W:25 S:3 P:2 spc
COPY {20}
LOOP e W:26 S:3 P:2
LOOP t W:26 S:3 P:2
LOOP W:26 S:3 P:2
LOOP o W:26 S:3 P:2 spc
COPY {20}
LOOP f W:27 S:3 P:2
LOOP W:27 S:3 P:2
LOOP k W:27 S:3 P:2 spc
COPY {20}
LOOP e W:28 S:3 P:2
LOOP y W:28 S:3 P:2
LOOP w W:28 S:3 P:2
LOOP o W:28 S:3 P:2
LOOP r W:28 S:3 P:2
LOOP d W:28 S:3 P:2
LOOP s W:28 S:3 P:2
LOOP , W:28 S:3 P:2
LOOP W:28 S:3 P:2
LOOP a W:28 S:3 P:2 spc
COPY {20}
LOOP n W:29 S:3 P:2
LOOP d W:29 S:3 P:2
LOOP W:29 S:3 P:2
LOOP c W:29 S:3 P:2 spc
COPY {20}
LOOP l W:30 S:3 P:2
LOOP e W:30 S:3 P:2
LOOP a W:30 S:3 P:2
LOOP n W:30 S:3 P:2
LOOP W:30 S:3 P:2
LOOP s W:30 S:3 P:2 spc
COPY {20}
LOOP t W:31 S:3 P:2
LOOP y W:31 S:3 P:2
LOOP l W:31 S:3 P:2
LOOP e W:31 S:3 P:2
LOOP W:31 S:3 P:2
LOOP W:31 S:3 P:2 spc
LOOP W:31 S:3 P:2 spc
LOOP W:31 S:3 P:2 spc
LOOP W:31 S:3 P:2 spc
LOOP W:31 S:3 P:2 spc
LOOP W:31 S:3 P:2 spc
LOOP W:31 S:3 P:2 spc
LOOP W:31 S:3 P:2 spc
LOOP W:31 S:3 P:2 spc
LOOP W:31 S:3 P:2 spc
LOOP W:31 S:3 P:2 spc
LOOP W:31 S:3 P:2 spc
LOOP W:31 S:3 P:2 spc
LOOP W:31 S:3 P:2 spc
LOOP W:31 S:3 P:2 spc
LOOP . W:31 S:3 P:2 spc
LOOP {0A} W:31 S:3 P:2 spc dot
inp: 'the LANGUAGE "C" is a procedural programming language .It was initially developed by "Dennis Ritchie".. the Main feAtures of "C" language include low-level access to memory, simple set of keywords, and clean style .{0A}'
buf: 'The language "C" is a procedural programming language. It was initially developed by "Dennis Ritchie"{0A} The main features of "C" language include low-level access to memory, simple set of keywords, and clean style.{0A}'
TOTAL: length=214 sentences=3 paragraphs=2 words=31