C - 字母的多线程计数频率导致内存错误
C - Multithreading count frequency of letters causes memory error
我正在尝试使用 C 多线程找出文本文件中每个字母的出现频率。作业是:1) 编写一个函数来读取文本中以“.”结尾的每个句子。 2) 编写一个函数,在二维数组中加载一个句子 3) 编写一个函数,为每个句子的每个字母生成一个 pthread(pthread 函数为该字母的计数器加 1)。
编辑:我通过 Valgrind 发现问题出在 sentence
函数中,我不明白为什么。
代码如下:
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <pthread.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/types.h>
char alphabet[26] = "abcdefghijklmnopqrstuvwxyz";
int count[26];
char* sentence(char * s){
char* p;
char* q;
char* arr;
int i;
p = s;
q = malloc(100);
arr = q;
for (i=0; *p != '.'; i++){
*q = *p;
q++;
p++;
}
*q = '[=10=]';
return arr;
}
char** load_sentence(char* p, char** q, int i){
q[i] = malloc(strlen(p)+1);
strcpy(q[i], p);
return q;
}
void* count_letter(void * s){
char* p = (char*) s;
int i;
for (i=0; i<26; i++){
if (*p == alphabet[i]){
count[i]++;
}
}
}
void frequency(char* str){
char* s = str;
int i, j, l;
l = strlen(str);
pthread_t tid[l];
for (i=0; i<l; i++){
pthread_create(&tid[i], NULL, count_letter, (void*) s);
s++;
}
for (j=0; j<l; j++){
pthread_join(tid[j], NULL);
}
}
int main(int argc, char* argv[]){
int fd;
char buff[100];
fd = open(argv[1], O_RDONLY);
char ** text = malloc(10*sizeof(char*));
read(fd, buff, sizeof(buff));
char* start = buff;
int i = 0; //number of phrases!
char* p = NULL;
while (*(p = sentence(start)) != '[=10=]'){
text = load_sentence(p, text, i);
start += strlen(p)+1;
i++;
}
int j, k;
for (k=0; k<i; k++){
frequency(text[k]);
}
for (j=0; j<26; j++){
printf("%c : %d times\n", alphabet[j], count[j]);
}
}
看起来像这样的案例:
hope it's a good reading. bye.
输出正确:
a : 2 times
b : 1 times
c : 0 times
d : 2 times
e : 3 times
f : 0 times
g : 3 times
h : 1 times
i : 2 times
j : 0 times
k : 0 times
l : 0 times
m : 0 times
n : 1 times
o : 3 times
p : 1 times
q : 0 times
r : 1 times
s : 1 times
t : 1 times
u : 0 times
v : 0 times
w : 0 times
x : 0 times
y : 1 times
z : 0 times
对于其他人,"memory error" 以 free() : invalid next size (normal)
开头。该错误有许多内存映射行并以流产结束。
我对C很陌生,很抱歉我没有经验。
这种情况有必要引入一个mutex
吗?
埃里卡,
由于我不太了解您的作业,请将此视为从 1000 中计算字符数的另一种方法。我没有检查它是否有错误,请根据您的需要重写。无论如何,这就是我解决它的方式。如果内存稀疏,我会从文件中逐个字符地读取,直到“.”。无论如何希望它能帮助你并取得好成绩:-)...
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <pthread.h>
#include <stdatomic.h>
#define MAX_THREADS 100
atomic_int threadCount;
#define NCHAR 26
char alphabet[NCHAR] = "abcdefghijklmnopqrstuvwxyz";
atomic_int count[NCHAR];
void* count_letter(void * s){
threadCount++;
char* p = (char*) s;
for (int i=0; i<NCHAR; i++)
if (*p == alphabet[i])
count[i]++;
threadCount--;
return NULL;
}
int main(int argc, char* argv[]){
//Init variables
FILE *file;
char *myText;
unsigned long fileLen;
int deadLockGuard=0;
threadCount=0;
//Open the file
file = fopen(argv[1], "rb");
if (!file) {
fprintf(stderr, "Unable to open file %s", argv[1]);
return EXIT_FAILURE;
}
fseek(file, 0, SEEK_END);
fileLen=ftell(file);
rewind(file);
//reserve memory and read the file
myText=(char *)malloc(fileLen+1);
if (!myText) {
fprintf(stderr, "Memory error!");
fclose(file);
return EXIT_FAILURE;
}
fread(myText, fileLen, 1, file);
fclose(file);
//Get each sentence ending with a . and then for each character look at the count for each character in it's own thread.
char *subString = strtok(myText, "."); //This is your sentence/load_sentence method
while (subString != NULL) {
for (int v = 0;v<strlen(subString);v++) { //This is your frequency method
deadLockGuard=0;
while (threadCount >= MAX_THREADS) {
usleep(100); //Sleep 0.1ms
if(deadLockGuard++ == 10000) {
printf("Dead-lock guard1 triggered.. Call Bill Gates for help!"); //No free threads after a second.. Either the computer is DEAD SLOW or we got some creepy crawler in da house.
return EXIT_FAILURE;
}
}
pthread_t tid; //Yes you can overwrite it.. I use a counter to join the workers.
pthread_create(&tid, NULL, count_letter, (void*) subString+v);
}
subString = strtok(NULL, ".");
}
deadLockGuard=0;
//pthread_join all the still woring threads
while (threadCount) {
usleep(1000); //sleep a milli
if(deadLockGuard++ == 2*1000) {
printf("Dead-lock guard2 triggered.. Call Bill Gates for help!"); //Threads are running after 2 seconds.. Exit!!
return EXIT_FAILURE;
}
}
//Garbage collect and print the results.
free(myText);
for (int j=0; j<NCHAR; j++)
printf("%c : %d times\n", alphabet[j], count[j]);
return EXIT_SUCCESS;
}
根据 reference:
,您之前使用 mutex
的版本有未定义的行为,因为您多次初始化互斥量
Attempting to initialize an already initialized mutex results in
undefined behavior.
您正在并发访问count
,因此您必须使用互斥来制作线程安全代码。你在count_letter
中调用了pthread_mutex_init
是不正确的,这个函数是你线程的主体(多次初始化互斥量而不销毁它会导致UB),你应该只调用一次pthread_mutex_init
,因为实例作为主函数中的第一行:
int main() {
pthread_mutex_init(&mtx,NULL);
在return之前添加
pthread_mutex_destroy(&mtx);
您的 count_letter 函数中的关键部分是第
行
count[i]++;
你应该修改如下
pthread_mutex_lock(&mtx);
count[i]++;
pthread_mutex_unlock(&mtx);
现在,return 到 sentence
的实现,在与 .
比较之前,您需要检查 *p
是否指向空终止符:
for (i=0; *p && *p != '.'; i++){
^^ added
未经测试,[=23=]
!= .
return 为真,您的循环将继续...
我正在尝试使用 C 多线程找出文本文件中每个字母的出现频率。作业是:1) 编写一个函数来读取文本中以“.”结尾的每个句子。 2) 编写一个函数,在二维数组中加载一个句子 3) 编写一个函数,为每个句子的每个字母生成一个 pthread(pthread 函数为该字母的计数器加 1)。
编辑:我通过 Valgrind 发现问题出在 sentence
函数中,我不明白为什么。
代码如下:
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <pthread.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/types.h>
char alphabet[26] = "abcdefghijklmnopqrstuvwxyz";
int count[26];
char* sentence(char * s){
char* p;
char* q;
char* arr;
int i;
p = s;
q = malloc(100);
arr = q;
for (i=0; *p != '.'; i++){
*q = *p;
q++;
p++;
}
*q = '[=10=]';
return arr;
}
char** load_sentence(char* p, char** q, int i){
q[i] = malloc(strlen(p)+1);
strcpy(q[i], p);
return q;
}
void* count_letter(void * s){
char* p = (char*) s;
int i;
for (i=0; i<26; i++){
if (*p == alphabet[i]){
count[i]++;
}
}
}
void frequency(char* str){
char* s = str;
int i, j, l;
l = strlen(str);
pthread_t tid[l];
for (i=0; i<l; i++){
pthread_create(&tid[i], NULL, count_letter, (void*) s);
s++;
}
for (j=0; j<l; j++){
pthread_join(tid[j], NULL);
}
}
int main(int argc, char* argv[]){
int fd;
char buff[100];
fd = open(argv[1], O_RDONLY);
char ** text = malloc(10*sizeof(char*));
read(fd, buff, sizeof(buff));
char* start = buff;
int i = 0; //number of phrases!
char* p = NULL;
while (*(p = sentence(start)) != '[=10=]'){
text = load_sentence(p, text, i);
start += strlen(p)+1;
i++;
}
int j, k;
for (k=0; k<i; k++){
frequency(text[k]);
}
for (j=0; j<26; j++){
printf("%c : %d times\n", alphabet[j], count[j]);
}
}
看起来像这样的案例:
hope it's a good reading. bye.
输出正确:
a : 2 times
b : 1 times
c : 0 times
d : 2 times
e : 3 times
f : 0 times
g : 3 times
h : 1 times
i : 2 times
j : 0 times
k : 0 times
l : 0 times
m : 0 times
n : 1 times
o : 3 times
p : 1 times
q : 0 times
r : 1 times
s : 1 times
t : 1 times
u : 0 times
v : 0 times
w : 0 times
x : 0 times
y : 1 times
z : 0 times
对于其他人,"memory error" 以 free() : invalid next size (normal)
开头。该错误有许多内存映射行并以流产结束。
我对C很陌生,很抱歉我没有经验。
这种情况有必要引入一个mutex
吗?
埃里卡,
由于我不太了解您的作业,请将此视为从 1000 中计算字符数的另一种方法。我没有检查它是否有错误,请根据您的需要重写。无论如何,这就是我解决它的方式。如果内存稀疏,我会从文件中逐个字符地读取,直到“.”。无论如何希望它能帮助你并取得好成绩:-)...
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <pthread.h>
#include <stdatomic.h>
#define MAX_THREADS 100
atomic_int threadCount;
#define NCHAR 26
char alphabet[NCHAR] = "abcdefghijklmnopqrstuvwxyz";
atomic_int count[NCHAR];
void* count_letter(void * s){
threadCount++;
char* p = (char*) s;
for (int i=0; i<NCHAR; i++)
if (*p == alphabet[i])
count[i]++;
threadCount--;
return NULL;
}
int main(int argc, char* argv[]){
//Init variables
FILE *file;
char *myText;
unsigned long fileLen;
int deadLockGuard=0;
threadCount=0;
//Open the file
file = fopen(argv[1], "rb");
if (!file) {
fprintf(stderr, "Unable to open file %s", argv[1]);
return EXIT_FAILURE;
}
fseek(file, 0, SEEK_END);
fileLen=ftell(file);
rewind(file);
//reserve memory and read the file
myText=(char *)malloc(fileLen+1);
if (!myText) {
fprintf(stderr, "Memory error!");
fclose(file);
return EXIT_FAILURE;
}
fread(myText, fileLen, 1, file);
fclose(file);
//Get each sentence ending with a . and then for each character look at the count for each character in it's own thread.
char *subString = strtok(myText, "."); //This is your sentence/load_sentence method
while (subString != NULL) {
for (int v = 0;v<strlen(subString);v++) { //This is your frequency method
deadLockGuard=0;
while (threadCount >= MAX_THREADS) {
usleep(100); //Sleep 0.1ms
if(deadLockGuard++ == 10000) {
printf("Dead-lock guard1 triggered.. Call Bill Gates for help!"); //No free threads after a second.. Either the computer is DEAD SLOW or we got some creepy crawler in da house.
return EXIT_FAILURE;
}
}
pthread_t tid; //Yes you can overwrite it.. I use a counter to join the workers.
pthread_create(&tid, NULL, count_letter, (void*) subString+v);
}
subString = strtok(NULL, ".");
}
deadLockGuard=0;
//pthread_join all the still woring threads
while (threadCount) {
usleep(1000); //sleep a milli
if(deadLockGuard++ == 2*1000) {
printf("Dead-lock guard2 triggered.. Call Bill Gates for help!"); //Threads are running after 2 seconds.. Exit!!
return EXIT_FAILURE;
}
}
//Garbage collect and print the results.
free(myText);
for (int j=0; j<NCHAR; j++)
printf("%c : %d times\n", alphabet[j], count[j]);
return EXIT_SUCCESS;
}
根据 reference:
,您之前使用mutex
的版本有未定义的行为,因为您多次初始化互斥量
Attempting to initialize an already initialized mutex results in undefined behavior.
您正在并发访问count
,因此您必须使用互斥来制作线程安全代码。你在count_letter
中调用了pthread_mutex_init
是不正确的,这个函数是你线程的主体(多次初始化互斥量而不销毁它会导致UB),你应该只调用一次pthread_mutex_init
,因为实例作为主函数中的第一行:
int main() {
pthread_mutex_init(&mtx,NULL);
在return之前添加
pthread_mutex_destroy(&mtx);
您的 count_letter 函数中的关键部分是第
行count[i]++;
你应该修改如下
pthread_mutex_lock(&mtx);
count[i]++;
pthread_mutex_unlock(&mtx);
现在,return 到 sentence
的实现,在与 .
比较之前,您需要检查 *p
是否指向空终止符:
for (i=0; *p && *p != '.'; i++){
^^ added
未经测试,[=23=]
!= .
return 为真,您的循环将继续...