我自己实现的 linux tail 函数输出错误
wrong output with my own implementation of the linux tail function
我写了一个程序,应该打印出文件的最后 n 个字节。它应该被称为“./tail -n 文件名”。这是完整的代码:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
int get_lines(char** argv);
int get_bytes(int lines, int fd);
int main(int argc, char* argv[])
{
if (argc != 3) {
printf("ungültige Anzahl an args\n");
return EXIT_FAILURE;
}
int lines = get_lines(argv);
if (lines == -1) return EXIT_FAILURE;
// open file
char* path = argv[2];
int fd = open(path, O_RDONLY);
if (fd == -1) {
printf("file konnte nicht geöffnet werden oder existiert nicht\n");
return EXIT_FAILURE;
}
// get stat of fiĺe
struct stat infos;
if (stat(path, &infos) == -1) {
printf("stat failed\n");
return EXIT_FAILURE;
}
// set offset to last byte
if (lseek(fd, -1, SEEK_END) == -1) {
printf("lseek 1 failed\n");
return EXIT_FAILURE;
}
// determine number of bytes corresponding to number of lines
int bytes = get_bytes(lines, fd);
// printf("lines: %d\nbytes: %d\n", lines, bytes);
if (bytes == -1) return EXIT_FAILURE;
// set offset to beginning of tail and save tail of file in buffer tail
char tail[bytes];
if (lseek(fd, -bytes, SEEK_END) == -1) {
printf("lseek 1 failed\n");
return EXIT_FAILURE;
}
if (read(fd, tail, bytes) == -1){
printf("read failed\n");
return EXIT_FAILURE;
}
printf("%s\n", tail);
return 0;
}
int get_lines(char** argv) {
// cast string without - to int
char* substr = &argv[1][1];
int lines = atoi(substr);
if (lines == 0) {
printf("-n mit n = Anzahl Zeilen\n");
return -1;
}
return lines;
}
int get_bytes(int lines, int fd) {
int bytes = 0;
char buff[1];
while (1) {
if (read(fd, buff, 1) == -1){
printf("read failed\n");
return -1;
}
if (buff[0] == '\n') {
lines--;
if (lines <= 0) break;
}
if (lseek(fd, -2, SEEK_CUR) == -1) {
// n bytes was bigger than bytes of file
break;
}
bytes++;
}
return bytes;
}
它可以工作,但最后会打印出一些额外的字符。例如,当我使用尾部实现的源代码调用程序时,如“./tail -4 tail.c”,我得到如下输出:
bytes++;
}
return bytes;
}�E0V
或
bytes++;
}
return bytes;
}�z�U
所以我的猜测是,我在最后读取了 tail 变量中的很多字节。但是我找不到错误。我计算的字节数不正确吗?
您似乎正在将 bytes
个字节读入 tail
并将 tail
传递给 %s
。
这将导致未定义的行为,因为%s
需要一个指向字符串的指针(null-terminated序列字符)而 tail
将不包含任何终止 null-character,因此它将读取 out-of-bounds,寻找终止 null-character.
要解决此问题,您可以指定要打印的长度。
尝试 printf("%.*s\n", bytes, tail);
而不是 printf("%s\n", tail);
。
我写了一个程序,应该打印出文件的最后 n 个字节。它应该被称为“./tail -n 文件名”。这是完整的代码:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
int get_lines(char** argv);
int get_bytes(int lines, int fd);
int main(int argc, char* argv[])
{
if (argc != 3) {
printf("ungültige Anzahl an args\n");
return EXIT_FAILURE;
}
int lines = get_lines(argv);
if (lines == -1) return EXIT_FAILURE;
// open file
char* path = argv[2];
int fd = open(path, O_RDONLY);
if (fd == -1) {
printf("file konnte nicht geöffnet werden oder existiert nicht\n");
return EXIT_FAILURE;
}
// get stat of fiĺe
struct stat infos;
if (stat(path, &infos) == -1) {
printf("stat failed\n");
return EXIT_FAILURE;
}
// set offset to last byte
if (lseek(fd, -1, SEEK_END) == -1) {
printf("lseek 1 failed\n");
return EXIT_FAILURE;
}
// determine number of bytes corresponding to number of lines
int bytes = get_bytes(lines, fd);
// printf("lines: %d\nbytes: %d\n", lines, bytes);
if (bytes == -1) return EXIT_FAILURE;
// set offset to beginning of tail and save tail of file in buffer tail
char tail[bytes];
if (lseek(fd, -bytes, SEEK_END) == -1) {
printf("lseek 1 failed\n");
return EXIT_FAILURE;
}
if (read(fd, tail, bytes) == -1){
printf("read failed\n");
return EXIT_FAILURE;
}
printf("%s\n", tail);
return 0;
}
int get_lines(char** argv) {
// cast string without - to int
char* substr = &argv[1][1];
int lines = atoi(substr);
if (lines == 0) {
printf("-n mit n = Anzahl Zeilen\n");
return -1;
}
return lines;
}
int get_bytes(int lines, int fd) {
int bytes = 0;
char buff[1];
while (1) {
if (read(fd, buff, 1) == -1){
printf("read failed\n");
return -1;
}
if (buff[0] == '\n') {
lines--;
if (lines <= 0) break;
}
if (lseek(fd, -2, SEEK_CUR) == -1) {
// n bytes was bigger than bytes of file
break;
}
bytes++;
}
return bytes;
}
它可以工作,但最后会打印出一些额外的字符。例如,当我使用尾部实现的源代码调用程序时,如“./tail -4 tail.c”,我得到如下输出:
bytes++;
}
return bytes;
}�E0V
或
bytes++;
}
return bytes;
}�z�U
所以我的猜测是,我在最后读取了 tail 变量中的很多字节。但是我找不到错误。我计算的字节数不正确吗?
您似乎正在将 bytes
个字节读入 tail
并将 tail
传递给 %s
。
这将导致未定义的行为,因为%s
需要一个指向字符串的指针(null-terminated序列字符)而 tail
将不包含任何终止 null-character,因此它将读取 out-of-bounds,寻找终止 null-character.
要解决此问题,您可以指定要打印的长度。
尝试 printf("%.*s\n", bytes, tail);
而不是 printf("%s\n", tail);
。