如何消除 linux 文件中的漏洞
how to eliminate hole in file on linux
假设我有一个像这样有洞的文件:
+-------------+-----------+--------------+----------+------------+
|****data1****| hole |****data2*****| hole |****data3***|
+-------------+-----------+--------------+----------+------------+
然后,我想把文件转换成这样:
+-------------+--------------+------------+
|****data1****|****data2*****|****data3***|
+-------------+--------------+------------+
原地消除文件漏洞,保持数据紧凑连续
有什么简单有效的方法吗?性能要求
这是一个小的 C 程序,演示了如何使用 Linux 特定的 lseek()
标志 SEEK_DATA
和 SEEK_END
来查找文件中的漏洞,以及 copy_file_range()
to copy the data segments. Note that the hole-detection depends on the underlying file system supporting it; see the documentation 的清单。
// squashholes.c
// Compile with gcc -o squashholes -O -Wall -Wextra squashholes.c
// Run as ./squashholes file-with-holes new-holeless-file
#define _GNU_SOURCE
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
int main(int argc, char **argv) {
if (argc != 3) {
printf("Usage: %s inputfile outputfile\n", argv[0]);
return 1;
}
int ifd = open(argv[1], O_RDONLY);
if (ifd < 0) {
perror("open in");
return 1;
}
int ofd = open(argv[2], O_WRONLY | O_CREAT | O_TRUNC, 0644);
if (ofd < 0) {
perror("open out");
return 1;
}
struct stat s;
if (fstat(ifd, &s) < 0) {
perror("fstat");
return 1;
}
printf("Input file size is %lld bytes.\n", (long long)s.st_size);
off_t startpos = 0; // Start at the beginning of the file
while (startpos < s.st_size) {
// Find the start of the first data chunk at or after the current offset
startpos = lseek(ifd, startpos, SEEK_DATA);
if (startpos == (off_t)-1) {
perror("lseek data");
return 1;
}
// Find the start of the first hole after the current data chunk
off_t endpos = lseek(ifd, startpos, SEEK_HOLE);
if (endpos == (off_t)-1) {
perror("lseek hole");
return 1;
}
// Reset file offset to the start of the data chunk
if (lseek(ifd, startpos, SEEK_SET) == (off_t)-1) {
perror("lseek start of block");
return 1;
}
// And copy it.
printf("Copying %lld bytes (range %lld to %lld)\n",
(long long)(endpos - startpos), (long long)startpos, (long long)endpos);
if (copy_file_range(ifd, NULL, ofd, NULL, endpos - startpos, 0) < 0) {
perror("copy_file_range");
return 1;
}
// Update the location to start looking for the next data chunk at.
startpos = endpos;
}
close(ifd);
close(ofd);
return 0;
}
假设我有一个像这样有洞的文件:
+-------------+-----------+--------------+----------+------------+
|****data1****| hole |****data2*****| hole |****data3***|
+-------------+-----------+--------------+----------+------------+
然后,我想把文件转换成这样:
+-------------+--------------+------------+
|****data1****|****data2*****|****data3***|
+-------------+--------------+------------+
原地消除文件漏洞,保持数据紧凑连续
有什么简单有效的方法吗?性能要求
这是一个小的 C 程序,演示了如何使用 Linux 特定的 lseek()
标志 SEEK_DATA
和 SEEK_END
来查找文件中的漏洞,以及 copy_file_range()
to copy the data segments. Note that the hole-detection depends on the underlying file system supporting it; see the documentation 的清单。
// squashholes.c
// Compile with gcc -o squashholes -O -Wall -Wextra squashholes.c
// Run as ./squashholes file-with-holes new-holeless-file
#define _GNU_SOURCE
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
int main(int argc, char **argv) {
if (argc != 3) {
printf("Usage: %s inputfile outputfile\n", argv[0]);
return 1;
}
int ifd = open(argv[1], O_RDONLY);
if (ifd < 0) {
perror("open in");
return 1;
}
int ofd = open(argv[2], O_WRONLY | O_CREAT | O_TRUNC, 0644);
if (ofd < 0) {
perror("open out");
return 1;
}
struct stat s;
if (fstat(ifd, &s) < 0) {
perror("fstat");
return 1;
}
printf("Input file size is %lld bytes.\n", (long long)s.st_size);
off_t startpos = 0; // Start at the beginning of the file
while (startpos < s.st_size) {
// Find the start of the first data chunk at or after the current offset
startpos = lseek(ifd, startpos, SEEK_DATA);
if (startpos == (off_t)-1) {
perror("lseek data");
return 1;
}
// Find the start of the first hole after the current data chunk
off_t endpos = lseek(ifd, startpos, SEEK_HOLE);
if (endpos == (off_t)-1) {
perror("lseek hole");
return 1;
}
// Reset file offset to the start of the data chunk
if (lseek(ifd, startpos, SEEK_SET) == (off_t)-1) {
perror("lseek start of block");
return 1;
}
// And copy it.
printf("Copying %lld bytes (range %lld to %lld)\n",
(long long)(endpos - startpos), (long long)startpos, (long long)endpos);
if (copy_file_range(ifd, NULL, ofd, NULL, endpos - startpos, 0) < 0) {
perror("copy_file_range");
return 1;
}
// Update the location to start looking for the next data chunk at.
startpos = endpos;
}
close(ifd);
close(ofd);
return 0;
}