如何消除 linux 文件中的漏洞

how to eliminate hole in file on linux

假设我有一个像这样有洞的文件:


+-------------+-----------+--------------+----------+------------+
|****data1****|    hole   |****data2*****|   hole   |****data3***|
+-------------+-----------+--------------+----------+------------+

然后,我想把文件转换成这样:

+-------------+--------------+------------+
|****data1****|****data2*****|****data3***|
+-------------+--------------+------------+

原地消除文件漏洞,保持数据紧凑连续

有什么简单有效的方法吗?性能要求

这是一个小的 C 程序,演示了如何使用 Linux 特定的 lseek() 标志 SEEK_DATASEEK_END 来查找文件中的漏洞,以及 copy_file_range() to copy the data segments. Note that the hole-detection depends on the underlying file system supporting it; see the documentation 的清单。

// squashholes.c
// Compile with gcc -o squashholes -O -Wall -Wextra squashholes.c
// Run as ./squashholes file-with-holes new-holeless-file

#define _GNU_SOURCE
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>

int main(int argc, char **argv) {
  if (argc != 3) {
    printf("Usage: %s inputfile outputfile\n", argv[0]);
    return 1;
  }

  int ifd = open(argv[1], O_RDONLY);
  if (ifd < 0) {
    perror("open in");
    return 1;
  }

  int ofd = open(argv[2], O_WRONLY | O_CREAT | O_TRUNC, 0644);
  if (ofd < 0) {
    perror("open out");
    return 1;
  }

  struct stat s;
  if (fstat(ifd, &s) < 0) {
    perror("fstat");
    return 1;
  }

  printf("Input file size is %lld bytes.\n", (long long)s.st_size);

  off_t startpos = 0; // Start at the beginning of the file
  while (startpos < s.st_size) {
    // Find the start of the first data chunk at or after the current offset
    startpos = lseek(ifd, startpos, SEEK_DATA);
    if (startpos == (off_t)-1) {
      perror("lseek data");
      return 1;
    }

    // Find the start of the first hole after the current data chunk
    off_t endpos = lseek(ifd, startpos, SEEK_HOLE);
    if (endpos == (off_t)-1) {
      perror("lseek hole");
      return 1;
    }

    // Reset file offset to the start of the data chunk
    if (lseek(ifd, startpos, SEEK_SET) == (off_t)-1) {
      perror("lseek start of block");
      return 1;
    }

    // And copy it.
    printf("Copying %lld bytes (range %lld to %lld)\n",
           (long long)(endpos - startpos), (long long)startpos, (long long)endpos);
    if (copy_file_range(ifd, NULL, ofd, NULL, endpos - startpos, 0) < 0) {
      perror("copy_file_range");
      return 1;
    }

    // Update the location to start looking for the next data chunk at.
    startpos = endpos;
  }

  close(ifd);
  close(ofd);

  return 0;
}