文件映射与文件系统同步
file mapping vs file system synchronization
我有一个包含一些数据的文件,它也是 memory-mapped。这样我就有了文件描述符和指向映射页面的指针。大多数情况下,数据仅从映射中读取,但最终也会被修改。
修改包括修改文件中的一些数据(某种 headers 更新),以及附加一些新数据(即写入 post 文件的当前末尾)。
这个数据结构是从不同的线程访问的,为了防止冲突,我同步访问它(互斥锁和朋友)。
在修改过程中,我同时使用了文件映射和文件描述符。 Headers 通过修改映射内存隐式更新,而新数据由适当的 API 写入文件(WriteFile
on windows,write
on posix)。值得注意的是,新数据和 headers 属于不同的页面。
由于修改改变了文件大小,每次修改后内存映射为re-initialized。也就是说,它是未映射的,然后再次映射(使用新大小)。
我意识到对映射内存的写入是 "asynchronous" wrt 文件系统,并且不能保证顺序,但我认为没有问题,因为我明确关闭了文件映射,这应该(恕我直言)采取行动作为一种冲洗点。
现在这在 windows 上没有问题,但在 linux 上(确切地说是 android)最终映射数据 turns-out 暂时不一致(即数据重试时可以)。它似乎没有反映 newly-appended 数据。
我是否必须调用同步 API 来确保正确刷新数据?如果是这样,我应该使用哪一个:sync
、msync
、syncfs
或不同的东西?
提前致谢。
编辑:
这是一个 pseudo-code 来说明我正在处理的场景。
(当然真正的代码更复杂)
struct CompressedGrid
{
mutex m_Lock;
int m_FileHandle;
void* m_pMappedMemory;
Hdr* get_Hdr() { return /* the mapped memory with some offset*/; }
void SaveGridCell(int idx, const Cell& cCompressed)
{
AutoLock scope(m_Lock);
// Write to mapped memory
get_Hdr()->m_pCellOffset[Idx] = /* current end of file */;
// Append the data
lseek64(m_FileHandle, 0, FILE_END);
write(m_FileHandle, cCompressed.pPtr, cCompressed.nSize);
// re-map
munmap(...);
m_pMappedMemory = mmap(...); // specify the new file size of course
}
bool DecodeGridCell(int idx, Cell& cRaw)
{
AutoLock scope(m_Lock);
uint64_t nOffs = get_Hdr()->m_pCellOffset[Idx] = /* ;
if (!nOffs)
return false; // unavail
const uint8_t* p = m_pMappedMemory + nOffs;
cRaw.DecodeFrom(p); // This is where the problem appears!
return true;
}
使用addr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_NORESERVE, fd, offset)
映射文件。
如果文件大小发生变化,请在重新映射文件之前使用 newaddr = mremap(addr, len, newlen, MREMAP_MAYMOVE)
to update the mapping to reflect it. To extend the file, use ftruncate(fd, newlen)
。
你可以使用mprotect(addr, len, protflags)
to change the protection (read/write) on any pages in the mapping (both must be aligned on a page boundary). You can also tell the kernel about your future accesses via madvise()
,如果映射太大而不能一次放入内存,但内核似乎非常擅长管理预读等,即使没有这些。
当您更改映射时,请使用 msync(partaddr, partlen, MS_SYNC | MS_INVALIDATE)
或 msync(partaddr, partlen, MS_ASYNC | MS_INVALIDATE)
以确保从 partaddr
向前的 partlen
字符的更改对其他映射和文件可见读者。如果您使用 MS_SYNC
,则仅在更新完成时调用 returns。 MS_ASYNC
调用告诉内核进行更新,但不会等到它完成。如果该文件没有其他内存映射,则 MS_INVALIDATE
不执行任何操作;但如果有,这会告诉内核确保更改也反映在那些更改中。
在自 2.6.19 以来的 Linux 内核中,MS_ASYNC
什么也不做,因为内核无论如何都会正确地跟踪更改(不需要 msync()
,除了可能在 munmap()
).我不知道 Android 内核是否有改变这种行为的补丁;我怀疑不是。为了跨 POSIXy 个系统的可移植性,将它们保留在代码中仍然是一个好主意。
mapped data turns-out to be inconsistent temporarily
好吧,除非你确实使用 msync(partaddr, partlen, MS_SYNC | MS_INVALIDATE)
,内核会在它认为最好的时候进行更新。
因此,如果您需要在继续之前对文件阅读器进行一些更改,请在执行这些更新的过程中使用 msync(areaptr, arealen, MS_SYNC | MS_INVALIDATE)
。
如果您不关心确切的时刻,请使用 msync(areaptr, arealen, MS_ASYNC | MS_INVALIDATE)
。这将是当前 Linux 内核的空操作,但为了可移植性保留它们是个好主意(如果性能需要,可能注释掉)并提醒开发人员(缺乏)同步期望。
正如我对 OP 的评论,我根本无法观察到 Linux 上的同步问题。 (这并不意味着它不会发生在 Android 上,因为 Android 内核是 Linux 内核的 衍生物 ,并不完全相同。)
我相信自 2.6.19 以来 Linux 内核根本不需要 msync()
调用,只要映射使用标志 MAP_SHARED | MAP_NORESERVE
,并且底层文件是未使用 O_DIRECT
标志打开。这种信念的原因是在这种情况下,映射和文件访问都应该使用完全相同的页面缓存页面。
这里有两个测试程序,可以用来在 Linux 上进行探索。一、单进程测试,test-single.c:
#define _POSIX_C_SOURCE 200809L
#define _GNU_SOURCE
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <sys/wait.h>
#include <fcntl.h>
#include <signal.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>
static inline int read_from(const int fd, void *const to, const size_t len, const off_t offset)
{
char *p = (char *)to;
char *const q = (char *)to + len;
ssize_t n;
if (lseek(fd, offset, SEEK_SET) != offset)
return errno = EIO;
while (p < q) {
n = read(fd, p, (size_t)(q - p));
if (n > 0)
p += n;
else
if (n != -1)
return errno = EIO;
else
if (errno != EINTR)
return errno;
}
return 0;
}
static inline int write_to(const int fd, const void *const from, const size_t len, const off_t offset)
{
const char *const q = (const char *)from + len;
const char *p = (const char *)from;
ssize_t n;
if (lseek(fd, offset, SEEK_SET) != offset)
return errno = EIO;
while (p < q) {
n = write(fd, p, (size_t)(q - p));
if (n > 0)
p += n;
else
if (n != -1)
return errno = EIO;
else
if (errno != EINTR)
return errno;
}
return 0;
}
int main(int argc, char *argv[])
{
unsigned long tests, n, merrs = 0, werrs = 0;
size_t page;
long *map, data[2];
int fd;
char dummy;
if (argc != 3) {
fprintf(stderr, "\n");
fprintf(stderr, "Usage: %s FILENAME COUNT\n", argv[0]);
fprintf(stderr, "\n");
fprintf(stderr, "This program will test synchronization between a memory map\n");
fprintf(stderr, "and reading/writing the underlying file, COUNT times.\n");
fprintf(stderr, "\n");
return EXIT_FAILURE;
}
if (sscanf(argv[2], " %lu %c", &tests, &dummy) != 1 || tests < 1) {
fprintf(stderr, "%s: Invalid number of tests to run.\n", argv[2]);
return EXIT_FAILURE;
}
/* Create the file. */
page = sysconf(_SC_PAGESIZE);
fd = open(argv[1], O_RDWR | O_CREAT | O_EXCL, 0644);
if (fd == -1) {
fprintf(stderr, "%s: Cannot create file: %s.\n", argv[1], strerror(errno));
return EXIT_FAILURE;
}
if (ftruncate(fd, page) == -1) {
fprintf(stderr, "%s: Cannot resize file: %s.\n", argv[1], strerror(errno));
unlink(argv[1]);
return EXIT_FAILURE;
}
/* Map it. */
map = mmap(NULL, page, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_NORESERVE, fd, 0);
if (map == MAP_FAILED) {
fprintf(stderr, "%s: Cannot map file: %s.\n", argv[1], strerror(errno));
unlink(argv[1]);
close(fd);
return EXIT_FAILURE;
}
/* Test loop. */
for (n = 0; n < tests; n++) {
/* Update map. */
map[0] = (long)(n + 1);
map[1] = (long)(~n);
/* msync(map, 2 * sizeof map[0], MAP_SYNC | MAP_INVALIDATE); */
/* Check the file contents. */
if (read_from(fd, data, sizeof data, 0)) {
fprintf(stderr, "read_from() failed: %s.\n", strerror(errno));
munmap(map, page);
unlink(argv[1]);
close(fd);
return EXIT_FAILURE;
}
werrs += (data[0] != (long)(n + 1) || data[1] != (long)(~n));
/* Update data. */
data[0] = (long)(n * 386131);
data[1] = (long)(n * -257);
if (write_to(fd, data, sizeof data, 0)) {
fprintf(stderr, "write_to() failed: %s.\n", strerror(errno));
munmap(map, page);
unlink(argv[1]);
close(fd);
return EXIT_FAILURE;
}
merrs += (map[0] != (long)(n * 386131) || map[1] != (long)(n * -257));
}
munmap(map, page);
unlink(argv[1]);
close(fd);
if (!werrs && !merrs)
printf("No errors detected.\n");
else {
if (!werrs)
printf("Detected %lu times (%.3f%%) when file contents were incorrect.\n",
werrs, 100.0 * (double)werrs / (double)tests);
if (!merrs)
printf("Detected %lu times (%.3f%%) when mapping was incorrect.\n",
merrs, 100.0 * (double)merrs / (double)tests);
}
return EXIT_SUCCESS;
}
编译并运行使用例如
gcc -Wall -O2 test-single -o single
./single temp 1000000
测试一百万次,当两个访问在同一进程中完成时,映射和文件内容是否保持同步。请注意,msync()
调用已被注释掉,因为在我的机器上不需要它:即使没有它,我在测试期间也从未看到任何 errors/desynchronization。
我机器上的测试速率大约是每秒 550,000 次测试。请注意,每个测试都是双向的,因此包括读取和写入。我只是无法通过它来检测任何错误。它也被写成对错误非常敏感。
第二个测试程序使用两个子进程和一个POSIX实时信号告诉另一个进程检查内容。 测试-multi.c:
#define _POSIX_C_SOURCE 200809L
#define _GNU_SOURCE
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <sys/wait.h>
#include <fcntl.h>
#include <signal.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>
#define NOTIFY_SIGNAL (SIGRTMIN+0)
int mapper_process(const int fd, const size_t len)
{
long value = 1, count[2] = { 0, 0 };
long *data;
siginfo_t info;
sigset_t sigs;
int signum;
if (fd == -1) {
fprintf(stderr, "mapper_process(): Invalid file descriptor.\n");
return EXIT_FAILURE;
}
data = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_NORESERVE, fd, 0);
if (data == MAP_FAILED) {
fprintf(stderr, "mapper_process(): Cannot map file.\n");
return EXIT_FAILURE;
}
sigemptyset(&sigs);
sigaddset(&sigs, NOTIFY_SIGNAL);
sigaddset(&sigs, SIGINT);
sigaddset(&sigs, SIGHUP);
sigaddset(&sigs, SIGTERM);
while (1) {
/* Wait for the notification. */
signum = sigwaitinfo(&sigs, &info);
if (signum == -1) {
if (errno == EINTR)
continue;
fprintf(stderr, "mapper_process(): sigwaitinfo() failed: %s.\n", strerror(errno));
munmap(data, len);
return EXIT_FAILURE;
}
if (signum != NOTIFY_SIGNAL)
break;
/* A notify signal was received. Check the write counter. */
count[ (data[0] == value) ]++;
/* Update. */
data[0] = value++;
data[1] = -(value++);
/* Synchronize */
/* msync(data, 2 * sizeof (data[0]), MS_SYNC | MS_INVALIDATE); */
/* And let the writer know. */
kill(info.si_pid, NOTIFY_SIGNAL);
}
/* Print statistics. */
printf("mapper_process(): %lu errors out of %lu cycles (%.3f%%)\n",
count[0], count[0] + count[1], 100.0 * (double)count[0] / (double)(count[0] + count[1]));
fflush(stdout);
munmap(data, len);
return EXIT_SUCCESS;
}
static inline int read_from(const int fd, void *const to, const size_t len, const off_t offset)
{
char *p = (char *)to;
char *const q = (char *)to + len;
ssize_t n;
if (lseek(fd, offset, SEEK_SET) != offset)
return errno = EIO;
while (p < q) {
n = read(fd, p, (size_t)(q - p));
if (n > 0)
p += n;
else
if (n != -1)
return errno = EIO;
else
if (errno != EINTR)
return errno;
}
return 0;
}
static inline int write_to(const int fd, const void *const from, const size_t len, const off_t offset)
{
const char *const q = (const char *)from + len;
const char *p = (const char *)from;
ssize_t n;
if (lseek(fd, offset, SEEK_SET) != offset)
return errno = EIO;
while (p < q) {
n = write(fd, p, (size_t)(q - p));
if (n > 0)
p += n;
else
if (n != -1)
return errno = EIO;
else
if (errno != EINTR)
return errno;
}
return 0;
}
int writer_process(const int fd, const size_t len, const pid_t other)
{
long data[2] = { 0, 0 }, count[2] = { 0, 0 };
long value = 0;
siginfo_t info;
sigset_t sigs;
int signum;
sigemptyset(&sigs);
sigaddset(&sigs, NOTIFY_SIGNAL);
sigaddset(&sigs, SIGINT);
sigaddset(&sigs, SIGHUP);
sigaddset(&sigs, SIGTERM);
while (1) {
/* Update. */
data[0] = ++value;
data[1] = -(value++);
/* then write the data. */
if (write_to(fd, data, sizeof data, 0)) {
fprintf(stderr, "writer_process(): write_to() failed: %s.\n", strerror(errno));
return EXIT_FAILURE;
}
/* Let the mapper know. */
kill(other, NOTIFY_SIGNAL);
/* Wait for the notification. */
signum = sigwaitinfo(&sigs, &info);
if (signum == -1) {
if (errno == EINTR)
continue;
fprintf(stderr, "writer_process(): sigwaitinfo() failed: %s.\n", strerror(errno));
return EXIT_FAILURE;
}
if (signum != NOTIFY_SIGNAL || info.si_pid != other)
break;
/* Reread the file. */
if (read_from(fd, data, sizeof data, 0)) {
fprintf(stderr, "writer_process(): read_from() failed: %s.\n", strerror(errno));
return EXIT_FAILURE;
}
/* Check the read counter. */
count[ (data[1] == -value) ]++;
}
/* Print statistics. */
printf("writer_process(): %lu errors out of %lu cycles (%.3f%%)\n",
count[0], count[0] + count[1], 100.0 * (double)count[0] / (double)(count[0] + count[1]));
fflush(stdout);
return EXIT_SUCCESS;
}
int main(int argc, char *argv[])
{
struct timespec duration;
double seconds;
pid_t mapper, writer, p;
size_t page;
siginfo_t info;
sigset_t sigs;
int fd, status;
char dummy;
if (argc != 3) {
fprintf(stderr, "\n");
fprintf(stderr, "Usage: %s FILENAME SECONDS\n", argv[0]);
fprintf(stderr, "\n");
fprintf(stderr, "This program will test synchronization between a memory map\n");
fprintf(stderr, "and reading/writing the underlying file.\n");
fprintf(stderr, "The test will run for the specified time, or indefinitely\n");
fprintf(stderr, "if SECONDS is zero, but you can also interrupt it with\n");
fprintf(stderr, "Ctrl+C (INT signal).\n");
fprintf(stderr, "\n");
return EXIT_FAILURE;
}
if (sscanf(argv[2], " %lf %c", &seconds, &dummy) != 1) {
fprintf(stderr, "%s: Invalid number of seconds to run.\n", argv[2]);
return EXIT_FAILURE;
}
if (seconds > 0) {
duration.tv_sec = (time_t)seconds;
duration.tv_nsec = (long)(1000000000 * (seconds - (double)(duration.tv_sec)));
} else {
duration.tv_sec = 0;
duration.tv_nsec = 0;
}
/* Block INT, HUP, CHLD, and the notification signal. */
sigemptyset(&sigs);
sigaddset(&sigs, SIGINT);
sigaddset(&sigs, SIGHUP);
sigaddset(&sigs, SIGCHLD);
sigaddset(&sigs, NOTIFY_SIGNAL);
if (sigprocmask(SIG_BLOCK, &sigs, NULL) == -1) {
fprintf(stderr, "Cannot block the necessary signals: %s.\n", strerror(errno));
return EXIT_FAILURE;
}
/* Create the file. */
page = sysconf(_SC_PAGESIZE);
fd = open(argv[1], O_RDWR | O_CREAT | O_EXCL, 0644);
if (fd == -1) {
fprintf(stderr, "%s: Cannot create file: %s.\n", argv[1], strerror(errno));
return EXIT_FAILURE;
}
if (ftruncate(fd, page) == -1) {
fprintf(stderr, "%s: Cannot resize file: %s.\n", argv[1], strerror(errno));
unlink(argv[1]);
return EXIT_FAILURE;
}
close(fd);
fd = -1;
/* Ensure streams are flushed before forking. They should be, we're just paranoid here. */
fflush(stdout);
fflush(stderr);
/* Fork the mapper child process. */
mapper = fork();
if (mapper == -1) {
fprintf(stderr, "Cannot fork mapper child process: %s.\n", strerror(errno));
unlink(argv[1]);
return EXIT_FAILURE;
}
if (!mapper) {
fd = open(argv[1], O_RDWR);
if (fd == -1) {
fprintf(stderr, "mapper_process(): %s: Cannot open file: %s.\n", argv[1], strerror(errno));
return EXIT_FAILURE;
}
status = mapper_process(fd, page);
close(fd);
return status;
}
/* For the writer child process. (mapper contains the PID of the mapper process.) */
writer = fork();
if (writer == -1) {
fprintf(stderr, "Cannot fork writer child process: %s.\n", strerror(errno));
unlink(argv[1]);
kill(mapper, SIGKILL);
return EXIT_FAILURE;
}
if (!writer) {
fd = open(argv[1], O_RDWR);
if (fd == -1) {
fprintf(stderr, "writer_process(): %s: Cannot open file: %s.\n", argv[1], strerror(errno));
return EXIT_FAILURE;
}
status = writer_process(fd, page, mapper);
close(fd);
return status;
}
/* Wait for a signal. */
if (duration.tv_sec || duration.tv_nsec)
status = sigtimedwait(&sigs, &info, &duration);
else
status = sigwaitinfo(&sigs, &info);
/* Whatever it was, we kill the child processes. */
kill(mapper, SIGHUP);
kill(writer, SIGHUP);
do {
p = waitpid(-1, NULL, 0);
} while (p != -1 || errno == EINTR);
/* Cleanup. */
unlink(argv[1]);
printf("Done.\n");
return EXIT_SUCCESS;
}
注意子进程单独打开临时文件。要编译和 运行,请使用例如
gcc -Wall -O2 test-multi.c -o multi
./multi temp 10
第二个参数是测试的持续时间,以秒为单位。 (您可以使用 SIGINT (Ctrl+C) 或 SIGHUP 安全地中断测试。)
在我的机器上,测试速率大约是每秒 120,000 次测试; msync()
调用在这里也被注释掉了,因为即使没有它我也看不到任何 errors/desynchronization 。 (另外,msync(ptr, len, MS_SYNC)
和 msync(ptr, len, MS_SYNC | MS_INVALIDATE)
非常慢;无论是哪种,我每秒只能进行不到 1000 次测试,结果完全没有差异。这是 100 倍的减速。)
mmap 的 MAP_NORESERVE
标志告诉它在内存压力下使用文件本身作为后备存储,而不是交换。如果您在无法识别该标志的系统上编译代码,则可以忽略它。只要映射没有从 RAM 中逐出,该标志根本不会影响操作。
我有一个包含一些数据的文件,它也是 memory-mapped。这样我就有了文件描述符和指向映射页面的指针。大多数情况下,数据仅从映射中读取,但最终也会被修改。
修改包括修改文件中的一些数据(某种 headers 更新),以及附加一些新数据(即写入 post 文件的当前末尾)。
这个数据结构是从不同的线程访问的,为了防止冲突,我同步访问它(互斥锁和朋友)。
在修改过程中,我同时使用了文件映射和文件描述符。 Headers 通过修改映射内存隐式更新,而新数据由适当的 API 写入文件(WriteFile
on windows,write
on posix)。值得注意的是,新数据和 headers 属于不同的页面。
由于修改改变了文件大小,每次修改后内存映射为re-initialized。也就是说,它是未映射的,然后再次映射(使用新大小)。
我意识到对映射内存的写入是 "asynchronous" wrt 文件系统,并且不能保证顺序,但我认为没有问题,因为我明确关闭了文件映射,这应该(恕我直言)采取行动作为一种冲洗点。
现在这在 windows 上没有问题,但在 linux 上(确切地说是 android)最终映射数据 turns-out 暂时不一致(即数据重试时可以)。它似乎没有反映 newly-appended 数据。
我是否必须调用同步 API 来确保正确刷新数据?如果是这样,我应该使用哪一个:sync
、msync
、syncfs
或不同的东西?
提前致谢。
编辑:
这是一个 pseudo-code 来说明我正在处理的场景。 (当然真正的代码更复杂)
struct CompressedGrid
{
mutex m_Lock;
int m_FileHandle;
void* m_pMappedMemory;
Hdr* get_Hdr() { return /* the mapped memory with some offset*/; }
void SaveGridCell(int idx, const Cell& cCompressed)
{
AutoLock scope(m_Lock);
// Write to mapped memory
get_Hdr()->m_pCellOffset[Idx] = /* current end of file */;
// Append the data
lseek64(m_FileHandle, 0, FILE_END);
write(m_FileHandle, cCompressed.pPtr, cCompressed.nSize);
// re-map
munmap(...);
m_pMappedMemory = mmap(...); // specify the new file size of course
}
bool DecodeGridCell(int idx, Cell& cRaw)
{
AutoLock scope(m_Lock);
uint64_t nOffs = get_Hdr()->m_pCellOffset[Idx] = /* ;
if (!nOffs)
return false; // unavail
const uint8_t* p = m_pMappedMemory + nOffs;
cRaw.DecodeFrom(p); // This is where the problem appears!
return true;
}
使用addr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_NORESERVE, fd, offset)
映射文件。
如果文件大小发生变化,请在重新映射文件之前使用 newaddr = mremap(addr, len, newlen, MREMAP_MAYMOVE)
to update the mapping to reflect it. To extend the file, use ftruncate(fd, newlen)
。
你可以使用mprotect(addr, len, protflags)
to change the protection (read/write) on any pages in the mapping (both must be aligned on a page boundary). You can also tell the kernel about your future accesses via madvise()
,如果映射太大而不能一次放入内存,但内核似乎非常擅长管理预读等,即使没有这些。
当您更改映射时,请使用 msync(partaddr, partlen, MS_SYNC | MS_INVALIDATE)
或 msync(partaddr, partlen, MS_ASYNC | MS_INVALIDATE)
以确保从 partaddr
向前的 partlen
字符的更改对其他映射和文件可见读者。如果您使用 MS_SYNC
,则仅在更新完成时调用 returns。 MS_ASYNC
调用告诉内核进行更新,但不会等到它完成。如果该文件没有其他内存映射,则 MS_INVALIDATE
不执行任何操作;但如果有,这会告诉内核确保更改也反映在那些更改中。
在自 2.6.19 以来的 Linux 内核中,MS_ASYNC
什么也不做,因为内核无论如何都会正确地跟踪更改(不需要 msync()
,除了可能在 munmap()
).我不知道 Android 内核是否有改变这种行为的补丁;我怀疑不是。为了跨 POSIXy 个系统的可移植性,将它们保留在代码中仍然是一个好主意。
mapped data turns-out to be inconsistent temporarily
好吧,除非你确实使用 msync(partaddr, partlen, MS_SYNC | MS_INVALIDATE)
,内核会在它认为最好的时候进行更新。
因此,如果您需要在继续之前对文件阅读器进行一些更改,请在执行这些更新的过程中使用 msync(areaptr, arealen, MS_SYNC | MS_INVALIDATE)
。
如果您不关心确切的时刻,请使用 msync(areaptr, arealen, MS_ASYNC | MS_INVALIDATE)
。这将是当前 Linux 内核的空操作,但为了可移植性保留它们是个好主意(如果性能需要,可能注释掉)并提醒开发人员(缺乏)同步期望。
正如我对 OP 的评论,我根本无法观察到 Linux 上的同步问题。 (这并不意味着它不会发生在 Android 上,因为 Android 内核是 Linux 内核的 衍生物 ,并不完全相同。)
我相信自 2.6.19 以来 Linux 内核根本不需要 msync()
调用,只要映射使用标志 MAP_SHARED | MAP_NORESERVE
,并且底层文件是未使用 O_DIRECT
标志打开。这种信念的原因是在这种情况下,映射和文件访问都应该使用完全相同的页面缓存页面。
这里有两个测试程序,可以用来在 Linux 上进行探索。一、单进程测试,test-single.c:
#define _POSIX_C_SOURCE 200809L
#define _GNU_SOURCE
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <sys/wait.h>
#include <fcntl.h>
#include <signal.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>
static inline int read_from(const int fd, void *const to, const size_t len, const off_t offset)
{
char *p = (char *)to;
char *const q = (char *)to + len;
ssize_t n;
if (lseek(fd, offset, SEEK_SET) != offset)
return errno = EIO;
while (p < q) {
n = read(fd, p, (size_t)(q - p));
if (n > 0)
p += n;
else
if (n != -1)
return errno = EIO;
else
if (errno != EINTR)
return errno;
}
return 0;
}
static inline int write_to(const int fd, const void *const from, const size_t len, const off_t offset)
{
const char *const q = (const char *)from + len;
const char *p = (const char *)from;
ssize_t n;
if (lseek(fd, offset, SEEK_SET) != offset)
return errno = EIO;
while (p < q) {
n = write(fd, p, (size_t)(q - p));
if (n > 0)
p += n;
else
if (n != -1)
return errno = EIO;
else
if (errno != EINTR)
return errno;
}
return 0;
}
int main(int argc, char *argv[])
{
unsigned long tests, n, merrs = 0, werrs = 0;
size_t page;
long *map, data[2];
int fd;
char dummy;
if (argc != 3) {
fprintf(stderr, "\n");
fprintf(stderr, "Usage: %s FILENAME COUNT\n", argv[0]);
fprintf(stderr, "\n");
fprintf(stderr, "This program will test synchronization between a memory map\n");
fprintf(stderr, "and reading/writing the underlying file, COUNT times.\n");
fprintf(stderr, "\n");
return EXIT_FAILURE;
}
if (sscanf(argv[2], " %lu %c", &tests, &dummy) != 1 || tests < 1) {
fprintf(stderr, "%s: Invalid number of tests to run.\n", argv[2]);
return EXIT_FAILURE;
}
/* Create the file. */
page = sysconf(_SC_PAGESIZE);
fd = open(argv[1], O_RDWR | O_CREAT | O_EXCL, 0644);
if (fd == -1) {
fprintf(stderr, "%s: Cannot create file: %s.\n", argv[1], strerror(errno));
return EXIT_FAILURE;
}
if (ftruncate(fd, page) == -1) {
fprintf(stderr, "%s: Cannot resize file: %s.\n", argv[1], strerror(errno));
unlink(argv[1]);
return EXIT_FAILURE;
}
/* Map it. */
map = mmap(NULL, page, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_NORESERVE, fd, 0);
if (map == MAP_FAILED) {
fprintf(stderr, "%s: Cannot map file: %s.\n", argv[1], strerror(errno));
unlink(argv[1]);
close(fd);
return EXIT_FAILURE;
}
/* Test loop. */
for (n = 0; n < tests; n++) {
/* Update map. */
map[0] = (long)(n + 1);
map[1] = (long)(~n);
/* msync(map, 2 * sizeof map[0], MAP_SYNC | MAP_INVALIDATE); */
/* Check the file contents. */
if (read_from(fd, data, sizeof data, 0)) {
fprintf(stderr, "read_from() failed: %s.\n", strerror(errno));
munmap(map, page);
unlink(argv[1]);
close(fd);
return EXIT_FAILURE;
}
werrs += (data[0] != (long)(n + 1) || data[1] != (long)(~n));
/* Update data. */
data[0] = (long)(n * 386131);
data[1] = (long)(n * -257);
if (write_to(fd, data, sizeof data, 0)) {
fprintf(stderr, "write_to() failed: %s.\n", strerror(errno));
munmap(map, page);
unlink(argv[1]);
close(fd);
return EXIT_FAILURE;
}
merrs += (map[0] != (long)(n * 386131) || map[1] != (long)(n * -257));
}
munmap(map, page);
unlink(argv[1]);
close(fd);
if (!werrs && !merrs)
printf("No errors detected.\n");
else {
if (!werrs)
printf("Detected %lu times (%.3f%%) when file contents were incorrect.\n",
werrs, 100.0 * (double)werrs / (double)tests);
if (!merrs)
printf("Detected %lu times (%.3f%%) when mapping was incorrect.\n",
merrs, 100.0 * (double)merrs / (double)tests);
}
return EXIT_SUCCESS;
}
编译并运行使用例如
gcc -Wall -O2 test-single -o single
./single temp 1000000
测试一百万次,当两个访问在同一进程中完成时,映射和文件内容是否保持同步。请注意,msync()
调用已被注释掉,因为在我的机器上不需要它:即使没有它,我在测试期间也从未看到任何 errors/desynchronization。
我机器上的测试速率大约是每秒 550,000 次测试。请注意,每个测试都是双向的,因此包括读取和写入。我只是无法通过它来检测任何错误。它也被写成对错误非常敏感。
第二个测试程序使用两个子进程和一个POSIX实时信号告诉另一个进程检查内容。 测试-multi.c:
#define _POSIX_C_SOURCE 200809L
#define _GNU_SOURCE
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <sys/wait.h>
#include <fcntl.h>
#include <signal.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>
#define NOTIFY_SIGNAL (SIGRTMIN+0)
int mapper_process(const int fd, const size_t len)
{
long value = 1, count[2] = { 0, 0 };
long *data;
siginfo_t info;
sigset_t sigs;
int signum;
if (fd == -1) {
fprintf(stderr, "mapper_process(): Invalid file descriptor.\n");
return EXIT_FAILURE;
}
data = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_NORESERVE, fd, 0);
if (data == MAP_FAILED) {
fprintf(stderr, "mapper_process(): Cannot map file.\n");
return EXIT_FAILURE;
}
sigemptyset(&sigs);
sigaddset(&sigs, NOTIFY_SIGNAL);
sigaddset(&sigs, SIGINT);
sigaddset(&sigs, SIGHUP);
sigaddset(&sigs, SIGTERM);
while (1) {
/* Wait for the notification. */
signum = sigwaitinfo(&sigs, &info);
if (signum == -1) {
if (errno == EINTR)
continue;
fprintf(stderr, "mapper_process(): sigwaitinfo() failed: %s.\n", strerror(errno));
munmap(data, len);
return EXIT_FAILURE;
}
if (signum != NOTIFY_SIGNAL)
break;
/* A notify signal was received. Check the write counter. */
count[ (data[0] == value) ]++;
/* Update. */
data[0] = value++;
data[1] = -(value++);
/* Synchronize */
/* msync(data, 2 * sizeof (data[0]), MS_SYNC | MS_INVALIDATE); */
/* And let the writer know. */
kill(info.si_pid, NOTIFY_SIGNAL);
}
/* Print statistics. */
printf("mapper_process(): %lu errors out of %lu cycles (%.3f%%)\n",
count[0], count[0] + count[1], 100.0 * (double)count[0] / (double)(count[0] + count[1]));
fflush(stdout);
munmap(data, len);
return EXIT_SUCCESS;
}
static inline int read_from(const int fd, void *const to, const size_t len, const off_t offset)
{
char *p = (char *)to;
char *const q = (char *)to + len;
ssize_t n;
if (lseek(fd, offset, SEEK_SET) != offset)
return errno = EIO;
while (p < q) {
n = read(fd, p, (size_t)(q - p));
if (n > 0)
p += n;
else
if (n != -1)
return errno = EIO;
else
if (errno != EINTR)
return errno;
}
return 0;
}
static inline int write_to(const int fd, const void *const from, const size_t len, const off_t offset)
{
const char *const q = (const char *)from + len;
const char *p = (const char *)from;
ssize_t n;
if (lseek(fd, offset, SEEK_SET) != offset)
return errno = EIO;
while (p < q) {
n = write(fd, p, (size_t)(q - p));
if (n > 0)
p += n;
else
if (n != -1)
return errno = EIO;
else
if (errno != EINTR)
return errno;
}
return 0;
}
int writer_process(const int fd, const size_t len, const pid_t other)
{
long data[2] = { 0, 0 }, count[2] = { 0, 0 };
long value = 0;
siginfo_t info;
sigset_t sigs;
int signum;
sigemptyset(&sigs);
sigaddset(&sigs, NOTIFY_SIGNAL);
sigaddset(&sigs, SIGINT);
sigaddset(&sigs, SIGHUP);
sigaddset(&sigs, SIGTERM);
while (1) {
/* Update. */
data[0] = ++value;
data[1] = -(value++);
/* then write the data. */
if (write_to(fd, data, sizeof data, 0)) {
fprintf(stderr, "writer_process(): write_to() failed: %s.\n", strerror(errno));
return EXIT_FAILURE;
}
/* Let the mapper know. */
kill(other, NOTIFY_SIGNAL);
/* Wait for the notification. */
signum = sigwaitinfo(&sigs, &info);
if (signum == -1) {
if (errno == EINTR)
continue;
fprintf(stderr, "writer_process(): sigwaitinfo() failed: %s.\n", strerror(errno));
return EXIT_FAILURE;
}
if (signum != NOTIFY_SIGNAL || info.si_pid != other)
break;
/* Reread the file. */
if (read_from(fd, data, sizeof data, 0)) {
fprintf(stderr, "writer_process(): read_from() failed: %s.\n", strerror(errno));
return EXIT_FAILURE;
}
/* Check the read counter. */
count[ (data[1] == -value) ]++;
}
/* Print statistics. */
printf("writer_process(): %lu errors out of %lu cycles (%.3f%%)\n",
count[0], count[0] + count[1], 100.0 * (double)count[0] / (double)(count[0] + count[1]));
fflush(stdout);
return EXIT_SUCCESS;
}
int main(int argc, char *argv[])
{
struct timespec duration;
double seconds;
pid_t mapper, writer, p;
size_t page;
siginfo_t info;
sigset_t sigs;
int fd, status;
char dummy;
if (argc != 3) {
fprintf(stderr, "\n");
fprintf(stderr, "Usage: %s FILENAME SECONDS\n", argv[0]);
fprintf(stderr, "\n");
fprintf(stderr, "This program will test synchronization between a memory map\n");
fprintf(stderr, "and reading/writing the underlying file.\n");
fprintf(stderr, "The test will run for the specified time, or indefinitely\n");
fprintf(stderr, "if SECONDS is zero, but you can also interrupt it with\n");
fprintf(stderr, "Ctrl+C (INT signal).\n");
fprintf(stderr, "\n");
return EXIT_FAILURE;
}
if (sscanf(argv[2], " %lf %c", &seconds, &dummy) != 1) {
fprintf(stderr, "%s: Invalid number of seconds to run.\n", argv[2]);
return EXIT_FAILURE;
}
if (seconds > 0) {
duration.tv_sec = (time_t)seconds;
duration.tv_nsec = (long)(1000000000 * (seconds - (double)(duration.tv_sec)));
} else {
duration.tv_sec = 0;
duration.tv_nsec = 0;
}
/* Block INT, HUP, CHLD, and the notification signal. */
sigemptyset(&sigs);
sigaddset(&sigs, SIGINT);
sigaddset(&sigs, SIGHUP);
sigaddset(&sigs, SIGCHLD);
sigaddset(&sigs, NOTIFY_SIGNAL);
if (sigprocmask(SIG_BLOCK, &sigs, NULL) == -1) {
fprintf(stderr, "Cannot block the necessary signals: %s.\n", strerror(errno));
return EXIT_FAILURE;
}
/* Create the file. */
page = sysconf(_SC_PAGESIZE);
fd = open(argv[1], O_RDWR | O_CREAT | O_EXCL, 0644);
if (fd == -1) {
fprintf(stderr, "%s: Cannot create file: %s.\n", argv[1], strerror(errno));
return EXIT_FAILURE;
}
if (ftruncate(fd, page) == -1) {
fprintf(stderr, "%s: Cannot resize file: %s.\n", argv[1], strerror(errno));
unlink(argv[1]);
return EXIT_FAILURE;
}
close(fd);
fd = -1;
/* Ensure streams are flushed before forking. They should be, we're just paranoid here. */
fflush(stdout);
fflush(stderr);
/* Fork the mapper child process. */
mapper = fork();
if (mapper == -1) {
fprintf(stderr, "Cannot fork mapper child process: %s.\n", strerror(errno));
unlink(argv[1]);
return EXIT_FAILURE;
}
if (!mapper) {
fd = open(argv[1], O_RDWR);
if (fd == -1) {
fprintf(stderr, "mapper_process(): %s: Cannot open file: %s.\n", argv[1], strerror(errno));
return EXIT_FAILURE;
}
status = mapper_process(fd, page);
close(fd);
return status;
}
/* For the writer child process. (mapper contains the PID of the mapper process.) */
writer = fork();
if (writer == -1) {
fprintf(stderr, "Cannot fork writer child process: %s.\n", strerror(errno));
unlink(argv[1]);
kill(mapper, SIGKILL);
return EXIT_FAILURE;
}
if (!writer) {
fd = open(argv[1], O_RDWR);
if (fd == -1) {
fprintf(stderr, "writer_process(): %s: Cannot open file: %s.\n", argv[1], strerror(errno));
return EXIT_FAILURE;
}
status = writer_process(fd, page, mapper);
close(fd);
return status;
}
/* Wait for a signal. */
if (duration.tv_sec || duration.tv_nsec)
status = sigtimedwait(&sigs, &info, &duration);
else
status = sigwaitinfo(&sigs, &info);
/* Whatever it was, we kill the child processes. */
kill(mapper, SIGHUP);
kill(writer, SIGHUP);
do {
p = waitpid(-1, NULL, 0);
} while (p != -1 || errno == EINTR);
/* Cleanup. */
unlink(argv[1]);
printf("Done.\n");
return EXIT_SUCCESS;
}
注意子进程单独打开临时文件。要编译和 运行,请使用例如
gcc -Wall -O2 test-multi.c -o multi
./multi temp 10
第二个参数是测试的持续时间,以秒为单位。 (您可以使用 SIGINT (Ctrl+C) 或 SIGHUP 安全地中断测试。)
在我的机器上,测试速率大约是每秒 120,000 次测试; msync()
调用在这里也被注释掉了,因为即使没有它我也看不到任何 errors/desynchronization 。 (另外,msync(ptr, len, MS_SYNC)
和 msync(ptr, len, MS_SYNC | MS_INVALIDATE)
非常慢;无论是哪种,我每秒只能进行不到 1000 次测试,结果完全没有差异。这是 100 倍的减速。)
mmap 的 MAP_NORESERVE
标志告诉它在内存压力下使用文件本身作为后备存储,而不是交换。如果您在无法识别该标志的系统上编译代码,则可以忽略它。只要映射没有从 RAM 中逐出,该标志根本不会影响操作。