FILE 结构在操作系统和体系结构之间是否一致?

Is the FILE struct consistent between operating systems and architectures?

对于以下 C 代码片段,LLVM 将生成以下 IR。

#include <stdio.h>
#include <stdlib.h>

int main(){
  printf("Hello world\n");
  fflush(NULL);
  return 0;
}
; ModuleID = 'a.c'
source_filename = "a.c"
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.15.0"

%struct.__sFILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
%struct.__sFILEX = type opaque
%struct.__sbuf = type { i8*, i32 }

@str = private unnamed_addr constant [12 x i8] c"Hello world[=11=]", align 1

; Function Attrs: nounwind ssp uwtable
define i32 @main() local_unnamed_addr #0 {
  %1 = tail call i32 @puts(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @str, i64 0, i64 0))
  %2 = tail call i32 @fflush(%struct.__sFILE* null)
  ret i32 0
}

; Function Attrs: nounwind
declare i32 @fflush(%struct.__sFILE* nocapture) local_unnamed_addr #1

; Function Attrs: nounwind
declare i32 @puts(i8* nocapture readonly) local_unnamed_addr #2

attributes #0 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "darwin-stkchk-strong-link" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "probe-stack"="___chkstk_darwin" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "darwin-stkchk-strong-link" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "probe-stack"="___chkstk_darwin" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { nounwind }

!llvm.module.flags = !{!0, !1, !2}
!llvm.ident = !{!3}

!0 = !{i32 2, !"SDK Version", [3 x i32] [i32 10, i32 15, i32 4]}
!1 = !{i32 1, !"wchar_size", i32 4}
!2 = !{i32 7, !"PIC Level", i32 2}
!3 = !{!"Apple clang version 11.0.3 (clang-1103.0.32.59)"}

我可以认为对应的FILE结构的结构声明在不同的操作系统之间是一致的吗?是否有一种编程方式来获取此结构的 LLVM 表示?

这是 macOS 实现:

typedef struct __sFILE {
    unsigned char *_p;  /* current position in (some) buffer */
    int _r;     /* read space left for getc() */
    int _w;     /* write space left for putc() */
    short   _flags;     /* flags, below; this FILE is free if 0 */
    short   _file;      /* fileno, if Unix descriptor, else -1 */
    struct  __sbuf _bf; /* the buffer (at least 1 byte, if !NULL) */
    int _lbfsize;   /* 0 or -_bf._size, for inline putc */

    /* operations */
    void    *_cookie;   /* cookie passed to io functions */
    int (* _Nullable _close)(void *);
    int (* _Nullable _read) (void *, char *, int);
    fpos_t  (* _Nullable _seek) (void *, fpos_t, int);
    int (* _Nullable _write)(void *, const char *, int);

    /* separate buffer for long sequences of ungetc() */
    struct  __sbuf _ub; /* ungetc buffer */
    struct __sFILEX *_extra; /* additions to FILE to not break ABI */
    int _ur;        /* saved _r when _r is counting ungetc data */

    /* tricks to meet minimum requirements even when malloc() fails */
    unsigned char _ubuf[3]; /* guarantee an ungetc() buffer */
    unsigned char _nbuf[1]; /* guarantee a getc() buffer */

    /* separate buffer for fgetln() when line crosses buffer boundary */
    struct  __sbuf _lb; /* buffer for fgetln() */

    /* Unix stdio files get aligned to block boundaries on fseek() */
    int _blksize;   /* stat.st_blksize (may be != _bf._size) */
    fpos_t  _offset;    /* current lseek offset (see WARNING) */
} FILE;

这是 Linux 实施:

typedef struct __sFILE {
    unsigned char *_p;  /* current position in (some) buffer */
    int _r;     /* read space left for getc() */
    int _w;     /* write space left for putc() */
    short   _flags;     /* flags, below; this FILE is free if 0 */
    short   _file;      /* fileno, if Unix descriptor, else -1 */
    struct  __sbuf _bf; /* the buffer (at least 1 byte, if !NULL) */
    int _lbfsize;   /* 0 or -_bf._size, for inline putc */

    /* operations */
    void    *_cookie;   /* cookie passed to io functions */
    int (*_close)(void *);
    int (*_read)(void *, char *, int);
    fpos_t  (*_seek)(void *, fpos_t, int);
    int (*_write)(void *, const char *, int);

    /* extension data, to avoid further ABI breakage */
    struct  __sbuf _ext;
    /* data for long sequences of ungetc() */
    unsigned char *_up; /* saved _p when _p is doing ungetc data */
    int _ur;        /* saved _r when _r is counting ungetc data */

    /* tricks to meet minimum requirements even when malloc() fails */
    unsigned char _ubuf[3]; /* guarantee an ungetc() buffer */
    unsigned char _nbuf[1]; /* guarantee a getc() buffer */

    /* separate buffer for fgetln() when line crosses buffer boundary */
    struct  __sbuf _lb; /* buffer for fgetln() */

    /* Unix stdio files get aligned to block boundaries on fseek() */
    int _blksize;   /* stat.st_blksize (may be != _bf._size) */
    fpos_t  _offset;    /* current lseek offset */
} FILE;

_Nullable是macOS的一个特性,估计可以忽略。看起来它们在其他方面是相同的,因为代码在有或没有 _Nullable 的情况下都是一样的。 FILE 结构在 Linux 和 Mac 平台中似乎是不变的,除了 unsigned char *_up;struct __sFILEX *_extra;.

FILE * 后面的类型结构对于您作为用户和操作系统之间的变量是不透明的。

更正式地说,它取决于您正在使用的 C(或 C++)编译器和支持库,但通常每个硬件和操作系统组合一个库,但可能同时支持两者的系统除外32 位和 64 位代码。

一些程序 (Perl) 准备在配置阶段在不同机器上的结构内部四处探索,但它们这样做是基于内部知识。没有标准结构,除非像 GNU C 库这样的库可以跨机器对其进行标准化。但是假设每个地方都不同是最安全的。

为了反驳 Asadefa in their 提出的乐观观点,这里是 AIX 7.2 的结构定义:

#if defined(__64BIT__) || defined(__ia64)
typedef struct {
    unsigned char   *_ptr;
    unsigned char   *_base;
    unsigned char   *_bufendp;
    char    *__newbase;
    void    *_lock;
    int _cnt;
    int _file;
    int __stdioid;
    short   _flag;
    short   _unused;
    long    _unused1[4];
} FILE;
#else /* 32-bit POWER */
typedef struct {
    unsigned char   *_ptr;
    int _cnt;
    unsigned char   *_base;
    unsigned char   *_bufendp;
    short   _flag;
    short   _file;
    int __stdioid;
    char    *__newbase;
    void    *_lock;
} FILE;
#endif /* __64BIT__ || __ia64 */

单个 O/S 上的 32 位和 64 位不同,并且与 Linux 和 macOS 上的结构完全无关。

Solaris 10 上的结构再次不同 — 使用了一些其他不使用的位域:

struct __FILE_TAG   /* needs to be binary-compatible with old versions */
{
#ifdef _STDIO_REVERSE
    unsigned char   *_ptr;  /* next character from/to here in buffer */
    int     _cnt;   /* number of available characters in buffer */
#else
    int     _cnt;   /* number of available characters in buffer */
    unsigned char   *_ptr;  /* next character from/to here in buffer */
#endif
    unsigned char   *_base; /* the buffer */
    unsigned char   _flag;  /* the state of the stream */
    unsigned char   _file; /* Old home of the file descriptor */
                /* Only fileno(3C) can retrieve the value now */
    unsigned    __orientation:2; /* the orientation of the stream */
    unsigned    __ionolock:1;   /* turn off implicit locking */
    unsigned    __seekable:1;   /* is file seekable? */
    unsigned    __extendedfd:1; /* enable extended FILE */
    unsigned    __xf_nocheck:1; /* no extended FILE runtime check */
    unsigned    __filler:10;
};

而且我在HP-UX 11.31上找到的版本又不一样了:

   typedef struct {
    int      __cnt;
    unsigned char   *__ptr;
    unsigned char   *__base;
    unsigned short   __flag;
    unsigned char    __fileL;       /* low byte of file desc */
    unsigned char    __fileH;       /* high byte of file desc */
   } FILE;

当然,如果none的Solaris、HP-UX、AIX在你的雷达屏幕上,你可能会得出不同的结论,但不同系统之间肯定存在巨大差异。