在 void * 之上实现一个结构类型

Implement a struct type on top of void *

这里是简单的健全性检查问题。基本要求是将两个灵活的数组成员放在一个结构中,以减少对 malloc 的调用次数。


void f()
  typedef struct
    double x;
    char y;
    int32_t foo;
    double z;
  } equivalent;
  equivalent * e = malloc(sizeof(equivalent));

  static_assert(sizeof(equivalent) == 24,"");
  char* memory = malloc(24);
  double* x    = (double*)  ( 0 + memory);
  char* y      = (char *)   ( 8 + memory);
  int32_t* foo = (int32_t*) (12 + memory);
  double* z    = (double*)  (16 + memory);


我已经通读了 C11 阐明的别名规则("effective type" 部分),我认为我没有问题。



编辑:作为对 Jonathan Leffler 的回应,这是我打算如何将几个运行时确定长度的数组放入单个内存块中的快速粗略草图。


void* g(uint64_t N_first, uint64_t N_second)
  // desired representation:                                                                                                                                                     
  // uint64_t N_first;                                                                                                                                                           
  // int32_t first[N_first];                                                                                                                                                     
  // uint64_t N_second;                                                                                                                                                          
  // double second[N_second];                                                                                                                                                    
  // this function doesn't populate the arrays, only                                                                                                                             
  // allocates storage and sets up the length fields                                                                                                                             

  uint64_t bytes_for_lengths = 16;

  char* bytes = malloc(bytes_for_lengths + bytes_for_first(N_first) +

  uint64_t* ptr_N_first = get_N_first(bytes);
  *ptr_N_first = N_first;

  uint64_t* ptr_N_second = get_N_second(bytes);
  *ptr_N_second = N_second;

  return (void*)bytes;

// I haven't decided how best to factor out the field access
// and associated functions yet, so this is not optimal

uint64_t* get_N_first(void* vdata)
  char* data = (char*)vdata;
  return (uint64_t*)(data + 0);
int32_t* get_first(void* vdata)
  char * data = (char*)vdata;
  return (int32_t*)(data + 8);
uint64_t bytes_for_first(uint64_t N_first)
  // first is an int32_t                                                                                                                                                         
  // the next field needs to be 8 byte aligned                                                                                                                                   
  uint64_t bytes = 4 * N_first;
  if (bytes % 8 != 0)
      bytes += 4;
  return bytes;

uint64_t* get_N_second(void* vdata)
  uint64_t n_first = *get_N_first(vdata);
  uint64_t first_bytes = bytes_for_first(n_first);
  char* data = (char*)vdata;
  return (uint64_t*)(data + 8 + first_bytes);
double* get_second(void* vdata)
  char * data = (char*)vdata;
  uint64_t n_first = *get_N_first(vdata);
  uint64_t first_bytes = bytes_for_first(n_first);
  return (double*)(data + 8 + first_bytes + 8);
uint64_t bytes_for_second(uint64_t N_second)
  // second is a double                                                                                                                                                          
  return 8 * N_second;


size_t offset_of_x = offsetof(equivalent, x);
size_t offset_of_y = offsetof(equivalent, y);
size_t offset_of_foo = offsetof(equivalent, foo);
size_t offset_of_z = offsetof(equivalent, z);

char* memory = malloc(sizeof(equivalent));
double* x    = offset_of_x   + memory;
char* y      = offset_of_y   + memory;
int32_t* foo = offset_of_foo + memory;
double* z    = offset_of_z   + memory;




struct fake_your_version {
    uint64_t N_first;
    int32_t first[N_first];
    uint64_t N_second;
    double second[N_second];


struct fake_alternative_1 {
    uint64_t size; // max over all num[i]
    uint64_t num[2]; // num[0] being for first, num[1] being for second
    struct {
        int32_t first;
        double second;
    } entry[num];


struct fake_alternative_2 {
    uint64_t num[2];
    void * data[2]; // separate malloc(num[i] * sizeof(whatever));


fake_alternative_1 也将节省一个 malloc(如果数组需要不同的大小,则以填充字节和丢失内存为代价)。

在你考虑这样做之前,你真的应该问问自己,malloc 真的 那个 慢,你 必须 避免它。也许,无论您在做什么,除了 malloc 之外的其他东西都会减慢您的速度(可能,您尝试保存 malloc 会让您的代码变得更慢而不是更快)。

fake_alternative_2 只会接受,每个数组都有自己的 malloc,但我想,我给你这个替代方案并没有告诉你任何新的东西。

我不禁觉得使用直接结构来实现您的 'double VLA' 结构类型会更简洁。大致是这样的:

// desired representation:
// uint64_t N_first;
// int32_t first[N_first];
// uint64_t N_second;
// double second[N_second];

#include <assert.h>
#include <inttypes.h>
#include <stdalign.h>
#include <stdio.h>
#include <stdlib.h>

struct DoubleVLA
    uint64_t  N_first;
    int32_t  *first;
    uint64_t  N_second;
    double   *second;
    //double    align_32[];     // Ensures alignment on 32-bit

extern struct DoubleVLA *alloc_DoubleVLA(uint64_t n1, uint64_t n2);

struct DoubleVLA *alloc_DoubleVLA(uint64_t n1, uint64_t n2)
    struct DoubleVLA *dv = malloc(sizeof(*dv) + n1 * sizeof(dv->first) + n2 * sizeof(dv->second));
    if (dv != 0)
        dv->N_first = n1;
        dv->N_second = n2;
        if (alignof(dv->second) >= alignof(dv->first))
            dv->second = (double *)((char *)dv + sizeof(*dv));
            dv->first  = (int32_t *)((char *)dv + sizeof(*dv) + n2 * sizeof(dv->second));
            dv->first  = (int32_t *)((char *)dv + sizeof(*dv));
            dv->second = (double *)((char *)dv + sizeof(*dv) + n1 * sizeof(dv->first));
    return dv;

int main(void)
    struct DoubleVLA *dv = alloc_DoubleVLA(UINT64_C(11), UINT64_C(32));
    for (uint64_t i = 0; i < dv->N_first; i++)
        dv->first[i] = i * 100 + rand() % 100;
    for (uint64_t j = 0; j < dv->N_second; j++)
        dv->second[j] = j * 1000.0 + (rand() % 100000) / 100.0;
    for (uint64_t i = 0; i < dv->N_first; i++)
        printf("%.2" PRIu64 " = %12" PRId32 "\n", i, dv->first[i]);
    for (uint64_t j = 0; j < dv->N_second; j++)
        printf("%.2" PRIu64 " = %12.2f\n", j, dv->second[j]);
    return 0;

即使在 32 位平台上,结构的末尾也应该有足够的填充以使其大小能够适当地对齐结构之后的 double 数组和之后的 int32_t 数组。但是,可以通过将两个大小放在结构中并将两个指针放在最后来避免不必要的填充。这在 64 位平台上不是问题。可选的align_32 VLA假定int32_t的对齐要求不大于double的对齐要求;它将确保结构被正确填充,即使存在一些奇怪的对齐限制或要求。可以提供满足约束的静态断言。

