在 Ada 中使用带 non-uniform 字节跳过的数据缓冲区的惯用方式

Idiomatic way to use data buffer with non-uniform byte skips in Ada

我正在尝试从 Ada 中的字节缓冲区读取数据,例如文件或通过网络连接缓冲区。消息的大小可变,有一个共同的 header,在 C++ 中它看起来像这样:

enum class MessageType : uint16_t {
    Foo = 0, Bar = 1
};

// Force the layout.
#pragma pack(push,1)

// 4 byte message header
struct MessageHeader {
    uint16_t checksum;
    uint16_t type;
};

// 4 byte header + 4 byte message
struct FooMessage {
    MessageHeader header;
    uint32_t tomatoes;
};

// 4 byte header + 8 byte message
struct BarMessage {
    MessageHeader header;
    uint32_t oranges;
    uint32_t apples;
};
#pragma pack(pop)

// For simplicity, assume the buffer is complete and only holds full messages.
void read(char* buffer, uint32_t bytesLeft) {
    while (bytesLeft > 0) {
        MessageHeader* header = reinterpret_cast<MessageHeader*>(buffer);
        switch (header->type) {
            case FooType: {
                FooMessage* foo = reinterpret_case<FooMessage*>(buffer);
                // process as const FooMessage&
                processFoo(*foo);
            }
            case BarType: {
                BarMessage* bar = reinterpret_cast<BarMessage*>(buffer);
                // process as const BarMessage&
                processBar(*bar);
            }
        }
        const auto size = (header->type == Foo ? sizeof(FooMessage) : sizeof(BarMessage));
        buffer += size;
        bytesLeft -= size;
    }
}

我不确定这样做的惯用方式。请注意,在某些格式中,消息类型也可能不是 header 中的前导数据成员。您应该写入和读取 Character 的数组或 Interfaces.C.char_array 的数组,还是 System.Address 的内存地址或其他内容?或者这应该是这里其他地方的数组的地址,或者只是一个带有“Convention => C”的数组以防止包含前导大小?

这是我目前在 Ada 中拥有的:

type Message_Type is (Foo, Bar) with Size => 16;
for Message_Type use (Foo => 0, Bar => 1);

-- Assume these work correctly and I don't need to do bit layout directly.
type Message_Header is record
    Checksum : Interfaces.Integer_16;
    Msg_Type : Message_Type;
end record
    with Convention => C, Size => 32;

type Foo_Message is record
    Header   : Message_Header;
    Tomatoes : Interfaces.Integer_32;
end record
    with Convention => C, Size => 64;

type Bar_Message is record
    Header  : Message_Header;
    Oranges : Interfaces.Integer_32;
    Apples  : Interfaces.Integer_32;
end record
    with Convention => C, Size => 96;

procedure Read(
    -- System.Address seems really weird here
    Buffer     : in out System.Address;
    Bytes_Left : in out Interfaces.Integer_64)
is
    use type Interfaces.Integer_64;
    use type System.Address;
    function To_Address is new Ada.Unchecked_Conversion (Interfaces.Integer_64, System.Address);
    function To_Integer is new Ada.Unchecked_Conversion (System.Address, Interfaces.Integer_64);

    procedure Process_Bar (B : aliased Bar_Message) is null;
    procedure Process_Foo (F : aliased Foo_Message) is null;
begin
    while Bytes_Left > 0 loop
        declare
            -- I'm really lost here.
            --
            -- Do you use access types to access the buffer or
            -- setting the address with "for Foo'Address use Buffer"??
            --
            Header : Message_Header;
            for Header'Address use Buffer;
    enter code here
            -- I'm assuming this doesn't initialize Foo and Bar here?    
            Foo_Msg : aliased Foo_Message;
            Bar_Msg : aliased Bar_Message;
            for Foo_Msg'Address use Buffer;
            for Bar_Msg'Address use Buffer;
            -- I'm assuming this doesn't initialize Foo and Bar here?    
            Size : System.Address := To_Address(0);
        begin
            case Header.Msg_Type is
                when Foo => Process_Foo (Foo_Msg);
                when Bar => Process_Bar (Bar_Msg);
            end case;
            Size := To_Address (if Header.Msg_Type = Foo then Foo'Size else Bar'Size);

            -- There's probably a better way to do this.
            Buffer := To_Address(To_Integer (Buffer) + To_Integer (Size));
            Bytes_Left := Bytes_Left - To_Integer (Size);
        end;
    end loop;
end Read;

以可变方式跨缓冲区中的字节行进并就地读取数据的惯用方法是什么?

用户可以使用具有 Unchecked_Union 方面的记录。

type Message (Msg_Type : Message_Type) is record
    Header : Message_Header;

    case Msg_Type is
        when Foo =>
            Tomatoes : Interfaces.Integer_16;
        when Bar =>
            Oranges : Interfaces.Integer_32;
            Apples  : Interfaces.Integer_32;
    end case;
end record
    with Unchecked_Union;

请注意,使用 Unchecked_Union 时无法访问判别式。

注意:TomatoesC 代码和您提供的 Ada 代码中的大小不同。

我会保持简单:只需在给定地址定义一个缓冲区数组:

Buf : System.Storage_Elements.Storage_Array (0 .. Bytes_Left - 1)
        with Address => Buffer;

然后解析缓冲区,message-by-message。下面的例子提供了我将如何解决这个问题的草图(免责声明:没有测试它)。

message_reader.ads

with System;
with System.Storage_Elements;
with Interfaces;

package Message_Reader is

   package SSE renames System.Storage_Elements;
   
   --  NOTE: Not using an enum type eases the implementation of the parser (I think).
   --        In particular for detecting unknown message types.

   type Message_Type is new Interfaces.Unsigned_16;   
   Message_Type_Foo : constant Message_Type := 0;
   Message_Type_Bar : constant Message_Type := 1;

   -- Assume these work correctly and I don't need to do bit layout directly.
   type Message_Header is record
      Checksum : Interfaces.Integer_16;
      Msg_Type : Message_Type;
   end record
     with Convention => C, Size => 32;

   type Foo_Message is record
      Header   : Message_Header;
      Tomatoes : Interfaces.Integer_32;
   end record
     with Convention => C, Size => 64;

   type Bar_Message is record
      Header  : Message_Header;
      Oranges : Interfaces.Integer_32;
      Apples  : Interfaces.Integer_32;
   end record
     with Convention => C, Size => 96;
   
   Unknown_Message_Type : exception;
   
   procedure Read
     (Buffer     : in     System.Address;
      Bytes_Left : in out SSE.Storage_Count);
   
private
   use type SSE.Storage_Count;

   pragma Compile_Time_Error 
     (System.Storage_Unit /= 8, "implementation expects a storage unit size of 8");
   
   Foo_Msg_Size_Bytes : constant SSE.Storage_Count := 
                          Foo_Message'Size / System.Storage_Unit;
   Bar_Msg_Size_Bytes : constant SSE.Storage_Count := 
                          Bar_Message'Size / System.Storage_Unit;
   
   procedure Process_Bar (B : Bar_Message) is null;
   procedure Process_Foo (F : Foo_Message) is null;
   
end Message_Reader;

message_reader.adb

with Ada.Unchecked_Conversion;

package body Message_Reader is   
   
   generic
      type Chunk_Type is private;      
   procedure Read_Chunk 
     (Buffer  : in     SSE.Storage_Array;
      Offset  : in     SSE.Storage_Offset;
      Chunk   :    out Chunk_Type;
      Success :    out Boolean);
   
   ----------
   -- Read --
   ----------

   procedure Read
     (Buffer     : in     System.Address;
      Bytes_Left : in out SSE.Storage_Count)
   is      
      Buf : SSE.Storage_Array (0 .. Bytes_Left - 1)
        with Address => Buffer;
      
      procedure Read_Header  is new Read_Chunk (Message_Header);
      procedure Read_Foo_Msg is new Read_Chunk (Foo_Message);
      procedure Read_Bar_Msg is new Read_Chunk (Bar_Message);
      
      Header  : Message_Header;
      Success : Boolean;
      
   begin
      loop
         Read_Header (Buf, Buf'Last - Bytes_Left - 1, Header, Success);         
         if not Success then
            exit;  --  Not enough data left in buffer.
         end if;
         
         case Header.Msg_Type is
                  
            when Message_Type_Foo =>
               declare
                  Foo : Foo_Message;
               begin
                  Read_Foo_Msg (Buf, Buf'Last - Bytes_Left - 1, Foo, Success);
                  if not Success then
                     exit;  --  Not enough data left in buffer.
                  end if;
                  
                  Bytes_Left := Bytes_Left - Foo_Msg_Size_Bytes; 
                  Process_Foo (Foo);
                  
               end;
                     
            when Message_Type_Bar =>
               declare
                  Bar : Bar_Message;
               begin
                  Read_Bar_Msg (Buf, Buf'Last - Bytes_Left - 1, Bar, Success);
                  if not Success then
                     exit;  --  Not enough data left in buffer.
                  end if;                  
                  
                  Bytes_Left := Bytes_Left - Bar_Msg_Size_Bytes;   
                  Process_Bar (Bar);
                  
               end;
                     
            when others =>
               raise Unknown_Message_Type;
                     
         end case;
      end loop;      
      
   end Read;
      
   ----------------
   -- Read_Chunk --
   ----------------

   procedure Read_Chunk 
     (Buffer  : in     SSE.Storage_Array;
      Offset  : in     SSE.Storage_Offset;
      Chunk   :    out Chunk_Type;
      Success :    out Boolean)
   is
      Chunk_Type_Bytes : constant SSE.Storage_Count := 
                            Chunk_Type'Size / System.Storage_Unit;
      
      subtype Chunk_Raw is SSE.Storage_Array (0 .. Chunk_Type_Bytes - 1);  
      
      function To_Chunk is new Ada.Unchecked_Conversion 
        (Source => Chunk_Raw, Target => Chunk_Type);
      
      Slice_First : constant SSE.Storage_Offset := Offset;
      Slice_Last  : constant SSE.Storage_Offset := Offset + Chunk_Type_Bytes - 1;
      
   begin      
      if Slice_Last <= Buffer'Last then
         Chunk := To_Chunk (Buffer (Slice_First .. Slice_Last));
         Success := True;
      else
         Success := False;
      end if;
      
   end Read_Chunk;   

end Message_Reader;