精神语法解析问题
Spirit Grammar parse issue
我有以下内容,
class BATSTradeMsg : public BATSMessageBase
{
BATSTradeMsg(int timestamp, char msgtype, uint64_t orderId, char side, uint32_t shares,
std::string const &symbol, uint64_t price, uint64_t execId) :
BATSMessageBase(timestamp, msgtype),
m_orderId(orderId),
m_side(side),
m_shares(shares),
m_symbol(symbol),
m_price(price),
m_execId(execId)
{
}
uint64_t m_orderId; // Base 36 Numeric values come over the wire in ascii
char m_side;
uint32_t m_shares;
std::string m_symbol;
uint64_t m_price;
uint64_t m_execId; // Base 36 Numeric values come over the wire in ascii
};
// order and execution ids are 12 characters base 36
qi::uint_parser< uint64_t, 36, 12, 12 > p_orderId;
qi::uint_parser< uint64_t, 36, 12, 12 > p_execId;
qi::uint_parser< uint32_t, 10, 6, 6 > p_shares;
qi::uint_parser< uint32_t, 10, 10, 10 > m_price;
qi::uint_parser< uint32_t, 10, 8, 8 > p_ts;
if (msgtype == BATSTradeMsg::longMsgCode)
m_wire_msg = ( p_ts >> qi::char_(msgtype)
>> p_orderId
>> qi::char_(BATSTradeMsg::sideFlag)
>> p_shares
>> qi::as_string[qi::repeat(8)[qi::char_]]
>> m_price
>> p_execId )
[qi::_val = phi::construct<BATSTradeMsg>(
qi::_1, qi::_2, qi::_3, qi::_4, qi::_5, qi::_6, qi::_7, qi::_8)];
else if ( msgtype == BATSTradeMsg::shortMsgCode )
m_wire_msg = ( p_ts >> qi::char_(msgtype)
>> p_orderId
>> qi::char_(BATSTradeMsg::sideFlag)
>> p_shares
>> qi::as_string[qi::repeat(6)[qi::char_]]
>> m_price
>> p_execId )
[qi::_val = phi::construct<BATSTradeMsg>(
qi::_1, qi::_2, qi::_3, qi::_4, qi::_5, qi::_6, qi::_7, qi::_8)];
基本上有两种消息类型,长消息和短消息,唯一的区别是第6个字段可以是6或8个字符的字符串。
但是,我意识到我不能做类似的事情,
m_wire_msg = ( p_ts >> qi::char_(msgtype)
>> p_orderId
>> qi::char_(BATSTradeMsg::sideFlag)
>> p_shares
>> ( qi::as_string[qi::repeat(6)[qi::char_]] | qi::as_string[qi::repeat(8)[qi::char_]])
>> m_price
>> p_execId )
[qi::_val = phi::construct<BATSTradeMsg>(
qi::_1, qi::_2, qi::_3, qi::_4, qi::_5, qi::_6, qi::_7, qi::_8)];
并让它正确解析这两条消息,
"28800168P1K27GA00000YB000300AAPL 00018319001K27GA00000Z"
"28800168r1K27GA00000YB000300AAPLSPOT00018319001K27GA00000Z"
这是我的建议:
数据类型
namespace BATS {
enum class MessageCode : char { Long = 'r', Short = 'P' };
struct MessageBase {
int timestamp;
MessageCode msgtype;
};
struct TradeMsg : MessageBase {
uint64_t orderId; // Base 36 Numeric values come over the wire in ascii
char side;
uint32_t shares;
std::string symbol;
uint64_t price;
uint64_t execId; // Base 36 Numeric values come over the wire in ascii
};
}
然后使用简单的 Fusion 自适应代替语义操作¹:
BOOST_FUSION_ADAPT_STRUCT(BATS::TradeMsg, timestamp, msgtype, orderId, side, shares, symbol, price, execId)
解析器
然后解析器基本上变为:
I assumed "side" could be "B" or "S" (for Buy or Sell).
template <typename It>
struct Parser : qi::grammar<It, BATS::TradeMsg()> {
Parser() : Parser::base_type(r_wire_msg) {
// see below
r_wire_msg
= r_long_wire_msg
| r_short_wire_msg
;
BOOST_SPIRIT_DEBUG_NODES((r_wire_msg)(r_short_wire_msg)(r_long_wire_msg))
}
private:
// order and execution ids are 12 characters base 36
qi::uint_parser<uint64_t, 36, 12, 12> p_orderId;
qi::uint_parser<uint64_t, 36, 12, 12> p_execId;
qi::uint_parser<uint32_t, 10, 6, 6> p_shares;
qi::uint_parser<uint32_t, 10, 10, 10> p_price;
qi::uint_parser<uint32_t, 10, 8, 8> p_ts;
qi::rule<It, BATS::TradeMsg()> r_wire_msg, r_long_wire_msg, r_short_wire_msg;
};
当然这两个sub-rules非常相似:
r_long_wire_msg
= p_ts
>> qi::char_(BATS::MessageCode::Long)
>> p_orderId
>> qi::char_("BS")
>> p_shares
>> qi::as_string[qi::repeat(8)[qi::char_]]
>> p_price
>> p_execId
;
r_short_wire_msg
= p_ts
>> qi::char_(BATS::MessageCode::Short)
>> p_orderId
>> qi::char_("BS")
>> p_shares
>> qi::as_string[qi::repeat(6)[qi::char_]]
>> p_price
>> p_execId
;
演示程序
这里剖析了 3 个测试用例:
- 问题(错误)中的 "short" 示例
- 我尝试修复 "short" 示例
- 问题
中的"long"例子
int main() {
using It = std::string::const_iterator;
Parser<It> const parser;
for (std::string const input : {
"28800168P1K27GA00000YB000300AAPL 00018319001K27GA00000Z",
"28800168r1K27GA00000YB000300AAPLSPOT00018319001K27GA00000Z" })
{
std::cout << "Input: " << std::quoted(input) << "\n";
It f = begin(input), l = end(input);
BATS::TradeMsg msg;
if (parse(f, l, parser, msg)) {
std::cout << "Parsed\n";
} else {
std::cout << "Parse failed\n";
}
if (f!=l)
std::cout << "Remaining data: " << std::quoted(std::string(f,l), '\'') << "\n";
}
}
版画
Input: "28800168P1K27GA00000YB000300AAPL 00018319001K27GA00000Z"
Parsed
Input: "28800168r1K27GA00000YB000300AAPLSPOT00018319001K27GA00000Z"
Parsed
¹ Boost Spirit: "Semantic actions are evil"? <-- 注意,这是问题的标题
我有以下内容,
class BATSTradeMsg : public BATSMessageBase
{
BATSTradeMsg(int timestamp, char msgtype, uint64_t orderId, char side, uint32_t shares,
std::string const &symbol, uint64_t price, uint64_t execId) :
BATSMessageBase(timestamp, msgtype),
m_orderId(orderId),
m_side(side),
m_shares(shares),
m_symbol(symbol),
m_price(price),
m_execId(execId)
{
}
uint64_t m_orderId; // Base 36 Numeric values come over the wire in ascii
char m_side;
uint32_t m_shares;
std::string m_symbol;
uint64_t m_price;
uint64_t m_execId; // Base 36 Numeric values come over the wire in ascii
};
// order and execution ids are 12 characters base 36
qi::uint_parser< uint64_t, 36, 12, 12 > p_orderId;
qi::uint_parser< uint64_t, 36, 12, 12 > p_execId;
qi::uint_parser< uint32_t, 10, 6, 6 > p_shares;
qi::uint_parser< uint32_t, 10, 10, 10 > m_price;
qi::uint_parser< uint32_t, 10, 8, 8 > p_ts;
if (msgtype == BATSTradeMsg::longMsgCode)
m_wire_msg = ( p_ts >> qi::char_(msgtype)
>> p_orderId
>> qi::char_(BATSTradeMsg::sideFlag)
>> p_shares
>> qi::as_string[qi::repeat(8)[qi::char_]]
>> m_price
>> p_execId )
[qi::_val = phi::construct<BATSTradeMsg>(
qi::_1, qi::_2, qi::_3, qi::_4, qi::_5, qi::_6, qi::_7, qi::_8)];
else if ( msgtype == BATSTradeMsg::shortMsgCode )
m_wire_msg = ( p_ts >> qi::char_(msgtype)
>> p_orderId
>> qi::char_(BATSTradeMsg::sideFlag)
>> p_shares
>> qi::as_string[qi::repeat(6)[qi::char_]]
>> m_price
>> p_execId )
[qi::_val = phi::construct<BATSTradeMsg>(
qi::_1, qi::_2, qi::_3, qi::_4, qi::_5, qi::_6, qi::_7, qi::_8)];
基本上有两种消息类型,长消息和短消息,唯一的区别是第6个字段可以是6或8个字符的字符串。
但是,我意识到我不能做类似的事情,
m_wire_msg = ( p_ts >> qi::char_(msgtype)
>> p_orderId
>> qi::char_(BATSTradeMsg::sideFlag)
>> p_shares
>> ( qi::as_string[qi::repeat(6)[qi::char_]] | qi::as_string[qi::repeat(8)[qi::char_]])
>> m_price
>> p_execId )
[qi::_val = phi::construct<BATSTradeMsg>(
qi::_1, qi::_2, qi::_3, qi::_4, qi::_5, qi::_6, qi::_7, qi::_8)];
并让它正确解析这两条消息,
"28800168P1K27GA00000YB000300AAPL 00018319001K27GA00000Z"
"28800168r1K27GA00000YB000300AAPLSPOT00018319001K27GA00000Z"
这是我的建议:
数据类型
namespace BATS {
enum class MessageCode : char { Long = 'r', Short = 'P' };
struct MessageBase {
int timestamp;
MessageCode msgtype;
};
struct TradeMsg : MessageBase {
uint64_t orderId; // Base 36 Numeric values come over the wire in ascii
char side;
uint32_t shares;
std::string symbol;
uint64_t price;
uint64_t execId; // Base 36 Numeric values come over the wire in ascii
};
}
然后使用简单的 Fusion 自适应代替语义操作¹:
BOOST_FUSION_ADAPT_STRUCT(BATS::TradeMsg, timestamp, msgtype, orderId, side, shares, symbol, price, execId)
解析器
然后解析器基本上变为:
I assumed "side" could be "B" or "S" (for Buy or Sell).
template <typename It>
struct Parser : qi::grammar<It, BATS::TradeMsg()> {
Parser() : Parser::base_type(r_wire_msg) {
// see below
r_wire_msg
= r_long_wire_msg
| r_short_wire_msg
;
BOOST_SPIRIT_DEBUG_NODES((r_wire_msg)(r_short_wire_msg)(r_long_wire_msg))
}
private:
// order and execution ids are 12 characters base 36
qi::uint_parser<uint64_t, 36, 12, 12> p_orderId;
qi::uint_parser<uint64_t, 36, 12, 12> p_execId;
qi::uint_parser<uint32_t, 10, 6, 6> p_shares;
qi::uint_parser<uint32_t, 10, 10, 10> p_price;
qi::uint_parser<uint32_t, 10, 8, 8> p_ts;
qi::rule<It, BATS::TradeMsg()> r_wire_msg, r_long_wire_msg, r_short_wire_msg;
};
当然这两个sub-rules非常相似:
r_long_wire_msg
= p_ts
>> qi::char_(BATS::MessageCode::Long)
>> p_orderId
>> qi::char_("BS")
>> p_shares
>> qi::as_string[qi::repeat(8)[qi::char_]]
>> p_price
>> p_execId
;
r_short_wire_msg
= p_ts
>> qi::char_(BATS::MessageCode::Short)
>> p_orderId
>> qi::char_("BS")
>> p_shares
>> qi::as_string[qi::repeat(6)[qi::char_]]
>> p_price
>> p_execId
;
演示程序
这里剖析了 3 个测试用例:
- 问题(错误)中的 "short" 示例
- 我尝试修复 "short" 示例
- 问题 中的"long"例子
int main() {
using It = std::string::const_iterator;
Parser<It> const parser;
for (std::string const input : {
"28800168P1K27GA00000YB000300AAPL 00018319001K27GA00000Z",
"28800168r1K27GA00000YB000300AAPLSPOT00018319001K27GA00000Z" })
{
std::cout << "Input: " << std::quoted(input) << "\n";
It f = begin(input), l = end(input);
BATS::TradeMsg msg;
if (parse(f, l, parser, msg)) {
std::cout << "Parsed\n";
} else {
std::cout << "Parse failed\n";
}
if (f!=l)
std::cout << "Remaining data: " << std::quoted(std::string(f,l), '\'') << "\n";
}
}
版画
Input: "28800168P1K27GA00000YB000300AAPL 00018319001K27GA00000Z"
Parsed
Input: "28800168r1K27GA00000YB000300AAPLSPOT00018319001K27GA00000Z"
Parsed
¹ Boost Spirit: "Semantic actions are evil"? <-- 注意,这是问题的标题