如何用boost qi解析rtsp url?
How to parse rtsp url with boost qi?
我正在尝试像这样解析 RTSP-url :
namespace qi = boost::spirit::qi;
qi::int_parser<unsigned char, 16, 2, 2> hex_byte;
struct UrlParser : qi::grammar<stringIterator_t, UrlStruct()>
{
UrlParser() : UrlParser::base_type(start)
{
using boost::spirit::int_;
namespace ascii = boost::spirit::ascii;
using boost::spirit::ascii::char_;
// Parses http://, rtsp://, ftp://....
protocol %= +(char_ - ':') >> "://";
// Parsers for url decoding
encodeChar.add("+", ' ');
passwordRule = *((encodeChar | "%" >> hex_byte | char_) - '@');
loginRule = *((encodeChar | "%" >> hex_byte | char_) - ':');
// Parses user:pass@, user:@, :pass@
credentials %= loginRule >> ':' >> passwordRule >> '@';
// Parses host name or ip address in 192.168.0.1:80 192.168.0.1/script.cgi
host %= +(!(char_("/") | (char_(":")) >> +ascii::digit >> (char_("/") | boost::spirit::eoi)) >> char_);
// Parses port number in ":80", string
port %= ':' >> int_;
// Parses script path in "/video.cgi?resulution=1" string.
path %= *(char_ - boost::spirit::eol);
start %= -protocol
>> -credentials
>> host
>> -port
>> path
;
}
qi::rule<stringIterator_t, UrlStruct()> start;
qi::rule<stringIterator_t, std::string()> protocol;
qi::rule<stringIterator_t, UrlStruct::stringPair_t()> credentials;
qi::rule<stringIterator_t, std::string()> host;
qi::rule<stringIterator_t, int()> port;
qi::rule<stringIterator_t, std::string()> path;
private:
qi::rule<stringIterator_t, std::string()> loginRule;
qi::rule<stringIterator_t, std::string()> passwordRule;
qi::symbols<char const, char const> encodeChar;
};
UrlStruct 看起来像这样:
struct UrlStruct
{
typedef boost::optional<std::string> optString_t;
typedef boost::optional<int> port_t;
typedef boost::fusion::vector<std::string, std::string> stringPair_t;
typedef boost::optional<stringPair_t> credentials_t;
optString_t protocol;
credentials_t credentials;
std::string host;
port_t port;
std::string path;
};
当我有这样的 url 时,这是有效的:
rtsp://admin:admin@wowzaec2demo.streamlock.net/vod/mp4:BigBuckBunny_115k.mov
但是当我的密码中有“@”时,这是行不通的。 URL 例如:
rtsp://admin:adm@in@wowzaec2demo.streamlock.net/vod/mp4:BigBuckBunny_115k.mov
我该如何解决这个问题?
相对明显的解决方法是 URL-转义 @
:
UrlParser p;
for (std::string const input : {
"rtsp://admin:admin@wowzaec2demo.streamlock.net/vod/mp4:BigBuckBunny_115k.mov",
"rtsp://admin:adm%40in@wowzaec2demo.streamlock.net/vod/mp4:BigBuckBunny_115k.mov",
"rtsp://admin:adm@in@wowzaec2demo.streamlock.net/vod/mp4:BigBuckBunny_115k.mov",
})
{
stringIterator_t f = begin(input), l = end(input);
Ast::UrlStruct u;
if (parse(f, l, p, u)) {
std::cout << "Parsed: " << u << "\n";
} else {
std::cout << "Failed\n";
}
版画
Parsed: ( rtsp (admin admin) wowzaec2demo.streamlock.net -- /vod/mp4:BigBuckBunny_115k.mov)
Parsed: ( rtsp (admin adm@in) wowzaec2demo.streamlock.net -- /vod/mp4:BigBuckBunny_115k.mov)
Parsed: ( rtsp (admin adm) in@wowzaec2demo.streamlock.net -- /vod/mp4:BigBuckBunny_115k.mov)
如您所见,第二个(对 @
使用 %40
)解析得很好。
以灵活的方式修复它
您的主机规则已经有一些否定的先行断言:
host = +(
!(char_("/") | (char_(":")) >> +ascii::digit >> (char_("/") | boost::spirit::eoi))
>> char_);
我将其简化为等价物:
host = +(~char_("/") - (port >> ('/' | qi::eoi)));
As an aside, I think the ":nnnn" is not a port unless it is at the very end of the hostname might be wrong and potentially unsafe. I suppose you have it there so you can still accept IPV6 addresses? See also What is the nicest way to parse this in C++?
同样,您的 password
规则有一个减法解析器:
password = *((encodeChar | "%" >> hex_byte | char_) - '@');
你要的近;你不想禁止 '@'
本身,但前提是后面跟着一个有效的主机生产:
login = *((encodeChar | "%" >> hex_byte | char_) - ':');
password = *((encodeChar | "%" >> hex_byte | char_) -
('@' >> host >> eoh));
我为 end-of-host
介绍了一个漂亮的速记:
auto eoh = qi::copy('/' | qi::eoi);
这是为了删除与 host
规则的重复。请注意,现在我们通过确保 @
不会出现在主机部分来完成工作(根据 RFC,可能有更多字符要排除,但这是使您的测试通过的最低要求):
host = +(~char_("@/") - (portSpec >> eoh));
演示
//#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/optional/optional_io.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
namespace qi = boost::spirit::qi;
namespace Ast {
struct UrlStruct {
using optString_t = boost::optional<std::string>;
using port_t = boost::optional<int>;
struct Credentials { // more natural, for ease of debug output really
std::string login, password;
};
using credentials_t = boost::optional<Credentials>;
optString_t protocol;
credentials_t credentials;
std::string host;
port_t port;
std::string path;
};
using boost::fusion::operator<<;
} // namespace Ast
BOOST_FUSION_ADAPT_STRUCT(Ast::UrlStruct::Credentials, login, password)
BOOST_FUSION_ADAPT_STRUCT(Ast::UrlStruct, protocol, credentials, host, port, path)
using stringIterator_t = std::string::const_iterator;
struct UrlParser : qi::grammar<stringIterator_t, Ast::UrlStruct()>
{
UrlParser() : UrlParser::base_type(start)
{
namespace ascii = boost::spirit::ascii;
using boost::spirit::ascii::char_;
// Parses http://, rtsp://, ftp://....
protocol = +(char_ - ':') >> "://";
// Parsers for url decoding
encodeChar.add("+", ' ');
auto eoh = qi::copy('/' | qi::eoi);
login = *((encodeChar | "%" >> hex_byte | char_) - ':');
password = *((encodeChar | "%" >> hex_byte | char_) -
('@' >> host >> eoh));
// Parses user:pass@, user:@, :pass@
credentials = login >> ':' >> password >> '@';
// Parses host name or ip address in 192.168.0.1:80 192.168.0.1/script.cgi
host = +(~char_("@/") - (portSpec >> eoh));
// Parses port number in ":80", string
portSpec = ':' >> port_number;
// Parses script path in "/video.cgi?resulution=1" string.
path = *(char_ - qi::eol);
start = -protocol
>> -credentials
>> host
>> -portSpec
>> path
;
BOOST_SPIRIT_DEBUG_NODES((start)(protocol)(credentials)(host)(portSpec)(
path)(login)(password))
}
private:
qi::rule<stringIterator_t, Ast::UrlStruct()> start;
qi::rule<stringIterator_t, std::string()> protocol;
qi::rule<stringIterator_t, Ast::UrlStruct::Credentials()> credentials;
qi::rule<stringIterator_t, std::string()> host;
qi::rule<stringIterator_t, int()> portSpec;
qi::rule<stringIterator_t, std::string()> path;
qi::rule<stringIterator_t, std::string()> login;
qi::rule<stringIterator_t, std::string()> password;
qi::symbols<char const, char const> encodeChar;
qi::uint_parser<uint8_t, 16, 2, 2> hex_byte;
qi::uint_parser<uint16_t> port_number;
};
int main() {
UrlParser p;
for (std::string const input : {
"rtsp://admin:admin@wowzaec2demo.streamlock.net/vod/mp4:BigBuckBunny_115k.mov",
"rtsp://admin:adm%40in@wowzaec2demo.streamlock.net/vod/mp4:BigBuckBunny_115k.mov",
"rtsp://admin:adm@in@wowzaec2demo.streamlock.net/vod/mp4:BigBuckBunny_115k.mov",
})
{
stringIterator_t f = begin(input), l = end(input);
Ast::UrlStruct u;
if (parse(f, l, p, u)) {
std::cout << "Yay: " << u << "\n";
} else {
std::cout << "Nay\n";
}
if (f != l) {
std::cout << "Remaining: " << std::quoted(std::string(f, l)) << "\n";
}
}
}
现在打印:
Yay: ( rtsp (admin admin) wowzaec2demo.streamlock.net -- /vod/mp4:BigBuckBunny_115k.mov)
Yay: ( rtsp (admin adm@in) wowzaec2demo.streamlock.net -- /vod/mp4:BigBuckBunny_115k.mov)
Yay: ( rtsp (admin adm@in) wowzaec2demo.streamlock.net -- /vod/mp4:BigBuckBunny_115k.mov)
其他旁白
端口确实应该限制为 16 位范围内的正整数。
使用BOOST_SPIRIT_DEBUG更轻松地调试语法
不要使用实现细节 (fusion::vector)。取而代之的是 std::pair
,或者实际上是另一个改编的结构,就像我所做的那样。这使得实施 operator<<
以进行快速调试变得不那么痛苦。
您在输入结束检测方面存在轻微的不一致。在大多数地方您使用 eoi
,但在 path
中您接受直到 eol
。您可能想让事情变得更加一致,或者决定从 eol
开始的任何输入的含义。
现实生活中的 URI 有时包含登录名但不包含密码(也没有 :@
)。我认为您的语法可能支持这一点?
现在解析器的一个结构性缺陷是效率。所有负面的前瞻可能导致比预期更多的回溯。 - (':' >> port)
应该没问题,但 - ('@' >> host)
可能会看到很多字符。实际上,我不认为这应该是一个问题(当然除非你经常使用带有许多&符号的很长的密码)。
但严肃地说,我认为这是解析器试图在同一抽象级别上做太多事情的标志。如果分而治之,事情会容易得多:先schema://HOSTSTUFF/PATHSTUFF
,然后HOSTSTUFF
I linked earlier。 path/query 部分也是如此,因为那里有无数有趣的东西,当你第一次分离 URI 的顶层分支时,你真的不需要费心。
我正在尝试像这样解析 RTSP-url :
namespace qi = boost::spirit::qi;
qi::int_parser<unsigned char, 16, 2, 2> hex_byte;
struct UrlParser : qi::grammar<stringIterator_t, UrlStruct()>
{
UrlParser() : UrlParser::base_type(start)
{
using boost::spirit::int_;
namespace ascii = boost::spirit::ascii;
using boost::spirit::ascii::char_;
// Parses http://, rtsp://, ftp://....
protocol %= +(char_ - ':') >> "://";
// Parsers for url decoding
encodeChar.add("+", ' ');
passwordRule = *((encodeChar | "%" >> hex_byte | char_) - '@');
loginRule = *((encodeChar | "%" >> hex_byte | char_) - ':');
// Parses user:pass@, user:@, :pass@
credentials %= loginRule >> ':' >> passwordRule >> '@';
// Parses host name or ip address in 192.168.0.1:80 192.168.0.1/script.cgi
host %= +(!(char_("/") | (char_(":")) >> +ascii::digit >> (char_("/") | boost::spirit::eoi)) >> char_);
// Parses port number in ":80", string
port %= ':' >> int_;
// Parses script path in "/video.cgi?resulution=1" string.
path %= *(char_ - boost::spirit::eol);
start %= -protocol
>> -credentials
>> host
>> -port
>> path
;
}
qi::rule<stringIterator_t, UrlStruct()> start;
qi::rule<stringIterator_t, std::string()> protocol;
qi::rule<stringIterator_t, UrlStruct::stringPair_t()> credentials;
qi::rule<stringIterator_t, std::string()> host;
qi::rule<stringIterator_t, int()> port;
qi::rule<stringIterator_t, std::string()> path;
private:
qi::rule<stringIterator_t, std::string()> loginRule;
qi::rule<stringIterator_t, std::string()> passwordRule;
qi::symbols<char const, char const> encodeChar;
};
UrlStruct 看起来像这样:
struct UrlStruct
{
typedef boost::optional<std::string> optString_t;
typedef boost::optional<int> port_t;
typedef boost::fusion::vector<std::string, std::string> stringPair_t;
typedef boost::optional<stringPair_t> credentials_t;
optString_t protocol;
credentials_t credentials;
std::string host;
port_t port;
std::string path;
};
当我有这样的 url 时,这是有效的:
rtsp://admin:admin@wowzaec2demo.streamlock.net/vod/mp4:BigBuckBunny_115k.mov
但是当我的密码中有“@”时,这是行不通的。 URL 例如:
rtsp://admin:adm@in@wowzaec2demo.streamlock.net/vod/mp4:BigBuckBunny_115k.mov
我该如何解决这个问题?
相对明显的解决方法是 URL-转义 @
:
UrlParser p;
for (std::string const input : {
"rtsp://admin:admin@wowzaec2demo.streamlock.net/vod/mp4:BigBuckBunny_115k.mov",
"rtsp://admin:adm%40in@wowzaec2demo.streamlock.net/vod/mp4:BigBuckBunny_115k.mov",
"rtsp://admin:adm@in@wowzaec2demo.streamlock.net/vod/mp4:BigBuckBunny_115k.mov",
})
{
stringIterator_t f = begin(input), l = end(input);
Ast::UrlStruct u;
if (parse(f, l, p, u)) {
std::cout << "Parsed: " << u << "\n";
} else {
std::cout << "Failed\n";
}
版画
Parsed: ( rtsp (admin admin) wowzaec2demo.streamlock.net -- /vod/mp4:BigBuckBunny_115k.mov)
Parsed: ( rtsp (admin adm@in) wowzaec2demo.streamlock.net -- /vod/mp4:BigBuckBunny_115k.mov)
Parsed: ( rtsp (admin adm) in@wowzaec2demo.streamlock.net -- /vod/mp4:BigBuckBunny_115k.mov)
如您所见,第二个(对 @
使用 %40
)解析得很好。
以灵活的方式修复它
您的主机规则已经有一些否定的先行断言:
host = +(
!(char_("/") | (char_(":")) >> +ascii::digit >> (char_("/") | boost::spirit::eoi))
>> char_);
我将其简化为等价物:
host = +(~char_("/") - (port >> ('/' | qi::eoi)));
As an aside, I think the ":nnnn" is not a port unless it is at the very end of the hostname might be wrong and potentially unsafe. I suppose you have it there so you can still accept IPV6 addresses? See also What is the nicest way to parse this in C++?
同样,您的 password
规则有一个减法解析器:
password = *((encodeChar | "%" >> hex_byte | char_) - '@');
你要的近;你不想禁止 '@'
本身,但前提是后面跟着一个有效的主机生产:
login = *((encodeChar | "%" >> hex_byte | char_) - ':');
password = *((encodeChar | "%" >> hex_byte | char_) -
('@' >> host >> eoh));
我为 end-of-host
介绍了一个漂亮的速记:
auto eoh = qi::copy('/' | qi::eoi);
这是为了删除与 host
规则的重复。请注意,现在我们通过确保 @
不会出现在主机部分来完成工作(根据 RFC,可能有更多字符要排除,但这是使您的测试通过的最低要求):
host = +(~char_("@/") - (portSpec >> eoh));
演示
//#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/optional/optional_io.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
namespace qi = boost::spirit::qi;
namespace Ast {
struct UrlStruct {
using optString_t = boost::optional<std::string>;
using port_t = boost::optional<int>;
struct Credentials { // more natural, for ease of debug output really
std::string login, password;
};
using credentials_t = boost::optional<Credentials>;
optString_t protocol;
credentials_t credentials;
std::string host;
port_t port;
std::string path;
};
using boost::fusion::operator<<;
} // namespace Ast
BOOST_FUSION_ADAPT_STRUCT(Ast::UrlStruct::Credentials, login, password)
BOOST_FUSION_ADAPT_STRUCT(Ast::UrlStruct, protocol, credentials, host, port, path)
using stringIterator_t = std::string::const_iterator;
struct UrlParser : qi::grammar<stringIterator_t, Ast::UrlStruct()>
{
UrlParser() : UrlParser::base_type(start)
{
namespace ascii = boost::spirit::ascii;
using boost::spirit::ascii::char_;
// Parses http://, rtsp://, ftp://....
protocol = +(char_ - ':') >> "://";
// Parsers for url decoding
encodeChar.add("+", ' ');
auto eoh = qi::copy('/' | qi::eoi);
login = *((encodeChar | "%" >> hex_byte | char_) - ':');
password = *((encodeChar | "%" >> hex_byte | char_) -
('@' >> host >> eoh));
// Parses user:pass@, user:@, :pass@
credentials = login >> ':' >> password >> '@';
// Parses host name or ip address in 192.168.0.1:80 192.168.0.1/script.cgi
host = +(~char_("@/") - (portSpec >> eoh));
// Parses port number in ":80", string
portSpec = ':' >> port_number;
// Parses script path in "/video.cgi?resulution=1" string.
path = *(char_ - qi::eol);
start = -protocol
>> -credentials
>> host
>> -portSpec
>> path
;
BOOST_SPIRIT_DEBUG_NODES((start)(protocol)(credentials)(host)(portSpec)(
path)(login)(password))
}
private:
qi::rule<stringIterator_t, Ast::UrlStruct()> start;
qi::rule<stringIterator_t, std::string()> protocol;
qi::rule<stringIterator_t, Ast::UrlStruct::Credentials()> credentials;
qi::rule<stringIterator_t, std::string()> host;
qi::rule<stringIterator_t, int()> portSpec;
qi::rule<stringIterator_t, std::string()> path;
qi::rule<stringIterator_t, std::string()> login;
qi::rule<stringIterator_t, std::string()> password;
qi::symbols<char const, char const> encodeChar;
qi::uint_parser<uint8_t, 16, 2, 2> hex_byte;
qi::uint_parser<uint16_t> port_number;
};
int main() {
UrlParser p;
for (std::string const input : {
"rtsp://admin:admin@wowzaec2demo.streamlock.net/vod/mp4:BigBuckBunny_115k.mov",
"rtsp://admin:adm%40in@wowzaec2demo.streamlock.net/vod/mp4:BigBuckBunny_115k.mov",
"rtsp://admin:adm@in@wowzaec2demo.streamlock.net/vod/mp4:BigBuckBunny_115k.mov",
})
{
stringIterator_t f = begin(input), l = end(input);
Ast::UrlStruct u;
if (parse(f, l, p, u)) {
std::cout << "Yay: " << u << "\n";
} else {
std::cout << "Nay\n";
}
if (f != l) {
std::cout << "Remaining: " << std::quoted(std::string(f, l)) << "\n";
}
}
}
现在打印:
Yay: ( rtsp (admin admin) wowzaec2demo.streamlock.net -- /vod/mp4:BigBuckBunny_115k.mov)
Yay: ( rtsp (admin adm@in) wowzaec2demo.streamlock.net -- /vod/mp4:BigBuckBunny_115k.mov)
Yay: ( rtsp (admin adm@in) wowzaec2demo.streamlock.net -- /vod/mp4:BigBuckBunny_115k.mov)
其他旁白
端口确实应该限制为 16 位范围内的正整数。
使用BOOST_SPIRIT_DEBUG更轻松地调试语法
不要使用实现细节 (fusion::vector)。取而代之的是
std::pair
,或者实际上是另一个改编的结构,就像我所做的那样。这使得实施operator<<
以进行快速调试变得不那么痛苦。您在输入结束检测方面存在轻微的不一致。在大多数地方您使用
eoi
,但在path
中您接受直到eol
。您可能想让事情变得更加一致,或者决定从eol
开始的任何输入的含义。现实生活中的 URI 有时包含登录名但不包含密码(也没有
:@
)。我认为您的语法可能支持这一点?现在解析器的一个结构性缺陷是效率。所有负面的前瞻可能导致比预期更多的回溯。
- (':' >> port)
应该没问题,但- ('@' >> host)
可能会看到很多字符。实际上,我不认为这应该是一个问题(当然除非你经常使用带有许多&符号的很长的密码)。但严肃地说,我认为这是解析器试图在同一抽象级别上做太多事情的标志。如果分而治之,事情会容易得多:先
schema://HOSTSTUFF/PATHSTUFF
,然后HOSTSTUFF
I linked earlier。 path/query 部分也是如此,因为那里有无数有趣的东西,当你第一次分离 URI 的顶层分支时,你真的不需要费心。