使用 C++ 和 BOOST 读取 JSON 文件

Reading JSON file with C++ and BOOST

HTTP 服务器向我发送一个 JSON 响应(字符串),如下所示:

{
    "folders" :
    [{
            "id" : 109,
            "parent_id" : 110,
            "path" : "\/1\/105\/110\/"
        },
        {
            "id" : 110,
            "parent_id" : 105,
            "path" : "\/1\/105\/"
        }
    ],

    "files" :
    [{
            "id" : 26,
            "parent_id" : 105,
            "name" : "picture.png",
            "hash" : "md5_hash",
            "path" : "\/1\/105\/"
        },
        {
            "id" : 25,
            "parent_id" : 110,
            "name" : "another_picture.jpg",
            "hash" : "md5_hash",
            "path" : "\/1\/105\/110\/"
        }
    ]
}

我想将此 "tree of a remote folder" 与本地文件夹树(例如包含本地文件位置的字符串向量)进行比较,因此我想将此 JSON 转换为 ( string, vector ( map(string, string) ) ) (我不知道这是否可能)。

我正在开发一个在本地和远程文件夹之间同步文件的工具,所以我使用 boost 来列出本地文件夹,我想比较本地列表和远程列表(JSON 响应)生成操作(下载本地文件夹中不存在的缺失文件,上传远程文件夹中不存在的文件)。

我在另一个问题上发现了这个功能:

void print(boost::property_tree::ptree const& pt)
{
    using boost::property_tree::ptree;
    ptree::const_iterator end = pt.end();
    for (ptree::const_iterator it = pt.begin(); it != end; ++it)
    {
        std::cout << it->first << ": " << it->second.get_value<std::string>() << std::endl;
        print(it->second);
    }
}

我成功打印了这样的东西:

folders:
:
id: 109
parent_id: 110
name: 2011_pictures
:
id: 110
parent_id: 105
name: Aminos
files:
id: 26
parent_id: 105
name: logo.png
:
id: 5
parent_id: 109
name: me.jpg

我想知道是否可以用这个结果生成一个 map<string, vector <map<string,string> > >,它将有 2 个键:"folders" 和 "files" 我们可以使用这两个键访问映射类型的向量,包含每个对象(文件或文件夹)的信息。如果可行,将降低任务的复杂性(比较两个文件夹列表)

示例:T["folder"][0]["id"] 将 return "109" ; T["files"][0]["name"] 会 return "logo.png"

更新:这个问题很老,但我想给出一个建议:只要你想在 C++ 下处理 Json,就使用 RAPIDJSON。

Disclaimer: The sample below is not a full blown JSON parser. Consider using a library that supports your needs. You can see a more evolved JSON parser here https://github.com/sehe/spirit-v2-json

快速而简单的 Spirit 语法(假设您不需要太多一致性)是:

    text_   = '"' >> raw [*('\' >> char_ | ~char_('"'))] >> '"'; // ¹
    value_  = null | bool | text_ | double_ | object_ | array_; // ²
    member_ = text_ >> ':' >> value_;
    object_ = '{' >> -(member_ % ',') >> '}';
    array_  = '[' >> -(value_  % ',') >> ']';

    // ¹ as a bonus I added utf8 escape decoding in the full sample
    // ² as another bonus I threw in the missing `null` and `bool` types

无需进一步使用 AST 即可转换为 C++ 类型,例如:

using text   = std::string;
using value  = boost::make_recursive_variant<
        null,
        bool,
        text,                                      // "string" (roughly!)
        double,                                    // number
        std::map<text, boost::recursive_variant_>, // object
        std::vector<boost::recursive_variant_>     // array
    >::type;
using member = std::pair<text, value>;
using object = std::map<text, value>;
using array  = std::vector<value>;

如果你有两个 qd_json::value 对象,你可以直接比较它们:

qd_json::value local_tree, remote_tree;
if (local_tree == remote_tree)
{
    std::cout << "the tree is unchanged\n";
}

这是一个演示程序:

更新了演示

演示已更新,向您展示如何获得您在编辑问题时建议的 "user-friendly" 数据结构:

int main() {
    auto json = qd_json::parse(sample);

    // extract into user friendly datastructure from the question:
    auto extracted = Data::extract_from(json);

    for (auto& e : extracted.folders) std::cout << "folder:\t" << e.id << "\t" << e.path << "\n";
    for (auto& e : extracted.files)   std::cout << "file:\t"   << e.id << "\t" << e.path << "\t" << e.name << "\n";
}

Live On Coliru

#include <boost/fusion/adapted/std_pair.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <map>

namespace qi = boost::spirit::qi;

static std::string const sample = R"(
    {
        "folders" :
        [{
                "id" : 109,
                "parent_id" : 110,
                "path" : "\/1\/105\/110\/"
            },
            {
                "id" : 110,
                "parent_id" : 105,
                "path" : "\/1\/105\/"
            }
        ],

        "files" :
        [{
                "id" : 26,
                "parent_id" : 105,
                "name" : "picture.png",
                "hash" : "md5_hash",
                "path" : "\/1\/105\/"
            },
            {
                "id" : 25,
                "parent_id" : 110,
                "name" : "another_picture.jpg",
                "hash" : "md5_hash",
                "path" : "\/1\/105\/110\/"
            }
        ]
    })";

namespace qd_json { // quick and dirty JSON handling
    struct null {
        bool operator==(null) const { return true; }
    };

    inline static std::ostream& operator<<(std::ostream& os, null) { return os << "null"; }

    using text   = std::string;
    using value  = boost::make_recursive_variant<
            null,
            text,                                      // "string" (roughly!)
            double,                                    // number
            std::map<text, boost::recursive_variant_>, // object
            std::vector<boost::recursive_variant_>,    // array
            bool
        >::type;
    using member = std::pair<text, value>;
    using object = std::map<text, value>;
    using array  = std::vector<value>;

    template <typename It, typename Skipper = qi::space_type>
    struct grammar : qi::grammar<It, value(), Skipper>
    {
        grammar() : grammar::base_type(value_) {
            using namespace qi;

            text_   = '"' >> raw [*('\' >> char_ | ~char_('"'))] >> '"';
            null_   = "null" >> attr(null{});
            bool_   = "true" >> attr(true) | "false" >> attr(false);
            value_  = null_ | bool_ | text_ | double_ | object_ | array_;
            member_ = text_ >> ':' >> value_;
            object_ = '{' >> -(member_ % ',') >> '}';
            array_  = '[' >> -(value_  % ',') >> ']';

            ////////////////////////////////////////
            // Bonus: properly decoding the string:
            text_   = lexeme [ '"' >> *ch_ >> '"' ];

            ch_ = +(
                    ~char_("\"\")) [ _val += _1 ] |
                       qi::lit("\x5C") >> (               // \ (reverse solidus)
                       qi::lit("\x22") [ _val += '"'  ] | // "    quotation mark  U+0022
                       qi::lit("\x5C") [ _val += '\' ] | // \    reverse solidus U+005C
                       qi::lit("\x2F") [ _val += '/'  ] | // /    solidus         U+002F
                       qi::lit("\x62") [ _val += '\b' ] | // b    backspace       U+0008
                       qi::lit("\x66") [ _val += '\f' ] | // f    form feed       U+000C
                       qi::lit("\x6E") [ _val += '\n' ] | // n    line feed       U+000A
                       qi::lit("\x72") [ _val += '\r' ] | // r    carriage return U+000D
                       qi::lit("\x74") [ _val += '\t' ] | // t    tab             U+0009
                       qi::lit("\x75")                    // uXXXX                U+XXXX
                            >> _4HEXDIG [ append_utf8(qi::_val, qi::_1) ]
                    );

            BOOST_SPIRIT_DEBUG_NODES((text_)(value_)(member_)(object_)(array_)(null_)(bool_))
        }
    private:
        qi::rule<It, text()>            text_, ch_;
        qi::rule<It, null()>            null_;
        qi::rule<It, bool()>            bool_;
        qi::rule<It, value(),  Skipper> value_;
        qi::rule<It, member(), Skipper> member_;
        qi::rule<It, object(), Skipper> object_;
        qi::rule<It, array(),  Skipper> array_;

        struct append_utf8_f {
            template <typename...> struct result { typedef void type; };
            template <typename String, typename Codepoint>
            void operator()(String& to, Codepoint codepoint) const {
                auto out = std::back_inserter(to);
                boost::utf8_output_iterator<decltype(out)> convert(out);
                *convert++ = codepoint;
            }
        };
        boost::phoenix::function<append_utf8_f> append_utf8;
        qi::uint_parser<uint32_t, 16, 4, 4> _4HEXDIG;
    };

    template <typename Range, typename It = typename boost::range_iterator<Range const>::type>
    value parse(Range const& input) {
        grammar<It> g;

        It first(boost::begin(input)), last(boost::end(input));
        value parsed;
        bool ok = qi::phrase_parse(first, last, g, qi::space, parsed);

        if (ok && (first == last))
            return parsed;

        throw std::runtime_error("Remaining unparsed: '" + std::string(first, last) + "'");
    }

}

#include <boost/range/algorithm.hpp>
#include <boost/range/adaptors.hpp>
#include <boost/range/algorithm_ext/push_back.hpp>

struct Data {
    struct Folder { int id, parent_id; std::string path; };
    struct File   { int id, parent_id; std::string path, name, md5_hash; };

    using Folders = std::vector<Folder>;
    using Files   = std::vector<File>;

    Folders folders;
    Files   files;

    static Data extract_from(qd_json::value const& json) {
        using namespace boost::adaptors;

        return {
            boost::copy_range<Folders>(arr(obj(json).at("folders")) | transformed(obj) | transformed(&Data::extract_folder)),
            boost::copy_range<Files>  (arr(obj(json).at("files"))   | transformed(obj) | transformed(&Data::extract_file)),
        };
    }
 private:
    static Folder extract_folder(qd_json::object const& obj) {
        return {
            id   (obj.at("id")),
            id   (obj.at("parent_id")),
            text (obj.at("path"))
        };
    }
    static File extract_file(qd_json::object const& obj) {
        return {
            id   (obj.at("id")),
            id   (obj.at("parent_id")),
            text (obj.at("path")),
            text (obj.at("name")),
            text (obj.at("hash")),
        };
    }

    static int             id  (qd_json::value const&v) { return boost::get<double>(v); };
    static std::string     text(qd_json::value const&v) { return boost::get<qd_json::text>(v); };
    static qd_json::array  arr (qd_json::value const&v) { return boost::get<qd_json::array>(v); };
    static qd_json::object obj (qd_json::value const&v) { return boost::get<qd_json::object>(v); };
};

int main()
{
    auto json = qd_json::parse(sample);

    // compare json documents
    qd_json::value clone = json;
    assert(json == clone);

    // extract into user friendly datastructure from the question:
    auto extracted = Data::extract_from(json);

    for (auto& e : extracted.folders) std::cout << "folder:\t" << e.id << "\t" << e.path << "\n";
    for (auto& e : extracted.files)   std::cout << "file:\t"   << e.id << "\t" << e.path << "\t" << e.name << "\n";
}

输出:

folder: 109 /1/105/110/
folder: 110 /1/105/
file:   26  /1/105/ picture.png
file:   25  /1/105/110/ another_picture.jpg

使用 Boost 的内置 json 解析器到 属性 树:

http://www.boost.org/doc/libs/1_57_0/doc/html/boost_propertytree/parsers.html#boost_propertytree.parsers.json_parser

因为其他答案中的数据结构 and the target data structure was suggested to be:

struct Data {
    struct Folder { int id, parent_id; std::string path; };
    struct File   { int id, parent_id; std::string path, name, md5_hash; };

    using Folders = std::vector<Folder>;
    using Files   = std::vector<File>;

    Folders folders;
    Files   files;
};

我最终写了一个从通用 "JSON" 到该数据结构的转换(参见另一个答案:)。

但是,如果我们 "skip the middle man" 并将 JSON 专门解析为显示的 Data 结构,OP 可能会更高兴。 "simplifies" 仅针对此类文档的语法:

start    = '{' >> 
           (folders_ >> commasep) ^
           (files_ >> commasep)
         >> '}';

folders_ = prop_key(+"folders") >> '[' >> -(folder_ % ',') >> ']';
files_   = prop_key(+"files")   >> '[' >> -(file_   % ',') >> ']';

folder_  = '{' >> (
                (prop_key(+"id")        >> int_  >> commasep) ^
                (prop_key(+"parent_id") >> int_  >> commasep) ^
                (prop_key(+"path")      >> text_ >> commasep)
            ) >> '}';
file_    = '{' >> (
                (prop_key(+"id")        >> int_  >> commasep) ^
                (prop_key(+"parent_id") >> int_  >> commasep) ^
                (prop_key(+"path")      >> text_ >> commasep) ^
                (prop_key(+"name")      >> text_ >> commasep) ^
                (prop_key(+"hash")      >> text_ >> commasep)
            ) >> '}';

prop_key = lexeme ['"' >> lazy(_r1) >> '"'] >> ':';
commasep = &char_('}') | ',';

这个语法允许

  • 无意义的空格,
  • 对象内属性的重新排序
  • 并省略了对象属性

好处:

  • 属性 值类型的早期检查
  • 更短的编译时间
  • 代码确实更少:LoC 减少 37(不计算示例 JSON 行,即 ~22%)

That last benefit has a flip side: if ever you want to read slightly different JSON, now you need to muck with the grammar instead of just writing a different extraction/transform. At 37 lines of code, my preference is with the but I'll leave it to you to decide.

下面是直接使用此语法的同一个演示程序:

Live On Coliru

//#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>

namespace qi = boost::spirit::qi;

static std::string const sample = R"(
    {
        "folders" :
        [{
                "id" : 109,
                "parent_id" : 110,
                "path" : "\/1\/105\/110\/"
            },
            {
                "id" : 110,
                "parent_id" : 105,
                "path" : "\/1\/105\/"
            }
        ],

        "files" :
        [{
                "id" : 26,
                "parent_id" : 105,
                "name" : "picture.png",
                "hash" : "md5_hash",
                "path" : "\/1\/105\/"
            },
            {
                "id" : 25,
                "parent_id" : 110,
                "name" : "another_picture.jpg",
                "hash" : "md5_hash",
                "path" : "\/1\/105\/110\/"
            }
        ]
    })";

struct Data {
    struct Folder { int id, parent_id; std::string path; };
    struct File   { int id, parent_id; std::string path, name, md5_hash; };

    using Folders = std::vector<Folder>;
    using Files   = std::vector<File>;

    Folders folders;
    Files   files;
};

BOOST_FUSION_ADAPT_STRUCT(Data::Folder, (int,id)(int,parent_id)(std::string,path))
BOOST_FUSION_ADAPT_STRUCT(Data::File,   (int,id)(int,parent_id)(std::string,path)(std::string,name)(std::string,md5_hash))
BOOST_FUSION_ADAPT_STRUCT(Data,         (Data::Folders,folders)(Data::Files,files))

namespace folder_info { // adhoc JSON parser

    template <typename It, typename Skipper = qi::space_type>
    struct grammar : qi::grammar<It, Data(), Skipper>
    {
        grammar() : grammar::base_type(start) {
            using namespace qi;

            start    = '{' >> 
                       (folders_ >> commasep) ^
                       (files_ >> commasep)
                     >> '}';

            folders_ = prop_key(+"folders") >> '[' >> -(folder_ % ',') >> ']';
            files_   = prop_key(+"files")   >> '[' >> -(file_   % ',') >> ']';

            folder_  = '{' >> (
                            (prop_key(+"id")        >> int_  >> commasep) ^
                            (prop_key(+"parent_id") >> int_  >> commasep) ^
                            (prop_key(+"path")      >> text_ >> commasep)
                        ) >> '}';
            file_    = '{' >> (
                            (prop_key(+"id")        >> int_  >> commasep) ^
                            (prop_key(+"parent_id") >> int_  >> commasep) ^
                            (prop_key(+"path")      >> text_ >> commasep) ^
                            (prop_key(+"name")      >> text_ >> commasep) ^
                            (prop_key(+"hash")      >> text_ >> commasep)
                        ) >> '}';

            prop_key = lexeme ['"' >> lazy(_r1) >> '"'] >> ':';
            commasep = &char_('}') | ',';

            ////////////////////////////////////////
            // Bonus: properly decoding the string:
            text_   = '"' >> *ch_ >> '"';

            ch_ = +(
                    ~char_("\"\")) [ _val += _1 ] |
                       qi::lit("\x5C") >> (               // \ (reverse solidus)
                       qi::lit("\x22") [ _val += '"'  ] | // "    quotation mark  U+0022
                       qi::lit("\x5C") [ _val += '\' ] | // \    reverse solidus U+005C
                       qi::lit("\x2F") [ _val += '/'  ] | // /    solidus         U+002F
                       qi::lit("\x62") [ _val += '\b' ] | // b    backspace       U+0008
                       qi::lit("\x66") [ _val += '\f' ] | // f    form feed       U+000C
                       qi::lit("\x6E") [ _val += '\n' ] | // n    line feed       U+000A
                       qi::lit("\x72") [ _val += '\r' ] | // r    carriage return U+000D
                       qi::lit("\x74") [ _val += '\t' ] | // t    tab             U+0009
                       qi::lit("\x75")                    // uXXXX                U+XXXX
                            >> _4HEXDIG [ append_utf8(qi::_val, qi::_1) ]
                    );

            BOOST_SPIRIT_DEBUG_NODES((files_)(folders_)(file_)(folder_)(start)(text_))
        }
    private:
        qi::rule<It, Data(),            Skipper> start;
        qi::rule<It, Data::Files(),     Skipper> files_;
        qi::rule<It, Data::Folders(),   Skipper> folders_;
        qi::rule<It, Data::File(),      Skipper> file_;
        qi::rule<It, Data::Folder(),    Skipper> folder_;
        qi::rule<It, void(const char*), Skipper> prop_key;

        qi::rule<It, std::string()> text_, ch_;
        qi::rule<It> commasep;

        struct append_utf8_f {
            template <typename...> struct result { typedef void type; };
            template <typename String, typename Codepoint>
            void operator()(String& to, Codepoint codepoint) const {
                auto out = std::back_inserter(to);
                boost::utf8_output_iterator<decltype(out)> convert(out);
                *convert++ = codepoint;
            }
        };
        boost::phoenix::function<append_utf8_f> append_utf8;
        qi::uint_parser<uint32_t, 16, 4, 4> _4HEXDIG;
    };

    template <typename Range, typename It = typename boost::range_iterator<Range const>::type>
    Data parse(Range const& input) {
        grammar<It> g;

        It first(boost::begin(input)), last(boost::end(input));
        Data parsed;
        bool ok = qi::phrase_parse(first, last, g, qi::space, parsed);

        if (ok && (first == last))
            return parsed;

        throw std::runtime_error("Remaining unparsed: '" + std::string(first, last) + "'");
    }
}

int main()
{
    auto parsed = folder_info::parse(sample);

    for (auto& e : parsed.folders) 
        std::cout << "folder:\t" << e.id << "\t" << e.path << "\n";
    for (auto& e : parsed.files) 
        std::cout << "file:\t"   << e.id << "\t" << e.path << "\t" << e.name << "\n";
}

输出:

folder: 109 /1/105/110/
folder: 110 /1/105/
file:   26  /1/105/ picture.png
file:   25  /1/105/110/ another_picture.jpg