像配置文件解析器这样的元数据
Metadata Like Config file parser
我正在尝试解析这个配置文件...
MODEL: "modelname1" { FILEPATH = "FILEPATH1"; TEXTUREPATH = "TEXTUREPATH1"; NORMALPATH = "NORMALPATH1"; }
MODEL:"modelname2"{FILEPATH = "FILEPATH2";TEXTUREPATH = "TEXTUREPATH2";NORMALPATH = "NORMALPATH2";}
这是我的尝试:
#include <iostream>
#include <map>
#include <fstream>
#include <vector>
#include <string>
using namespace std;
struct ModelData
{
string tagName;
string filePath;
string texturePath;
string normalPath;
};
vector<ModelData> g_modelData;
void GetStringValue( string _source, string _tagName, string& _outStrVal )
{
size_t equalPos = _source.find_first_of('=');
string tagName = _source.substr(0, equalPos);
tagName.erase(remove(tagName.begin(), tagName.end(), '"'), tagName.end());
if (tagName == _tagName)
{
_source = _source.substr(equalPos + 1);
_source.erase(remove(_source.begin(), _source.end(), '"'), _source.end());
_outStrVal = _source;
}
}
int main()
{
ifstream infile("modeldata.txt", ios::in);
if (!infile.good())
{
cout << "Error opening file!" << endl;
}
string line;
line.resize(1024);
while (infile.getline((char*)line.data(), line.size(), '\n'))
{
line.erase(remove(line.begin(), line.end(), ' '), line.end());
line.erase(remove(line.begin(), line.end(), '\t'), line.end());
size_t colonPos = line.find_first_of(':');
string tagStr = line.substr(0, colonPos);
if (tagStr == "MODEL")
{
ModelData md;
string tagValueStr = line.substr(colonPos + 1);
size_t tagNamePos = tagValueStr.find_first_of('{');
string tagName = tagValueStr.substr(0, tagNamePos);
tagName.erase(remove(tagName.begin(), tagName.end(), '"'), tagName.end());
md.tagName = tagName;
tagValueStr = tagValueStr.substr(tagNamePos + 1);
size_t tagValueTerminatingPos = tagValueStr.find_first_of('}');
tagValueStr = tagValueStr.substr(0, tagValueTerminatingPos);
string temp;
char context[1024];
memset( context,0, 1024 );
temp = strtok_s(&tagValueStr[0], ";", (char**)&context);
GetStringValue(temp, "FILEPATH", md.filePath);
temp = strtok_s(nullptr, ";", (char**)&context);
GetStringValue(temp, "TEXTUREPATH", md.texturePath);
temp = strtok_s(nullptr, ";", (char**)&context);
GetStringValue(temp, "NORMALPATH", md.normalPath);
g_modelData.push_back(md);
}
}
infile.close();
system("pause");
return 0;
}
但是如果我将配置文件格式化为
会怎么样
MODEL: "modelname1"
{
FILEPATH = "FILEPATH1";
TEXTUREPATH = "TEXTUREPATH1";
NORMALPATH = "NORMALPATH1";
}
MODEL:"modelname2"
{
FILEPATH = "FILEPATH2";
TEXTUREPATH = "TEXTUREPATH2";
NORMALPATH = "NORMALPATH2";
}
然后 getline 将无法工作,我必须逐个字符解析...
所以我想问问什么可以更快地实现上述更改,我想在程序中提取这些数据并用它来加载东西。
如果你想重新设计这个配置文件以获得更好的流程,我没问题。
查询更新:
想了解更多关于如何解析嵌套块和大括号的信息,但在google 搜索中没有找到太多信息。我的配置文件应该看起来像...
MODEL: "modelname1"
{
FILEPATH = "FILEPATH1";
TEXTUREPATH = "TEXTUREPATH1";
NORMALPATH = "NORMALPATH1";
PLACEHOLDER =
{
DATA0 = true//1
DATA1 = 1.0f, 1.0f, 1.0f;
DATA2 = 1.0f, 1.0f, 1.0f;
DATA3 = 1.0f, 1.0f, 1.0f;
}
}
应该读入
struct ModelData
{
string tagName;
string filePath;
string texturePath;
string normalPath;
bool Data0;
Vec3 Data1;
Vec3 Data2;
Vec3 Data3;
};
您可以使用正则表达式来匹配字符串并从中提取数据。
正则表达式是标准库的一部分,就像 std::string 和 std::vector 一样。
这是一个例子:
#include <iostream>
#include <regex>
#include <string>
#include <sstream>
struct ModelData
{
std::string tagName;
std::string filePath;
std::string texturePath;
std::string normalPath;
};
// helper function to output the content of your structure.
std::ostream& operator<<(std::ostream& os, const ModelData& data)
{
os << "Model = " << data.tagName << "\n";
os << " filePath = " << data.filePath << "\n";
os << " texturePath = " << data.texturePath << "\n";
os << " normalPath = " << data.normalPath << "\n";
os << std::endl;
return os;
}
// load from a stream, so this example can use
// a stringstream instead of a filestream (doesn't change this function)
auto load_from_stram(std::istream& ifile)
{
// More on regex here : https://regexone.com/, or to test your own regular expressions go here https://regex101.com/
// between ( ) is a capture group and will contain the value of your variable
// .+ wil match one or more of any character
static std::regex model_rx{ "MODEL = (.+)" };
static std::regex filepath_rx{ "FILEPATH = (.+)" };
static std::regex texturepath_rx{ "TEXTUREPATH = (.+)" };
static std::regex normalpath_rx{ "NORMALPATH = (.+)" };
std::smatch match;
std::vector<ModelData> models;
std::string line;
ModelData data;
// read until end of file
while (std::getline(ifile,line))
{
// check if a model starts,
// condition next 3 lines MUST contain data too
if (std::regex_search(line, match, model_rx))
{
// match[0] will contain full regex match
// match[1] will contain first matched group
data.tagName = match[1];
for (std::size_t n = 0; n < 3; ++n)
{
std::getline(ifile, line);
if (std::regex_search(line, match, filepath_rx)) data.filePath = match[1];
if (std::regex_search(line, match, texturepath_rx)) data.texturePath = match[1];
if (std::regex_search(line, match, normalpath_rx)) data.normalPath = match[1];
}
models.push_back(data);
}
}
return models;
}
int main()
{
std::istringstream ifile{ "MODEL = modelname1\nFILEPATH = FILEPATH1\nTEXTUREPATH = TEXTUREPATH1\nNORMALPATH = NORMALPATH1\n \
MODEL = modelname2\nFILEPATH = FILEPATH2\nTEXTUREPATH = TEXTUREPATH2\nNORMALPATH = NORMALPATH2\n" };
auto models = load_from_stram(ifile);
for (const auto& model : models)
{
std::cout << model;
}
}
既然你想学基础,那我就只提纲目答题。请注意,如果您想为您的语言添加块嵌套,则两者都不起作用。为此,您需要像 boost::spirit
或类似的语法。
使用getline
一次提取一个块:
std::string model_name, block_contents;
getline(infile, model_name, '{');
getline(infile, block_contents, '}');
使用正则表达式做同样的事情:
auto block_regex{ R"(MODEL\s*:\s*"(.*?)"\s*\{(.*?)\})" };
然后您可以应用 std::regex_iterator
来按顺序获取每个块。
正在解析区块内容
将 std::regex_iterator
与正则表达式 (\S+)\s*=\s*(".*?");
结合使用。将结果分配给 std::map
.
我刚刚偶然发现了一个关于解析配置文件的很棒的教程,那个人使用了 tokenizer 并使用标记逐个字符地进行了分析。
我想知道生成令牌和分词器背后的策略是什么。
我正在尝试解析这个配置文件...
MODEL: "modelname1" { FILEPATH = "FILEPATH1"; TEXTUREPATH = "TEXTUREPATH1"; NORMALPATH = "NORMALPATH1"; }
MODEL:"modelname2"{FILEPATH = "FILEPATH2";TEXTUREPATH = "TEXTUREPATH2";NORMALPATH = "NORMALPATH2";}
这是我的尝试:
#include <iostream>
#include <map>
#include <fstream>
#include <vector>
#include <string>
using namespace std;
struct ModelData
{
string tagName;
string filePath;
string texturePath;
string normalPath;
};
vector<ModelData> g_modelData;
void GetStringValue( string _source, string _tagName, string& _outStrVal )
{
size_t equalPos = _source.find_first_of('=');
string tagName = _source.substr(0, equalPos);
tagName.erase(remove(tagName.begin(), tagName.end(), '"'), tagName.end());
if (tagName == _tagName)
{
_source = _source.substr(equalPos + 1);
_source.erase(remove(_source.begin(), _source.end(), '"'), _source.end());
_outStrVal = _source;
}
}
int main()
{
ifstream infile("modeldata.txt", ios::in);
if (!infile.good())
{
cout << "Error opening file!" << endl;
}
string line;
line.resize(1024);
while (infile.getline((char*)line.data(), line.size(), '\n'))
{
line.erase(remove(line.begin(), line.end(), ' '), line.end());
line.erase(remove(line.begin(), line.end(), '\t'), line.end());
size_t colonPos = line.find_first_of(':');
string tagStr = line.substr(0, colonPos);
if (tagStr == "MODEL")
{
ModelData md;
string tagValueStr = line.substr(colonPos + 1);
size_t tagNamePos = tagValueStr.find_first_of('{');
string tagName = tagValueStr.substr(0, tagNamePos);
tagName.erase(remove(tagName.begin(), tagName.end(), '"'), tagName.end());
md.tagName = tagName;
tagValueStr = tagValueStr.substr(tagNamePos + 1);
size_t tagValueTerminatingPos = tagValueStr.find_first_of('}');
tagValueStr = tagValueStr.substr(0, tagValueTerminatingPos);
string temp;
char context[1024];
memset( context,0, 1024 );
temp = strtok_s(&tagValueStr[0], ";", (char**)&context);
GetStringValue(temp, "FILEPATH", md.filePath);
temp = strtok_s(nullptr, ";", (char**)&context);
GetStringValue(temp, "TEXTUREPATH", md.texturePath);
temp = strtok_s(nullptr, ";", (char**)&context);
GetStringValue(temp, "NORMALPATH", md.normalPath);
g_modelData.push_back(md);
}
}
infile.close();
system("pause");
return 0;
}
但是如果我将配置文件格式化为
会怎么样MODEL: "modelname1"
{
FILEPATH = "FILEPATH1";
TEXTUREPATH = "TEXTUREPATH1";
NORMALPATH = "NORMALPATH1";
}
MODEL:"modelname2"
{
FILEPATH = "FILEPATH2";
TEXTUREPATH = "TEXTUREPATH2";
NORMALPATH = "NORMALPATH2";
}
然后 getline 将无法工作,我必须逐个字符解析...
所以我想问问什么可以更快地实现上述更改,我想在程序中提取这些数据并用它来加载东西。
如果你想重新设计这个配置文件以获得更好的流程,我没问题。
查询更新:
想了解更多关于如何解析嵌套块和大括号的信息,但在google 搜索中没有找到太多信息。我的配置文件应该看起来像...
MODEL: "modelname1"
{
FILEPATH = "FILEPATH1";
TEXTUREPATH = "TEXTUREPATH1";
NORMALPATH = "NORMALPATH1";
PLACEHOLDER =
{
DATA0 = true//1
DATA1 = 1.0f, 1.0f, 1.0f;
DATA2 = 1.0f, 1.0f, 1.0f;
DATA3 = 1.0f, 1.0f, 1.0f;
}
}
应该读入
struct ModelData
{
string tagName;
string filePath;
string texturePath;
string normalPath;
bool Data0;
Vec3 Data1;
Vec3 Data2;
Vec3 Data3;
};
您可以使用正则表达式来匹配字符串并从中提取数据。 正则表达式是标准库的一部分,就像 std::string 和 std::vector 一样。 这是一个例子:
#include <iostream>
#include <regex>
#include <string>
#include <sstream>
struct ModelData
{
std::string tagName;
std::string filePath;
std::string texturePath;
std::string normalPath;
};
// helper function to output the content of your structure.
std::ostream& operator<<(std::ostream& os, const ModelData& data)
{
os << "Model = " << data.tagName << "\n";
os << " filePath = " << data.filePath << "\n";
os << " texturePath = " << data.texturePath << "\n";
os << " normalPath = " << data.normalPath << "\n";
os << std::endl;
return os;
}
// load from a stream, so this example can use
// a stringstream instead of a filestream (doesn't change this function)
auto load_from_stram(std::istream& ifile)
{
// More on regex here : https://regexone.com/, or to test your own regular expressions go here https://regex101.com/
// between ( ) is a capture group and will contain the value of your variable
// .+ wil match one or more of any character
static std::regex model_rx{ "MODEL = (.+)" };
static std::regex filepath_rx{ "FILEPATH = (.+)" };
static std::regex texturepath_rx{ "TEXTUREPATH = (.+)" };
static std::regex normalpath_rx{ "NORMALPATH = (.+)" };
std::smatch match;
std::vector<ModelData> models;
std::string line;
ModelData data;
// read until end of file
while (std::getline(ifile,line))
{
// check if a model starts,
// condition next 3 lines MUST contain data too
if (std::regex_search(line, match, model_rx))
{
// match[0] will contain full regex match
// match[1] will contain first matched group
data.tagName = match[1];
for (std::size_t n = 0; n < 3; ++n)
{
std::getline(ifile, line);
if (std::regex_search(line, match, filepath_rx)) data.filePath = match[1];
if (std::regex_search(line, match, texturepath_rx)) data.texturePath = match[1];
if (std::regex_search(line, match, normalpath_rx)) data.normalPath = match[1];
}
models.push_back(data);
}
}
return models;
}
int main()
{
std::istringstream ifile{ "MODEL = modelname1\nFILEPATH = FILEPATH1\nTEXTUREPATH = TEXTUREPATH1\nNORMALPATH = NORMALPATH1\n \
MODEL = modelname2\nFILEPATH = FILEPATH2\nTEXTUREPATH = TEXTUREPATH2\nNORMALPATH = NORMALPATH2\n" };
auto models = load_from_stram(ifile);
for (const auto& model : models)
{
std::cout << model;
}
}
既然你想学基础,那我就只提纲目答题。请注意,如果您想为您的语言添加块嵌套,则两者都不起作用。为此,您需要像 boost::spirit
或类似的语法。
使用getline
一次提取一个块:
std::string model_name, block_contents;
getline(infile, model_name, '{');
getline(infile, block_contents, '}');
使用正则表达式做同样的事情:
auto block_regex{ R"(MODEL\s*:\s*"(.*?)"\s*\{(.*?)\})" };
然后您可以应用 std::regex_iterator
来按顺序获取每个块。
正在解析区块内容
将 std::regex_iterator
与正则表达式 (\S+)\s*=\s*(".*?");
结合使用。将结果分配给 std::map
.
我刚刚偶然发现了一个关于解析配置文件的很棒的教程,那个人使用了 tokenizer 并使用标记逐个字符地进行了分析。
我想知道生成令牌和分词器背后的策略是什么。