使用 boost 文件系统在 C++ 中搜索部分文件名
Search partial filenames in C++ using boost filesystem
问题很简单,我想在一个目录中找到一个文件路径,但我只有部分文件名,所以这里是这个任务的函数
void getfiles(const fs::path& root, const string& ext, vector<fs::path>& ret)
{
if(!fs::exists(root) || !fs::is_directory(root)) return;
fs::recursive_directory_iterator it(root);
fs::recursive_directory_iterator endit;
while(it != endit)
{
if(fs::is_regular_file(*it)&&it->path().extension()==ext) ret.push_back(it->path());//
++it;
}
}
bool find_file(const filesystem::path& dir_path, const filesystem::path file_name, filesystem::path& path_found) {
const fs::recursive_directory_iterator end;
const auto it = find_if(fs::recursive_directory_iterator(dir_path), end,
[file_name](fs::path e) {
cerr<<boost::algorithm::icontains(e.filename().native() ,file_name.native())<<endl;
return boost::algorithm::icontains(e.filename().native() ,file_name.native());//
});
if (it == end) {
return false;
} else {
path_found = it->path();
return true;
}
}
int main (int argc, char* argv[])
{
vector<fs::path> inputClass ;
fs::path textFiles,datasetPath,imgpath;
textFiles=argv[1];
datasetPath=argv[2];
getfiles(textFiles,".txt",inputClass);
for (int i=0;i<inputClass.size();i++)
{
ifstream lblFile(inputClass[i].string().c_str());
string line;
fs::path classname=inputClass[i].parent_path()/inputClass[i].stem().string();
cerr<<classname.stem()<<endl;
while (getline(lblFile,line))
{
bool find=find_file(datasetPath,line,imgpath);
if (find)
{
while(!fs::exists(classname))
fs::create_directories (classname);
fs::copy(imgpath,classname/imgpath.filename());
cerr<<"Found\n";
}
else
cerr<<"Not Found \n";
}
lblFile.close();
}
}
控制台输出:
"490"
vfv343434.jpeg||E9408000EC0
0
fsdfdsfdfsf.jpeg||E9408000EC0
0
1200E9408000EC0.jpeg||E9408000EC0
0
Not Found
但是当我手动设置搜索字符串时它工作正常!我尝试了其他搜索字符串的方法,如 std::find 但所有方法都找不到子字符串,输入字符串(行)似乎有问题我打印了所有字符但没有特殊字符或任何东西。
如果我手动设置搜索字符串,它会按预期工作
string search="E9408000EC0";
cerr<<e.filename().native()<<"||"<<search<<endl;
cerr<<boost::algorithm::icontains(e.filename().native() ,search)<<endl;
以上变化的结果就像
"490"
vfv343434.jpeg||E9408000EC0
0
fsdfdsfdfsf.jpeg||E9408000EC0
0
1200E9408000EC0.jpeg||E9408000EC0
1
Found
我无法复制这个。
我唯一的预感是,在您的平台上,也许 string()
访问器不是 returning 纯字符串,而是例如引用的路径。那会破坏搜索。考虑改用 native()
访问器。
(事实上,由于file_name
不是路径,而是字符串模式,建议passing the argument as std::string__view
or similar instead。)
#include <boost/filesystem.hpp>
#include <boost/algorithm/string.hpp>
#include <iostream>
namespace fs = boost::filesystem;
template <typename Out>
void find_file(const fs::path& dir_path, const fs::path file_name, Out out) {
fs::recursive_directory_iterator it(dir_path), end;
std::copy_if(it, end, out, [file_name](fs::path e) {
return boost::algorithm::icontains(e.filename().native(),
file_name.native());
});
}
int main() {
fs::path d = "a/b/c/e";
fs::create_directories(d);
{
std::ofstream ofs(d / "1200E9408000EC0.jpeg");
}
std::cout << fs::path("000EC0").native() << "\n";
std::vector<fs::path> found;
find_file(".", "000EC0", back_inserter(found));
for (auto &f : found)
{
std::cout << "Found: " << f << "\n";
}
}
版画
000EC0
Found: "./a/b/c/e/1200E9408000EC0.jpeg"
更新:代码审查
对于更新后的问题,提出了一个稍微改进的测试器,它可以与 boost::filesystem
和 std::filesystem
一起使用。
有许多小改进(删除重复、显式转换、使用可选到 return 可选匹配等
还添加了一个空格 trim
以避免在输入行中出现多余的空格:
#include <boost/algorithm/string.hpp>
#include <fstream>
#include <iostream>
using boost::algorithm::icontains;
using boost::algorithm::trim;
#if defined(USE_BOOST_FS)
#include <boost/filesystem.hpp>
namespace fs = boost::filesystem;
using boost::system::error_code;
#else
#include <filesystem>
namespace fs = std::filesystem;
using std::error_code;
#endif
void getfiles(
const fs::path& root, const std::string& ext, std::vector<fs::path>& ret)
{
if (!exists(root) || !is_directory(root))
return;
for (fs::recursive_directory_iterator it(root), endit; it != endit; ++it) {
if (is_regular_file(*it) && it->path().extension() == ext)
ret.push_back(it->path()); //
}
}
std::optional<fs::path> find_file(const fs::path& dir_path, fs::path partial)
{
fs::recursive_directory_iterator end,
it = fs::recursive_directory_iterator(dir_path);
it = std::find_if(it, end, [partial](fs::path e) {
auto search = partial.native();
//std::cerr << e.filename().native() << "||" << search << std::endl;
auto matches = icontains(e.filename().native(), search);
std::cerr << e << " Matches: " << std::boolalpha << matches
<< std::endl;
return matches;
});
return (it != end)
? std::make_optional(it->path())
: std::nullopt;
}
auto readInputClass(fs::path const& textFiles)
{
std::vector<fs::path> found;
getfiles(textFiles, ".txt", found);
return found;
}
int main(int argc, char** argv)
{
std::vector<std::string> const args(argv, argv + argc);
auto const textFiles = readInputClass(args.at(1));
std::string const datasetPath = args.at(2);
for (fs::path classname : textFiles) {
// open the text file
std::ifstream lblFile(classname);
// use base without extension as output directory
classname.replace_extension();
if (!fs::exists(classname)) {
if (fs::create_directories(classname))
std::cerr << classname << " created" << std::endl;
}
for (std::string line; getline(lblFile, line);) {
trim(line);
if (auto found = find_file(datasetPath, line)) {
auto dest = classname / found->filename();
error_code ec;
copy(*found, dest, ec);
std::cerr << dest << " (" << ec.message() << ")\n";
} else {
std::cerr << "Not Found \n";
}
}
}
}
使用
从头开始测试
mkdir -pv textfiles dataset
touch dataset/{vfv343434,fsdfdsfdfsf,1200E9408000EC0}.jpeg
echo 'E9408000EC0 ' > textfiles/490.txt
运行
./a.out textfiles/ dataset/
版画
"textfiles/490" created
"dataset/1200E9408000EC0.jpeg" Matches: true
"textfiles/490/1200E9408000EC0.jpeg" (Success)
或随后 运行
"dataset/fsdfdsfdfsf.jpeg" Matches: false
"dataset/1200E9408000EC0.jpeg" Matches: true
"textfiles/490/1200E9408000EC0.jpeg" (File exists)
奖金
进行更多诊断并避免为每个模式重复遍历文件系统。现在的主程序是:
int main(int argc, char** argv)
{
std::vector<std::string> const args(argv, argv + argc);
Paths const classes = getfiles(args.at(1), ".txt");
Mappings map = readClassMappings(classes);
std::cout << "Procesing " << map.size() << " patterns from "
<< classes.size() << " classes" << std::endl;
processDatasetDir(args.at(2), map);
}
其余功能实现为:
// be smart about case insenstiive patterns
struct Pattern : std::string {
using std::string::string;
using std::string::operator=;
#ifdef __cpp_lib_three_way_comparison
std::weak_ordering operator<=>(Pattern const& other) const {
if (boost::ilexicographical_compare(*this, other)) {
return std::weak_ordering::less;
} else if (boost::ilexicographical_compare(other, *this)) {
return std::weak_ordering::less;
}
return std::weak_ordering::equivalent;
}
#else
bool operator<(Pattern const& other) const {
return boost::ilexicographical_compare(*this, other);
}
#endif
};
using Paths = std::vector<fs::path>;
using Mapping = std::pair<Pattern, fs::path>;
using Patterns = std::set<Pattern>;
using Mappings = std::set<Mapping>;
Mappings readClassMappings(Paths const& classes)
{
Mappings mappings;
for (fs::path classname : classes) {
std::ifstream lblFile(classname);
classname.replace_extension();
for (Pattern pattern; getline(lblFile, pattern);) {
trim(pattern);
if (auto [it, ok] = mappings.emplace(pattern, classname); !ok) {
std::cerr << "WARNING: " << std::quoted(pattern)
<< " duplicates " << std::quoted(it->first)
<< std::endl;
}
}
}
return mappings;
}
size_t processDatasetDir(const fs::path& datasetPath, Mappings const& patterns)
{
size_t copied = 0, failed = 0;
Patterns found;
using It = fs::recursive_directory_iterator;
for (It it = It(datasetPath), end; it != end; ++it) {
if (!it->is_regular_file())
continue;
fs::path const& entry = *it;
for (auto& [pattern, location]: patterns) {
if (icontains(it->path().filename().native(), pattern)) {
found.emplace(pattern);
if (!exists(location) && fs::create_directories(location))
std::cerr << location << " created" << std::endl;
auto dest = location / entry.filename();
error_code ec;
copy(entry, dest, ec);
std::cerr << dest << " (" << ec.message() << ") from "
<< std::quoted(pattern) << "\n";
(ec? failed : copied) += 1;
}
}
}
std::cout << "Copied:" << copied
<< ", missing:" << patterns.size() - found.size()
<< ", failed: " << failed << std::endl;
return copied;
}
加上一些更“随机”的测试数据:
mkdir -pv textfiles dataset
touch dataset/{vfv343434,fsdfdsfdfsf,1200E9408000EC0}.jpeg
echo .jPeg > textfiles/all_of_them.txt
echo $'E9408000EC0 \n e9408000ec0\nE9408\nbOgUs' > textfiles/490.txt
运行 为
./a.out textfiles/ dataset/
打印:
WARNING: "e9408000ec0" duplicates "E9408000EC0"
Procesing 4 patterns from 2 classes
"textfiles/all_of_them" created
"textfiles/all_of_them/1200E9408000EC0.jpeg" (Success) from ".jPeg"
"textfiles/490" created
"textfiles/490/1200E9408000EC0.jpeg" (Success) from "E9408"
"textfiles/490/1200E9408000EC0.jpeg" (File exists) from "E9408000EC0"
"textfiles/all_of_them/vfv343434.jpeg" (Success) from ".jPeg"
"textfiles/all_of_them/fsdfdsfdfsf.jpeg" (Success) from ".jPeg"
Copied:4, missing:1, failed: 1
问题很简单,我想在一个目录中找到一个文件路径,但我只有部分文件名,所以这里是这个任务的函数
void getfiles(const fs::path& root, const string& ext, vector<fs::path>& ret)
{
if(!fs::exists(root) || !fs::is_directory(root)) return;
fs::recursive_directory_iterator it(root);
fs::recursive_directory_iterator endit;
while(it != endit)
{
if(fs::is_regular_file(*it)&&it->path().extension()==ext) ret.push_back(it->path());//
++it;
}
}
bool find_file(const filesystem::path& dir_path, const filesystem::path file_name, filesystem::path& path_found) {
const fs::recursive_directory_iterator end;
const auto it = find_if(fs::recursive_directory_iterator(dir_path), end,
[file_name](fs::path e) {
cerr<<boost::algorithm::icontains(e.filename().native() ,file_name.native())<<endl;
return boost::algorithm::icontains(e.filename().native() ,file_name.native());//
});
if (it == end) {
return false;
} else {
path_found = it->path();
return true;
}
}
int main (int argc, char* argv[])
{
vector<fs::path> inputClass ;
fs::path textFiles,datasetPath,imgpath;
textFiles=argv[1];
datasetPath=argv[2];
getfiles(textFiles,".txt",inputClass);
for (int i=0;i<inputClass.size();i++)
{
ifstream lblFile(inputClass[i].string().c_str());
string line;
fs::path classname=inputClass[i].parent_path()/inputClass[i].stem().string();
cerr<<classname.stem()<<endl;
while (getline(lblFile,line))
{
bool find=find_file(datasetPath,line,imgpath);
if (find)
{
while(!fs::exists(classname))
fs::create_directories (classname);
fs::copy(imgpath,classname/imgpath.filename());
cerr<<"Found\n";
}
else
cerr<<"Not Found \n";
}
lblFile.close();
}
}
控制台输出:
"490"
vfv343434.jpeg||E9408000EC0
0
fsdfdsfdfsf.jpeg||E9408000EC0
0
1200E9408000EC0.jpeg||E9408000EC0
0
Not Found
但是当我手动设置搜索字符串时它工作正常!我尝试了其他搜索字符串的方法,如 std::find 但所有方法都找不到子字符串,输入字符串(行)似乎有问题我打印了所有字符但没有特殊字符或任何东西。 如果我手动设置搜索字符串,它会按预期工作
string search="E9408000EC0";
cerr<<e.filename().native()<<"||"<<search<<endl;
cerr<<boost::algorithm::icontains(e.filename().native() ,search)<<endl;
以上变化的结果就像
"490"
vfv343434.jpeg||E9408000EC0
0
fsdfdsfdfsf.jpeg||E9408000EC0
0
1200E9408000EC0.jpeg||E9408000EC0
1
Found
我无法复制这个。
我唯一的预感是,在您的平台上,也许 string()
访问器不是 returning 纯字符串,而是例如引用的路径。那会破坏搜索。考虑改用 native()
访问器。
(事实上,由于file_name
不是路径,而是字符串模式,建议passing the argument as std::string__view
or similar instead。)
#include <boost/filesystem.hpp>
#include <boost/algorithm/string.hpp>
#include <iostream>
namespace fs = boost::filesystem;
template <typename Out>
void find_file(const fs::path& dir_path, const fs::path file_name, Out out) {
fs::recursive_directory_iterator it(dir_path), end;
std::copy_if(it, end, out, [file_name](fs::path e) {
return boost::algorithm::icontains(e.filename().native(),
file_name.native());
});
}
int main() {
fs::path d = "a/b/c/e";
fs::create_directories(d);
{
std::ofstream ofs(d / "1200E9408000EC0.jpeg");
}
std::cout << fs::path("000EC0").native() << "\n";
std::vector<fs::path> found;
find_file(".", "000EC0", back_inserter(found));
for (auto &f : found)
{
std::cout << "Found: " << f << "\n";
}
}
版画
000EC0
Found: "./a/b/c/e/1200E9408000EC0.jpeg"
更新:代码审查
对于更新后的问题,提出了一个稍微改进的测试器,它可以与 boost::filesystem
和 std::filesystem
一起使用。
有许多小改进(删除重复、显式转换、使用可选到 return 可选匹配等
还添加了一个空格 trim
以避免在输入行中出现多余的空格:
#include <boost/algorithm/string.hpp>
#include <fstream>
#include <iostream>
using boost::algorithm::icontains;
using boost::algorithm::trim;
#if defined(USE_BOOST_FS)
#include <boost/filesystem.hpp>
namespace fs = boost::filesystem;
using boost::system::error_code;
#else
#include <filesystem>
namespace fs = std::filesystem;
using std::error_code;
#endif
void getfiles(
const fs::path& root, const std::string& ext, std::vector<fs::path>& ret)
{
if (!exists(root) || !is_directory(root))
return;
for (fs::recursive_directory_iterator it(root), endit; it != endit; ++it) {
if (is_regular_file(*it) && it->path().extension() == ext)
ret.push_back(it->path()); //
}
}
std::optional<fs::path> find_file(const fs::path& dir_path, fs::path partial)
{
fs::recursive_directory_iterator end,
it = fs::recursive_directory_iterator(dir_path);
it = std::find_if(it, end, [partial](fs::path e) {
auto search = partial.native();
//std::cerr << e.filename().native() << "||" << search << std::endl;
auto matches = icontains(e.filename().native(), search);
std::cerr << e << " Matches: " << std::boolalpha << matches
<< std::endl;
return matches;
});
return (it != end)
? std::make_optional(it->path())
: std::nullopt;
}
auto readInputClass(fs::path const& textFiles)
{
std::vector<fs::path> found;
getfiles(textFiles, ".txt", found);
return found;
}
int main(int argc, char** argv)
{
std::vector<std::string> const args(argv, argv + argc);
auto const textFiles = readInputClass(args.at(1));
std::string const datasetPath = args.at(2);
for (fs::path classname : textFiles) {
// open the text file
std::ifstream lblFile(classname);
// use base without extension as output directory
classname.replace_extension();
if (!fs::exists(classname)) {
if (fs::create_directories(classname))
std::cerr << classname << " created" << std::endl;
}
for (std::string line; getline(lblFile, line);) {
trim(line);
if (auto found = find_file(datasetPath, line)) {
auto dest = classname / found->filename();
error_code ec;
copy(*found, dest, ec);
std::cerr << dest << " (" << ec.message() << ")\n";
} else {
std::cerr << "Not Found \n";
}
}
}
}
使用
从头开始测试mkdir -pv textfiles dataset
touch dataset/{vfv343434,fsdfdsfdfsf,1200E9408000EC0}.jpeg
echo 'E9408000EC0 ' > textfiles/490.txt
运行
./a.out textfiles/ dataset/
版画
"textfiles/490" created
"dataset/1200E9408000EC0.jpeg" Matches: true
"textfiles/490/1200E9408000EC0.jpeg" (Success)
或随后 运行
"dataset/fsdfdsfdfsf.jpeg" Matches: false
"dataset/1200E9408000EC0.jpeg" Matches: true
"textfiles/490/1200E9408000EC0.jpeg" (File exists)
奖金
进行更多诊断并避免为每个模式重复遍历文件系统。现在的主程序是:
int main(int argc, char** argv)
{
std::vector<std::string> const args(argv, argv + argc);
Paths const classes = getfiles(args.at(1), ".txt");
Mappings map = readClassMappings(classes);
std::cout << "Procesing " << map.size() << " patterns from "
<< classes.size() << " classes" << std::endl;
processDatasetDir(args.at(2), map);
}
其余功能实现为:
// be smart about case insenstiive patterns
struct Pattern : std::string {
using std::string::string;
using std::string::operator=;
#ifdef __cpp_lib_three_way_comparison
std::weak_ordering operator<=>(Pattern const& other) const {
if (boost::ilexicographical_compare(*this, other)) {
return std::weak_ordering::less;
} else if (boost::ilexicographical_compare(other, *this)) {
return std::weak_ordering::less;
}
return std::weak_ordering::equivalent;
}
#else
bool operator<(Pattern const& other) const {
return boost::ilexicographical_compare(*this, other);
}
#endif
};
using Paths = std::vector<fs::path>;
using Mapping = std::pair<Pattern, fs::path>;
using Patterns = std::set<Pattern>;
using Mappings = std::set<Mapping>;
Mappings readClassMappings(Paths const& classes)
{
Mappings mappings;
for (fs::path classname : classes) {
std::ifstream lblFile(classname);
classname.replace_extension();
for (Pattern pattern; getline(lblFile, pattern);) {
trim(pattern);
if (auto [it, ok] = mappings.emplace(pattern, classname); !ok) {
std::cerr << "WARNING: " << std::quoted(pattern)
<< " duplicates " << std::quoted(it->first)
<< std::endl;
}
}
}
return mappings;
}
size_t processDatasetDir(const fs::path& datasetPath, Mappings const& patterns)
{
size_t copied = 0, failed = 0;
Patterns found;
using It = fs::recursive_directory_iterator;
for (It it = It(datasetPath), end; it != end; ++it) {
if (!it->is_regular_file())
continue;
fs::path const& entry = *it;
for (auto& [pattern, location]: patterns) {
if (icontains(it->path().filename().native(), pattern)) {
found.emplace(pattern);
if (!exists(location) && fs::create_directories(location))
std::cerr << location << " created" << std::endl;
auto dest = location / entry.filename();
error_code ec;
copy(entry, dest, ec);
std::cerr << dest << " (" << ec.message() << ") from "
<< std::quoted(pattern) << "\n";
(ec? failed : copied) += 1;
}
}
}
std::cout << "Copied:" << copied
<< ", missing:" << patterns.size() - found.size()
<< ", failed: " << failed << std::endl;
return copied;
}
加上一些更“随机”的测试数据:
mkdir -pv textfiles dataset
touch dataset/{vfv343434,fsdfdsfdfsf,1200E9408000EC0}.jpeg
echo .jPeg > textfiles/all_of_them.txt
echo $'E9408000EC0 \n e9408000ec0\nE9408\nbOgUs' > textfiles/490.txt
运行 为
./a.out textfiles/ dataset/
打印:
WARNING: "e9408000ec0" duplicates "E9408000EC0"
Procesing 4 patterns from 2 classes
"textfiles/all_of_them" created
"textfiles/all_of_them/1200E9408000EC0.jpeg" (Success) from ".jPeg"
"textfiles/490" created
"textfiles/490/1200E9408000EC0.jpeg" (Success) from "E9408"
"textfiles/490/1200E9408000EC0.jpeg" (File exists) from "E9408000EC0"
"textfiles/all_of_them/vfv343434.jpeg" (Success) from ".jPeg"
"textfiles/all_of_them/fsdfdsfdfsf.jpeg" (Success) from ".jPeg"
Copied:4, missing:1, failed: 1