c++:有没有更快的方法来获取 map/unordered_map 的交集?
c++ : Is there a faster way to get the intersection of map/unordered_map?
在 c++ 中是否有更快的方法,以便我可以胜过 python 的实现?
- 获取两个 map/unordered_map 键的交集
- 对于这些相交的键,计算它们各自的元素之间的成对差异 set/unordered_set
一些可能有用的信息:
- hash_DICT1 有大约 O(10000) 个键,每个集合中大约有 O(10) 个元素。
- hash_DICT2 有大约 O(1000) 个键,每个集合中大约有 O(1) 个元素。
例如:
map <int,set<int>> hash_DICT1;
hash_DICT1[1] = {1,2,3};
hash_DICT1[2] = {4,5,6};
map <int,set<int>> hash_DICT2;
hash_DICT2[1] = {11,12,13};
hash_DICT2[3] = {4,5,6};
vector<int> output_vector
= GetPairDiff(hash_DICT1, hash_DICT2)
= [11-1,12-1,13-1,
11-2,12-2,13-2,
11-3,12-3,13-3] // only hashkey=1 is intersect, so only compute pairwise difference of the respective set elements.
= [10, 11, 12,
9, 10, 11,
8, 9, 10] // Note that i do want to keep duplicates, if any. Order does not matter.
GetPairDiff
函数。
vector<int> GetPairDiff(
unordered_map <int, set<int>> &hash_DICT1,
unordered_map <int, set<int>> &hash_DICT2) {
// Init
vector<int> output_vector;
int curr_key;
set<int> curr_set1, curr_set2;
// Get intersection
for (const auto &KEY_SET:hash_DICT2) {
curr_key = KEY_SET.first;
// Find pairwise difference
if (hash_DICT1.count(curr_key) > 0){
curr_set1 = hash_DICT1[curr_key];
curr_set2 = hash_DICT2[curr_key];
for (auto it1=curr_set1.begin(); it1 != curr_set1.end(); ++it1) {
for (auto it2=curr_set2.begin(); it2 != curr_set2.end(); ++it2) {
output_vector.push_back(*it2 - *it1);
}
}
}
}
}
主要运行
int main (int argc, char ** argv) {
// Using unordered_map
unordered_map <int,set<int>> hash_DICT_1;
hash_DICT_1[1] = {1,2,3};
hash_DICT_1[2] = {4,5,6};
unordered <int,set<int>> hash_DICT_2;
hash_DICT_2[1] = {11,12,13};
hash_DICT_2[3] = {4,5,6};
GetPairDiff(hash_DICT_1, hash_DICT_1);
}
这样编译
g++ -o ./CompareRunTime.out -Ofast -Wall -Wextra -std=c++11
欢迎使用其他数据结构,例如map
或unordered_set
。
但是我确实尝试了所有 4 种排列,发现 GetPairDiff
运行 给出的排列最快,但远不及 python 的实现:
hash_DICT1 = { 1 : {1,2,3}, 2 : {4,5,6} }
hash_DICT2 = { 1 : {11,12,13}, 3 : {4,5,6} }
def GetPairDiff(hash_DICT1, hash_DICT2):
vector = []
for element in hash_DICT1.keys() & hash_DICT2.keys():
vector.extend(
[db_t-qry_t
for qry_t in hash_DICT2[element]
for db_t in hash_DICT1[element] ])
return vector
output_vector = GetPairDiff(hash_DICT1, hash_DICT2)
性能比较:
python : 0.00824 s
c++ : 0.04286 s
用c++实现大约是所用时间的5倍!!!
- 你在应该使用的地方做了很多复制
const&
。
- 您不保存搜索结果。您应该使用
find
而不是 count
然后使用结果。
push_back
到 vector
可以通过 reserve()
如果您事先知道需要存储的元素数量来加快速度。
解决这些问题可能会导致类似这样的结果(需要 C++17):
#include <iostream>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
using container = std::unordered_map<int, std::unordered_set<int>>;
std::vector<int> GetPairDiff(const container& hash_DICT1,
const container& hash_DICT2) {
// Init
std::vector<int> output_vector;
// Get intersection
for(auto& [curr_key2, curr_set2] : hash_DICT2) {
// use find() instead of count()
if(auto it1 = hash_DICT1.find(curr_key2); it1 != hash_DICT1.end()) {
auto& curr_set1 = it1->second;
// Reserve the space you know you'll need for this iteration. Note:
// This might be a pessimizing optimization so try with and without it.
output_vector.reserve(curr_set1.size() * curr_set2.size() +
output_vector.size());
// Calculate pairwise difference
for(auto& s1v : curr_set1) {
for(auto& s2v : curr_set2) {
output_vector.emplace_back(s2v - s1v);
}
}
}
}
return output_vector;
}
int main() {
container hash_DICT1{{1, {1, 2, 3}},
{2, {4, 5, 6}}};
container hash_DICT2{{1, {11, 12, 13}},
{3, {4, 5, 6}}};
auto result = GetPairDiff(hash_DICT1, hash_DICT2);
for(int v : result) {
std::cout << v << '\n';
}
}
对于我计算机上使用 g++ -std=c++17 -O3
编译的这些容器,这是 python 版本的 8 倍多。
这是同一程序的 C++11 版本:
#include <iostream>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
using container = std::unordered_map<int, std::unordered_set<int>>;
std::vector<int> GetPairDiff(const container& hash_DICT1,
const container& hash_DICT2) {
// Init
std::vector<int> output_vector;
// Get intersection
for(auto& curr_pair2 : hash_DICT2) {
auto& curr_key2 = curr_pair2.first;
auto& curr_set2 = curr_pair2.second;
// use find() instead of count()
auto it1 = hash_DICT1.find(curr_key2);
if(it1 != hash_DICT1.end()) {
auto& curr_set1 = it1->second;
// Reserve the space you know you'll need for this iteration. Note:
// This might be a pessimizing optimization so try with and without it.
output_vector.reserve(curr_set1.size() * curr_set2.size() +
output_vector.size());
// Calculate pairwise difference
for(auto& s1v : curr_set1) {
for(auto& s2v : curr_set2) {
output_vector.emplace_back(s2v - s1v);
}
}
}
}
return output_vector;
}
int main() {
container hash_DICT1{{1, {1, 2, 3}},
{2, {4, 5, 6}}};
container hash_DICT2{{1, {11, 12, 13}},
{3, {4, 5, 6}}};
auto result = GetPairDiff(hash_DICT1, hash_DICT2);
for(int v : result) {
std::cout << v << '\n';
}
}
在 c++ 中是否有更快的方法,以便我可以胜过 python 的实现?
- 获取两个 map/unordered_map 键的交集
- 对于这些相交的键,计算它们各自的元素之间的成对差异 set/unordered_set 一些可能有用的信息:
- hash_DICT1 有大约 O(10000) 个键,每个集合中大约有 O(10) 个元素。
- hash_DICT2 有大约 O(1000) 个键,每个集合中大约有 O(1) 个元素。
例如:
map <int,set<int>> hash_DICT1;
hash_DICT1[1] = {1,2,3};
hash_DICT1[2] = {4,5,6};
map <int,set<int>> hash_DICT2;
hash_DICT2[1] = {11,12,13};
hash_DICT2[3] = {4,5,6};
vector<int> output_vector
= GetPairDiff(hash_DICT1, hash_DICT2)
= [11-1,12-1,13-1,
11-2,12-2,13-2,
11-3,12-3,13-3] // only hashkey=1 is intersect, so only compute pairwise difference of the respective set elements.
= [10, 11, 12,
9, 10, 11,
8, 9, 10] // Note that i do want to keep duplicates, if any. Order does not matter.
GetPairDiff
函数。
vector<int> GetPairDiff(
unordered_map <int, set<int>> &hash_DICT1,
unordered_map <int, set<int>> &hash_DICT2) {
// Init
vector<int> output_vector;
int curr_key;
set<int> curr_set1, curr_set2;
// Get intersection
for (const auto &KEY_SET:hash_DICT2) {
curr_key = KEY_SET.first;
// Find pairwise difference
if (hash_DICT1.count(curr_key) > 0){
curr_set1 = hash_DICT1[curr_key];
curr_set2 = hash_DICT2[curr_key];
for (auto it1=curr_set1.begin(); it1 != curr_set1.end(); ++it1) {
for (auto it2=curr_set2.begin(); it2 != curr_set2.end(); ++it2) {
output_vector.push_back(*it2 - *it1);
}
}
}
}
}
主要运行
int main (int argc, char ** argv) {
// Using unordered_map
unordered_map <int,set<int>> hash_DICT_1;
hash_DICT_1[1] = {1,2,3};
hash_DICT_1[2] = {4,5,6};
unordered <int,set<int>> hash_DICT_2;
hash_DICT_2[1] = {11,12,13};
hash_DICT_2[3] = {4,5,6};
GetPairDiff(hash_DICT_1, hash_DICT_1);
}
这样编译
g++ -o ./CompareRunTime.out -Ofast -Wall -Wextra -std=c++11
欢迎使用其他数据结构,例如map
或unordered_set
。
但是我确实尝试了所有 4 种排列,发现 GetPairDiff
运行 给出的排列最快,但远不及 python 的实现:
hash_DICT1 = { 1 : {1,2,3}, 2 : {4,5,6} }
hash_DICT2 = { 1 : {11,12,13}, 3 : {4,5,6} }
def GetPairDiff(hash_DICT1, hash_DICT2):
vector = []
for element in hash_DICT1.keys() & hash_DICT2.keys():
vector.extend(
[db_t-qry_t
for qry_t in hash_DICT2[element]
for db_t in hash_DICT1[element] ])
return vector
output_vector = GetPairDiff(hash_DICT1, hash_DICT2)
性能比较:
python : 0.00824 s
c++ : 0.04286 s
用c++实现大约是所用时间的5倍!!!
- 你在应该使用的地方做了很多复制
const&
。 - 您不保存搜索结果。您应该使用
find
而不是count
然后使用结果。 push_back
到vector
可以通过reserve()
如果您事先知道需要存储的元素数量来加快速度。
解决这些问题可能会导致类似这样的结果(需要 C++17):
#include <iostream>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
using container = std::unordered_map<int, std::unordered_set<int>>;
std::vector<int> GetPairDiff(const container& hash_DICT1,
const container& hash_DICT2) {
// Init
std::vector<int> output_vector;
// Get intersection
for(auto& [curr_key2, curr_set2] : hash_DICT2) {
// use find() instead of count()
if(auto it1 = hash_DICT1.find(curr_key2); it1 != hash_DICT1.end()) {
auto& curr_set1 = it1->second;
// Reserve the space you know you'll need for this iteration. Note:
// This might be a pessimizing optimization so try with and without it.
output_vector.reserve(curr_set1.size() * curr_set2.size() +
output_vector.size());
// Calculate pairwise difference
for(auto& s1v : curr_set1) {
for(auto& s2v : curr_set2) {
output_vector.emplace_back(s2v - s1v);
}
}
}
}
return output_vector;
}
int main() {
container hash_DICT1{{1, {1, 2, 3}},
{2, {4, 5, 6}}};
container hash_DICT2{{1, {11, 12, 13}},
{3, {4, 5, 6}}};
auto result = GetPairDiff(hash_DICT1, hash_DICT2);
for(int v : result) {
std::cout << v << '\n';
}
}
对于我计算机上使用 g++ -std=c++17 -O3
编译的这些容器,这是 python 版本的 8 倍多。
这是同一程序的 C++11 版本:
#include <iostream>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
using container = std::unordered_map<int, std::unordered_set<int>>;
std::vector<int> GetPairDiff(const container& hash_DICT1,
const container& hash_DICT2) {
// Init
std::vector<int> output_vector;
// Get intersection
for(auto& curr_pair2 : hash_DICT2) {
auto& curr_key2 = curr_pair2.first;
auto& curr_set2 = curr_pair2.second;
// use find() instead of count()
auto it1 = hash_DICT1.find(curr_key2);
if(it1 != hash_DICT1.end()) {
auto& curr_set1 = it1->second;
// Reserve the space you know you'll need for this iteration. Note:
// This might be a pessimizing optimization so try with and without it.
output_vector.reserve(curr_set1.size() * curr_set2.size() +
output_vector.size());
// Calculate pairwise difference
for(auto& s1v : curr_set1) {
for(auto& s2v : curr_set2) {
output_vector.emplace_back(s2v - s1v);
}
}
}
}
return output_vector;
}
int main() {
container hash_DICT1{{1, {1, 2, 3}},
{2, {4, 5, 6}}};
container hash_DICT2{{1, {11, 12, 13}},
{3, {4, 5, 6}}};
auto result = GetPairDiff(hash_DICT1, hash_DICT2);
for(int v : result) {
std::cout << v << '\n';
}
}