在多个查询的子数组中查找不同(唯一)值的数量
Finding number of distinct (unique) values in a sub-array for multiple queries
我有一个数组(可以有 2X10^5 个值)。我想对该数组执行大量查询。每个查询都是 [L,R] 类型,这个查询的结果应该是子数组中从索引 L 开始到索引 R 结束的唯一值的数量。
我知道这可以使用 Mo 的算法在 O(nrootn) 时间内完成。然而问题在于 Mo 的算法是一种离线算法。我正在寻找的是一个在线算法,因为上一个查询的结果决定了我的下一个查询。
我试着用它来形成一个线段树,其中的节点将存储范围内的所有不同元素。然而,这对我的目的来说太慢了。这种方法的预处理花费了太多时间。
这是我的 C++ 尝试解决方案(也发布了 here) using a Wavelet tree, implemented with code adapted from https://www.geeksforgeeks.org/wavelet-trees-introduction. The idea to reformulate the problem (as Photon 一个 link 到)是首先构造一个数组,列出原始数组中每个对应的单元格,下一个单元格的索引右边的重复元素。那么问题就变成了找到区间中有多少单元格具有超出当前区间的"next-index"(那些显然在区间内没有重复),可以用装饰小波树查询。请参阅底部的(非零基)查询示例。
// Adapted from https://www.geeksforgeeks.org/wavelet-trees-introduction
#include <iostream>
#include <vector>
#include <map>
#include <algorithm>
#include <climits>
using namespace std;
// wavelet tree class
class wavelet_tree {
public:
// Range to elements
int low, high;
// Left and Right child
wavelet_tree* l, *r;
std::vector<int> freq;
// Default constructor
// Array is in range [x, y]
// Indices are in range [from, to]
wavelet_tree(int* from, int* to, int x, int y)
{
// Initialising low and high
low = x, high = y;
// Array is of 0 length
if (from >= to)
return;
// Array is homogenous
// Example : 1 1 1 1 1
if (high == low) {
// Assigning storage to freq array
freq.reserve(to - from + 1);
// Initialising the Freq array
freq.push_back(0);
// Assigning values
for (auto it = from; it != to; it++)
// freq will be increasing as there'll
// be no further sub-tree
freq.push_back(freq.back() + 1);
return;
}
// Computing mid
int mid = (low + high) / 2;
// Lambda function to check if a number
// is less than or equal to mid
auto lessThanMid = [mid](int x) {
return x <= mid;
};
// Assigning storage to freq array
freq.reserve(to - from + 1);
// Initialising the freq array
freq.push_back(0);
// Assigning value to freq array
for (auto it = from; it != to; it++)
// If lessThanMid returns 1(true), we add
// 1 to previous entry. Otherwise, we add 0
// (element goes to right sub-tree)
freq.push_back(freq.back() + lessThanMid(*it));
// std::stable_partition partitions the array w.r.t Mid
auto pivot = std::stable_partition(from, to, lessThanMid);
// Left sub-tree's object
l = new wavelet_tree(from, pivot, low, mid);
// Right sub-tree's object
r = new wavelet_tree(pivot, to, mid + 1, high);
}
// Count of numbers in range[L..R] less than
// or equal to k
int kOrLess(int l, int r, int k)
{
// No elements int range is less than k
if (l > r or k < low)
return 0;
// All elements in the range are less than k
if (high <= k)
return r - l + 1;
// Computing LtCount and RtCount
int LtCount = freq[l - 1];
int RtCount = freq[r];
// Answer is (no. of element <= k) in
// left + (those <= k) in right
return (this->l->kOrLess(LtCount + 1, RtCount, k) +
this->r->kOrLess(l - LtCount, r - RtCount, k));
}
// Count of numbers in range[L..R] greater than
// or equal to k
int kOrMore(int l, int r, int k)
{
// No elements int range are greater than k
if (l > r or k > high)
return 0;
// All elements in the range are greater than k
if (low >= k)
return r - l + 1;
// Computing LtCount and RtCount
int LtCount = freq[l - 1];
int RtCount = freq[r];
// Answer is (no. of element <= k) in
// left + (those <= k) in right
return (this->l->kOrMore(LtCount + 1, RtCount, k) +
this->r->kOrMore(l - LtCount, r - RtCount, k));
}
};
int main()
{
int size = 7, high = INT_MIN;
int arr[] = {1, 2, 3, 2, 4, 3, 1};
int next[size];
std::map<int, int> next_idx;
for (int i=size-1; i>=0; i--){
if (next_idx.find(arr[i]) == next_idx.end())
next[i] = size + 1;
else
next[i] = next_idx[arr[i]];
next_idx[arr[i]] = i + 1;
high = max(high, next[i]);
}
// Object of class wavelet tree
wavelet_tree obj(next, next + size, 1, high);
// Queries are NON-zero-based
//
// 1 2 3 4 5 6 7
// {1, 2, 3, 2, 4, 3, 1};
// query([3, 6]) = 3;
cout << obj.kOrMore(3, 6, 7) << '\n';
// query([1, 4]) = 3;
cout << obj.kOrMore(1, 4, 5) << '\n';
// query([1, 7]) = 4;
cout << obj.kOrMore(1, 7, 8) << '\n';
return 0;
}
我有一个数组(可以有 2X10^5 个值)。我想对该数组执行大量查询。每个查询都是 [L,R] 类型,这个查询的结果应该是子数组中从索引 L 开始到索引 R 结束的唯一值的数量。
我知道这可以使用 Mo 的算法在 O(nrootn) 时间内完成。然而问题在于 Mo 的算法是一种离线算法。我正在寻找的是一个在线算法,因为上一个查询的结果决定了我的下一个查询。
我试着用它来形成一个线段树,其中的节点将存储范围内的所有不同元素。然而,这对我的目的来说太慢了。这种方法的预处理花费了太多时间。
这是我的 C++ 尝试解决方案(也发布了 here) using a Wavelet tree, implemented with code adapted from https://www.geeksforgeeks.org/wavelet-trees-introduction. The idea to reformulate the problem (as Photon
// Adapted from https://www.geeksforgeeks.org/wavelet-trees-introduction
#include <iostream>
#include <vector>
#include <map>
#include <algorithm>
#include <climits>
using namespace std;
// wavelet tree class
class wavelet_tree {
public:
// Range to elements
int low, high;
// Left and Right child
wavelet_tree* l, *r;
std::vector<int> freq;
// Default constructor
// Array is in range [x, y]
// Indices are in range [from, to]
wavelet_tree(int* from, int* to, int x, int y)
{
// Initialising low and high
low = x, high = y;
// Array is of 0 length
if (from >= to)
return;
// Array is homogenous
// Example : 1 1 1 1 1
if (high == low) {
// Assigning storage to freq array
freq.reserve(to - from + 1);
// Initialising the Freq array
freq.push_back(0);
// Assigning values
for (auto it = from; it != to; it++)
// freq will be increasing as there'll
// be no further sub-tree
freq.push_back(freq.back() + 1);
return;
}
// Computing mid
int mid = (low + high) / 2;
// Lambda function to check if a number
// is less than or equal to mid
auto lessThanMid = [mid](int x) {
return x <= mid;
};
// Assigning storage to freq array
freq.reserve(to - from + 1);
// Initialising the freq array
freq.push_back(0);
// Assigning value to freq array
for (auto it = from; it != to; it++)
// If lessThanMid returns 1(true), we add
// 1 to previous entry. Otherwise, we add 0
// (element goes to right sub-tree)
freq.push_back(freq.back() + lessThanMid(*it));
// std::stable_partition partitions the array w.r.t Mid
auto pivot = std::stable_partition(from, to, lessThanMid);
// Left sub-tree's object
l = new wavelet_tree(from, pivot, low, mid);
// Right sub-tree's object
r = new wavelet_tree(pivot, to, mid + 1, high);
}
// Count of numbers in range[L..R] less than
// or equal to k
int kOrLess(int l, int r, int k)
{
// No elements int range is less than k
if (l > r or k < low)
return 0;
// All elements in the range are less than k
if (high <= k)
return r - l + 1;
// Computing LtCount and RtCount
int LtCount = freq[l - 1];
int RtCount = freq[r];
// Answer is (no. of element <= k) in
// left + (those <= k) in right
return (this->l->kOrLess(LtCount + 1, RtCount, k) +
this->r->kOrLess(l - LtCount, r - RtCount, k));
}
// Count of numbers in range[L..R] greater than
// or equal to k
int kOrMore(int l, int r, int k)
{
// No elements int range are greater than k
if (l > r or k > high)
return 0;
// All elements in the range are greater than k
if (low >= k)
return r - l + 1;
// Computing LtCount and RtCount
int LtCount = freq[l - 1];
int RtCount = freq[r];
// Answer is (no. of element <= k) in
// left + (those <= k) in right
return (this->l->kOrMore(LtCount + 1, RtCount, k) +
this->r->kOrMore(l - LtCount, r - RtCount, k));
}
};
int main()
{
int size = 7, high = INT_MIN;
int arr[] = {1, 2, 3, 2, 4, 3, 1};
int next[size];
std::map<int, int> next_idx;
for (int i=size-1; i>=0; i--){
if (next_idx.find(arr[i]) == next_idx.end())
next[i] = size + 1;
else
next[i] = next_idx[arr[i]];
next_idx[arr[i]] = i + 1;
high = max(high, next[i]);
}
// Object of class wavelet tree
wavelet_tree obj(next, next + size, 1, high);
// Queries are NON-zero-based
//
// 1 2 3 4 5 6 7
// {1, 2, 3, 2, 4, 3, 1};
// query([3, 6]) = 3;
cout << obj.kOrMore(3, 6, 7) << '\n';
// query([1, 4]) = 3;
cout << obj.kOrMore(1, 4, 5) << '\n';
// query([1, 7]) = 4;
cout << obj.kOrMore(1, 7, 8) << '\n';
return 0;
}