查找 N 个数字 K 个数字 xor 等于 0
Find of N numbers K numbers xor equalling 0
我的问题是在例如一个 Vector/Array 和 n 个数字,等于用 xor 0 计算的。我已经尝试过强制所有组合而不重复,但是 n=100 和 k=10 的时间复杂度太高了。 (大约 1.7*10^13 种可能性)
有没有人有任何想法来加快在具有 n 个数字的数组中查找 k 个数字的过程?
我认为我的方法不是最聪明的,有这么多的可能性,但我真的不知道如何让我的程序更快。
对于解决方案的建议,我将不胜感激。
我的程序是用 C++ 编写的。
SOLUTION comb(int N, int K, const std::vector<KEY>& bit_codes)
{
std::vector<KEY> chosen_keys;
std::vector<KEY> failed{KEY(100000, "Error")};
std::string bitmask(K, 1);
bitmask.resize(N, 0); // N-K trailing 0's
std::vector<int> comb;
// print integers and permute bitmask
do {
comb.clear();
for (int i = 0; i < N; ++i) // [0..N-1] integers
{
if (bitmask[i]){
comb.emplace_back(i);
}
}
//do all code till next permutation
//reserve the size
for (auto & i : comb) {
chosen_keys.emplace_back(bit_codes[i]);
}
std::string string = chosen_keys[0].bits;
std::string helper;
for (size_t i = 1; i < chosen_keys.size(); ++i) {
helper = to_string(to_bitset(string) ^ to_bitset(chosen_keys[i].bits));
string = helper;
}
for(auto & item: bit_codes){
if (item.bits == string){
return {chosen_keys, item};
}
}
chosen_keys.clear();
} while (std::prev_permutation(bitmask.begin(), bitmask.end()));
return {failed, KEY(100000, "Error")};
}
bitCodes = 这些是 Vector 中的数字,我在其中搜索 k 个数字,其中它们的异或等于 0(KEY 是一个 class,其中保存了位)
我使用这些二进制数:
所有位代码:
1. 001001101100101001000100101
2. 101111001100011110101010000
3. 001111010101110001101001100
4. 111111100010110100010000001
5. 000100000011001010001110000
6. 110101111110101111011011111
7. 110111011001000010010100110
8. 111000111001000110101001011
9. 111001100011011111000110010
10. 101011001111110110101000111
11. 100000110010100011010111011
12. 101110000110011100001010101
13. 011011110010010111010001101
14. 100011101111000110100100001
15. 100001101110001110010111011
16. 011001011101011000010011110
17. 101100000110110001011110100
18. 000111111010001100010001111
19. 001111101111011100010000010
20. 001001000111111111011011001
有k=20个,我搜索了n=5个号码。
输出为:
3. 001111010101110001101001100
4. 111111100010110100010000001
6. 110101111110101111011011111
10. 101011001111110110101000111
12. 101110000110011100001010101
...因为它们等于 0.
对于 15504 种可能性,代码在大约 100 毫秒后完成执行。
现在你可以自己算算n=100和k=10需要多长时间了。
对于 k = 10,n = 100,强力方法是不可行的,因为有超过 17 万亿种组合:
comb(100,10) = 17310309456440
由于两个相等值的异或为零,我的第一个想法是将问题简化为 k = 5,n = 100,超过 7500 万种组合:
comb(100,5) = 75287520
将每个组合的异或和、一个索引计数 (5) 和 5 个索引值存储到一个包含 75287520 个结构的数组中。对于奇数 k,例如 k = 9,将 k = 4,n = 100 的 3921225 个组合存储到数组中(索引计数 = 4),然后是 k = 5,n = 100 的 75287520 个组合(索引计数 = 5) ) 到数组中。要保存 space,请将索引数和索引值存储为 8 位无符号整数。
创建结构数组后,根据异或和对其进行排序。扫描排序数组以寻找相等的异或和(因为两个相等值的异或为零),并且两组索引值之间没有重复项,这将在合并两组索引值以创建单个向量时检测到 k指标值。如果目标是找到任何一个有效的集合,就到此为止。如果目标是找到所有唯一集,请使用 std::map,使用 k 个索引值的向量作为键和一个 8 位整数作为值(未使用该值,但 std::map 需要) .注意 - std::map 可能会消耗大量 space 和时间,如果有很多异或为零的唯一集合。
使用伪随机数生成器创建包含 100 个整数的数组,对于 k = 10,n = 100,我的测试代码找到了 4099 个异或为零的唯一集合。在我的笔记本电脑上(Intel Core i7-10510U,Win 10 Pro 64 位,Visual Studio 2019),使用基数排序,总时间大约需要 1.5 秒:生成 ~= 0.37 秒,排序 ~=0.88 秒,扫描 ~ = 0.25 秒,使用 1.7 GB 内存。使用快速排序,大约需要 6.5 秒,排序 ~= 5.88 秒,使用 0.9 GB 内存
#include <algorithm>
#include <ctime>
#include <iostream>
#include <iomanip>
#include <map>
#include <vector>
typedef unsigned char uint8_t;
typedef unsigned short unit16_t;
typedef unsigned int uint32_t;
typedef unsigned long long uint64_t;
// N numbers xored K at a time
#define K 10
#define N 100
#define K0 (K/2)
#define K1 (K - K0)
typedef struct _SUMIDX {
uint32_t sum;
uint8_t cnt;
uint8_t idx[K1];
}SUMIDX;
clock_t ctTimeBeg; // clock values
clock_t ctTimeMid;
clock_t ctTimeEnd;
//----------------------------------------------------------------------//
// Comb(n,k) //
//----------------------------------------------------------------------//
static int Comb(int n, int k)
{
uint64_t dvnd = 1;
uint64_t dvsr = 1;
uint64_t quot;
for(int i = n-k+1; i <= n; i++)
dvnd *= i;
for(int i = 1; i <= k; i++)
dvsr *= i;
quot = dvnd/dvsr;
return (int) quot;
}
//----------------------------------------------------------------------//
// InitCombination - init combination //
//----------------------------------------------------------------------//
void InitCombination(std::vector<uint8_t> &x, uint8_t n, uint8_t k) {
for(uint8_t i = 0; i < k; i++)
x[i] = i;
--x[k-1];
}
//----------------------------------------------------------------------//
// NextCombination - generate next combination //
//----------------------------------------------------------------------//
int NextCombination(std::vector<uint8_t> &x, uint8_t n, uint8_t k) {
uint8_t j = k - 1;
while (j != (uint8_t)(-1) && x[j] == n - k + j)
--j;
if (j == (uint8_t)(-1))
return 0;
++x[j];
for (uint8_t i = j + 1; i < k; ++i)
x[i] = x[j] + i - j;
return 1;
}
//----------------------------------------------------------------------//
// RadixSort //
//----------------------------------------------------------------------//
// sort a bin by 3 least significant bytes
void RadixSort3(std::vector<SUMIDX> &sumidx, std::vector<SUMIDX> &rdxsrt,
uint32_t beg, uint32_t end)
{
uint32_t mIndex[3][256] = {0}; // count / index matrix
uint32_t i,j,m,n;
SUMIDX u;
std::swap(sumidx, rdxsrt); // swap vectors
for(i = beg; i < end; i++){ // generate histograms
u.sum = sumidx[i].sum;
for(j = 0; j < 3; j++){
mIndex[j][(size_t)(u.sum & 0xff)]++;
u.sum >>= 8;
}
}
for(j = 0; j < 3; j++){ // convert to indices
m = beg;
for(i = 0; i < 256; i++){
n = mIndex[j][i];
mIndex[j][i] = m;
m += n;
}
}
for(j = 0; j < 3; j++){ // radix sort
for(i = beg; i < end; i++){ // sort by current lsb
u = sumidx[i];
m = (u.sum>>(j<<3))&0xff;
rdxsrt[mIndex[j][m]++] = u;
}
std::swap(sumidx, rdxsrt); // swap vectors
}
}
// split vector into 256 bins according to most significant byte
void RadixSort(std::vector<SUMIDX> &sumidx, std::vector<SUMIDX> &rdxsrt)
{
uint32_t aIndex[260] = {0}; // count / array
uint32_t cnt = (uint32_t)sumidx.size();
uint32_t i;
for(i = 0; i < cnt; i++) // generate histogram
aIndex[1+(sumidx[i].sum >> 24)]++;
for(i = 2; i < 257; i++) // convert to indices
aIndex[i] += aIndex[i-1];
for(i = 0; i < cnt; i++) // sort by msb
rdxsrt[aIndex[sumidx[i].sum>>24]++] = sumidx[i];
for(i = 256; i; i--) // restore aIndex
aIndex[i] = aIndex[i-1];
aIndex[0] = 0;
for(i = 0; i < 256; i++) // radix sort the 256 bins
RadixSort3(sumidx, rdxsrt, aIndex[i], aIndex[i+1]);
}
//----------------------------------------------------------------------//
// QuickSort //
//----------------------------------------------------------------------//
#define ISZ (24) // use insertion sort for <= ISZ elements
void InsertionSort(std::vector<SUMIDX> &sumidx, int lo, int hi)
{
if(lo >= hi)
return;
SUMIDX t;
int i, j;
lo--;
for (j = lo + 2; j <= hi; j++) {
t = sumidx[j];
i = j-1;
while(i != lo && sumidx[i].sum > t.sum){
sumidx[i+1] = sumidx[i];
i--;
}
sumidx[i+1] = t;
}
}
void QuickSort(std::vector<SUMIDX> &sumidx, int lo, int hi)
{
while (hi - lo >= ISZ){
uint32_t p = sumidx[lo + (hi - lo) / 2].sum;
int i = lo - 1;
int j = hi + 1;
while (1){
while (sumidx[++i].sum < p);
while (sumidx[--j].sum > p);
if (i >= j)
break;
std::swap(sumidx[i], sumidx[j]);
}
if(j - lo < hi - j){
QuickSort(sumidx, lo, j);
lo = j+1;
} else {
QuickSort(sumidx, j+1, hi);
hi = j;
}
}
InsertionSort(sumidx, lo, hi);
}
void QuickSort(std::vector<SUMIDX> &sumidx)
{
QuickSort(sumidx, 0, (int)sumidx.size());
}
//----------------------------------------------------------------------//
// Rnd32 - return 32 bit random number //
//----------------------------------------------------------------------//
int Rnd32()
{
static unsigned int r = 0;
r = r*1664525 + 1013904223;
return r;
}
#define RDXSRT 1
int main(int argc, char**argv)
{
int c, c0, c1; // combinations
int sxi = 0; // index to sumidx
c0 = Comb(N, K0); // generate # of combinations
c1 = Comb(N, K1);
c = c0;
if (K0 != K1)
c = c0 + c1;
std::vector<SUMIDX> sumidx(c); // vector of sums and indexes
#if RDXSRT
std::vector<SUMIDX> rdxsrt(c); // vector for radix sort
#endif
std::vector<int> v(N); // vector of numbers
std::vector<uint8_t> x(K); // vector of indexes
std::map<std::vector<uint8_t>, uint8_t> m; // map of sets of K indexes
std::map<std::vector<uint8_t>, uint8_t>::iterator mi; // iterator for m
for(int i = 0; i < N; i++)
v[i] = Rnd32();
ctTimeBeg = clock();
// generate vector of sums and indexes
InitCombination(x, N, K0);
while (NextCombination(x, N, K0)) {
int sum = 0;
for (int i = 0; i < K0; i++)
sum ^= v[x[i]];
sumidx[sxi].sum = sum;
sumidx[sxi].cnt = K0;
for (int i = 0; i < K0; i++)
sumidx[sxi].idx[i] = x[i];
sxi++;
}
#if (K0 != K1)
InitCombination(x, N, K1);
while (NextCombination(x, N, K1)) {
int sum = 0;
for (int i = 0; i < K1; i++)
sum ^= v[x[i]];
sumidx[sxi].sum = sum;
sumidx[sxi].cnt = K1;
for (int i = 0; i < K1; i++)
sumidx[sxi].idx[i] = x[i];
sxi++;
}
#endif
ctTimeMid = clock();
std::cout << "# of ticks " << ctTimeMid - ctTimeBeg << std::endl;
// sort the vector according to the sums
#if RDXSRT
RadixSort(sumidx, rdxsrt);
#else
QuickSort(sumidx);
#endif
#if 0
std::sort(sumidx.begin(), sumidx.end(),
[](SUMIDX s0, SUMIDX s1)
{return s0.sum < s1.sum;});
#endif
ctTimeEnd = clock();
std::cout << "# of ticks " << ctTimeEnd - ctTimeMid << std::endl;
ctTimeMid = ctTimeEnd;
// scan the sorted vector for equal sums
for (int i = 0; i < c - 1; i++) {
for (int j = i + 1; j < c; j++) {
if (sumidx[i].sum != sumidx[j].sum)
break;
#if (K0 != K1)
if (K != (sumidx[i].cnt + sumidx[j].cnt))
continue;
#endif
// merge indexes
int ii = 0, jj = 0, im = 0;
while (1) {
// if duplicate indexes skip
if (sumidx[i].idx[ii] == sumidx[j].idx[jj])
break;
if (sumidx[i].idx[ii] < sumidx[j].idx[jj]) {
x[im++] = sumidx[i].idx[ii++];
if (ii >= sumidx[i].cnt) {
do
x[im++] = sumidx[j].idx[jj++];
while (jj < sumidx[j].cnt);
break;
}
}
else {
x[im++] = sumidx[j].idx[jj++];
if (jj >= sumidx[j].cnt) {
do
x[im++] = sumidx[i].idx[ii++];
while (ii < sumidx[i].cnt);
break;
}
}
}
if (im != K) // if duplicate indexes skip
continue;
m[x]; // insert if unique set
}
}
ctTimeEnd = clock();
std::cout << "# of ticks " << ctTimeEnd - ctTimeMid << std::endl;
std::cout << "# of ticks " << ctTimeEnd - ctTimeBeg << std::endl;
std::cout << "number of combinations found " << m.size() << std::endl;
#if 1 // show and check what was found
for (mi = m.begin(); mi != m.end(); mi++) {
x = mi->first;
for (int i = 0; i < K; i++)
std::cout << std::setw(3) << (uint32_t) x[i] << " ";
std::cout << std::endl;
int sum = 0;
for (int i = 0; i < K; i++)
sum ^= v[x[i]];
if (sum != 0)
std::cout << "error" << std::endl;
}
#endif
return(0);
}
以上代码适用于 64GB 系统上的 128 位密钥,N=111,K=11。为了减少使用的内存量,下面的代码仅存储 N=111、K=5 的条目,并计算 xor 并对 N=111、K=6 的组合进行二分查找。它可以 运行 在具有 16GB 内存的系统上。它比上面的代码慢得多,在我的笔记本电脑上用了不到 4 分钟。二分搜索被修改为搜索第一个匹配条目。我做了一些简单的测试来确认它是否正常工作,但还没有进行详尽的测试以确保没有任何问题。
#include <algorithm>
#include <ctime>
#include <iostream>
#include <iomanip>
typedef unsigned char uint8_t;
typedef unsigned short unit16_t;
typedef unsigned int uint32_t;
typedef unsigned long long uint64_t;
// N numbers xored K at a time
#define K 11
#define N 111
#define K0 (K/2)
#define K1 (K - K0)
typedef struct _SUMIDX {
uint64_t sum[2];
uint8_t idx[K0];
}SUMIDX;
clock_t ctTimeBeg; // clock values
clock_t ctTimeMid;
clock_t ctTimeEnd;
//----------------------------------------------------------------------//
// Comb(n,k) //
//----------------------------------------------------------------------//
static uint32_t Comb(uint32_t n, uint32_t k)
{
uint64_t dvnd = 1;
uint64_t dvsr = 1;
uint64_t quot;
for(uint32_t i = n-k+1; i <= n; i++)
dvnd *= i;
for(uint32_t i = 1; i <= k; i++)
dvsr *= i;
quot = dvnd/dvsr;
return (uint32_t) quot;
}
//----------------------------------------------------------------------//
// InitCombination - init combination //
//----------------------------------------------------------------------//
void InitCombination(uint8_t x[], uint8_t n, uint8_t k) {
for(uint8_t i = 0; i < k; i++)
x[i] = i;
--x[k-1];
}
//----------------------------------------------------------------------//
// NextCombination - generate next combination //
//----------------------------------------------------------------------//
uint32_t NextCombination(uint8_t x[], uint8_t n, uint8_t k) {
uint8_t j = k - 1;
while (j != (uint8_t)(-1) && x[j] == n - k + j)
--j;
if (j == (uint8_t)(-1))
return 0;
++x[j];
for (uint8_t i = j + 1; i < k; ++i)
x[i] = x[j] + i - j;
return 1;
}
//----------------------------------------------------------------------//
// RadixSort //
//----------------------------------------------------------------------//
// sort a bin by 15 least significant bytes
void RadixSort7(SUMIDX sumidx[], SUMIDX rdxsrt[],
uint32_t beg, uint32_t end)
{
uint32_t mIndex[15][256] = {0}; // count / index matrix
uint32_t i,j,m,n;
SUMIDX u;
std::swap(sumidx, rdxsrt); // swap array pointers
for(i = beg; i < end; i++){ // generate histograms
u.sum[0] = sumidx[i].sum[0];
for(j = 0; j < 8; j++){
mIndex[j][(size_t)(u.sum[0] & 0xff)]++;
u.sum[0] >>= 8;
}
u.sum[1] = sumidx[i].sum[1];
for(j = 8; j < 15; j++){
mIndex[j][(size_t)(u.sum[1] & 0xff)]++;
u.sum[1] >>= 8;
}
}
for(j = 0; j < 15; j++){ // convert to indices
m = beg;
for(i = 0; i < 256; i++){
n = mIndex[j][i];
mIndex[j][i] = m;
m += n;
}
}
for(j = 0; j < 8; j++){ // radix sort
for(i = beg; i < end; i++){ // sort by current lsb
u = sumidx[i];
m = (u.sum[0]>>(j<<3))&0xff;
rdxsrt[mIndex[j][m]++] = u;
}
std::swap(sumidx, rdxsrt); // swap array pointers
}
for (j = 0; j < 7; j++) { // radix sort
for(i = beg; i < end; i++){ // sort by current lsb
u = sumidx[i];
m = (u.sum[1]>>(j<<3))&0xff;
rdxsrt[mIndex[8+j][m]++] = u;
}
std::swap(sumidx, rdxsrt); // swap array pointers
}
}
// split vector into 256 bins according to most significant byte
void RadixSort(SUMIDX sumidx[], SUMIDX rdxsrt[], uint32_t cnt)
{
uint32_t aIndex[260] = {0}; // count / array
uint32_t i;
for(i = 0; i < cnt; i++) // generate histogram
aIndex[1+(sumidx[i].sum[1] >> 56)]++;
for(i = 2; i < 257; i++) // convert to indices
aIndex[i] += aIndex[i-1];
for(i = 0; i < cnt; i++) // sort by msb
rdxsrt[aIndex[sumidx[i].sum[1] >>56]++] = sumidx[i];
for(i = 256; i; i--) // restore aIndex
aIndex[i] = aIndex[i-1];
aIndex[0] = 0;
for(i = 0; i < 256; i++) // radix sort the 256 bins
RadixSort7(sumidx, rdxsrt, aIndex[i], aIndex[i+1]);
}
//----------------------------------------------------------------------//
// BinSrc return index to first match //
//----------------------------------------------------------------------//
uint32_t BinSrc(SUMIDX sumidx[], uint64_t x[2], uint32_t cnt)
{
uint32_t lo = 0;
uint32_t hi = cnt;
uint32_t i;
while((hi - lo) > 1){ // find a match
i = (lo + hi)/2;
if((x[1] < sumidx[i].sum[1]) ||
(x[1] == sumidx[i].sum[1]) &&
(x[0] < sumidx[i].sum[0])){
hi = i;
continue;
}
if((x[1] > sumidx[i].sum[1]) ||
(x[1] == sumidx[i].sum[1]) &&
(x[0] > sumidx[i].sum[0])){
lo = i;
continue;
}
break;
}
hi = i; // find first match
while (hi != lo) {
i = (lo + hi) / 2;
if ((x[1] == sumidx[i].sum[1]) &&
(x[0] == sumidx[i].sum[0])) {
hi = i;
continue;
}
if ((x[1] > sumidx[i].sum[1]) ||
(x[1] == sumidx[i].sum[1]) &&
(x[0] > sumidx[i].sum[0])) {
lo = i+1;
continue;
}
break;
}
if((x[1] == sumidx[lo].sum[1]) &&
(x[0] == sumidx[lo].sum[0]))
return lo;
return uint32_t(0-1);
}
//----------------------------------------------------------------------//
// Rnd64 - return 64 bit random number //
//----------------------------------------------------------------------//
uint64_t Rnd64() // random 64 bit integer
{
static uint64_t r = 1ull;
r = r * 6364136223846793005ull + 1442695040888963407ull;
return r;
}
//----------------------------------------------------------------------//
// Rnd32 - return 32 bit random number //
//----------------------------------------------------------------------//
uint32_t Rnd32()
{
static uint32_t r = 0;
r = r*1664525 + 1013904223;
return r;
}
int main(int argc, char**argv)
{
uint64_t sum[2]; // 128 bit key
uint32_t c0; // combinations(N, K0)
uint32_t xi; // index to sumidx
uint32_t i, ii, jj, im;
c0 = Comb(N, K0); // generate # of combinations
SUMIDX *sumidx = new SUMIDX[c0]; // array of xors + indexes
SUMIDX *rdxsrt = new SUMIDX[c0]; // array for radix sort
uint64_t v[N][2];
uint8_t x[K1];
uint8_t m[K];
for(i = 0; i < N; i++){
v[i][0] = Rnd64()&0x000000000000ffffull;
v[i][1] = Rnd64()&0xffffffff00000000ull;
}
ctTimeBeg = clock();
// generate array of xors and indexes
xi = 0;
InitCombination(x, N, K0);
while (NextCombination(x, N, K0)) {
sum[1] = sum[0] = 0;
for (i = 0; i < K0; i++){
sum[0] ^= v[x[i]][0];
sum[1] ^= v[x[i]][1];
}
sumidx[xi].sum[0] = sum[0];
sumidx[xi].sum[1] = sum[1];
for (i = 0; i < K0; i++)
sumidx[xi].idx[i] = x[i];
xi++;
}
ctTimeMid = clock();
std::cout << "# of ticks " << ctTimeMid - ctTimeBeg << std::endl;
// sort the vector according to the sums
RadixSort(sumidx, rdxsrt, c0);
ctTimeEnd = clock();
std::cout << "# of ticks " << ctTimeEnd - ctTimeMid << std::endl;
ctTimeMid = ctTimeEnd;
// scan the sorted vector for equal xors
InitCombination(x, N, K1);
#if 0
uint32_t z = K1;
#endif
while (NextCombination(x, N, K1)) {
#if 0
if (z != x[1]) {
z = x[1];
for (i = 0; i < K1; i++)
std::cout << std::setw(3) << (uint32_t)x[i] << " ";
std::cout << std::endl;
}
#endif
sum[1] = sum[0] = 0;
for (i = 0; i < K1; i++){
sum[0] ^= v[x[i]][0];
sum[1] ^= v[x[i]][1];
}
i = BinSrc(sumidx, sum, c0);
if(i == uint32_t(0-1))
continue;
while((sum[0] == sumidx[i].sum[0]) &&
(sum[1] == sumidx[i].sum[1])){
// merge indexes
ii = jj = im = 0;
while (1) {
// if duplicate indexes skip
if (sumidx[i].idx[ii] == x[jj])
break;
if (sumidx[i].idx[ii] < x[jj]) {
m[im++] = sumidx[i].idx[ii++];
if (ii >= K0) {
do
m[im++] =x[jj++];
while (jj < K1);
break;
}
}
else {
m[im++] = x[jj++];
if (jj >= K1) {
do
m[im++] = sumidx[i].idx[ii++];
while (ii < K0);
break;
}
}
}
if (im == K) // if not duplicate indexes stop
goto found0;
i++;
}
}
found0:
ctTimeEnd = clock();
std::cout << "# of ticks " << ctTimeEnd - ctTimeMid << std::endl;
std::cout << "# of ticks " << ctTimeEnd - ctTimeBeg << std::endl;
if(im != K){
std::cout << "not found" << std::endl;
goto exit0;
}
for (i = 0; i < K; i++)
std::cout << std::setw(3) << (uint32_t)m[i] << " ";
std::cout << std::endl;
sum[1] = sum[0] = 0;
for (i = 0; i < K; i++){
sum[0] ^= v[m[i]][0];
sum[1] ^= v[m[i]][1];
}
if (sum[0] != 0 || sum[1] != 0)
std::cout << "error" << std::endl;
exit0:
delete rdxsrt;
delete sumidx;
return(0);
}
我的问题是在例如一个 Vector/Array 和 n 个数字,等于用 xor 0 计算的。我已经尝试过强制所有组合而不重复,但是 n=100 和 k=10 的时间复杂度太高了。 (大约 1.7*10^13 种可能性)
有没有人有任何想法来加快在具有 n 个数字的数组中查找 k 个数字的过程?
我认为我的方法不是最聪明的,有这么多的可能性,但我真的不知道如何让我的程序更快。
对于解决方案的建议,我将不胜感激。
我的程序是用 C++ 编写的。
SOLUTION comb(int N, int K, const std::vector<KEY>& bit_codes)
{
std::vector<KEY> chosen_keys;
std::vector<KEY> failed{KEY(100000, "Error")};
std::string bitmask(K, 1);
bitmask.resize(N, 0); // N-K trailing 0's
std::vector<int> comb;
// print integers and permute bitmask
do {
comb.clear();
for (int i = 0; i < N; ++i) // [0..N-1] integers
{
if (bitmask[i]){
comb.emplace_back(i);
}
}
//do all code till next permutation
//reserve the size
for (auto & i : comb) {
chosen_keys.emplace_back(bit_codes[i]);
}
std::string string = chosen_keys[0].bits;
std::string helper;
for (size_t i = 1; i < chosen_keys.size(); ++i) {
helper = to_string(to_bitset(string) ^ to_bitset(chosen_keys[i].bits));
string = helper;
}
for(auto & item: bit_codes){
if (item.bits == string){
return {chosen_keys, item};
}
}
chosen_keys.clear();
} while (std::prev_permutation(bitmask.begin(), bitmask.end()));
return {failed, KEY(100000, "Error")};
}
bitCodes = 这些是 Vector 中的数字,我在其中搜索 k 个数字,其中它们的异或等于 0(KEY 是一个 class,其中保存了位)
我使用这些二进制数: 所有位代码:
1. 001001101100101001000100101
2. 101111001100011110101010000
3. 001111010101110001101001100
4. 111111100010110100010000001
5. 000100000011001010001110000
6. 110101111110101111011011111
7. 110111011001000010010100110
8. 111000111001000110101001011
9. 111001100011011111000110010
10. 101011001111110110101000111
11. 100000110010100011010111011
12. 101110000110011100001010101
13. 011011110010010111010001101
14. 100011101111000110100100001
15. 100001101110001110010111011
16. 011001011101011000010011110
17. 101100000110110001011110100
18. 000111111010001100010001111
19. 001111101111011100010000010
20. 001001000111111111011011001
有k=20个,我搜索了n=5个号码。
输出为:
3. 001111010101110001101001100
4. 111111100010110100010000001
6. 110101111110101111011011111
10. 101011001111110110101000111
12. 101110000110011100001010101
...因为它们等于 0.
对于 15504 种可能性,代码在大约 100 毫秒后完成执行。
现在你可以自己算算n=100和k=10需要多长时间了。
对于 k = 10,n = 100,强力方法是不可行的,因为有超过 17 万亿种组合:
comb(100,10) = 17310309456440
由于两个相等值的异或为零,我的第一个想法是将问题简化为 k = 5,n = 100,超过 7500 万种组合:
comb(100,5) = 75287520
将每个组合的异或和、一个索引计数 (5) 和 5 个索引值存储到一个包含 75287520 个结构的数组中。对于奇数 k,例如 k = 9,将 k = 4,n = 100 的 3921225 个组合存储到数组中(索引计数 = 4),然后是 k = 5,n = 100 的 75287520 个组合(索引计数 = 5) ) 到数组中。要保存 space,请将索引数和索引值存储为 8 位无符号整数。
创建结构数组后,根据异或和对其进行排序。扫描排序数组以寻找相等的异或和(因为两个相等值的异或为零),并且两组索引值之间没有重复项,这将在合并两组索引值以创建单个向量时检测到 k指标值。如果目标是找到任何一个有效的集合,就到此为止。如果目标是找到所有唯一集,请使用 std::map,使用 k 个索引值的向量作为键和一个 8 位整数作为值(未使用该值,但 std::map 需要) .注意 - std::map 可能会消耗大量 space 和时间,如果有很多异或为零的唯一集合。
使用伪随机数生成器创建包含 100 个整数的数组,对于 k = 10,n = 100,我的测试代码找到了 4099 个异或为零的唯一集合。在我的笔记本电脑上(Intel Core i7-10510U,Win 10 Pro 64 位,Visual Studio 2019),使用基数排序,总时间大约需要 1.5 秒:生成 ~= 0.37 秒,排序 ~=0.88 秒,扫描 ~ = 0.25 秒,使用 1.7 GB 内存。使用快速排序,大约需要 6.5 秒,排序 ~= 5.88 秒,使用 0.9 GB 内存
#include <algorithm>
#include <ctime>
#include <iostream>
#include <iomanip>
#include <map>
#include <vector>
typedef unsigned char uint8_t;
typedef unsigned short unit16_t;
typedef unsigned int uint32_t;
typedef unsigned long long uint64_t;
// N numbers xored K at a time
#define K 10
#define N 100
#define K0 (K/2)
#define K1 (K - K0)
typedef struct _SUMIDX {
uint32_t sum;
uint8_t cnt;
uint8_t idx[K1];
}SUMIDX;
clock_t ctTimeBeg; // clock values
clock_t ctTimeMid;
clock_t ctTimeEnd;
//----------------------------------------------------------------------//
// Comb(n,k) //
//----------------------------------------------------------------------//
static int Comb(int n, int k)
{
uint64_t dvnd = 1;
uint64_t dvsr = 1;
uint64_t quot;
for(int i = n-k+1; i <= n; i++)
dvnd *= i;
for(int i = 1; i <= k; i++)
dvsr *= i;
quot = dvnd/dvsr;
return (int) quot;
}
//----------------------------------------------------------------------//
// InitCombination - init combination //
//----------------------------------------------------------------------//
void InitCombination(std::vector<uint8_t> &x, uint8_t n, uint8_t k) {
for(uint8_t i = 0; i < k; i++)
x[i] = i;
--x[k-1];
}
//----------------------------------------------------------------------//
// NextCombination - generate next combination //
//----------------------------------------------------------------------//
int NextCombination(std::vector<uint8_t> &x, uint8_t n, uint8_t k) {
uint8_t j = k - 1;
while (j != (uint8_t)(-1) && x[j] == n - k + j)
--j;
if (j == (uint8_t)(-1))
return 0;
++x[j];
for (uint8_t i = j + 1; i < k; ++i)
x[i] = x[j] + i - j;
return 1;
}
//----------------------------------------------------------------------//
// RadixSort //
//----------------------------------------------------------------------//
// sort a bin by 3 least significant bytes
void RadixSort3(std::vector<SUMIDX> &sumidx, std::vector<SUMIDX> &rdxsrt,
uint32_t beg, uint32_t end)
{
uint32_t mIndex[3][256] = {0}; // count / index matrix
uint32_t i,j,m,n;
SUMIDX u;
std::swap(sumidx, rdxsrt); // swap vectors
for(i = beg; i < end; i++){ // generate histograms
u.sum = sumidx[i].sum;
for(j = 0; j < 3; j++){
mIndex[j][(size_t)(u.sum & 0xff)]++;
u.sum >>= 8;
}
}
for(j = 0; j < 3; j++){ // convert to indices
m = beg;
for(i = 0; i < 256; i++){
n = mIndex[j][i];
mIndex[j][i] = m;
m += n;
}
}
for(j = 0; j < 3; j++){ // radix sort
for(i = beg; i < end; i++){ // sort by current lsb
u = sumidx[i];
m = (u.sum>>(j<<3))&0xff;
rdxsrt[mIndex[j][m]++] = u;
}
std::swap(sumidx, rdxsrt); // swap vectors
}
}
// split vector into 256 bins according to most significant byte
void RadixSort(std::vector<SUMIDX> &sumidx, std::vector<SUMIDX> &rdxsrt)
{
uint32_t aIndex[260] = {0}; // count / array
uint32_t cnt = (uint32_t)sumidx.size();
uint32_t i;
for(i = 0; i < cnt; i++) // generate histogram
aIndex[1+(sumidx[i].sum >> 24)]++;
for(i = 2; i < 257; i++) // convert to indices
aIndex[i] += aIndex[i-1];
for(i = 0; i < cnt; i++) // sort by msb
rdxsrt[aIndex[sumidx[i].sum>>24]++] = sumidx[i];
for(i = 256; i; i--) // restore aIndex
aIndex[i] = aIndex[i-1];
aIndex[0] = 0;
for(i = 0; i < 256; i++) // radix sort the 256 bins
RadixSort3(sumidx, rdxsrt, aIndex[i], aIndex[i+1]);
}
//----------------------------------------------------------------------//
// QuickSort //
//----------------------------------------------------------------------//
#define ISZ (24) // use insertion sort for <= ISZ elements
void InsertionSort(std::vector<SUMIDX> &sumidx, int lo, int hi)
{
if(lo >= hi)
return;
SUMIDX t;
int i, j;
lo--;
for (j = lo + 2; j <= hi; j++) {
t = sumidx[j];
i = j-1;
while(i != lo && sumidx[i].sum > t.sum){
sumidx[i+1] = sumidx[i];
i--;
}
sumidx[i+1] = t;
}
}
void QuickSort(std::vector<SUMIDX> &sumidx, int lo, int hi)
{
while (hi - lo >= ISZ){
uint32_t p = sumidx[lo + (hi - lo) / 2].sum;
int i = lo - 1;
int j = hi + 1;
while (1){
while (sumidx[++i].sum < p);
while (sumidx[--j].sum > p);
if (i >= j)
break;
std::swap(sumidx[i], sumidx[j]);
}
if(j - lo < hi - j){
QuickSort(sumidx, lo, j);
lo = j+1;
} else {
QuickSort(sumidx, j+1, hi);
hi = j;
}
}
InsertionSort(sumidx, lo, hi);
}
void QuickSort(std::vector<SUMIDX> &sumidx)
{
QuickSort(sumidx, 0, (int)sumidx.size());
}
//----------------------------------------------------------------------//
// Rnd32 - return 32 bit random number //
//----------------------------------------------------------------------//
int Rnd32()
{
static unsigned int r = 0;
r = r*1664525 + 1013904223;
return r;
}
#define RDXSRT 1
int main(int argc, char**argv)
{
int c, c0, c1; // combinations
int sxi = 0; // index to sumidx
c0 = Comb(N, K0); // generate # of combinations
c1 = Comb(N, K1);
c = c0;
if (K0 != K1)
c = c0 + c1;
std::vector<SUMIDX> sumidx(c); // vector of sums and indexes
#if RDXSRT
std::vector<SUMIDX> rdxsrt(c); // vector for radix sort
#endif
std::vector<int> v(N); // vector of numbers
std::vector<uint8_t> x(K); // vector of indexes
std::map<std::vector<uint8_t>, uint8_t> m; // map of sets of K indexes
std::map<std::vector<uint8_t>, uint8_t>::iterator mi; // iterator for m
for(int i = 0; i < N; i++)
v[i] = Rnd32();
ctTimeBeg = clock();
// generate vector of sums and indexes
InitCombination(x, N, K0);
while (NextCombination(x, N, K0)) {
int sum = 0;
for (int i = 0; i < K0; i++)
sum ^= v[x[i]];
sumidx[sxi].sum = sum;
sumidx[sxi].cnt = K0;
for (int i = 0; i < K0; i++)
sumidx[sxi].idx[i] = x[i];
sxi++;
}
#if (K0 != K1)
InitCombination(x, N, K1);
while (NextCombination(x, N, K1)) {
int sum = 0;
for (int i = 0; i < K1; i++)
sum ^= v[x[i]];
sumidx[sxi].sum = sum;
sumidx[sxi].cnt = K1;
for (int i = 0; i < K1; i++)
sumidx[sxi].idx[i] = x[i];
sxi++;
}
#endif
ctTimeMid = clock();
std::cout << "# of ticks " << ctTimeMid - ctTimeBeg << std::endl;
// sort the vector according to the sums
#if RDXSRT
RadixSort(sumidx, rdxsrt);
#else
QuickSort(sumidx);
#endif
#if 0
std::sort(sumidx.begin(), sumidx.end(),
[](SUMIDX s0, SUMIDX s1)
{return s0.sum < s1.sum;});
#endif
ctTimeEnd = clock();
std::cout << "# of ticks " << ctTimeEnd - ctTimeMid << std::endl;
ctTimeMid = ctTimeEnd;
// scan the sorted vector for equal sums
for (int i = 0; i < c - 1; i++) {
for (int j = i + 1; j < c; j++) {
if (sumidx[i].sum != sumidx[j].sum)
break;
#if (K0 != K1)
if (K != (sumidx[i].cnt + sumidx[j].cnt))
continue;
#endif
// merge indexes
int ii = 0, jj = 0, im = 0;
while (1) {
// if duplicate indexes skip
if (sumidx[i].idx[ii] == sumidx[j].idx[jj])
break;
if (sumidx[i].idx[ii] < sumidx[j].idx[jj]) {
x[im++] = sumidx[i].idx[ii++];
if (ii >= sumidx[i].cnt) {
do
x[im++] = sumidx[j].idx[jj++];
while (jj < sumidx[j].cnt);
break;
}
}
else {
x[im++] = sumidx[j].idx[jj++];
if (jj >= sumidx[j].cnt) {
do
x[im++] = sumidx[i].idx[ii++];
while (ii < sumidx[i].cnt);
break;
}
}
}
if (im != K) // if duplicate indexes skip
continue;
m[x]; // insert if unique set
}
}
ctTimeEnd = clock();
std::cout << "# of ticks " << ctTimeEnd - ctTimeMid << std::endl;
std::cout << "# of ticks " << ctTimeEnd - ctTimeBeg << std::endl;
std::cout << "number of combinations found " << m.size() << std::endl;
#if 1 // show and check what was found
for (mi = m.begin(); mi != m.end(); mi++) {
x = mi->first;
for (int i = 0; i < K; i++)
std::cout << std::setw(3) << (uint32_t) x[i] << " ";
std::cout << std::endl;
int sum = 0;
for (int i = 0; i < K; i++)
sum ^= v[x[i]];
if (sum != 0)
std::cout << "error" << std::endl;
}
#endif
return(0);
}
以上代码适用于 64GB 系统上的 128 位密钥,N=111,K=11。为了减少使用的内存量,下面的代码仅存储 N=111、K=5 的条目,并计算 xor 并对 N=111、K=6 的组合进行二分查找。它可以 运行 在具有 16GB 内存的系统上。它比上面的代码慢得多,在我的笔记本电脑上用了不到 4 分钟。二分搜索被修改为搜索第一个匹配条目。我做了一些简单的测试来确认它是否正常工作,但还没有进行详尽的测试以确保没有任何问题。
#include <algorithm>
#include <ctime>
#include <iostream>
#include <iomanip>
typedef unsigned char uint8_t;
typedef unsigned short unit16_t;
typedef unsigned int uint32_t;
typedef unsigned long long uint64_t;
// N numbers xored K at a time
#define K 11
#define N 111
#define K0 (K/2)
#define K1 (K - K0)
typedef struct _SUMIDX {
uint64_t sum[2];
uint8_t idx[K0];
}SUMIDX;
clock_t ctTimeBeg; // clock values
clock_t ctTimeMid;
clock_t ctTimeEnd;
//----------------------------------------------------------------------//
// Comb(n,k) //
//----------------------------------------------------------------------//
static uint32_t Comb(uint32_t n, uint32_t k)
{
uint64_t dvnd = 1;
uint64_t dvsr = 1;
uint64_t quot;
for(uint32_t i = n-k+1; i <= n; i++)
dvnd *= i;
for(uint32_t i = 1; i <= k; i++)
dvsr *= i;
quot = dvnd/dvsr;
return (uint32_t) quot;
}
//----------------------------------------------------------------------//
// InitCombination - init combination //
//----------------------------------------------------------------------//
void InitCombination(uint8_t x[], uint8_t n, uint8_t k) {
for(uint8_t i = 0; i < k; i++)
x[i] = i;
--x[k-1];
}
//----------------------------------------------------------------------//
// NextCombination - generate next combination //
//----------------------------------------------------------------------//
uint32_t NextCombination(uint8_t x[], uint8_t n, uint8_t k) {
uint8_t j = k - 1;
while (j != (uint8_t)(-1) && x[j] == n - k + j)
--j;
if (j == (uint8_t)(-1))
return 0;
++x[j];
for (uint8_t i = j + 1; i < k; ++i)
x[i] = x[j] + i - j;
return 1;
}
//----------------------------------------------------------------------//
// RadixSort //
//----------------------------------------------------------------------//
// sort a bin by 15 least significant bytes
void RadixSort7(SUMIDX sumidx[], SUMIDX rdxsrt[],
uint32_t beg, uint32_t end)
{
uint32_t mIndex[15][256] = {0}; // count / index matrix
uint32_t i,j,m,n;
SUMIDX u;
std::swap(sumidx, rdxsrt); // swap array pointers
for(i = beg; i < end; i++){ // generate histograms
u.sum[0] = sumidx[i].sum[0];
for(j = 0; j < 8; j++){
mIndex[j][(size_t)(u.sum[0] & 0xff)]++;
u.sum[0] >>= 8;
}
u.sum[1] = sumidx[i].sum[1];
for(j = 8; j < 15; j++){
mIndex[j][(size_t)(u.sum[1] & 0xff)]++;
u.sum[1] >>= 8;
}
}
for(j = 0; j < 15; j++){ // convert to indices
m = beg;
for(i = 0; i < 256; i++){
n = mIndex[j][i];
mIndex[j][i] = m;
m += n;
}
}
for(j = 0; j < 8; j++){ // radix sort
for(i = beg; i < end; i++){ // sort by current lsb
u = sumidx[i];
m = (u.sum[0]>>(j<<3))&0xff;
rdxsrt[mIndex[j][m]++] = u;
}
std::swap(sumidx, rdxsrt); // swap array pointers
}
for (j = 0; j < 7; j++) { // radix sort
for(i = beg; i < end; i++){ // sort by current lsb
u = sumidx[i];
m = (u.sum[1]>>(j<<3))&0xff;
rdxsrt[mIndex[8+j][m]++] = u;
}
std::swap(sumidx, rdxsrt); // swap array pointers
}
}
// split vector into 256 bins according to most significant byte
void RadixSort(SUMIDX sumidx[], SUMIDX rdxsrt[], uint32_t cnt)
{
uint32_t aIndex[260] = {0}; // count / array
uint32_t i;
for(i = 0; i < cnt; i++) // generate histogram
aIndex[1+(sumidx[i].sum[1] >> 56)]++;
for(i = 2; i < 257; i++) // convert to indices
aIndex[i] += aIndex[i-1];
for(i = 0; i < cnt; i++) // sort by msb
rdxsrt[aIndex[sumidx[i].sum[1] >>56]++] = sumidx[i];
for(i = 256; i; i--) // restore aIndex
aIndex[i] = aIndex[i-1];
aIndex[0] = 0;
for(i = 0; i < 256; i++) // radix sort the 256 bins
RadixSort7(sumidx, rdxsrt, aIndex[i], aIndex[i+1]);
}
//----------------------------------------------------------------------//
// BinSrc return index to first match //
//----------------------------------------------------------------------//
uint32_t BinSrc(SUMIDX sumidx[], uint64_t x[2], uint32_t cnt)
{
uint32_t lo = 0;
uint32_t hi = cnt;
uint32_t i;
while((hi - lo) > 1){ // find a match
i = (lo + hi)/2;
if((x[1] < sumidx[i].sum[1]) ||
(x[1] == sumidx[i].sum[1]) &&
(x[0] < sumidx[i].sum[0])){
hi = i;
continue;
}
if((x[1] > sumidx[i].sum[1]) ||
(x[1] == sumidx[i].sum[1]) &&
(x[0] > sumidx[i].sum[0])){
lo = i;
continue;
}
break;
}
hi = i; // find first match
while (hi != lo) {
i = (lo + hi) / 2;
if ((x[1] == sumidx[i].sum[1]) &&
(x[0] == sumidx[i].sum[0])) {
hi = i;
continue;
}
if ((x[1] > sumidx[i].sum[1]) ||
(x[1] == sumidx[i].sum[1]) &&
(x[0] > sumidx[i].sum[0])) {
lo = i+1;
continue;
}
break;
}
if((x[1] == sumidx[lo].sum[1]) &&
(x[0] == sumidx[lo].sum[0]))
return lo;
return uint32_t(0-1);
}
//----------------------------------------------------------------------//
// Rnd64 - return 64 bit random number //
//----------------------------------------------------------------------//
uint64_t Rnd64() // random 64 bit integer
{
static uint64_t r = 1ull;
r = r * 6364136223846793005ull + 1442695040888963407ull;
return r;
}
//----------------------------------------------------------------------//
// Rnd32 - return 32 bit random number //
//----------------------------------------------------------------------//
uint32_t Rnd32()
{
static uint32_t r = 0;
r = r*1664525 + 1013904223;
return r;
}
int main(int argc, char**argv)
{
uint64_t sum[2]; // 128 bit key
uint32_t c0; // combinations(N, K0)
uint32_t xi; // index to sumidx
uint32_t i, ii, jj, im;
c0 = Comb(N, K0); // generate # of combinations
SUMIDX *sumidx = new SUMIDX[c0]; // array of xors + indexes
SUMIDX *rdxsrt = new SUMIDX[c0]; // array for radix sort
uint64_t v[N][2];
uint8_t x[K1];
uint8_t m[K];
for(i = 0; i < N; i++){
v[i][0] = Rnd64()&0x000000000000ffffull;
v[i][1] = Rnd64()&0xffffffff00000000ull;
}
ctTimeBeg = clock();
// generate array of xors and indexes
xi = 0;
InitCombination(x, N, K0);
while (NextCombination(x, N, K0)) {
sum[1] = sum[0] = 0;
for (i = 0; i < K0; i++){
sum[0] ^= v[x[i]][0];
sum[1] ^= v[x[i]][1];
}
sumidx[xi].sum[0] = sum[0];
sumidx[xi].sum[1] = sum[1];
for (i = 0; i < K0; i++)
sumidx[xi].idx[i] = x[i];
xi++;
}
ctTimeMid = clock();
std::cout << "# of ticks " << ctTimeMid - ctTimeBeg << std::endl;
// sort the vector according to the sums
RadixSort(sumidx, rdxsrt, c0);
ctTimeEnd = clock();
std::cout << "# of ticks " << ctTimeEnd - ctTimeMid << std::endl;
ctTimeMid = ctTimeEnd;
// scan the sorted vector for equal xors
InitCombination(x, N, K1);
#if 0
uint32_t z = K1;
#endif
while (NextCombination(x, N, K1)) {
#if 0
if (z != x[1]) {
z = x[1];
for (i = 0; i < K1; i++)
std::cout << std::setw(3) << (uint32_t)x[i] << " ";
std::cout << std::endl;
}
#endif
sum[1] = sum[0] = 0;
for (i = 0; i < K1; i++){
sum[0] ^= v[x[i]][0];
sum[1] ^= v[x[i]][1];
}
i = BinSrc(sumidx, sum, c0);
if(i == uint32_t(0-1))
continue;
while((sum[0] == sumidx[i].sum[0]) &&
(sum[1] == sumidx[i].sum[1])){
// merge indexes
ii = jj = im = 0;
while (1) {
// if duplicate indexes skip
if (sumidx[i].idx[ii] == x[jj])
break;
if (sumidx[i].idx[ii] < x[jj]) {
m[im++] = sumidx[i].idx[ii++];
if (ii >= K0) {
do
m[im++] =x[jj++];
while (jj < K1);
break;
}
}
else {
m[im++] = x[jj++];
if (jj >= K1) {
do
m[im++] = sumidx[i].idx[ii++];
while (ii < K0);
break;
}
}
}
if (im == K) // if not duplicate indexes stop
goto found0;
i++;
}
}
found0:
ctTimeEnd = clock();
std::cout << "# of ticks " << ctTimeEnd - ctTimeMid << std::endl;
std::cout << "# of ticks " << ctTimeEnd - ctTimeBeg << std::endl;
if(im != K){
std::cout << "not found" << std::endl;
goto exit0;
}
for (i = 0; i < K; i++)
std::cout << std::setw(3) << (uint32_t)m[i] << " ";
std::cout << std::endl;
sum[1] = sum[0] = 0;
for (i = 0; i < K; i++){
sum[0] ^= v[m[i]][0];
sum[1] ^= v[m[i]][1];
}
if (sum[0] != 0 || sum[1] != 0)
std::cout << "error" << std::endl;
exit0:
delete rdxsrt;
delete sumidx;
return(0);
}