计算也是平方数的第 N 个三角数
Computing Nth triangular number that is also a square number
练习赛中出现了这个问题:
计算第 N 个也是平方数的三角数,对 10006699 取模。(1 ≤ N ≤ 10^18)最多有 10^5 个测试用例。
我发现我可以很容易地用递归关系计算它 Ti = 6Ti-1 - Ti-2 + 2, T0 = 0 和 T1 = 1.
我使用矩阵求幂来获得每个测试用例的大约 O(log N) 性能,但它显然太慢了,因为有 10^5 个测试用例。事实上,即使约束只有 (1 ≤ N ≤ 10^6),这段代码也太慢了,我可以只进行 O(N) 预处理和 O(1)查询。
我应该改变解决问题的方法,还是只优化部分代码?
#include <ios>
#include <iostream>
#include <vector>
#define MOD 10006699
/*
Transformation Matrix:
0 1 0 t[i] t[i+1]
-1 6 1 * t[i+1] = t[i+2]
0 0 1 2 2
*/
std::vector<std::vector<long long int> > multi(std::vector<std::vector<long long int> > a, std::vector<std::vector<long long int> > b)
{
std::vector<std::vector<long long int> > c(3, std::vector<long long int>(3));
for (int i = 0; i < 3; i++)
{
for (int j = 0; j < 3; j++)
{
for (int k = 0; k < 3; k++)
{
c[i][j] += (a[i][k] * b[k][j]) % MOD;
c[i][j] %= MOD;
}
}
}
return c;
}
std::vector<std::vector<long long int> > power(std::vector<std::vector<long long int> > vec, long long int p)
{
if (p == 1) return vec;
else if (p % 2 == 1) return multi(vec, power(vec, p-1));
else
{
std::vector<std::vector<long long int> > x = power(vec, p/2);
return multi(x, x);
}
}
int main()
{
std::ios_base::sync_with_stdio(false);
long long int n;
while (std::cin >> n)
{
if (n == 0) break;
else
{
std::vector<std::vector<long long int> > trans;
long long int ans;
trans.resize(3);
trans[0].push_back(0);
trans[0].push_back(1);
trans[0].push_back(0);
trans[1].push_back(-1);
trans[1].push_back(6);
trans[1].push_back(1);
trans[2].push_back(0);
trans[2].push_back(0);
trans[2].push_back(1);
trans = power(trans, n);
ans = (trans[0][1]%MOD + (2*trans[0][2])%MOD)%MOD;
if (ans < 0) ans += MOD;
std::cout << ans << std::endl;
}
}
}
注意:我删除了我的旧答案,这个更有用
您似乎不太可能为该问题创建比 O(log N) 更好的渐近算法。但是,可以对您当前的代码进行一些修改,这不会改善渐近时间但会提高性能
下面是对您的代码的修改,它产生了相同的答案:
#include <ctime>
#include <ios>
#include <iostream>
#include <vector>
#define MOD 10006699
void power(std::vector<std::vector<long long int> >& vec, long long int p)
{
if (p == 1)
return;
else if (p & 1)
{
std::vector<std::vector<long long int> > copy1 = vec;
power(copy1, p-1);
std::vector<std::vector<long long int> > copy2(3, std::vector<long long int>(3));
for (int i = 0; i < 3; i++)
for (int j = 0; j < 3; j++)
{
for (int k = 0; k < 3; k++)
copy2[i][j] += (vec[i][k] * copy1[k][j]) % MOD;
copy2[i][j] %= MOD;
}
vec = copy2;
return;
}
else
{
power(vec, p/2);
std::vector<std::vector<long long int> > copy(3, std::vector<long long int>(3));
for (int i = 0; i < 3; i++)
for (int j = 0; j < 3; j++)
{
for (int k = 0; k < 3; k++)
copy[i][j] += (vec[i][k] * vec[k][j]) % MOD;
copy[i][j] %= MOD;
}
vec = copy;
return;
}
}
int main()
{
std::ios_base::sync_with_stdio(false);
long long int n;
while (std::cin >> n)
{
std::clock_t start = std::clock();
if (n == 0) break;
std::vector<std::vector<long long int> > trans;
long long int ans;
trans.resize(3);
trans[0].push_back(0);
trans[0].push_back(1);
trans[0].push_back(0);
trans[1].push_back(-1);
trans[1].push_back(6);
trans[1].push_back(1);
trans[2].push_back(0);
trans[2].push_back(0);
trans[2].push_back(1);
power(trans, n);
ans = (trans[0][1]%MOD + (2*trans[0][2])%MOD)%MOD;
if (ans < 0) ans += MOD;
std::cout << "Answer: " << ans << std::endl;
std::cout << "Time: " << (std::clock() - start) / (double)(CLOCKS_PER_SEC / 1000) << " ms" << std::endl;
}
}
区别主要有:
c[i][j] %= MOD;
的代码移动要在 k
循环之外
- 通过引用传递向量
- 更少的函数调用
如果我在您的 main
的 while 循环中放置与我的代码中相同的计时代码,请将您的文件命名为 "before.cpp",将我的文件命名为 "after.cpp",然后 运行 连续 10 次完全优化然后这些是我的结果:
Alexanders-MBP:Desktop alexandersimes$ g++ before.cpp -O3 -o before
Alexanders-MBP:Desktop alexandersimes$ ./before
1000000000000000000
Answer: 6635296
Time: 0.708 ms
1000000000000000000
Answer: 6635296
Time: 0.542 ms
1000000000000000000
Answer: 6635296
Time: 0.688 ms
1000000000000000000
Answer: 6635296
Time: 0.634 ms
1000000000000000000
Answer: 6635296
Time: 0.626 ms
1000000000000000000
Answer: 6635296
Time: 0.629 ms
1000000000000000000
Answer: 6635296
Time: 0.629 ms
1000000000000000000
Answer: 6635296
Time: 0.629 ms
1000000000000000000
Answer: 6635296
Time: 0.632 ms
1000000000000000000
Answer: 6635296
Time: 0.695 ms
Alexanders-MBP:Desktop alexandersimes$ g++ after.cpp -O3 -o after
Alexanders-MBP:Desktop alexandersimes$ ./after
1000000000000000000
Answer: 6635296
Time: 0.283 ms
1000000000000000000
Answer: 6635296
Time: 0.287 ms
1000000000000000000
Answer: 6635296
Time: 0.27 ms
1000000000000000000
Answer: 6635296
Time: 0.27 ms
1000000000000000000
Answer: 6635296
Time: 0.266 ms
1000000000000000000
Answer: 6635296
Time: 0.265 ms
1000000000000000000
Answer: 6635296
Time: 0.266 ms
1000000000000000000
Answer: 6635296
Time: 0.267 ms
1000000000000000000
Answer: 6635296
Time: 0.21 ms
1000000000000000000
Answer: 6635296
Time: 0.208 ms
练习赛中出现了这个问题:
计算第 N 个也是平方数的三角数,对 10006699 取模。(1 ≤ N ≤ 10^18)最多有 10^5 个测试用例。
我发现我可以很容易地用递归关系计算它 Ti = 6Ti-1 - Ti-2 + 2, T0 = 0 和 T1 = 1.
我使用矩阵求幂来获得每个测试用例的大约 O(log N) 性能,但它显然太慢了,因为有 10^5 个测试用例。事实上,即使约束只有 (1 ≤ N ≤ 10^6),这段代码也太慢了,我可以只进行 O(N) 预处理和 O(1)查询。
我应该改变解决问题的方法,还是只优化部分代码?
#include <ios>
#include <iostream>
#include <vector>
#define MOD 10006699
/*
Transformation Matrix:
0 1 0 t[i] t[i+1]
-1 6 1 * t[i+1] = t[i+2]
0 0 1 2 2
*/
std::vector<std::vector<long long int> > multi(std::vector<std::vector<long long int> > a, std::vector<std::vector<long long int> > b)
{
std::vector<std::vector<long long int> > c(3, std::vector<long long int>(3));
for (int i = 0; i < 3; i++)
{
for (int j = 0; j < 3; j++)
{
for (int k = 0; k < 3; k++)
{
c[i][j] += (a[i][k] * b[k][j]) % MOD;
c[i][j] %= MOD;
}
}
}
return c;
}
std::vector<std::vector<long long int> > power(std::vector<std::vector<long long int> > vec, long long int p)
{
if (p == 1) return vec;
else if (p % 2 == 1) return multi(vec, power(vec, p-1));
else
{
std::vector<std::vector<long long int> > x = power(vec, p/2);
return multi(x, x);
}
}
int main()
{
std::ios_base::sync_with_stdio(false);
long long int n;
while (std::cin >> n)
{
if (n == 0) break;
else
{
std::vector<std::vector<long long int> > trans;
long long int ans;
trans.resize(3);
trans[0].push_back(0);
trans[0].push_back(1);
trans[0].push_back(0);
trans[1].push_back(-1);
trans[1].push_back(6);
trans[1].push_back(1);
trans[2].push_back(0);
trans[2].push_back(0);
trans[2].push_back(1);
trans = power(trans, n);
ans = (trans[0][1]%MOD + (2*trans[0][2])%MOD)%MOD;
if (ans < 0) ans += MOD;
std::cout << ans << std::endl;
}
}
}
注意:我删除了我的旧答案,这个更有用
您似乎不太可能为该问题创建比 O(log N) 更好的渐近算法。但是,可以对您当前的代码进行一些修改,这不会改善渐近时间但会提高性能
下面是对您的代码的修改,它产生了相同的答案:
#include <ctime>
#include <ios>
#include <iostream>
#include <vector>
#define MOD 10006699
void power(std::vector<std::vector<long long int> >& vec, long long int p)
{
if (p == 1)
return;
else if (p & 1)
{
std::vector<std::vector<long long int> > copy1 = vec;
power(copy1, p-1);
std::vector<std::vector<long long int> > copy2(3, std::vector<long long int>(3));
for (int i = 0; i < 3; i++)
for (int j = 0; j < 3; j++)
{
for (int k = 0; k < 3; k++)
copy2[i][j] += (vec[i][k] * copy1[k][j]) % MOD;
copy2[i][j] %= MOD;
}
vec = copy2;
return;
}
else
{
power(vec, p/2);
std::vector<std::vector<long long int> > copy(3, std::vector<long long int>(3));
for (int i = 0; i < 3; i++)
for (int j = 0; j < 3; j++)
{
for (int k = 0; k < 3; k++)
copy[i][j] += (vec[i][k] * vec[k][j]) % MOD;
copy[i][j] %= MOD;
}
vec = copy;
return;
}
}
int main()
{
std::ios_base::sync_with_stdio(false);
long long int n;
while (std::cin >> n)
{
std::clock_t start = std::clock();
if (n == 0) break;
std::vector<std::vector<long long int> > trans;
long long int ans;
trans.resize(3);
trans[0].push_back(0);
trans[0].push_back(1);
trans[0].push_back(0);
trans[1].push_back(-1);
trans[1].push_back(6);
trans[1].push_back(1);
trans[2].push_back(0);
trans[2].push_back(0);
trans[2].push_back(1);
power(trans, n);
ans = (trans[0][1]%MOD + (2*trans[0][2])%MOD)%MOD;
if (ans < 0) ans += MOD;
std::cout << "Answer: " << ans << std::endl;
std::cout << "Time: " << (std::clock() - start) / (double)(CLOCKS_PER_SEC / 1000) << " ms" << std::endl;
}
}
区别主要有:
c[i][j] %= MOD;
的代码移动要在k
循环之外- 通过引用传递向量
- 更少的函数调用
如果我在您的 main
的 while 循环中放置与我的代码中相同的计时代码,请将您的文件命名为 "before.cpp",将我的文件命名为 "after.cpp",然后 运行 连续 10 次完全优化然后这些是我的结果:
Alexanders-MBP:Desktop alexandersimes$ g++ before.cpp -O3 -o before
Alexanders-MBP:Desktop alexandersimes$ ./before
1000000000000000000
Answer: 6635296
Time: 0.708 ms
1000000000000000000
Answer: 6635296
Time: 0.542 ms
1000000000000000000
Answer: 6635296
Time: 0.688 ms
1000000000000000000
Answer: 6635296
Time: 0.634 ms
1000000000000000000
Answer: 6635296
Time: 0.626 ms
1000000000000000000
Answer: 6635296
Time: 0.629 ms
1000000000000000000
Answer: 6635296
Time: 0.629 ms
1000000000000000000
Answer: 6635296
Time: 0.629 ms
1000000000000000000
Answer: 6635296
Time: 0.632 ms
1000000000000000000
Answer: 6635296
Time: 0.695 ms
Alexanders-MBP:Desktop alexandersimes$ g++ after.cpp -O3 -o after
Alexanders-MBP:Desktop alexandersimes$ ./after
1000000000000000000
Answer: 6635296
Time: 0.283 ms
1000000000000000000
Answer: 6635296
Time: 0.287 ms
1000000000000000000
Answer: 6635296
Time: 0.27 ms
1000000000000000000
Answer: 6635296
Time: 0.27 ms
1000000000000000000
Answer: 6635296
Time: 0.266 ms
1000000000000000000
Answer: 6635296
Time: 0.265 ms
1000000000000000000
Answer: 6635296
Time: 0.266 ms
1000000000000000000
Answer: 6635296
Time: 0.267 ms
1000000000000000000
Answer: 6635296
Time: 0.21 ms
1000000000000000000
Answer: 6635296
Time: 0.208 ms