参数化和 "function template partial specialization is not allowed"
Parameterization and "function template partial specialization is not allowed"
这是 What is the function parameter equivalent of constexpr? 原始问题的延续,我们正在尝试加速一些在 Clang 和 VC++ 下执行移位和旋转的代码。 Clang 和 VC++ 没有很好地优化代码,因为它将 shift/rotate 数量视为变量(即,不是 constexpr
)。
当我尝试参数化移位量和字长时,结果为:
$ g++ -std=c++11 -march=native test.cxx -o test.exe
test.cxx:13:10: error: function template partial specialization is not allowed
uint32_t LeftRotate<uint32_t, unsigned int>(uint32_t v)
^ ~~~~~~~~~~~~~~~~~~~~~~~~
test.cxx:21:10: error: function template partial specialization is not allowed
uint64_t LeftRotate<uint64_t, unsigned int>(uint64_t v)
^ ~~~~~~~~~~~~~~~~~~~~~~~~
2 errors generated.
这是测试程序。它比需要的要大一点,所以人们可以看到我们需要同时处理 uint32_t
和 uint64_t
(更不用说 uint8_t
、uint16_t
和其他类型)。
$ cat test.cxx
#include <iostream>
#include <stdint.h>
template<typename T, unsigned int R>
inline T LeftRotate(unsigned int v)
{
static const unsigned int THIS_SIZE = sizeof(T)*8;
static const unsigned int MASK = THIS_SIZE-1;
return T((v<<R)|(v>>(-R&MASK)));
};
template<uint32_t, unsigned int R>
uint32_t LeftRotate<uint32_t, unsigned int>(uint32_t v)
{
__asm__ ("roll %1, %0" : "+mq" (v) : "I" ((unsigned char)R));
return v;
}
#if __x86_64__
template<uint64_t, unsigned int R>
uint64_t LeftRotate<uint64_t, unsigned int>(uint64_t v)
{
__asm__ ("rolq %1, %0" : "+mq" (v) : "J" ((unsigned char)R));
return v;
}
#endif
int main(int argc, char* argv[])
{
std::cout << "Rotated: " << LeftRotate<uint32_t, 2>((uint32_t)argc) << std::endl;
return 0;
}
根据我尝试实施旋转的方式,我已经经历了多次错误消息迭代。其他错误消息包括 no function template matches function template specialization...
。使用 template <>
似乎产生了最难以理解的结果。
我如何参数化偏移量以希望 Clang 和 VC++ 将按预期优化函数调用?
使用模板 class,而不是模板函数:
#include <iostream>
#include <stdint.h>
template<typename T, unsigned int R>
struct LeftRotate {
static inline T compute(T v)
{
static const unsigned int THIS_SIZE = sizeof(T)*8;
static const unsigned int MASK = THIS_SIZE-1;
return T((v<<R)|(v>>(-R&MASK)));
}
};
template<unsigned int R>
struct LeftRotate<uint32_t, R> {
static inline uint32_t compute(uint32_t v)
{
__asm__ ("roll %1, %0" : "+mq" (v) : "I" ((unsigned char)R));
return v;
}
};
#if __x86_64__
template<unsigned int R>
struct LeftRotate<uint64_t, R> {
static inline uint64_t compute(uint64_t v)
{
__asm__ ("rolq %1, %0" : "+mq" (v) : "J" ((unsigned char)R));
return v;
}
};
#endif
int main(int argc, char* argv[])
{
std::cout << "Rotated: " << LeftRotate<uint32_t, 2>::compute((uint32_t)argc) << std::endl;
return 0;
}
另一种方法是将模板常量转换为编译器可以优化掉的常量参数。
第一步:定义a的概念rotate_distance:
template<unsigned int R> using rotate_distance = std::integral_constant<unsigned int, R>;
第 2 步:根据采用此类参数的函数的重载定义旋转函数:
template<unsigned int R>
uint32_t LeftRotate(uint32_t v, rotate_distance<R>)
现在,如果我们愿意,我们可以简单地调用 LeftRotate(x, rotate_distance<y>())
,这似乎很好地表达了意图,
或者我们现在可以根据这种形式重新定义 2 参数模板形式:
template<unsigned int Dist, class T>
T LeftRotate(T t)
{
return LeftRotate(t, rotate_distance<Dist>());
}
完整演示:
#include <iostream>
#include <stdint.h>
#include <utility>
template<unsigned int R> using rotate_distance = std::integral_constant<unsigned int, R>;
template<typename T, unsigned int R>
inline T LeftRotate(unsigned int v, rotate_distance<R>)
{
static const unsigned int THIS_SIZE = sizeof(T)*8;
static const unsigned int MASK = THIS_SIZE-1;
return T((v<<R)|(v>>(-R&MASK)));
}
template<unsigned int R>
uint32_t LeftRotate(uint32_t v, rotate_distance<R>)
{
__asm__ ("roll %1, %0" : "+mq" (v) : "I" ((unsigned char)R));
return v;
}
#if __x86_64__
template<unsigned int R>
uint64_t LeftRotate(uint64_t v, rotate_distance<R>)
{
__asm__ ("rolq %1, %0" : "+mq" (v) : "J" ((unsigned char)R));
return v;
}
#endif
template<unsigned int Dist, class T>
T LeftRotate(T t)
{
return LeftRotate(t, rotate_distance<Dist>());
}
int main(int argc, char* argv[])
{
std::cout << "Rotated: " << LeftRotate((uint32_t)argc, rotate_distance<2>()) << std::endl;
std::cout << "Rotated: " << LeftRotate((uint64_t)argc, rotate_distance<2>()) << std::endl;
std::cout << "Rotated: " << LeftRotate<2>((uint64_t)argc) << std::endl;
return 0;
}
C++11 之前的编译器
在 c++11 之前我们没有 std::integral_constant,所以我们必须制作自己的版本。
为了我们的目的,这就足够了:
template<unsigned int R> struct rotate_distance {};
完整证明 - 注意优化的效果:
这是 What is the function parameter equivalent of constexpr? 原始问题的延续,我们正在尝试加速一些在 Clang 和 VC++ 下执行移位和旋转的代码。 Clang 和 VC++ 没有很好地优化代码,因为它将 shift/rotate 数量视为变量(即,不是 constexpr
)。
当我尝试参数化移位量和字长时,结果为:
$ g++ -std=c++11 -march=native test.cxx -o test.exe
test.cxx:13:10: error: function template partial specialization is not allowed
uint32_t LeftRotate<uint32_t, unsigned int>(uint32_t v)
^ ~~~~~~~~~~~~~~~~~~~~~~~~
test.cxx:21:10: error: function template partial specialization is not allowed
uint64_t LeftRotate<uint64_t, unsigned int>(uint64_t v)
^ ~~~~~~~~~~~~~~~~~~~~~~~~
2 errors generated.
这是测试程序。它比需要的要大一点,所以人们可以看到我们需要同时处理 uint32_t
和 uint64_t
(更不用说 uint8_t
、uint16_t
和其他类型)。
$ cat test.cxx
#include <iostream>
#include <stdint.h>
template<typename T, unsigned int R>
inline T LeftRotate(unsigned int v)
{
static const unsigned int THIS_SIZE = sizeof(T)*8;
static const unsigned int MASK = THIS_SIZE-1;
return T((v<<R)|(v>>(-R&MASK)));
};
template<uint32_t, unsigned int R>
uint32_t LeftRotate<uint32_t, unsigned int>(uint32_t v)
{
__asm__ ("roll %1, %0" : "+mq" (v) : "I" ((unsigned char)R));
return v;
}
#if __x86_64__
template<uint64_t, unsigned int R>
uint64_t LeftRotate<uint64_t, unsigned int>(uint64_t v)
{
__asm__ ("rolq %1, %0" : "+mq" (v) : "J" ((unsigned char)R));
return v;
}
#endif
int main(int argc, char* argv[])
{
std::cout << "Rotated: " << LeftRotate<uint32_t, 2>((uint32_t)argc) << std::endl;
return 0;
}
根据我尝试实施旋转的方式,我已经经历了多次错误消息迭代。其他错误消息包括 no function template matches function template specialization...
。使用 template <>
似乎产生了最难以理解的结果。
我如何参数化偏移量以希望 Clang 和 VC++ 将按预期优化函数调用?
使用模板 class,而不是模板函数:
#include <iostream>
#include <stdint.h>
template<typename T, unsigned int R>
struct LeftRotate {
static inline T compute(T v)
{
static const unsigned int THIS_SIZE = sizeof(T)*8;
static const unsigned int MASK = THIS_SIZE-1;
return T((v<<R)|(v>>(-R&MASK)));
}
};
template<unsigned int R>
struct LeftRotate<uint32_t, R> {
static inline uint32_t compute(uint32_t v)
{
__asm__ ("roll %1, %0" : "+mq" (v) : "I" ((unsigned char)R));
return v;
}
};
#if __x86_64__
template<unsigned int R>
struct LeftRotate<uint64_t, R> {
static inline uint64_t compute(uint64_t v)
{
__asm__ ("rolq %1, %0" : "+mq" (v) : "J" ((unsigned char)R));
return v;
}
};
#endif
int main(int argc, char* argv[])
{
std::cout << "Rotated: " << LeftRotate<uint32_t, 2>::compute((uint32_t)argc) << std::endl;
return 0;
}
另一种方法是将模板常量转换为编译器可以优化掉的常量参数。
第一步:定义a的概念rotate_distance:
template<unsigned int R> using rotate_distance = std::integral_constant<unsigned int, R>;
第 2 步:根据采用此类参数的函数的重载定义旋转函数:
template<unsigned int R>
uint32_t LeftRotate(uint32_t v, rotate_distance<R>)
现在,如果我们愿意,我们可以简单地调用 LeftRotate(x, rotate_distance<y>())
,这似乎很好地表达了意图,
或者我们现在可以根据这种形式重新定义 2 参数模板形式:
template<unsigned int Dist, class T>
T LeftRotate(T t)
{
return LeftRotate(t, rotate_distance<Dist>());
}
完整演示:
#include <iostream>
#include <stdint.h>
#include <utility>
template<unsigned int R> using rotate_distance = std::integral_constant<unsigned int, R>;
template<typename T, unsigned int R>
inline T LeftRotate(unsigned int v, rotate_distance<R>)
{
static const unsigned int THIS_SIZE = sizeof(T)*8;
static const unsigned int MASK = THIS_SIZE-1;
return T((v<<R)|(v>>(-R&MASK)));
}
template<unsigned int R>
uint32_t LeftRotate(uint32_t v, rotate_distance<R>)
{
__asm__ ("roll %1, %0" : "+mq" (v) : "I" ((unsigned char)R));
return v;
}
#if __x86_64__
template<unsigned int R>
uint64_t LeftRotate(uint64_t v, rotate_distance<R>)
{
__asm__ ("rolq %1, %0" : "+mq" (v) : "J" ((unsigned char)R));
return v;
}
#endif
template<unsigned int Dist, class T>
T LeftRotate(T t)
{
return LeftRotate(t, rotate_distance<Dist>());
}
int main(int argc, char* argv[])
{
std::cout << "Rotated: " << LeftRotate((uint32_t)argc, rotate_distance<2>()) << std::endl;
std::cout << "Rotated: " << LeftRotate((uint64_t)argc, rotate_distance<2>()) << std::endl;
std::cout << "Rotated: " << LeftRotate<2>((uint64_t)argc) << std::endl;
return 0;
}
C++11 之前的编译器
在 c++11 之前我们没有 std::integral_constant,所以我们必须制作自己的版本。
为了我们的目的,这就足够了:
template<unsigned int R> struct rotate_distance {};
完整证明 - 注意优化的效果: