为什么函数指针比虚方法执行得更好

Question

我使用这段代码做了一些分析

#include "Timer.h"
#include <iostream>

enum class BackendAPI {
    B_API_NONE,
    B_API_VULKAN,
    B_API_DIRECTX_12,
    B_API_WEB_GPU,
};

namespace Functional
{
    typedef void* VertexBufferHandle;

    namespace Vulkan
    {
        struct VulkanVertexBuffer {};

        VertexBufferHandle CreateVertexBuffer(size_t size)
        {
            return nullptr;
        }

        __forceinline void Hello() {}
        __forceinline void Bello() {}
        __forceinline void Mello() {}
    }

    class RenderBackend {
    public:
        RenderBackend() {}
        ~RenderBackend() {}

        void SetupBackendMethods(BackendAPI api)
        {
            switch (api)
            {
            case BackendAPI::B_API_VULKAN:
            {
                CreateVertexBuffer = Vulkan::CreateVertexBuffer;
                Hello = Vulkan::Hello;
                Bello = Vulkan::Bello;
                Mello = Vulkan::Mello;
            }
            break;
            case BackendAPI::B_API_DIRECTX_12:
                break;
            case BackendAPI::B_API_WEB_GPU:
                break;
            default:
                break;
            }
        }

        VertexBufferHandle(*CreateVertexBuffer)(size_t size) = nullptr;
        void (*Hello)() = nullptr;
        void (*Bello)() = nullptr;
        void (*Mello)() = nullptr;
    };
}

namespace ObjectOriented
{
    struct VertexBuffer {};

    class RenderBackend {
    public:
        RenderBackend() {}
        virtual ~RenderBackend() {}

        virtual VertexBuffer* CreateVertexBuffer(size_t size) = 0;
        virtual void Hello() = 0;
        virtual void Bello() = 0;
        virtual void Mello() = 0;
    };

    class VulkanBackend final : public RenderBackend {
        struct VulkanVertexBuffer : public VertexBuffer {};

    public:
        VulkanBackend() {}
        ~VulkanBackend() {}

        __forceinline virtual VertexBuffer* CreateVertexBuffer(size_t size) override
        {
            return nullptr;
        }

        __forceinline virtual void Hello() override {}
        __forceinline virtual void Bello() override {}
        __forceinline virtual void Mello() override {}
    };

    RenderBackend* CreateBackend(BackendAPI api)
    {
        switch (api)
        {
        case BackendAPI::B_API_VULKAN:
            return new VulkanBackend;
            break;
        case BackendAPI::B_API_DIRECTX_12:
            break;
        case BackendAPI::B_API_WEB_GPU:
            break;
        default:
            break;
        }

        return nullptr;
    }
}

int main()
{
    constexpr int maxItr = 1000000;

    for (int i = 0; i < 100; i++)
    {
        int counter = maxItr;
        Timer t;

        auto pBackend = ObjectOriented::CreateBackend(BackendAPI::B_API_VULKAN);
        while (counter--)
        {
            pBackend->Hello();
            pBackend->Bello();
            pBackend->Mello();

            auto pRef = pBackend->CreateVertexBuffer(100);
        }

        delete pBackend;
    }

    std::cout << "\n";

    for (int i = 0; i < 100; i++)
    {
        int counter = maxItr;
        Timer t;

        {
            Functional::RenderBackend backend;
            backend.SetupBackendMethods(BackendAPI::B_API_VULKAN);
            while (counter--)
            {
                backend.Hello();
                backend.Bello();
                backend.Mello();

                auto pRef = backend.CreateVertexBuffer(100);
            }
        }
    }
}

其中`#include "Timer.h" 是

#pragma once
#include <chrono>

/**
 * Timer class.
 * This calculates the total time taken from creation till the termination of the object.
 */
class Timer {
public:
    /**
     * Default contructor.
     */
    Timer()
    {
        // Set the time point at the creation of the object.
        startPoint = std::chrono::high_resolution_clock::now();
    }

    /**
     * Default destructor.
     */
    ~Timer()
    {
        // Get the time point of the time of the object's termination.
        auto endPoint = std::chrono::high_resolution_clock::now();

        // Convert time points.
        long long start = std::chrono::time_point_cast<std::chrono::microseconds>(startPoint).time_since_epoch().count();
        long long end = std::chrono::time_point_cast<std::chrono::microseconds>(endPoint).time_since_epoch().count();

        // Print the time to the console.
        printf("Time taken: %15I64d\n", static_cast<__int64>(end - start));
    }

private:
    std::chrono::time_point<std::chrono::high_resolution_clock> startPoint; // The start time point.
};

并以图表形式输出后（使用Visual Studio 2019中的Release配置编译），结果如下，

注意： 上面的代码是为了分析构建大型库时函数式方法与面向对象方法的性能差异。分析是通过运行应用程序 5 次，重新编译源代码来完成的。每个运行有 100 次迭代。测试是通过两种方式完成的（首先是面向对象的，其次是功能性的，反之亦然），但性能结果大致相同。

我知道继承有点慢，因为它必须在运行时解析来自 V-Table 的函数指针。但我不明白的部分是，如果我是正确的，函数指针也会在运行时解析。这意味着程序需要在执行之前获取函数代码。

所以我的问题是，

为什么函数指针的性能比虚拟方法好一些？
为什么虚方法在某些时候性能下降但函数指针有些稳定？

谢谢！

Answer 1

每次调用方法时都需要（基本上）访问虚方法查找表。它为每个调用添加了另一个间接。

当你初始化一个后端然后保存函数指针时，你实际上去掉了这个额外的间接寻址，pre-compute它在开始时就一次。

因此，直接函数指针带来的性能提升不足为奇。

为什么函数指针比虚方法执行得更好

Why do function pointers perform better than virtual methods

c++

performance

profiling

virtual-functions

function-pointers