如何使用模拟来比较真实结果

How to use mocking to compare real results

我有一个示例 class,它读取保存的 Tensorflow 模型并运行预测

class Sample():
   ## all it does is creates a new column with predictions

   def __init__(self, tf_model):
      self.tf_model = tf_model

   def tf_process(self, x):

       ##some other preprocessing
       x["tf_predictions"] = self.tf_model.predict(x)
       return x
   
   def predict(self, x):
       predictions = self.tf_process(x)
       return predictions

无需加载模型的单元测试代码:

import unittest
import pandas as pd
from unittest import TestCase, mock
from my_package.sample_model import Sample

class TestSample(unittest.TestCase):

   def test_predict(self):
      with mock.patch("Sample.tf_process") as process:
         process.return_value = pd.DataFrame("hardcoded_value")
         #to check: process.return_value = Output (Sample.predict())
         

目标:

要将 process.return_valueOutput of predict method in Sample 进行比较,但为此我仍然必须加载模型,我不明白这里的 mock 有什么用,因为我会有无论如何调用 predict 方法将其与 process.return_value 进行比较。任何建议都会有所帮助

我认为你的情况最好使用 Mock()。您可以在没有 patch() 的情况下创建非常好的和简单的测试。只需准备所有必要的模拟实例进行初始化。

from unittest.mock import Mock


class TestSample(TestCase):
    def test_predict(self):
        # let's say predict() will return something... just an example
        tf = Mock(predict=Mock(return_value=(10, 20, 30)))
        df = pd.DataFrame({'test_col': (1, 2, 3)})
        df = Sample(tf).predict(df)
        # check column
        self.assertTrue('tf_predictions' in df.columns)
        # or check records
        self.assertEqual(
            df.to_dict('records'),
            [
                {'test_col': 1, 'tf_predictions': 10},
                {'test_col': 2, 'tf_predictions': 20},
                {'test_col': 3, 'tf_predictions': 30}
            ]
        )

当您需要对复杂服务进行测试时,它也很有帮助。举个例子:

class ClusterService:
    def __init__(self, service_a, service_b, service_c) -> None:
        self._service_a = service_a
        self._service_b = service_b
        self._service_c = service_c
        # service_d, ... etc

    def get_cluster_info(self, name: str):
        self._service_a.send_something_to_somewhere(name)
        data = {
            'name': name,
            'free_resources': self._service_b.get_free_resources(),
            'current_price': self._service_c.get_price(name),
        }

        return ' ,'.join([
            ': '.join(['Cluster name', name]),
            ': '.join(['CPU', str(data['free_resources']['cpu'])]),
            ': '.join(['RAM', str(data['free_resources']['ram'])]),
            ': '.join(['Price', '{} $'.format(round(data['current_price']['usd'], 2))]),
        ])


class TestClusterService(TestCase):
    def test_get_cluster_info(self):
        cluster = ClusterService(
            service_a=Mock(),
            service_b=Mock(get_free_resources=Mock(return_value={'cpu': 100, 'ram': 200})),
            service_c=Mock(get_price=Mock(return_value={'usd': 101.4999})),
        )

        self.assertEqual(
            cluster.get_cluster_info('best name'),
            'Cluster name: best name ,CPU: 100 ,RAM: 200 ,Price: 101.5 $'
        )