如何使用模拟来比较真实结果
How to use mocking to compare real results
我有一个示例 class,它读取保存的 Tensorflow 模型并运行预测
class Sample():
## all it does is creates a new column with predictions
def __init__(self, tf_model):
self.tf_model = tf_model
def tf_process(self, x):
##some other preprocessing
x["tf_predictions"] = self.tf_model.predict(x)
return x
def predict(self, x):
predictions = self.tf_process(x)
return predictions
无需加载模型的单元测试代码:
import unittest
import pandas as pd
from unittest import TestCase, mock
from my_package.sample_model import Sample
class TestSample(unittest.TestCase):
def test_predict(self):
with mock.patch("Sample.tf_process") as process:
process.return_value = pd.DataFrame("hardcoded_value")
#to check: process.return_value = Output (Sample.predict())
目标:
要将 process.return_value
与 Output of predict method in Sample
进行比较,但为此我仍然必须加载模型,我不明白这里的 mock
有什么用,因为我会有无论如何调用 predict
方法将其与 process.return_value
进行比较。任何建议都会有所帮助
我认为你的情况最好使用 Mock()
。您可以在没有 patch()
的情况下创建非常好的和简单的测试。只需准备所有必要的模拟实例进行初始化。
from unittest.mock import Mock
class TestSample(TestCase):
def test_predict(self):
# let's say predict() will return something... just an example
tf = Mock(predict=Mock(return_value=(10, 20, 30)))
df = pd.DataFrame({'test_col': (1, 2, 3)})
df = Sample(tf).predict(df)
# check column
self.assertTrue('tf_predictions' in df.columns)
# or check records
self.assertEqual(
df.to_dict('records'),
[
{'test_col': 1, 'tf_predictions': 10},
{'test_col': 2, 'tf_predictions': 20},
{'test_col': 3, 'tf_predictions': 30}
]
)
当您需要对复杂服务进行测试时,它也很有帮助。举个例子:
class ClusterService:
def __init__(self, service_a, service_b, service_c) -> None:
self._service_a = service_a
self._service_b = service_b
self._service_c = service_c
# service_d, ... etc
def get_cluster_info(self, name: str):
self._service_a.send_something_to_somewhere(name)
data = {
'name': name,
'free_resources': self._service_b.get_free_resources(),
'current_price': self._service_c.get_price(name),
}
return ' ,'.join([
': '.join(['Cluster name', name]),
': '.join(['CPU', str(data['free_resources']['cpu'])]),
': '.join(['RAM', str(data['free_resources']['ram'])]),
': '.join(['Price', '{} $'.format(round(data['current_price']['usd'], 2))]),
])
class TestClusterService(TestCase):
def test_get_cluster_info(self):
cluster = ClusterService(
service_a=Mock(),
service_b=Mock(get_free_resources=Mock(return_value={'cpu': 100, 'ram': 200})),
service_c=Mock(get_price=Mock(return_value={'usd': 101.4999})),
)
self.assertEqual(
cluster.get_cluster_info('best name'),
'Cluster name: best name ,CPU: 100 ,RAM: 200 ,Price: 101.5 $'
)
我有一个示例 class,它读取保存的 Tensorflow 模型并运行预测
class Sample():
## all it does is creates a new column with predictions
def __init__(self, tf_model):
self.tf_model = tf_model
def tf_process(self, x):
##some other preprocessing
x["tf_predictions"] = self.tf_model.predict(x)
return x
def predict(self, x):
predictions = self.tf_process(x)
return predictions
无需加载模型的单元测试代码:
import unittest
import pandas as pd
from unittest import TestCase, mock
from my_package.sample_model import Sample
class TestSample(unittest.TestCase):
def test_predict(self):
with mock.patch("Sample.tf_process") as process:
process.return_value = pd.DataFrame("hardcoded_value")
#to check: process.return_value = Output (Sample.predict())
目标:
要将 process.return_value
与 Output of predict method in Sample
进行比较,但为此我仍然必须加载模型,我不明白这里的 mock
有什么用,因为我会有无论如何调用 predict
方法将其与 process.return_value
进行比较。任何建议都会有所帮助
我认为你的情况最好使用 Mock()
。您可以在没有 patch()
的情况下创建非常好的和简单的测试。只需准备所有必要的模拟实例进行初始化。
from unittest.mock import Mock
class TestSample(TestCase):
def test_predict(self):
# let's say predict() will return something... just an example
tf = Mock(predict=Mock(return_value=(10, 20, 30)))
df = pd.DataFrame({'test_col': (1, 2, 3)})
df = Sample(tf).predict(df)
# check column
self.assertTrue('tf_predictions' in df.columns)
# or check records
self.assertEqual(
df.to_dict('records'),
[
{'test_col': 1, 'tf_predictions': 10},
{'test_col': 2, 'tf_predictions': 20},
{'test_col': 3, 'tf_predictions': 30}
]
)
当您需要对复杂服务进行测试时,它也很有帮助。举个例子:
class ClusterService:
def __init__(self, service_a, service_b, service_c) -> None:
self._service_a = service_a
self._service_b = service_b
self._service_c = service_c
# service_d, ... etc
def get_cluster_info(self, name: str):
self._service_a.send_something_to_somewhere(name)
data = {
'name': name,
'free_resources': self._service_b.get_free_resources(),
'current_price': self._service_c.get_price(name),
}
return ' ,'.join([
': '.join(['Cluster name', name]),
': '.join(['CPU', str(data['free_resources']['cpu'])]),
': '.join(['RAM', str(data['free_resources']['ram'])]),
': '.join(['Price', '{} $'.format(round(data['current_price']['usd'], 2))]),
])
class TestClusterService(TestCase):
def test_get_cluster_info(self):
cluster = ClusterService(
service_a=Mock(),
service_b=Mock(get_free_resources=Mock(return_value={'cpu': 100, 'ram': 200})),
service_c=Mock(get_price=Mock(return_value={'usd': 101.4999})),
)
self.assertEqual(
cluster.get_cluster_info('best name'),
'Cluster name: best name ,CPU: 100 ,RAM: 200 ,Price: 101.5 $'
)