使用 AIF360 计算群体公平性指标
Calculate group fairness metrics with AIF360
我要计算group fairness metrics using AIF360。这是一个示例数据集和模型,其中性别是受保护的属性,收入是目标。
import pandas as pd
from sklearn.svm import SVC
from aif360.sklearn import metrics
df = pd.DataFrame({'gender': [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1],
'experience': [0, 0.1, 0.2, 0.4, 0.5, 0.6, 0, 0.1, 0.2, 0.4, 0.5, 0.6],
'income': [0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1]})
clf = SVC(random_state=0).fit(df[['gender', 'experience']], df['income'])
y_pred = clf.predict(df[['gender', 'experience']])
metrics.statistical_parity_difference(y_true=df['income'], y_pred=y_pred, prot_attr='gender', priv_group=1, pos_label=1)
它抛出:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-7-609692e52b2a> in <module>
11 y_pred = clf.predict(X)
12
---> 13 metrics.statistical_parity_difference(y_true=df['income'], y_pred=y_pred, prot_attr='gender', priv_group=1, pos_label=1)
TypeError: statistical_parity_difference() got an unexpected keyword argument 'y_true'
disparate_impact_ratio
的类似错误。似乎需要输入不同的数据,但我一直无法弄清楚如何。
删除函数调用中的 y_true=
和 y_pred=
字符,然后重试。正如 documentation, *y
within the function prototype stands for arbitrary number of arguments (see this post) 中所见。所以这是最合乎逻辑的猜测。
换句话说,y_true
和 y_pred
不是关键字参数。所以他们不能用他们的名字传递。关键字参数在函数原型中表示为 **kwargs
。
这可以通过将数据转换为 StandardDataset
然后调用下面的 fair_metrics
函数来完成:
from aif360.datasets import StandardDataset
from aif360.metrics import BinaryLabelDatasetMetric, ClassificationMetric
dataset = StandardDataset(df,
label_name='income',
favorable_classes=[1],
protected_attribute_names=['gender'],
privileged_classes=[[1]])
def fair_metrics(dataset, y_pred):
dataset_pred = dataset.copy()
dataset_pred.labels = y_pred
attr = dataset_pred.protected_attribute_names[0]
idx = dataset_pred.protected_attribute_names.index(attr)
privileged_groups = [{attr:dataset_pred.privileged_protected_attributes[idx][0]}]
unprivileged_groups = [{attr:dataset_pred.unprivileged_protected_attributes[idx][0]}]
classified_metric = ClassificationMetric(dataset, dataset_pred, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)
metric_pred = BinaryLabelDatasetMetric(dataset_pred, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)
result = {'statistical_parity_difference': metric_pred.statistical_parity_difference(),
'disparate_impact': metric_pred.disparate_impact(),
'equal_opportunity_difference': classified_metric.equal_opportunity_difference()}
return result
fair_metrics(dataset, y_pred)
哪个returns正确的结果(image ref):
{'statistical_parity_difference': -0.6666666666666667,
'disparate_impact': 0.3333333333333333,
'equal_opportunity_difference': 0.0}
我要计算group fairness metrics using AIF360。这是一个示例数据集和模型,其中性别是受保护的属性,收入是目标。
import pandas as pd
from sklearn.svm import SVC
from aif360.sklearn import metrics
df = pd.DataFrame({'gender': [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1],
'experience': [0, 0.1, 0.2, 0.4, 0.5, 0.6, 0, 0.1, 0.2, 0.4, 0.5, 0.6],
'income': [0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1]})
clf = SVC(random_state=0).fit(df[['gender', 'experience']], df['income'])
y_pred = clf.predict(df[['gender', 'experience']])
metrics.statistical_parity_difference(y_true=df['income'], y_pred=y_pred, prot_attr='gender', priv_group=1, pos_label=1)
它抛出:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-7-609692e52b2a> in <module>
11 y_pred = clf.predict(X)
12
---> 13 metrics.statistical_parity_difference(y_true=df['income'], y_pred=y_pred, prot_attr='gender', priv_group=1, pos_label=1)
TypeError: statistical_parity_difference() got an unexpected keyword argument 'y_true'
disparate_impact_ratio
的类似错误。似乎需要输入不同的数据,但我一直无法弄清楚如何。
删除函数调用中的 y_true=
和 y_pred=
字符,然后重试。正如 documentation, *y
within the function prototype stands for arbitrary number of arguments (see this post) 中所见。所以这是最合乎逻辑的猜测。
换句话说,y_true
和 y_pred
不是关键字参数。所以他们不能用他们的名字传递。关键字参数在函数原型中表示为 **kwargs
。
这可以通过将数据转换为 StandardDataset
然后调用下面的 fair_metrics
函数来完成:
from aif360.datasets import StandardDataset
from aif360.metrics import BinaryLabelDatasetMetric, ClassificationMetric
dataset = StandardDataset(df,
label_name='income',
favorable_classes=[1],
protected_attribute_names=['gender'],
privileged_classes=[[1]])
def fair_metrics(dataset, y_pred):
dataset_pred = dataset.copy()
dataset_pred.labels = y_pred
attr = dataset_pred.protected_attribute_names[0]
idx = dataset_pred.protected_attribute_names.index(attr)
privileged_groups = [{attr:dataset_pred.privileged_protected_attributes[idx][0]}]
unprivileged_groups = [{attr:dataset_pred.unprivileged_protected_attributes[idx][0]}]
classified_metric = ClassificationMetric(dataset, dataset_pred, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)
metric_pred = BinaryLabelDatasetMetric(dataset_pred, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)
result = {'statistical_parity_difference': metric_pred.statistical_parity_difference(),
'disparate_impact': metric_pred.disparate_impact(),
'equal_opportunity_difference': classified_metric.equal_opportunity_difference()}
return result
fair_metrics(dataset, y_pred)
哪个returns正确的结果(image ref):
{'statistical_parity_difference': -0.6666666666666667,
'disparate_impact': 0.3333333333333333,
'equal_opportunity_difference': 0.0}