TFF: train_test_client_split 对每个客户端数据进行分区
TFF: train_test_client_split to partition each client data
我正在构建联邦学习模型。
我写了下面的代码,但我一直收到错误,这也不是真的
请告诉我如何正确使用函数 train_test_client_split
?
@tf.function
def create_tf_dataset_for_client_fn(dataset_path):
return tf.data.experimental.CsvDataset(
dataset_path, record_defaults=record_defaults, header=True )
source = tff.simulation.datasets.FilePerUserClientData(
dataset_paths, create_tf_dataset_for_client_fn)
print(source.client_ids)
>> ['client_0', 'client_1', 'client_2']
@classmethod
def from_clients_and_fn():
client_ids: Iterable[str]
create_tf_dataset_for_client_fn: Callable[[str], tf.data.Dataset]
Splitting=source.from_clients_and_tf_fn(['client_0', 'client_1', 'client_2'],create_tf_dataset_for_client_fn)
source.train_test_client_split(client_data=Splitting,
num_test_clients=1)
NotFoundError: client_1; No such file or directory [Op:IteratorGetNext]
文件在那里,路径也正确,但我不知道这里有什么问题?
你只需要正确的数据结构。试试下面的方法。
创建虚拟数据
import tensorflow as tf
import tensorflow_federated as tff
import pandas as pd
from collections import OrderedDict
# Dummy data
samples = 5
data = [[tf.random.uniform((samples,), maxval=50, dtype=tf.int32).numpy().tolist(),
tf.random.uniform((samples,), maxval=50, dtype=tf.int32).numpy().tolist(),
tf.random.uniform((samples,), maxval=50, dtype=tf.int32).numpy().tolist(),
tf.random.uniform((samples,), maxval=50, dtype=tf.int32).numpy().tolist(),
tf.random.normal((samples,)).numpy().tolist(),
tf.random.normal((samples,)).numpy().tolist(),
tf.random.normal((samples,)).numpy().tolist(),
tf.random.normal((samples,)).numpy().tolist(),
tf.random.normal((samples,)).numpy().tolist(),
tf.random.normal((samples,)).numpy().tolist(),
tf.random.uniform((samples,), maxval=50, dtype=tf.int32).numpy().tolist(),
tf.random.uniform((samples,), maxval=50, dtype=tf.int32).numpy().tolist()]]
df = pd.DataFrame(data)
df = df.explode(list(df.columns))
df.to_csv('client1.csv', index= False)
df.to_csv('client2.csv', index= False)
加载、处理和拆分数据:
record_defaults = [int(), int(), int(), int(), float(),float(),float(),float(),float(),float(), int(), int()]
@tf.function
def create_tf_dataset_for_client_fn(dataset_path):
return tf.data.experimental.CsvDataset(dataset_path, record_defaults=record_defaults, header=True)
@tf.function
def add_parsing(dataset):
def parse_dataset(*x):
return OrderedDict([('label', x[:-1]), ('features', x[1:-1])])
return dataset.map(parse_dataset, num_parallel_calls=tf.data.AUTOTUNE)
dataset_paths = {'client1': '/content/client1.csv', 'client2': '/content/client2.csv'}
source = tff.simulation.datasets.FilePerUserClientData(
dataset_paths, create_tf_dataset_for_client_fn)
# Make sure the client ids are tensor strings when splitting data.
source._client_ids = [tf.cast(c, tf.string) for c in source.client_ids]
source = source.preprocess(add_parsing)
train, test = source.train_test_client_split(source, 1)
我正在构建联邦学习模型。
我写了下面的代码,但我一直收到错误,这也不是真的
请告诉我如何正确使用函数 train_test_client_split
?
@tf.function
def create_tf_dataset_for_client_fn(dataset_path):
return tf.data.experimental.CsvDataset(
dataset_path, record_defaults=record_defaults, header=True )
source = tff.simulation.datasets.FilePerUserClientData(
dataset_paths, create_tf_dataset_for_client_fn)
print(source.client_ids)
>> ['client_0', 'client_1', 'client_2']
@classmethod
def from_clients_and_fn():
client_ids: Iterable[str]
create_tf_dataset_for_client_fn: Callable[[str], tf.data.Dataset]
Splitting=source.from_clients_and_tf_fn(['client_0', 'client_1', 'client_2'],create_tf_dataset_for_client_fn)
source.train_test_client_split(client_data=Splitting,
num_test_clients=1)
NotFoundError: client_1; No such file or directory [Op:IteratorGetNext]
文件在那里,路径也正确,但我不知道这里有什么问题?
你只需要正确的数据结构。试试下面的方法。
创建虚拟数据
import tensorflow as tf
import tensorflow_federated as tff
import pandas as pd
from collections import OrderedDict
# Dummy data
samples = 5
data = [[tf.random.uniform((samples,), maxval=50, dtype=tf.int32).numpy().tolist(),
tf.random.uniform((samples,), maxval=50, dtype=tf.int32).numpy().tolist(),
tf.random.uniform((samples,), maxval=50, dtype=tf.int32).numpy().tolist(),
tf.random.uniform((samples,), maxval=50, dtype=tf.int32).numpy().tolist(),
tf.random.normal((samples,)).numpy().tolist(),
tf.random.normal((samples,)).numpy().tolist(),
tf.random.normal((samples,)).numpy().tolist(),
tf.random.normal((samples,)).numpy().tolist(),
tf.random.normal((samples,)).numpy().tolist(),
tf.random.normal((samples,)).numpy().tolist(),
tf.random.uniform((samples,), maxval=50, dtype=tf.int32).numpy().tolist(),
tf.random.uniform((samples,), maxval=50, dtype=tf.int32).numpy().tolist()]]
df = pd.DataFrame(data)
df = df.explode(list(df.columns))
df.to_csv('client1.csv', index= False)
df.to_csv('client2.csv', index= False)
加载、处理和拆分数据:
record_defaults = [int(), int(), int(), int(), float(),float(),float(),float(),float(),float(), int(), int()]
@tf.function
def create_tf_dataset_for_client_fn(dataset_path):
return tf.data.experimental.CsvDataset(dataset_path, record_defaults=record_defaults, header=True)
@tf.function
def add_parsing(dataset):
def parse_dataset(*x):
return OrderedDict([('label', x[:-1]), ('features', x[1:-1])])
return dataset.map(parse_dataset, num_parallel_calls=tf.data.AUTOTUNE)
dataset_paths = {'client1': '/content/client1.csv', 'client2': '/content/client2.csv'}
source = tff.simulation.datasets.FilePerUserClientData(
dataset_paths, create_tf_dataset_for_client_fn)
# Make sure the client ids are tensor strings when splitting data.
source._client_ids = [tf.cast(c, tf.string) for c in source.client_ids]
source = source.preprocess(add_parsing)
train, test = source.train_test_client_split(source, 1)