Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Secure Aggregation: Task Runner #1264

Draft
wants to merge 11 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions openfl-workspace/keras_cnn_mnist_secagg/.workspace
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
current_plan_name: default

5 changes: 5 additions & 0 deletions openfl-workspace/keras_cnn_mnist_secagg/plan/cols.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Copyright (C) 2020-2021 Intel Corporation
# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you.

collaborators:

7 changes: 7 additions & 0 deletions openfl-workspace/keras_cnn_mnist_secagg/plan/data.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Copyright (C) 2020-2021 Intel Corporation
# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you.

# collaborator_name,data_directory_path
one,1


2 changes: 2 additions & 0 deletions openfl-workspace/keras_cnn_mnist_secagg/plan/defaults
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
../../workspace/plan/defaults

63 changes: 63 additions & 0 deletions openfl-workspace/keras_cnn_mnist_secagg/plan/plan.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Copyright (C) 2020-2021 Intel Corporation
# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you.

aggregator :
defaults : plan/defaults/aggregator.yaml
template : openfl.component.Aggregator
settings :
init_state_path : save/init.pbuf
best_state_path : save/best.pbuf
last_state_path : save/last.pbuf
rounds_to_train : 10

collaborator :
defaults : plan/defaults/collaborator.yaml
template : openfl.component.Collaborator
settings :
delta_updates : false
opt_treatment : RESET

data_loader :
defaults : plan/defaults/data_loader.yaml
template : src.dataloader.KerasMNISTInMemory
settings :
collaborator_count : 2
data_group_name : mnist
batch_size : 256

task_runner :
defaults : plan/defaults/task_runner.yaml
template : src.taskrunner.KerasCNN

network :
defaults : plan/defaults/network.yaml

assigner :
defaults : plan/defaults/assigner.yaml
template : openfl.component.RandomGroupedAssigner
settings :
task_groups :
- name : secagg_setup
percentage : 1.0
tasks :
- generate_keys
- generate_ciphertexts
- decrypt_ciphertexts

tasks:
generate_keys:
function: generate_keys
kwargs: {}
generate_ciphertexts:
function: generate_ciphertexts
kwargs: {}
decrypt_ciphertexts:
function: decrypt_ciphertexts
kwargs: {}

compression_pipeline :
defaults : plan/defaults/compression_pipeline.yaml
# To use different Compression Pipeline, uncomment the following lines
# template : openfl.pipelines.KCPipeline
# settings :
# n_clusters : 6
3 changes: 3 additions & 0 deletions openfl-workspace/keras_cnn_mnist_secagg/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
keras==3.6.0
tensorflow==2.18.0

3 changes: 3 additions & 0 deletions openfl-workspace/keras_cnn_mnist_secagg/src/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Copyright (C) 2020-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
"""You may copy this file as the starting point of your own model."""
47 changes: 47 additions & 0 deletions openfl-workspace/keras_cnn_mnist_secagg/src/dataloader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Copyright (C) 2020-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

"""You may copy this file as the starting point of your own model."""

from openfl.federated import KerasDataLoader
from .mnist_utils import load_mnist_shard


class KerasMNISTInMemory(KerasDataLoader):
"""Data Loader for MNIST Dataset."""

def __init__(self, data_path, batch_size, **kwargs):
"""
Initialize.

Args:
data_path: File path for the dataset
batch_size (int): The batch size for the data loader
**kwargs: Additional arguments, passed to super init and load_mnist_shard
"""
super().__init__(batch_size, **kwargs)

# TODO: We should be downloading the dataset shard into a directory
# TODO: There needs to be a method to ask how many collaborators and
# what index/rank is this collaborator.
# Then we have a way to automatically shard based on rank and size of
# collaborator list.
try:
int(data_path)
except:
raise ValueError(
"Expected `%s` to be representable as `int`, as it refers to the data shard " +
"number used by the collaborator.",
data_path
)

_, num_classes, X_train, y_train, X_valid, y_valid = load_mnist_shard(
shard_num=int(data_path), **kwargs
)

self.X_train = X_train
self.y_train = y_train
self.X_valid = X_valid
self.y_valid = y_valid

self.num_classes = num_classes
118 changes: 118 additions & 0 deletions openfl-workspace/keras_cnn_mnist_secagg/src/mnist_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
# Copyright (C) 2020-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

"""You may copy this file as the starting point of your own model."""

from logging import getLogger

import numpy as np
from tensorflow.python.keras.utils.data_utils import get_file

logger = getLogger(__name__)


def one_hot(labels, classes):
"""
One Hot encode a vector.

Args:
labels (list): List of labels to onehot encode
classes (int): Total number of categorical classes

Returns:
np.array: Matrix of one-hot encoded labels
"""
return np.eye(classes)[labels]


def _load_raw_datashards(shard_num, collaborator_count):
"""
Load the raw data by shard.

Returns tuples of the dataset shard divided into training and validation.

Args:
shard_num (int): The shard number to use
collaborator_count (int): The number of collaborators in the federation

Returns:
2 tuples: (image, label) of the training, validation dataset
"""
origin_folder = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/'
path = get_file('mnist.npz',
origin=origin_folder + 'mnist.npz',
file_hash='731c5ac602752760c8e48fbffcf8c3b850d9dc2a2aedcf2cc48468fc17b673d1')

with np.load(path) as f:
# get all of mnist
X_train_tot = f['x_train']
y_train_tot = f['y_train']

X_valid_tot = f['x_test']
y_valid_tot = f['y_test']

# create the shards
shard_num = int(shard_num)
X_train = X_train_tot[shard_num::collaborator_count]
y_train = y_train_tot[shard_num::collaborator_count]

X_valid = X_valid_tot[shard_num::collaborator_count]
y_valid = y_valid_tot[shard_num::collaborator_count]

return (X_train, y_train), (X_valid, y_valid)


def load_mnist_shard(shard_num, collaborator_count, categorical=True,
channels_last=True, **kwargs):
"""
Load the MNIST dataset.

Args:
shard_num (int): The shard to use from the dataset
collaborator_count (int): The number of collaborators in the federation
categorical (bool): True = convert the labels to one-hot encoded
vectors (Default = True)
channels_last (bool): True = The input images have the channels
last (Default = True)
**kwargs: Additional parameters to pass to the function

Returns:
list: The input shape
int: The number of classes
numpy.ndarray: The training data
numpy.ndarray: The training labels
numpy.ndarray: The validation data
numpy.ndarray: The validation labels
"""
img_rows, img_cols = 28, 28
num_classes = 10

(X_train, y_train), (X_valid, y_valid) = _load_raw_datashards(
shard_num, collaborator_count
)

if channels_last:
X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
X_valid = X_valid.reshape(X_valid.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)
else:
X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)
X_valid = X_valid.reshape(X_valid.shape[0], 1, img_rows, img_cols)
input_shape = (1, img_rows, img_cols)

X_train = X_train.astype('float32')
X_valid = X_valid.astype('float32')
X_train /= 255
X_valid /= 255

logger.info(f'MNIST > X_train Shape : {X_train.shape}')
logger.info(f'MNIST > y_train Shape : {y_train.shape}')
logger.info(f'MNIST > Train Samples : {X_train.shape[0]}')
logger.info(f'MNIST > Valid Samples : {X_valid.shape[0]}')

if categorical:
# convert class vectors to binary class matrices
y_train = one_hot(y_train, num_classes)
y_valid = one_hot(y_valid, num_classes)

return input_shape, num_classes, X_train, y_train, X_valid, y_valid
26 changes: 26 additions & 0 deletions openfl-workspace/keras_cnn_mnist_secagg/src/taskrunner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Copyright (C) 2020-2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

"""You may copy this file as the starting point of your own model."""

from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import Dense
from keras.layers import Flatten

from openfl.federated.task.runner_sa import SATaskRunner


class KerasCNN(SATaskRunner):
"""A basic convolutional neural network model."""

def __init__(self, **kwargs):
"""
Initialize.

Args:
**kwargs: Additional parameters to pass to the function
"""
super().__init__(**kwargs)

self.initialize_tensorkeys_for_functions()
Loading