From 34f54bda234ed94c56b04aab1370225bec69a4da Mon Sep 17 00:00:00 2001 From: rayrayraykk <18007356109@163.com> Date: Wed, 15 Feb 2023 20:07:30 +0800 Subject: [PATCH 1/3] support hpo in distributed --- .../distributed_xgb_client_1_hpo.yaml | 63 +++++++++++++++++++ .../distributed_xgb_client_2_hpo.yaml | 63 +++++++++++++++++++ .../baseline/distributed_xgb_server_hpo.yaml | 61 ++++++++++++++++++ .../autotune/baseline/fedhpo_vfl.yaml | 1 + federatedscope/autotune/utils.py | 13 ++++ .../core/auxiliaries/worker_builder.py | 46 +++++++------- federatedscope/core/configs/cfg_hpo.py | 1 + federatedscope/core/configs/config.py | 6 +- .../feature/vfl/preprocess/instance_norm.py | 9 ++- .../vfl/selection/correlation_filter.py | 9 ++- .../core/feature/vfl/selection/iv_filter.py | 9 ++- federatedscope/core/fed_runner.py | 21 ++++++- federatedscope/core/workers/client.py | 2 + federatedscope/core/workers/server.py | 11 ++-- .../core/workers/wrapper/__init__.py | 4 +- .../core/workers/wrapper/autotune.py | 59 +++++++++++++++++ federatedscope/hpo.py | 46 -------------- federatedscope/main.py | 42 ++++++++----- .../vertical_fl/worker/vertical_server.py | 2 +- scripts/example_configs/cora/sha.yaml | 1 + .../example_configs/cora/sha_wrap_fedex.yaml | 1 + .../cora/sha_wrap_fedex_arm.yaml | 1 + scripts/example_configs/fed_node_cls.yaml | 1 + .../example_configs/femnist/avg/bo_gp.yaml | 1 + .../femnist/avg/bo_gp_wrap.yaml | 1 + .../example_configs/femnist/avg/bo_kde.yaml | 1 + .../femnist/avg/bo_kde_wrap.yaml | 1 + .../example_configs/femnist/avg/bo_rf.yaml | 1 + .../femnist/avg/bo_rf_wrap.yaml | 1 + scripts/example_configs/femnist/avg/bohb.yaml | 1 + .../femnist/avg/bohb_wrap.yaml | 1 + scripts/example_configs/femnist/avg/hb.yaml | 1 + .../example_configs/femnist/avg/hb_wrap.yaml | 1 + scripts/example_configs/femnist/avg/rs.yaml | 1 + .../example_configs/femnist/avg/rs_wrap.yaml | 1 + scripts/example_configs/femnist/avg/sha.yaml | 1 + .../example_configs/femnist/avg/sha_wrap.yaml | 1 + scripts/example_configs/femnist/sha.yaml | 1 + .../femnist/sha_wrap_fedex.yaml | 1 + scripts/example_configs/sha_wrap_fedex.yaml | 1 + .../example_configs/sha_wrap_fedex_arm.yaml | 1 + scripts/example_configs/toy_rs.yaml | 1 + scripts/example_configs/toy_sha.yaml | 1 + .../fedentsgd_on_cifar10.yaml | 1 + 44 files changed, 383 insertions(+), 110 deletions(-) create mode 100644 federatedscope/autotune/baseline/distributed_xgb_client_1_hpo.yaml create mode 100644 federatedscope/autotune/baseline/distributed_xgb_client_2_hpo.yaml create mode 100644 federatedscope/autotune/baseline/distributed_xgb_server_hpo.yaml create mode 100644 federatedscope/core/workers/wrapper/autotune.py delete mode 100644 federatedscope/hpo.py diff --git a/federatedscope/autotune/baseline/distributed_xgb_client_1_hpo.yaml b/federatedscope/autotune/baseline/distributed_xgb_client_1_hpo.yaml new file mode 100644 index 000000000..ab4d99936 --- /dev/null +++ b/federatedscope/autotune/baseline/distributed_xgb_client_1_hpo.yaml @@ -0,0 +1,63 @@ +use_gpu: True +device: 0 +early_stop: + patience: 5 +seed: 12345 +federate: + client_num: 2 + mode: 'distributed' + make_global_eval: False + online_aggr: False + total_round_num: 20 +distribute: + use: True + server_host: '127.0.0.1' + server_port: 50051 + client_host: '127.0.0.1' + client_port: 50052 + role: 'client' + data_idx: 1 +model: + type: xgb_tree + lambda_: 0.1 + gamma: 0 + num_of_trees: 10 +train: + optimizer: + lr: 0.5 + # learning rate for xgb model + eta: 0.5 +data: + root: data/ + type: adult + splits: [1.0, 0.0] + args: [{normalization: False, standardization: True}] +feat_engr: + scenario: vfl +dataloader: + type: raw + batch_size: 50 +criterion: + type: CrossEntropyLoss +trainer: + type: verticaltrainer +vertical: + use: True + key_size: 256 + dims: [7, 14] + algo: 'xgb' +eval: + freq: 5 + best_res_update_round_wise_key: test_loss +hpo: + use: True + scheduler: sha + num_workers: 0 + init_cand_num: 9 + ss: 'federatedscope/autotune/baseline/vfl_ss.yaml' + sha: + budgets: [ 3, 9 ] + elim_rate: 3 + iter: 1 + metric: 'server_global_eval.test_loss' + working_folder: sha \ No newline at end of file diff --git a/federatedscope/autotune/baseline/distributed_xgb_client_2_hpo.yaml b/federatedscope/autotune/baseline/distributed_xgb_client_2_hpo.yaml new file mode 100644 index 000000000..698b4a438 --- /dev/null +++ b/federatedscope/autotune/baseline/distributed_xgb_client_2_hpo.yaml @@ -0,0 +1,63 @@ +use_gpu: True +device: 0 +early_stop: + patience: 5 +seed: 12345 +federate: + client_num: 2 + mode: 'distributed' + make_global_eval: False + online_aggr: False + total_round_num: 20 +distribute: + use: True + server_host: '127.0.0.1' + server_port: 50051 + client_host: '127.0.0.1' + client_port: 50053 + role: 'client' + data_idx: 2 +model: + type: xgb_tree + lambda_: 0.1 + gamma: 0 + num_of_trees: 10 +train: + optimizer: + lr: 0.5 + # learning rate for xgb model + eta: 0.5 +data: + root: data/ + type: adult + splits: [1.0, 0.0] + args: [{normalization: False, standardization: True}] +feat_engr: + scenario: vfl +dataloader: + type: raw + batch_size: 50 +criterion: + type: CrossEntropyLoss +trainer: + type: verticaltrainer +vertical: + use: True + key_size: 256 + dims: [7, 14] + algo: 'xgb' +eval: + freq: 5 + best_res_update_round_wise_key: test_loss +hpo: + use: True + scheduler: sha + num_workers: 0 + init_cand_num: 9 + ss: 'federatedscope/autotune/baseline/vfl_ss.yaml' + sha: + budgets: [ 3, 9 ] + elim_rate: 3 + iter: 1 + metric: 'server_global_eval.test_loss' + working_folder: sha \ No newline at end of file diff --git a/federatedscope/autotune/baseline/distributed_xgb_server_hpo.yaml b/federatedscope/autotune/baseline/distributed_xgb_server_hpo.yaml new file mode 100644 index 000000000..423501cc1 --- /dev/null +++ b/federatedscope/autotune/baseline/distributed_xgb_server_hpo.yaml @@ -0,0 +1,61 @@ +use_gpu: True +device: 0 +early_stop: + patience: 5 +seed: 12345 +federate: + client_num: 2 + mode: 'distributed' + make_global_eval: False + online_aggr: False + total_round_num: 20 +distribute: + use: True + server_host: '127.0.0.1' + server_port: 50051 + role: 'server' + data_idx: 0 +model: + type: xgb_tree + lambda_: 0.1 + gamma: 0 + num_of_trees: 10 +train: + optimizer: + lr: 0.5 + # learning rate for xgb model + eta: 0.5 +data: + root: data/ + type: adult + splits: [1.0, 0.0] + args: [{normalization: False, standardization: True}] +feat_engr: + scenario: vfl +dataloader: + type: raw + batch_size: 50 +criterion: + type: CrossEntropyLoss +trainer: + type: verticaltrainer +vertical: + use: True + key_size: 256 + dims: [7, 14] + algo: 'xgb' +eval: + freq: 5 + best_res_update_round_wise_key: test_loss +hpo: + use: True + scheduler: sha + num_workers: 0 + init_cand_num: 9 + ss: 'federatedscope/autotune/baseline/vfl_ss.yaml' + sha: + budgets: [ 3, 9 ] + elim_rate: 3 + iter: 1 + metric: 'server_global_eval.test_loss' + working_folder: sha \ No newline at end of file diff --git a/federatedscope/autotune/baseline/fedhpo_vfl.yaml b/federatedscope/autotune/baseline/fedhpo_vfl.yaml index 57f44275e..dd7750f24 100644 --- a/federatedscope/autotune/baseline/fedhpo_vfl.yaml +++ b/federatedscope/autotune/baseline/fedhpo_vfl.yaml @@ -39,6 +39,7 @@ eval: freq: 5 best_res_update_round_wise_key: test_loss hpo: + use: True scheduler: sha num_workers: 0 init_cand_num: 9 diff --git a/federatedscope/autotune/utils.py b/federatedscope/autotune/utils.py index 2d67b04dd..4c95bca08 100644 --- a/federatedscope/autotune/utils.py +++ b/federatedscope/autotune/utils.py @@ -1,3 +1,5 @@ +from typing import MutableMapping + import yaml import logging import pandas as pd @@ -312,3 +314,14 @@ def log2wandb(trial, config, results, trial_cfg): trial_cfg.hpo.metric: results[key1][key2], } wandb.log(log_res) + + +def flatten_dict(d, parent_key='', sep='.'): + items = [] + for key, value in d.items(): + new_key = parent_key + sep + key if parent_key else key + if isinstance(value, MutableMapping): + items.extend(flatten_dict(value, new_key, sep=sep).items()) + else: + items.append((new_key, value)) + return dict(items) diff --git a/federatedscope/core/auxiliaries/worker_builder.py b/federatedscope/core/auxiliaries/worker_builder.py index 92d2d3353..aa2866165 100644 --- a/federatedscope/core/auxiliaries/worker_builder.py +++ b/federatedscope/core/auxiliaries/worker_builder.py @@ -45,16 +45,16 @@ def get_client_cls(cfg): worker_class = func(cfg.federate.method.lower()) if worker_class is not None: return worker_class['client'] - - if cfg.hpo.fedex.use: - from federatedscope.autotune.fedex import FedExClient - return FedExClient - if cfg.hpo.fts.use: - from federatedscope.autotune.fts import FTSClient - return FTSClient - if cfg.hpo.pfedhpo.use: - from federatedscope.autotune.pfedhpo import pFedHPOClient - return pFedHPOClient + if cfg.hpo.use: + if cfg.hpo.fedex.use: + from federatedscope.autotune.fedex import FedExClient + return FedExClient + if cfg.hpo.fts.use: + from federatedscope.autotune.fts import FTSClient + return FTSClient + if cfg.hpo.pfedhpo.use: + from federatedscope.autotune.pfedhpo import pFedHPOClient + return pFedHPOClient if cfg.vertical.use: if cfg.vertical.algo == 'lr': @@ -141,19 +141,19 @@ def get_server_cls(cfg): if worker_class is not None: return worker_class['server'] - if cfg.hpo.fedex.use: - from federatedscope.autotune.fedex import FedExServer - return FedExServer - - if cfg.hpo.fts.use: - from federatedscope.autotune.fts import FTSServer - return FTSServer - if cfg.hpo.pfedhpo.use and not cfg.hpo.pfedhpo.train_fl: - from federatedscope.autotune.pfedhpo import pFedHPOServer - return pFedHPOServer - if cfg.hpo.pfedhpo.use and cfg.hpo.pfedhpo.train_fl: - from federatedscope.autotune.pfedhpo import pFedHPOFLServer - return pFedHPOFLServer + if cfg.hpo.use: + if cfg.hpo.fedex.use: + from federatedscope.autotune.fedex import FedExServer + return FedExServer + if cfg.hpo.fts.use: + from federatedscope.autotune.fts import FTSServer + return FTSServer + if cfg.hpo.pfedhpo.use and not cfg.hpo.pfedhpo.train_fl: + from federatedscope.autotune.pfedhpo import pFedHPOServer + return pFedHPOServer + if cfg.hpo.pfedhpo.use and cfg.hpo.pfedhpo.train_fl: + from federatedscope.autotune.pfedhpo import pFedHPOFLServer + return pFedHPOFLServer if cfg.attack.attack_method.lower() in ['dlg', 'ig']: from federatedscope.attack.worker_as_attacker.server_attacker import\ diff --git a/federatedscope/core/configs/cfg_hpo.py b/federatedscope/core/configs/cfg_hpo.py index d802052f3..20ae596e4 100644 --- a/federatedscope/core/configs/cfg_hpo.py +++ b/federatedscope/core/configs/cfg_hpo.py @@ -13,6 +13,7 @@ def extend_hpo_cfg(cfg): # hpo related options # ---------------------------------------------------------------------- # cfg.hpo = CN() + cfg.hpo.use = False cfg.hpo.trial_index = 0 cfg.hpo.working_folder = 'hpo' cfg.hpo.ss = '' diff --git a/federatedscope/core/configs/config.py b/federatedscope/core/configs/config.py index 348cf66cb..75134d0ad 100644 --- a/federatedscope/core/configs/config.py +++ b/federatedscope/core/configs/config.py @@ -158,12 +158,12 @@ def assert_cfg(self, check_cfg=True): def clean_unused_sub_cfgs(self): """ Clean the un-used secondary-level CfgNode, whose ``.use`` \ - attribute is ``True`` + attribute is ``True`` except `hpo` """ - for v in self.values(): + for key, v in self.items(): if isinstance(v, CfgNode) or isinstance(v, CN): # sub-config - if hasattr(v, "use") and v.use is False: + if hasattr(v, "use") and v.use is False and key != 'hpo': for k in copy.deepcopy(v).keys(): # delete the un-used attributes if k == "use": diff --git a/federatedscope/core/feature/vfl/preprocess/instance_norm.py b/federatedscope/core/feature/vfl/preprocess/instance_norm.py index 05870f97e..7cf2fb5a1 100644 --- a/federatedscope/core/feature/vfl/preprocess/instance_norm.py +++ b/federatedscope/core/feature/vfl/preprocess/instance_norm.py @@ -17,9 +17,9 @@ def wrap_instance_norm_server(worker): Returns: Wrap vfl server with instance norm. """ - def trigger_for_feat_engr(self, - trigger_train_func, - kwargs_for_trigger_train_func={}): + def trigger_for_train(self, + trigger_train_func, + kwargs_for_trigger_train_func={}): # broadcast_model_para_func after feature engineering finishing self.trigger_train_func = trigger_train_func self.kwargs_for_trigger_train_func = \ @@ -108,8 +108,7 @@ def callback_func_for_ss_instance_sum_norm_square(self, message: Message): self.trigger_train_func(**self.kwargs_for_trigger_train_func) # Bind method to instance - worker.trigger_for_feat_engr = types.MethodType(trigger_for_feat_engr, - worker) + worker.trigger_for_train = types.MethodType(trigger_for_train, worker) worker.callback_func_for_ss_instance_sum = types.MethodType( callback_func_for_ss_instance_sum, worker) worker.callback_func_for_ss_instance_sum_norm_square = types.MethodType( diff --git a/federatedscope/core/feature/vfl/selection/correlation_filter.py b/federatedscope/core/feature/vfl/selection/correlation_filter.py index f89e0655e..e06d20e88 100644 --- a/federatedscope/core/feature/vfl/selection/correlation_filter.py +++ b/federatedscope/core/feature/vfl/selection/correlation_filter.py @@ -18,9 +18,9 @@ def wrap_correlation_filter_server(worker): Returns: Wrap vfl server with correlation_filter. """ - def trigger_for_feat_engr(self, - trigger_train_func, - kwargs_for_trigger_train_func={}): + def trigger_for_train(self, + trigger_train_func, + kwargs_for_trigger_train_func={}): logger.info('Start to execute correlation_filter, which requires FHE.') self.msg_buffer['feat_dim'] = {} @@ -96,8 +96,7 @@ def callbacks_funcs_for_feat_dim(self, message: Message): self.trigger_train_func(**self.kwargs_for_trigger_train_func) # Bind method to instance - worker.trigger_for_feat_engr = types.MethodType(trigger_for_feat_engr, - worker) + worker.trigger_for_train = types.MethodType(trigger_for_train, worker) worker.callback_funcs_for_en_feat_corrcoef = types.MethodType( callback_funcs_for_en_feat_corrcoef, worker) worker.callbacks_funcs_for_feat_dim = types.MethodType( diff --git a/federatedscope/core/feature/vfl/selection/iv_filter.py b/federatedscope/core/feature/vfl/selection/iv_filter.py index 58d935e57..6b484f631 100644 --- a/federatedscope/core/feature/vfl/selection/iv_filter.py +++ b/federatedscope/core/feature/vfl/selection/iv_filter.py @@ -19,9 +19,9 @@ def wrap_iv_filter_server(worker): Returns: Wrap vfl server with iv_filter. """ - def trigger_for_feat_engr(self, - trigger_train_func, - kwargs_for_trigger_train_func={}): + def trigger_for_train(self, + trigger_train_func, + kwargs_for_trigger_train_func={}): logger.info('Start to execute woe_filter, which requires HE.') self.trigger_train_func = trigger_train_func self.kwargs_for_trigger_train_func = \ @@ -78,8 +78,7 @@ def callbacks_funcs_for_feat_dim(self, message: Message): self.trigger_train_func(**self.kwargs_for_trigger_train_func) # Bind method to instance - worker.trigger_for_feat_engr = types.MethodType(trigger_for_feat_engr, - worker) + worker.trigger_for_train = types.MethodType(trigger_for_train, worker) worker.callbacks_funcs_for_feat_dim = types.MethodType( callbacks_funcs_for_feat_dim, worker) diff --git a/federatedscope/core/fed_runner.py b/federatedscope/core/fed_runner.py index 76668277e..7e7114f1c 100644 --- a/federatedscope/core/fed_runner.py +++ b/federatedscope/core/fed_runner.py @@ -170,7 +170,16 @@ def _setup_server(self, resource_info=None, client_resource_info=None): from federatedscope.core.workers.wrapper import wrap_swa_server server = wrap_swa_server(server) logger.info('Server has been set up ... ') - return self.feat_engr_wrapper_server(server) + + if self.cfg.feat_engr.type: + server = self.feat_engr_wrapper_server(server) + + if self.cfg.federate.mode == 'distributed' and self.cfg.hpo.use: + from federatedscope.core.workers.wrapper import \ + wrap_autotune_server + server = wrap_autotune_server(server) + + return server def _setup_client(self, client_id=-1, @@ -223,7 +232,15 @@ def _setup_client(self, else: logger.info(f'Client {client_id} has been set up ... ') - return self.feat_engr_wrapper_client(client) + if self.cfg.feat_engr.type: + client = self.feat_engr_wrapper_client(client) + + if self.cfg.federate.mode == 'distributed' and self.cfg.hpo.use: + from federatedscope.core.workers.wrapper import \ + wrap_autotune_client + client = wrap_autotune_client(client) + + return client def check(self): """ diff --git a/federatedscope/core/workers/client.py b/federatedscope/core/workers/client.py index 0a407b788..e14164321 100644 --- a/federatedscope/core/workers/client.py +++ b/federatedscope/core/workers/client.py @@ -71,6 +71,8 @@ def __init__(self, if config is None: return + self.args, self.kwargs = args, kwargs + # the unseen_client indicates that whether this client contributes to # FL process by training on its local data and uploading the local # model update, which is useful for check the participation diff --git a/federatedscope/core/workers/server.py b/federatedscope/core/workers/server.py index 3986eac15..36f5c9636 100644 --- a/federatedscope/core/workers/server.py +++ b/federatedscope/core/workers/server.py @@ -70,6 +70,7 @@ def __init__(self, device='cpu', strategy=None, unseen_clients_id=None, + *args, **kwargs): super(Server, self).__init__(ID, state, config, model, strategy) # Register message handlers @@ -79,6 +80,8 @@ def __init__(self, if config is None: return + self.args, self.kwargs = args, kwargs + self.data = data self.device = device self.best_results = dict() @@ -795,7 +798,7 @@ def trigger_for_start(self): self._cfg.asyn.time_budget # start feature engineering - self.trigger_for_feat_engr( + self.trigger_for_train( self.broadcast_model_para, { 'msg_type': 'model_para', 'sample_client_num': self.sample_client_num @@ -805,9 +808,9 @@ def trigger_for_start(self): '----------- Starting training (Round #{:d}) -------------'. format(self.state)) - def trigger_for_feat_engr(self, - trigger_train_func, - kwargs_for_trigger_train_func={}): + def trigger_for_train(self, + trigger_train_func, + kwargs_for_trigger_train_func={}): """ Interface for feature engineering, the default operation is none """ diff --git a/federatedscope/core/workers/wrapper/__init__.py b/federatedscope/core/workers/wrapper/__init__.py index 678056669..2cc75f561 100644 --- a/federatedscope/core/workers/wrapper/__init__.py +++ b/federatedscope/core/workers/wrapper/__init__.py @@ -1,3 +1,5 @@ from federatedscope.core.workers.wrapper.fedswa import wrap_swa_server +from federatedscope.core.workers.wrapper.autotune import \ + wrap_autotune_server, wrap_autotune_client -__all__ = ['wrap_swa_server'] +__all__ = ['wrap_swa_server', 'wrap_autotune_server', 'wrap_autotune_client'] diff --git a/federatedscope/core/workers/wrapper/autotune.py b/federatedscope/core/workers/wrapper/autotune.py new file mode 100644 index 000000000..48ce52b27 --- /dev/null +++ b/federatedscope/core/workers/wrapper/autotune.py @@ -0,0 +1,59 @@ +import copy +import types +import logging + +from federatedscope.core.message import Message +from federatedscope.autotune.utils import flatten_dict, config2cmdargs + +logger = logging.getLogger(__name__) + + +def wrap_autotune_server(server): + tmp_trigger_for_train = server.trigger_for_train + + def trigger_for_train(self, + trigger_train_func, + kwargs_for_trigger_train_func={}): + cfg = copy.deepcopy(self._cfg) + cfg.defrost() + cfg.clear_aux_info() + del cfg['distribute'] + cfg = config2cmdargs(flatten_dict(cfg)) + + # broadcast cfg + self.comm_manager.send( + Message(msg_type='cfg', + sender=self.ID, + receiver=list(self.comm_manager.neighbors.keys()), + state=self.state, + timestamp=self.cur_timestamp, + content=cfg)) + tmp_trigger_for_train(trigger_train_func, + kwargs_for_trigger_train_func) + + # Bind method to instance + server.trigger_for_train = types.MethodType(trigger_for_train, server) + + return server + + +def wrap_autotune_client(client): + def callback_funcs_for_cfg(self, message: Message): + sender = message.sender + new_cfg = message.content + + if sender == self.server_id and self._cfg.hpo.use: + logger.info("Receive a new `cfg`, and start to reinitialize.") + self._cfg.defrost() + # TODO: Some var might remain unchanged + self._cfg.merge_from_list(new_cfg) + self._cfg.freeze() + + # Bind method to instance + client.callback_funcs_for_cfg = types.MethodType(callback_funcs_for_cfg, + client) + + # Register handlers functions + client.register_handlers('cfg', client.callback_funcs_for_cfg) + + return client diff --git a/federatedscope/hpo.py b/federatedscope/hpo.py deleted file mode 100644 index 789c897e7..000000000 --- a/federatedscope/hpo.py +++ /dev/null @@ -1,46 +0,0 @@ -import os -import sys - -DEV_MODE = False # simplify the federatedscope re-setup everytime we change -# the source codes of federatedscope -if DEV_MODE: - file_dir = os.path.join(os.path.dirname(__file__), '..') - sys.path.append(file_dir) - -from federatedscope.core.auxiliaries.utils import setup_seed -from federatedscope.core.auxiliaries.logging import update_logger -from federatedscope.core.cmd_args import parse_args, parse_client_cfg -from federatedscope.core.configs.config import global_cfg, CfgNode -from federatedscope.autotune import get_scheduler, run_scheduler - -if os.environ.get('https_proxy'): - del os.environ['https_proxy'] -if os.environ.get('http_proxy'): - del os.environ['http_proxy'] - -if __name__ == '__main__': - init_cfg = global_cfg.clone() - args = parse_args() - if args.cfg_file: - init_cfg.merge_from_file(args.cfg_file) - cfg_opt, client_cfg_opt = parse_client_cfg(args.opts) - init_cfg.merge_from_list(cfg_opt) - - # Update Exp_name for hpo - if init_cfg.expname == '': - from federatedscope.autotune.utils import generate_hpo_exp_name - init_cfg.expname = generate_hpo_exp_name(init_cfg) - - update_logger(init_cfg, clear_before_add=True) - setup_seed(init_cfg.seed) - - # load clients' cfg file - if args.client_cfg_file: - client_cfgs = CfgNode.load_cfg(open(args.client_cfg_file, 'r')) - # client_cfgs.set_new_allowed(True) - client_cfgs.merge_from_list(client_cfg_opt) - else: - client_cfgs = None - - scheduler = get_scheduler(init_cfg, client_cfgs) - run_scheduler(scheduler, init_cfg, client_cfgs) diff --git a/federatedscope/main.py b/federatedscope/main.py index d63ec8444..f5e2b45e9 100644 --- a/federatedscope/main.py +++ b/federatedscope/main.py @@ -29,6 +29,13 @@ cfg_opt, client_cfg_opt = parse_client_cfg(args.opts) init_cfg.merge_from_list(cfg_opt) + if init_cfg.hpo.use: + # TODO: fix hpo yaml file + # Update Exp_name for hpo + if init_cfg.expname == '': + from federatedscope.autotune.utils import generate_hpo_exp_name + init_cfg.expname = generate_hpo_exp_name(init_cfg) + update_logger(init_cfg, clear_before_add=True) setup_seed(init_cfg.seed) @@ -40,18 +47,23 @@ else: client_cfgs = None - # federated dataset might change the number of clients - # thus, we allow the creation procedure of dataset to modify the global - # cfg object - data, modified_cfg = get_data(config=init_cfg.clone(), - client_cfgs=client_cfgs) - init_cfg.merge_from_other_cfg(modified_cfg) - - init_cfg.freeze() - - runner = get_runner(data=data, - server_class=get_server_cls(init_cfg), - client_class=get_client_cls(init_cfg), - config=init_cfg.clone(), - client_configs=client_cfgs) - _ = runner.run() + if init_cfg.hpo.use: + from federatedscope.autotune import get_scheduler, run_scheduler + scheduler = get_scheduler(init_cfg, client_cfgs) + run_scheduler(scheduler, init_cfg, client_cfgs) + else: + # federated dataset might change the number of clients + # thus, we allow the creation procedure of dataset to modify the global + # cfg object + data, modified_cfg = get_data(config=init_cfg.clone(), + client_cfgs=client_cfgs) + init_cfg.merge_from_other_cfg(modified_cfg) + + init_cfg.freeze() + + runner = get_runner(data=data, + server_class=get_server_cls(init_cfg), + client_class=get_client_cls(init_cfg), + config=init_cfg.clone(), + client_configs=client_cfgs) + _ = runner.run() diff --git a/federatedscope/vertical_fl/worker/vertical_server.py b/federatedscope/vertical_fl/worker/vertical_server.py index 2fd34faf7..4873351f2 100644 --- a/federatedscope/vertical_fl/worker/vertical_server.py +++ b/federatedscope/vertical_fl/worker/vertical_server.py @@ -53,7 +53,7 @@ def trigger_for_start(self): if self.check_client_join_in(): self.broadcast_public_keys() self.broadcast_client_address() - self.trigger_for_feat_engr(self.broadcast_model_para) + self.trigger_for_train(self.broadcast_model_para) def broadcast_public_keys(self): self.comm_manager.send( diff --git a/scripts/example_configs/cora/sha.yaml b/scripts/example_configs/cora/sha.yaml index 00444abb6..e8a2b51d9 100644 --- a/scripts/example_configs/cora/sha.yaml +++ b/scripts/example_configs/cora/sha.yaml @@ -36,6 +36,7 @@ eval: metrics: ['acc', 'correct', 'f1'] split: ['test', 'val', 'train'] hpo: + use: True scheduler: sha num_workers: 0 init_cand_num: 81 diff --git a/scripts/example_configs/cora/sha_wrap_fedex.yaml b/scripts/example_configs/cora/sha_wrap_fedex.yaml index a5c729c14..deb6b1e9f 100644 --- a/scripts/example_configs/cora/sha_wrap_fedex.yaml +++ b/scripts/example_configs/cora/sha_wrap_fedex.yaml @@ -36,6 +36,7 @@ eval: metrics: ['acc', 'correct', 'f1'] split: ['test', 'val', 'train'] hpo: + use: True scheduler: sha num_workers: 0 init_cand_num: 81 diff --git a/scripts/example_configs/cora/sha_wrap_fedex_arm.yaml b/scripts/example_configs/cora/sha_wrap_fedex_arm.yaml index 6566791d5..28d8be2fb 100644 --- a/scripts/example_configs/cora/sha_wrap_fedex_arm.yaml +++ b/scripts/example_configs/cora/sha_wrap_fedex_arm.yaml @@ -36,6 +36,7 @@ eval: metrics: ['acc', 'correct', 'f1'] split: ['test', 'val', 'train'] hpo: + use: True scheduler: wrap_sha num_workers: 0 init_cand_num: 81 diff --git a/scripts/example_configs/fed_node_cls.yaml b/scripts/example_configs/fed_node_cls.yaml index 1f84b165d..5ceff4aaa 100644 --- a/scripts/example_configs/fed_node_cls.yaml +++ b/scripts/example_configs/fed_node_cls.yaml @@ -29,6 +29,7 @@ trainer: eval: metrics: ['acc', 'correct'] hpo: + use: True scheduler: sha larger_better: True metric: 'server_global_eval.test_acc' diff --git a/scripts/example_configs/femnist/avg/bo_gp.yaml b/scripts/example_configs/femnist/avg/bo_gp.yaml index 35915c31f..3b6901195 100644 --- a/scripts/example_configs/femnist/avg/bo_gp.yaml +++ b/scripts/example_configs/femnist/avg/bo_gp.yaml @@ -40,6 +40,7 @@ eval: metrics: ['acc', 'correct', 'f1'] split: ['test', 'val', 'train'] hpo: + use: True scheduler: bo_gp num_workers: 0 ss: 'scripts/example_configs/femnist/avg/ss.yaml' diff --git a/scripts/example_configs/femnist/avg/bo_gp_wrap.yaml b/scripts/example_configs/femnist/avg/bo_gp_wrap.yaml index 815325682..070254a0b 100644 --- a/scripts/example_configs/femnist/avg/bo_gp_wrap.yaml +++ b/scripts/example_configs/femnist/avg/bo_gp_wrap.yaml @@ -41,6 +41,7 @@ eval: metrics: ['acc', 'correct', 'f1'] split: ['test', 'val', 'train'] hpo: + use: True scheduler: wrap_bo_gp num_workers: 0 init_cand_num: 100 diff --git a/scripts/example_configs/femnist/avg/bo_kde.yaml b/scripts/example_configs/femnist/avg/bo_kde.yaml index e8096feeb..685af450a 100644 --- a/scripts/example_configs/femnist/avg/bo_kde.yaml +++ b/scripts/example_configs/femnist/avg/bo_kde.yaml @@ -40,6 +40,7 @@ eval: metrics: ['acc', 'correct', 'f1'] split: ['test', 'val', 'train'] hpo: + use: True scheduler: bo_kde num_workers: 0 ss: 'scripts/example_configs/femnist/avg/ss.yaml' diff --git a/scripts/example_configs/femnist/avg/bo_kde_wrap.yaml b/scripts/example_configs/femnist/avg/bo_kde_wrap.yaml index 1ad4f0003..b06f02926 100644 --- a/scripts/example_configs/femnist/avg/bo_kde_wrap.yaml +++ b/scripts/example_configs/femnist/avg/bo_kde_wrap.yaml @@ -41,6 +41,7 @@ eval: metrics: ['acc', 'correct', 'f1'] split: ['test', 'val', 'train'] hpo: + use: True scheduler: wrap_bo_kde num_workers: 0 init_cand_num: 100 diff --git a/scripts/example_configs/femnist/avg/bo_rf.yaml b/scripts/example_configs/femnist/avg/bo_rf.yaml index 1e8bca28e..f69a5196b 100644 --- a/scripts/example_configs/femnist/avg/bo_rf.yaml +++ b/scripts/example_configs/femnist/avg/bo_rf.yaml @@ -40,6 +40,7 @@ eval: metrics: ['acc', 'correct', 'f1'] split: ['test', 'val', 'train'] hpo: + use: True scheduler: bo_rf num_workers: 0 ss: 'scripts/example_configs/femnist/avg/ss.yaml' diff --git a/scripts/example_configs/femnist/avg/bo_rf_wrap.yaml b/scripts/example_configs/femnist/avg/bo_rf_wrap.yaml index f2977a2eb..6745090a2 100644 --- a/scripts/example_configs/femnist/avg/bo_rf_wrap.yaml +++ b/scripts/example_configs/femnist/avg/bo_rf_wrap.yaml @@ -41,6 +41,7 @@ eval: metrics: ['acc', 'correct', 'f1'] split: ['test', 'val', 'train'] hpo: + use: True scheduler: wrap_bo_rf num_workers: 0 init_cand_num: 100 diff --git a/scripts/example_configs/femnist/avg/bohb.yaml b/scripts/example_configs/femnist/avg/bohb.yaml index b60970464..5f14d3fe9 100644 --- a/scripts/example_configs/femnist/avg/bohb.yaml +++ b/scripts/example_configs/femnist/avg/bohb.yaml @@ -40,6 +40,7 @@ eval: metrics: ['acc', 'correct', 'f1'] split: ['test', 'val', 'train'] hpo: + use: True scheduler: bohb num_workers: 0 ss: 'scripts/example_configs/femnist/avg/ss.yaml' diff --git a/scripts/example_configs/femnist/avg/bohb_wrap.yaml b/scripts/example_configs/femnist/avg/bohb_wrap.yaml index e3ca1f733..5613045bd 100644 --- a/scripts/example_configs/femnist/avg/bohb_wrap.yaml +++ b/scripts/example_configs/femnist/avg/bohb_wrap.yaml @@ -41,6 +41,7 @@ eval: metrics: ['acc', 'correct', 'f1'] split: ['test', 'val', 'train'] hpo: + use: True scheduler: wrap_bohb num_workers: 0 init_cand_num: 100 diff --git a/scripts/example_configs/femnist/avg/hb.yaml b/scripts/example_configs/femnist/avg/hb.yaml index f48d9de93..d2ce65fd5 100644 --- a/scripts/example_configs/femnist/avg/hb.yaml +++ b/scripts/example_configs/femnist/avg/hb.yaml @@ -40,6 +40,7 @@ eval: metrics: ['acc', 'correct', 'f1'] split: ['test', 'val', 'train'] hpo: + use: True scheduler: hb num_workers: 0 ss: 'scripts/example_configs/femnist/avg/ss.yaml' diff --git a/scripts/example_configs/femnist/avg/hb_wrap.yaml b/scripts/example_configs/femnist/avg/hb_wrap.yaml index 1cfdeeca6..e38893918 100644 --- a/scripts/example_configs/femnist/avg/hb_wrap.yaml +++ b/scripts/example_configs/femnist/avg/hb_wrap.yaml @@ -41,6 +41,7 @@ eval: metrics: ['acc', 'correct', 'f1'] split: ['test', 'val', 'train'] hpo: + use: True scheduler: wrap_hb num_workers: 0 init_cand_num: 100 diff --git a/scripts/example_configs/femnist/avg/rs.yaml b/scripts/example_configs/femnist/avg/rs.yaml index d6dd868a0..f9bfbd788 100644 --- a/scripts/example_configs/femnist/avg/rs.yaml +++ b/scripts/example_configs/femnist/avg/rs.yaml @@ -40,6 +40,7 @@ eval: metrics: ['acc', 'correct', 'f1'] split: ['test', 'val', 'train'] hpo: + use: True scheduler: rs num_workers: 0 init_cand_num: 10 diff --git a/scripts/example_configs/femnist/avg/rs_wrap.yaml b/scripts/example_configs/femnist/avg/rs_wrap.yaml index 9d4680524..5bee56ca1 100644 --- a/scripts/example_configs/femnist/avg/rs_wrap.yaml +++ b/scripts/example_configs/femnist/avg/rs_wrap.yaml @@ -41,6 +41,7 @@ eval: metrics: ['acc', 'correct', 'f1'] split: ['test', 'val', 'train'] hpo: + use: True scheduler: wrap_rs num_workers: 0 init_cand_num: 100 diff --git a/scripts/example_configs/femnist/avg/sha.yaml b/scripts/example_configs/femnist/avg/sha.yaml index 0c9350fe0..f73a6a839 100644 --- a/scripts/example_configs/femnist/avg/sha.yaml +++ b/scripts/example_configs/femnist/avg/sha.yaml @@ -40,6 +40,7 @@ eval: metrics: ['acc', 'correct', 'f1'] split: ['test', 'val', 'train'] hpo: + use: True scheduler: sha num_workers: 0 init_cand_num: 27 diff --git a/scripts/example_configs/femnist/avg/sha_wrap.yaml b/scripts/example_configs/femnist/avg/sha_wrap.yaml index cf476e100..81e5bb579 100644 --- a/scripts/example_configs/femnist/avg/sha_wrap.yaml +++ b/scripts/example_configs/femnist/avg/sha_wrap.yaml @@ -41,6 +41,7 @@ eval: metrics: ['acc', 'correct', 'f1'] split: ['test', 'val', 'train'] hpo: + use: True scheduler: wrap_sha num_workers: 0 init_cand_num: 27 diff --git a/scripts/example_configs/femnist/sha.yaml b/scripts/example_configs/femnist/sha.yaml index 2343d43d6..266e67e79 100644 --- a/scripts/example_configs/femnist/sha.yaml +++ b/scripts/example_configs/femnist/sha.yaml @@ -40,6 +40,7 @@ eval: metrics: ['acc', 'correct', 'f1'] split: ['test', 'val', 'train'] hpo: + use: True scheduler: sha num_workers: 0 init_cand_num: 27 diff --git a/scripts/example_configs/femnist/sha_wrap_fedex.yaml b/scripts/example_configs/femnist/sha_wrap_fedex.yaml index 58309fe31..1fbbdde4e 100644 --- a/scripts/example_configs/femnist/sha_wrap_fedex.yaml +++ b/scripts/example_configs/femnist/sha_wrap_fedex.yaml @@ -40,6 +40,7 @@ eval: metrics: ['acc', 'correct', 'f1'] split: ['test', 'val', 'train'] hpo: + use: True scheduler: sha num_workers: 0 init_cand_num: 81 diff --git a/scripts/example_configs/sha_wrap_fedex.yaml b/scripts/example_configs/sha_wrap_fedex.yaml index 5bf9240c0..f37bf16b5 100644 --- a/scripts/example_configs/sha_wrap_fedex.yaml +++ b/scripts/example_configs/sha_wrap_fedex.yaml @@ -17,6 +17,7 @@ data: model: type: 'lr' hpo: + use: True scheduler: sha num_workers: 1 init_cand_num: 5 diff --git a/scripts/example_configs/sha_wrap_fedex_arm.yaml b/scripts/example_configs/sha_wrap_fedex_arm.yaml index 8fdda9d2b..b45cc87e5 100644 --- a/scripts/example_configs/sha_wrap_fedex_arm.yaml +++ b/scripts/example_configs/sha_wrap_fedex_arm.yaml @@ -19,6 +19,7 @@ model: fedopt: use: True hpo: + use: True scheduler: wrap_sha #num_workers: 1 num_workers: 0 diff --git a/scripts/example_configs/toy_rs.yaml b/scripts/example_configs/toy_rs.yaml index dbc6fe433..eb01af388 100644 --- a/scripts/example_configs/toy_rs.yaml +++ b/scripts/example_configs/toy_rs.yaml @@ -13,6 +13,7 @@ model: data: type: 'toy' hpo: + use: True num_workers: 3 init_cand_num: 3 ss: scripts/example_configs/toy_hpo_ss.yaml diff --git a/scripts/example_configs/toy_sha.yaml b/scripts/example_configs/toy_sha.yaml index ed72cc924..fd620c784 100644 --- a/scripts/example_configs/toy_sha.yaml +++ b/scripts/example_configs/toy_sha.yaml @@ -14,6 +14,7 @@ model: data: type: 'toy' hpo: + use: True scheduler: sha num_workers: 0 init_cand_num: 5 diff --git a/scripts/wide_valley_exp_scripts/fedentsgd_on_cifar10.yaml b/scripts/wide_valley_exp_scripts/fedentsgd_on_cifar10.yaml index 2d94ef1e8..dbabecaa2 100644 --- a/scripts/wide_valley_exp_scripts/fedentsgd_on_cifar10.yaml +++ b/scripts/wide_valley_exp_scripts/fedentsgd_on_cifar10.yaml @@ -54,6 +54,7 @@ eval: best_res_update_round_wise_key: test_acc count_flops: False hpo: + use: True scheduler: bo_gp num_workers: 0 ss: 'scripts/wide_valley_exp_scripts/search_space_for_fedentsgd.yaml' From be40e7bb6189e20dff110a6fee113782d66c9224 Mon Sep 17 00:00:00 2001 From: rayrayraykk <18007356109@163.com> Date: Thu, 16 Feb 2023 10:59:39 +0800 Subject: [PATCH 2/3] fix minor bugs --- .../core/auxiliaries/worker_builder.py | 45 +++++++++---------- federatedscope/core/trainers/torch_trainer.py | 2 +- 2 files changed, 23 insertions(+), 24 deletions(-) diff --git a/federatedscope/core/auxiliaries/worker_builder.py b/federatedscope/core/auxiliaries/worker_builder.py index aa2866165..95255bc5a 100644 --- a/federatedscope/core/auxiliaries/worker_builder.py +++ b/federatedscope/core/auxiliaries/worker_builder.py @@ -45,16 +45,16 @@ def get_client_cls(cfg): worker_class = func(cfg.federate.method.lower()) if worker_class is not None: return worker_class['client'] - if cfg.hpo.use: - if cfg.hpo.fedex.use: - from federatedscope.autotune.fedex import FedExClient - return FedExClient - if cfg.hpo.fts.use: - from federatedscope.autotune.fts import FTSClient - return FTSClient - if cfg.hpo.pfedhpo.use: - from federatedscope.autotune.pfedhpo import pFedHPOClient - return pFedHPOClient + + if cfg.hpo.fedex.use: + from federatedscope.autotune.fedex import FedExClient + return FedExClient + if cfg.hpo.fts.use: + from federatedscope.autotune.fts import FTSClient + return FTSClient + if cfg.hpo.pfedhpo.use: + from federatedscope.autotune.pfedhpo import pFedHPOClient + return pFedHPOClient if cfg.vertical.use: if cfg.vertical.algo == 'lr': @@ -141,19 +141,18 @@ def get_server_cls(cfg): if worker_class is not None: return worker_class['server'] - if cfg.hpo.use: - if cfg.hpo.fedex.use: - from federatedscope.autotune.fedex import FedExServer - return FedExServer - if cfg.hpo.fts.use: - from federatedscope.autotune.fts import FTSServer - return FTSServer - if cfg.hpo.pfedhpo.use and not cfg.hpo.pfedhpo.train_fl: - from federatedscope.autotune.pfedhpo import pFedHPOServer - return pFedHPOServer - if cfg.hpo.pfedhpo.use and cfg.hpo.pfedhpo.train_fl: - from federatedscope.autotune.pfedhpo import pFedHPOFLServer - return pFedHPOFLServer + if cfg.hpo.fedex.use: + from federatedscope.autotune.fedex import FedExServer + return FedExServer + if cfg.hpo.fts.use: + from federatedscope.autotune.fts import FTSServer + return FTSServer + if cfg.hpo.pfedhpo.use and not cfg.hpo.pfedhpo.train_fl: + from federatedscope.autotune.pfedhpo import pFedHPOServer + return pFedHPOServer + if cfg.hpo.pfedhpo.use and cfg.hpo.pfedhpo.train_fl: + from federatedscope.autotune.pfedhpo import pFedHPOFLServer + return pFedHPOFLServer if cfg.attack.attack_method.lower() in ['dlg', 'ig']: from federatedscope.attack.worker_as_attacker.server_attacker import\ diff --git a/federatedscope/core/trainers/torch_trainer.py b/federatedscope/core/trainers/torch_trainer.py index c343cbabf..cb9685365 100644 --- a/federatedscope/core/trainers/torch_trainer.py +++ b/federatedscope/core/trainers/torch_trainer.py @@ -34,7 +34,7 @@ def get_model_para(self): def setup_data(self, ctx): """ - Initialization data by ``cfg``. + Initializsetup_dataation data by ``cfg``. """ if isinstance(ctx.data, ClientData): ctx.data.setup(ctx.cfg) From 0146a883176cced64b5cc59d63bcf68e1d465d91 Mon Sep 17 00:00:00 2001 From: rayrayraykk <18007356109@163.com> Date: Thu, 16 Feb 2023 11:00:44 +0800 Subject: [PATCH 3/3] fix docstr --- federatedscope/core/trainers/torch_trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/federatedscope/core/trainers/torch_trainer.py b/federatedscope/core/trainers/torch_trainer.py index cb9685365..c343cbabf 100644 --- a/federatedscope/core/trainers/torch_trainer.py +++ b/federatedscope/core/trainers/torch_trainer.py @@ -34,7 +34,7 @@ def get_model_para(self): def setup_data(self, ctx): """ - Initializsetup_dataation data by ``cfg``. + Initialization data by ``cfg``. """ if isinstance(ctx.data, ClientData): ctx.data.setup(ctx.cfg)