From 1f7f7df996ac2fd8018ecc33158f2b4bf301162a Mon Sep 17 00:00:00 2001
From: Dina Suehiro Jones <dina.s.jones@intel.com>
Date: Thu, 21 Oct 2021 13:47:33 -0700
Subject: [PATCH] Updates to only use docker `--privileged` when required and
 check cpuset (#150)

* Update numactl usage

* Update error handling

* Check to make sure it's in cpuset

* Update forming cpu list

* Style fixes and add unit tests

* Test updates

* Updates to figure out which cores are on which node

* Update to print debug message

* Update to organize the cpuset list by node:

* Check args to see if docker should run with privileged

* Add unit tests

* Update numa_cores_per_instance for 'socket' setting

* update for cores per instance 'socket'

* Add doc update

* Remove unused import

* Style fixes

* update to cpuset list

* make str

* limit length

* update cores_per_node

* Move num_physical_cores calculation

* update num inter/intra threads

* Fix intra threads

* Updates to platform util to explain the core lists

* Update unit tests

* Unit test updates

* Add tests with limited cpusets

* Remove debug print

* Add additional error handling and info

* Update base benchmark util

* Fix conditionals

* Update messages

* Update for numa_cores_per_instance 'socket' when sockets have different number of cores

* Fix conditionals for checking numa cores per instance socket

* Add another unit test to check the case when the --socket-id specified does not have any cores in the cpuset

* Removing conditional in the validate function since it's being done in the init function

* Add stderr=PIPE so that the terminal doesn't show an error when PlatformUtils is used before numactl is installed
---
 benchmarks/common/base_benchmark_util.py      |  51 +++++---
 benchmarks/common/base_model_init.py          |  89 +++++++++-----
 benchmarks/common/platform_util.py            | 110 +++++++++++++++++-
 .../common/tensorflow/container_init.sh       |  14 ++-
 benchmarks/common/tensorflow/start.sh         |  10 +-
 benchmarks/launch_benchmark.py                |   6 +-
 .../ModelPackagesAdvancedOptions.md           |  40 +++++++
 tests/test_utils/io.py                        |   6 +-
 tests/test_utils/platform_config.py           |   6 +-
 .../tensorflow/test_run_tf_benchmarks.py      |  78 ++++++++++++-
 .../tf_model_args/tf_3d_unet_args.json        |   3 +-
 .../tf_model_args/tf_3d_unet_mlperf_args.json |  18 ++-
 .../tf_model_args/tf_bert_args.json           |  42 +++++--
 .../tf_model_args/tf_densenet169_args.json    |   9 +-
 .../tf_model_args/tf_dien_args.json           |  21 ++--
 .../tf_model_args/tf_faster_rcnn_args.json    |  19 +--
 .../tf_model_args/tf_gnmt_args.json           |   9 +-
 .../tf_model_args/tf_inceptionv3_args.json    |  46 ++++++--
 .../tf_model_args/tf_inceptionv4_args.json    |  15 ++-
 .../tf_model_args/tf_mask_rcnn_args.json      |  16 ++-
 .../tf_model_args/tf_minigo_args.json         |   9 +-
 .../tf_model_args/tf_mobilenet_v1_args.json   |  35 ++++--
 .../tensorflow/tf_model_args/tf_ncf_args.json |  17 ++-
 .../tf_model_args/tf_resnet101_args.json      |  15 ++-
 .../tf_model_args/tf_resnet50_args.json       |  28 +++--
 .../tf_model_args/tf_resnet50v1_5_args.json   |  59 +++++++---
 .../tf_model_args/tf_rfcn_args.json           |  18 ++-
 .../tf_model_args/tf_ssd_mobilenet_args.json  |  28 ++++-
 .../tf_model_args/tf_ssd_resnet34_args.json   |  40 +++++--
 .../tf_transformer_lt_official_args.json      |   6 +-
 .../tf_transformer_mlperf_args.json           |  12 +-
 .../tf_model_args/tf_unet_args.json           |   3 +-
 .../tf_model_args/tf_wavenet_args.json        |   3 +-
 .../tf_model_args/tf_wide_deep_args.json      |   6 +-
 .../tf_wide_deep_large_ds_args.json           |  27 +++--
 tests/unit/common/test_base_model_init.py     |  43 +++++++
 tests/unit/common/test_platform_util.py       |  91 ++++++++++++++-
 tests/unit/test_launch_benchmark.py           |  28 +++++
 38 files changed, 860 insertions(+), 216 deletions(-)

diff --git a/benchmarks/common/base_benchmark_util.py b/benchmarks/common/base_benchmark_util.py
index cbfd7e55b..ca86b2931 100644
--- a/benchmarks/common/base_benchmark_util.py
+++ b/benchmarks/common/base_benchmark_util.py
@@ -315,21 +315,34 @@ def _validate_args(self):
             if args.mpi:
                 raise ValueError("--mpi_num_processes cannot be used together with --numa-cores-per-instance.")
 
-            if args.numa_cores_per_instance == "socket":
-                args.numa_cores_per_instance = self._platform_util.num_cores_per_socket
-
-            if args.socket_id != -1:
-                if int(args.numa_cores_per_instance) > self._platform_util.num_cores_per_socket:
-                    raise ValueError("The number of --numa-cores-per-instance ({}) cannot exceed the "
-                                     "number of cores per socket {} when a single socket (--socket-id {}) "
-                                     "is being used.".format(args.numa_cores_per_instance,
-                                                             self._platform_util.num_cores_per_socket,
-                                                             args.socket_id))
-            else:
-                if int(args.numa_cores_per_instance) > system_num_cores:
-                    raise ValueError("The number of --numa-cores-per-instance ({}) cannot exceed the "
-                                     "number of system cores ({}).".format(args.numa_cores_per_instance,
-                                                                           system_num_cores))
+            if args.numa_cores_per_instance != "socket":
+                if args.socket_id != -1:
+                    if int(args.numa_cores_per_instance) > self._platform_util.num_cores_per_socket:
+                        raise ValueError("The number of --numa-cores-per-instance ({}) cannot exceed the "
+                                         "number of cores per socket {} when a single socket (--socket-id {}) "
+                                         "is being used.".format(args.numa_cores_per_instance,
+                                                                 self._platform_util.num_cores_per_socket,
+                                                                 args.socket_id))
+                else:
+                    if int(args.numa_cores_per_instance) > system_num_cores:
+                        raise ValueError("The number of --numa-cores-per-instance ({}) cannot exceed the "
+                                         "number of system cores ({}).".format(args.numa_cores_per_instance,
+                                                                               system_num_cores))
+
+        # If socket id is specified and we have a cpuset, make sure that there are some cores in the specified socket.
+        # If cores are limited, then print out a note about that.
+        if args.socket_id != -1 and self._platform_util.cpuset_cpus:
+            cpuset_len_for_socket = 0
+
+            if args.socket_id in self._platform_util.cpuset_cpus.keys():
+                cpuset_len_for_socket = len(self._platform_util.cpuset_cpus[args.socket_id])
+
+            if cpuset_len_for_socket == 0:
+                sys.exit("ERROR: There are no socket id {} cores in the cpuset.".format(args.socket_id))
+            elif cpuset_len_for_socket < self._platform_util.num_cores_per_socket:
+                print("Note: Socket id {} is specified, but the cpuset has limited this socket to {} cores. "
+                      "This is less than the number of cores per socket on the system ({})".
+                      format(args.socket_id, cpuset_len_for_socket, self._platform_util.num_cores_per_socket))
 
     def initialize_model(self, args, unknown_args):
         """Create model initializer for the specified model"""
@@ -340,7 +353,13 @@ def initialize_model(self, args, unknown_args):
                 os.path.dirname(os.path.realpath(__file__)))
 
             if args.numa_cores_per_instance == "socket":
-                args.numa_cores_per_instance = self._platform_util.num_cores_per_socket
+                if self._platform_util.cpuset_cpus:
+                    if args.socket_id != -1:
+                        args.numa_cores_per_instance = len(self._platform_util.cpuset_cpus[args.socket_id])
+                    else:
+                        args.numa_cores_per_instance = "socket"
+                else:
+                    args.numa_cores_per_instance = self._platform_util.num_cores_per_socket
 
             # find the path to the model_init.py file
             filename = "{}.py".format(self.MODEL_INITIALIZER)
diff --git a/benchmarks/common/base_model_init.py b/benchmarks/common/base_model_init.py
index 71898c13a..65468f23c 100644
--- a/benchmarks/common/base_model_init.py
+++ b/benchmarks/common/base_model_init.py
@@ -152,26 +152,38 @@ def run_numactl_multi_instance(self, cmd, replace_unique_output_dir=None):
         swap out that path for a path with the instance number in the folder name
         so that each instance uses a unique output folder.
         """
-        # Get the cores list and group them according to the number of cores per instance
-        cores_per_instance = int(self.args.numa_cores_per_instance)
-        cpu_cores_list = self.platform_util.cpu_core_list
-
-        if self.args.socket_id != -1:
-            # If it's specified to just use a single socket, then only use the cores from that socket
-            if len(cpu_cores_list) > self.args.socket_id:
-                cpu_cores_list = cpu_cores_list[self.args.socket_id]
+
+        if self.args.numa_cores_per_instance != "socket":
+            # Get the cores list and group them according to the number of cores per instance
+            cores_per_instance = int(self.args.numa_cores_per_instance)
+            cpu_cores_list = self.platform_util.cpu_core_list
+
+            if self.args.socket_id != -1:
+                # If it's specified to just use a single socket, then only use the cores from that socket
+                if len(cpu_cores_list) > self.args.socket_id:
+                    cpu_cores_list = cpu_cores_list[self.args.socket_id]
+                else:
+                    raise ValueError("Error while trying to get the core list for socket {0}. "
+                                     "The core list does not have cores for socket {0}.\n "
+                                     "Core list: {1}\n".format(self.args.socket_id, str(cpu_cores_list)))
             else:
-                raise ValueError("Error while trying to get the core list for socket {0}. "
-                                 "The core list does not have cores for socket {0}.\n "
-                                 "Core list: {1}\n".format(self.args.socket_id, str(cpu_cores_list)))
-        else:
-            # Using cores from all sockets
-            combined_core_list = []
-            for socket_cores in cpu_cores_list:
-                combined_core_list += socket_cores
-            cpu_cores_list = combined_core_list
+                # Using cores from all sockets
+                combined_core_list = []
+                for socket_cores in cpu_cores_list:
+                    combined_core_list += socket_cores
+                cpu_cores_list = combined_core_list
 
-        instance_cores_list = self.group_cores(cpu_cores_list, cores_per_instance)
+            instance_cores_list = self.group_cores(cpu_cores_list, cores_per_instance)
+        else:
+            instance_cores_list = []
+            cores_per_instance = "socket"
+            # Cores should be grouped based on the cores for each socket
+            if self.args.socket_id != -1:
+                # Only using cores from one socket
+                instance_cores_list[0] = self.platform_util.cpu_core_list[self.args.socket_id]
+            else:
+                # Get the cores for each socket
+                instance_cores_list = self.platform_util.cpu_core_list
 
         # Setup the log file name with the model name, precision, mode, batch size (if there is one),
         # number of cores per instance. An extra {} is intentionally left in the log_filename_format
@@ -188,11 +200,14 @@ def run_numactl_multi_instance(self, cmd, replace_unique_output_dir=None):
         # Loop through each instance and add that instance's command to a string
         multi_instance_command = ""
         for instance_num, core_list in enumerate(instance_cores_list):
-            if len(core_list) < int(cores_per_instance):
+            if cores_per_instance != "socket" and len(core_list) < int(cores_per_instance):
                 print("NOTE: Skipping remainder of {} cores for instance {}"
                       .format(len(core_list), instance_num))
                 continue
 
+            if len(core_list) == 0:
+                continue
+
             prefix = ("OMP_NUM_THREADS={0} "
                       "numactl --localalloc --physcpubind={1}").format(
                 len(core_list), ",".join(core_list))
@@ -340,21 +355,36 @@ def set_num_inter_intra_threads(self, num_inter_threads=None, num_intra_threads=
 
         if self.args.numa_cores_per_instance:
             # Set default num inter/intra threads if the user didn't provide specific values
+            if self.args.numa_cores_per_instance == "socket":
+                if self.args.socket_id != -1:
+                    inter_threads = len(self.platform_util.cpu_core_list[self.args.socket_id])
+                else:
+                    # since we can only have one value for inter threads and the number of cores
+                    # per socket can vary, if the cpuset is limited, get the lowest core count
+                    # per socket and use that as the num inter threads
+                    inter_threads = min([len(i) for i in self.platform_util.cpu_core_list if len(i) > 0])
+            else:
+                inter_threads = self.args.numa_cores_per_instance
+
             if not self.args.num_inter_threads:
                 self.args.num_inter_threads = 1
             if not self.args.num_intra_threads:
-                self.args.num_intra_threads = self.args.numa_cores_per_instance
+                self.args.num_intra_threads = inter_threads
             if not self.args.data_num_inter_threads:
                 self.args.data_num_inter_threads = 1
             if not self.args.data_num_intra_threads:
-                self.args.data_num_intra_threads = self.args.numa_cores_per_instance
+                self.args.data_num_intra_threads = inter_threads
         elif self.args.socket_id != -1:
             if not self.args.num_inter_threads:
                 self.args.num_inter_threads = 1
             if not self.args.num_intra_threads:
-                self.args.num_intra_threads = \
-                    self.platform_util.num_cores_per_socket \
-                    if self.args.num_cores == -1 else self.args.num_cores
+                if self.args.num_cores != -1:
+                    self.args.num_intra_threads = self.args.num_cores
+                elif self.platform_util.cpuset_cpus and \
+                        self.args.socket_id in self.platform_util.cpuset_cpus.keys():
+                    self.args.num_intra_threads = len(self.platform_util.cpuset_cpus[self.args.socket_id])
+                else:
+                    self.args.num_intra_threads = self.platform_util.num_cores_per_socket
         else:
             if not self.args.num_inter_threads:
                 self.args.num_inter_threads = self.platform_util.num_cpu_sockets
@@ -362,9 +392,14 @@ def set_num_inter_intra_threads(self, num_inter_threads=None, num_intra_threads=
                     self.args.num_inter_threads = 1
             if not self.args.num_intra_threads:
                 if self.args.num_cores == -1:
-                    self.args.num_intra_threads = \
-                        int(self.platform_util.num_cores_per_socket *
-                            self.platform_util.num_cpu_sockets)
+                    if self.platform_util.cpuset_cpus and len(self.platform_util.cpuset_cpus.keys()) > 0:
+                        # Total up the number of cores in the cpuset
+                        self.args.num_intra_threads = sum([len(self.platform_util.cpuset_cpus[socket_id])
+                                                           for socket_id in self.platform_util.cpuset_cpus.keys()])
+                    else:
+                        self.args.num_intra_threads = \
+                            int(self.platform_util.num_cores_per_socket *
+                                self.platform_util.num_cpu_sockets)
                     if os.environ["MPI_NUM_PROCESSES"] != "None":
                         self.args.num_intra_threads = self.platform_util.num_cores_per_socket - 2
                 else:
diff --git a/benchmarks/common/platform_util.py b/benchmarks/common/platform_util.py
index a831b1d70..6f6cfb07c 100644
--- a/benchmarks/common/platform_util.py
+++ b/benchmarks/common/platform_util.py
@@ -33,6 +33,8 @@
 CORES_PER_SOCKET_STR_ = "Core(s) per socket"
 THREADS_PER_CORE_STR_ = "Thread(s) per core"
 LOGICAL_CPUS_STR_ = "CPU(s)"
+NUMA_NODE_CPU_RANGE_STR_ = "NUMA node{} CPU(s):"
+ONLINE_CPUS_LIST = "On-line CPU(s) list:"
 
 
 class CPUInfo():
@@ -192,8 +194,16 @@ def __init__(self, args):
         self.num_threads_per_core = 0
         self.num_logical_cpus = 0
         self.num_numa_nodes = 0
+
+        # Core list generated by numactl -H in the case where --numa-cores-per-instance is
+        # being used. It then gets pruned based on the cpuset_cpus, in case docker is
+        # limiting the cores that the container has access to
         self.cpu_core_list = []
 
+        # Dictionary generated from the cpuset.cpus file (in linux_init) for the case where
+        # docker is limiting the number of cores that the container has access to
+        self.cpuset_cpus = None
+
         os_type = system_platform.system()
         if "Windows" == os_type:
             self.windows_init()
@@ -204,6 +214,45 @@ def __init__(self, args):
         else:
             raise ValueError("Unable to determine Operating system type.")
 
+    def _get_list_from_string_ranges(self, str_ranges):
+        """
+        Converts a string of numbered ranges (comma separated numbers or ranges) to an
+        integer list. Duplicates should be removed and the integer list should be
+        ordered.
+        For example an input of "3-6,10,0-5" should return [0, 1, 2, 3, 4, 5, 6, 10]
+        """
+        result_list = []
+
+        for section in str_ranges.split(","):
+            if "-" in section:
+                # Section is a range, so get the start and end values
+                start, end = section.split("-")
+                section_list = range(int(start), int(end) + 1)
+                result_list += section_list
+            else:
+                # This section is just a single number, not a range
+                result_list.append(int(section))
+
+        # Remove duplicates
+        result_list = list(set(result_list))
+
+        return result_list
+
+    def _get_cpuset(self):
+        """
+        Try to get the cpuset.cpus info, since lscpu does not know if docker has limited
+        the cpuset accessible to the container
+        """
+        cpuset = ""
+        cpuset_cpus_file = "/sys/fs/cgroup/cpuset/cpuset.cpus"
+        if os.path.exists(cpuset_cpus_file):
+            with open(cpuset_cpus_file, "r") as f:
+                cpuset = f.read()
+
+            if self.args.verbose:
+                print("cpuset.cpus: {}".format(cpuset))
+        return cpuset
+
     def linux_init(self):
         lscpu_cmd = "lscpu"
         try:
@@ -219,6 +268,9 @@ def linux_init(self):
             print("Problem getting CPU info: {}".format(e))
             sys.exit(1)
 
+        core_list_per_node = {}
+        online_cpus_list = ""
+
         # parse it
         for line in cpu_info:
             #      NUMA_NODES_STR_       = "NUMA node(s)"
@@ -236,28 +288,76 @@ def linux_init(self):
             #      LOGICAL_CPUS_STR_     = "CPU(s)"
             elif line.find(LOGICAL_CPUS_STR_) == 0:
                 self.num_logical_cpus = int(line.split(":")[1].strip())
+            #      ONLINE_CPUS_LIST      = "On-line CPU(s) list"
+            elif line.find(ONLINE_CPUS_LIST) == 0:
+                online_cpus_list = line.split(":")[1].strip()
+            else:
+                # Get the ranges of cores per node from NUMA node* CPU(s)
+                for node in range(0, self.num_numa_nodes):
+                    if line.find(NUMA_NODE_CPU_RANGE_STR_.format(str(node))) == 0:
+                        range_for_node = line.split(":")[1].strip()
+                        range_list_for_node = self._get_list_from_string_ranges(range_for_node)
+                        core_list_per_node[node] = range_list_for_node
+
+        # Try to get the cpuset.cpus info, since lscpu does not know if the cpuset is limited
+        cpuset = self._get_cpuset()
+        if cpuset:
+            # If the cpuset is the same as the online_cpus_list, then we are using the whole
+            # machine, so let's avoid unnecessary complexity and don't bother with the cpuset_cpu list
+            if (online_cpus_list != "" and online_cpus_list != cpuset) or online_cpus_list == "":
+                self.cpuset_cpus = self._get_list_from_string_ranges(cpuset)
 
         # Uses numactl get the core number for each numa node and adds the cores for each
-        # node to the cpu_cores_list array
-        if self.num_numa_nodes > 0:
+        # node to the cpu_cores_list array. Only do this if the command is trying to use
+        # numa_cores_per_instance we can't count on numactl being installed otherwise and
+        # this list is only used for the numactl multi-instance runs.
+        num_physical_cores = self.num_cpu_sockets * self.num_cores_per_socket
+        cores_per_node = int(num_physical_cores / self.num_numa_nodes)
+        if self.num_numa_nodes > 0 and self.args.numa_cores_per_instance is not None:
             try:
                 # Get the list of cores
-                num_physical_cores = self.num_cpu_sockets * self.num_cores_per_socket
-                cores_per_node = int(num_physical_cores / self.num_numa_nodes)
                 cpu_array_command = \
                     "numactl -H | grep 'node [0-9]* cpus:' |" \
                     "sed 's/.*node [0-9]* cpus: *//' | head -{0} |cut -f1-{1} -d' '".format(
                         self.num_numa_nodes, int(cores_per_node))
                 cpu_array = subprocess.Popen(
-                    cpu_array_command, shell=True, stdout=subprocess.PIPE).stdout.readlines()
+                    cpu_array_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).stdout.readlines()
 
                 for node_cpus in cpu_array:
                     node_cpus = str(node_cpus).lstrip("b'").replace("\\n'", " ")
                     self.cpu_core_list.append([x for x in node_cpus.split(" ") if x != ''])
+
+                # If we have the cpuset list, cross check that list with our core list and
+                # remove cores that are not part of the cpuset list
+                if self.cpuset_cpus is not None:
+                    for socket, core_list in enumerate(self.cpu_core_list):
+                        self.cpu_core_list[socket] = [x for x in core_list if int(x) in self.cpuset_cpus]
+
+                if (self.args.verbose):
+                    print("Core list: {}".format(self.cpu_core_list), flush=True)
+
             except Exception as e:
                 print("Warning: An error occured when getting the list of cores using '{}':\n {}".
                       format(cpu_array_command, e))
 
+        if self.cpuset_cpus is not None:
+            # Reformat the cpuset_cpus list so that it's split up by node
+            for node in core_list_per_node.keys():
+                core_list_per_node[node] = [x for x in core_list_per_node[node] if x in self.cpuset_cpus]
+            self.cpuset_cpus = core_list_per_node
+
+            # Remove cores that aren't part of the cpu_core_list
+            for socket in self.cpuset_cpus.keys():
+                if len(self.cpuset_cpus[socket]) > cores_per_node:
+                    del self.cpuset_cpus[socket][cores_per_node:]
+
+            # Remove keys with empty lists (sockets where there are no cores enabled in the cpuset)
+            self.cpuset_cpus = {k: v for k, v in self.cpuset_cpus.items() if v}
+
+            # Update the number of sockets based on the cpuset
+            if len(self.cpuset_cpus.keys()) > 0:
+                self.num_cpu_sockets = len(self.cpuset_cpus.keys())
+
     def windows_init(self):
         NUM_SOCKETS_STR_ = "DeviceID"
         CORES_PER_SOCKET_STR_ = "NumberOfCores"
diff --git a/benchmarks/common/tensorflow/container_init.sh b/benchmarks/common/tensorflow/container_init.sh
index ff0bce322..b2ac40524 100755
--- a/benchmarks/common/tensorflow/container_init.sh
+++ b/benchmarks/common/tensorflow/container_init.sh
@@ -16,10 +16,12 @@
 #
 
 # This file includes runtime installs for model containers
-
-if (( $(id -u) == 0 )); then
-  apt-get install numactl -y
-else
-  echo "Please run as root"
-  exit 1
+if [[ $NUMA_CORES_PER_INSTANCE != "None" || $SOCKET_ID != "-1" || $NUM_CORES != "-1" ]]; then
+  if (( $(id -u) == 0 )); then
+    apt-get install numactl -y
+  else
+    echo "Please run as root"
+    exit 1
+  fi
 fi
+
diff --git a/benchmarks/common/tensorflow/start.sh b/benchmarks/common/tensorflow/start.sh
index 8095b8247..83aafafa7 100644
--- a/benchmarks/common/tensorflow/start.sh
+++ b/benchmarks/common/tensorflow/start.sh
@@ -154,10 +154,12 @@ if _running-in-container ; then
   # Call the framework's container_init.sh, if it exists
   if [ -f ${MOUNT_BENCHMARK}/common/${FRAMEWORK}/container_init.sh ]; then
     if [[ ${CENTOS_PLATFORM} == "True" ]] && [[ ${NOINSTALL} != "True" ]]; then
-      yum update -y
-      yum install -y numactl
-  else
-    ${MOUNT_BENCHMARK}/common/${FRAMEWORK}/container_init.sh
+      if [[ $NUMA_CORES_PER_INSTANCE != "None" || $SOCKET_ID != "-1" || $NUM_CORES != "-1" ]]; then
+        yum update -y
+        yum install -y numactl
+      fi
+    else
+      ${MOUNT_BENCHMARK}/common/${FRAMEWORK}/container_init.sh
     fi
   fi
   # Call the model specific container_init.sh, if it exists
diff --git a/benchmarks/launch_benchmark.py b/benchmarks/launch_benchmark.py
index fb2318db6..2be4cf494 100644
--- a/benchmarks/launch_benchmark.py
+++ b/benchmarks/launch_benchmark.py
@@ -442,9 +442,13 @@ def run_docker_container(self, benchmark_scripts, intelai_models,
         if args.debug:
             docker_run_cmd.append("-it")
 
+        if args.numa_cores_per_instance is not None or args.socket_id != -1 or \
+                args.num_cores != -1 or args.mpi is not None or args.num_mpi > 1:
+            docker_run_cmd.append("--privileged")
+
         docker_shm_size = "--shm-size={}".format(args.shm_size)
         docker_run_cmd = docker_run_cmd + env_vars + volume_mounts + [
-            docker_shm_size, "--privileged", "-u", "root:root", "-w",
+            docker_shm_size, "-u", "root:root", "-w",
             workspace, args.docker_image, "/bin/bash"]
 
         if not args.debug:
diff --git a/quickstart/common/tensorflow/ModelPackagesAdvancedOptions.md b/quickstart/common/tensorflow/ModelPackagesAdvancedOptions.md
index 2b19c8442..c53c5b6db 100644
--- a/quickstart/common/tensorflow/ModelPackagesAdvancedOptions.md
+++ b/quickstart/common/tensorflow/ModelPackagesAdvancedOptions.md
@@ -116,6 +116,46 @@ docker run \
     --data-location ${DATASET_DIR}
 </pre>
 
+If a cpuset is specified along with `--numa-cores-per-instance`, the cores
+used for each instance will be limited to those specified as part of the cpuset.
+Also, note that since `--numa-cores-per-instance` uses `numactl`, it needs to
+be run with `--privilege`.
+
+<pre>
+$MODEL_ZOO_DIR=&lt;path to the model zoo directory&gt;
+DATASET_DIR=&lt;path to the preprocessed imagenet dataset&gt;
+OUTPUT_DIR=&lt;directory where log files will be written&gt;
+
+docker run --rm --privileged --init \
+    --volume $PRETRAINED_MODEL:$PRETRAINED_MODEL \
+    --volume $MODEL_ZOO_DIR:$MODEL_ZOO_DIR \
+    --volume $OUTPUT_DIR:$OUTPUT_DIR \
+    --env http_proxy=$http_proxy \
+    --env https_proxy=$https_proxy \
+    --env PRETRAINED_MODEL=$PRETRAINED_MODEL \
+    --env OUTPUT_DIR=$OUTPUT_DIR \
+    -w $MODEL_ZOO_DIR \
+    <mark><b>--cpuset-cpus "0-7,28-35"</b></mark> \
+    -it intel/intel-optimized-tensorflow:latest \
+    python benchmarks/launch_benchmark.py \
+    --in-graph ${PRETRAINED_MODEL} \
+    --model-name resnet50v1_5 \
+    --framework tensorflow \
+    --precision bfloat16 \
+    --mode inference \
+    --batch-size=1 \
+    --output-dir ${OUTPUT_DIR} \
+    --benchmark-only \
+    <mark><b>--numa-cores-per-instance 4</b></mark>
+
+# The command above ends up running the following instances:
+# OMP_NUM_THREADS=4 numactl --localalloc --physcpubind=0,1,2,3 python eval_image_classifier_inference.py --input-graph=resnet50_v1_5_bfloat16.pb --num-inter-threads=1 --num-intra-threads=4 --num-cores=28 --batch-size=1 --warmup-steps=10 --steps=50 --data-num-inter-threads=1 --data-num-intra-threads=4 >> resnet50v1_5_bfloat16_inference_bs1_cores4_instance0.log 2>&1 & \
+# OMP_NUM_THREADS=4 numactl --localalloc --physcpubind=4,5,6,7 python eval_image_classifier_inference.py --input-graph=resnet50_v1_5_bfloat16.pb --num-inter-threads=1 --num-intra-threads=4 --num-cores=28 --batch-size=1 --warmup-steps=10 --steps=50 --data-num-inter-threads=1 --data-num-intra-threads=4 >> resnet50v1_5_bfloat16_inference_bs1_cores4_instance1.log 2>&1 & \
+# OMP_NUM_THREADS=4 numactl --localalloc --physcpubind=28,29,30,31 python eval_image_classifier_inference.py --input-graph=resnet50_v1_5_bfloat16.pb --num-inter-threads=1 --num-intra-threads=4 --num-cores=28 --batch-size=1 --warmup-steps=10 --steps=50 --data-num-inter-threads=1 --data-num-intra-threads=4 >> resnet50v1_5_bfloat16_inference_bs1_cores4_instance2.log 2>&1 & \
+# OMP_NUM_THREADS=4 numactl --localalloc --physcpubind=32,33,34,35 python eval_image_classifier_inference.py --input-graph=resnet50_v1_5_bfloat16.pb --num-inter-threads=1 --num-intra-threads=4 --num-cores=28 --batch-size=1 --warmup-steps=10 --steps=50 --data-num-inter-threads=1 --data-num-intra-threads=4 >> resnet50v1_5_bfloat16_inference_bs1_cores4_instance3.log 2>&1 & \
+# wait
+</pre>
+
 ## Mounting local model packages in docker
 
 A download of the model package can be run in a docker container by mounting the
diff --git a/tests/test_utils/io.py b/tests/test_utils/io.py
index 21ee943f2..a9809b549 100644
--- a/tests/test_utils/io.py
+++ b/tests/test_utils/io.py
@@ -33,6 +33,10 @@ def parse_json_files(json_dir_path):
         with open(file_path) as f:
             data = json.load(f)
             for x in data:
+                # Use 0-111 as the default cpuset, if it's not specified in the json
+                cpuset = "0-111"
+                if 'cpuset' in x.keys():
+                    cpuset = x['cpuset']
                 values.append(
-                    tuple((x['input'], x['output'], model_file + " :: " + x['_comment'])))
+                    tuple((x['input'], x['output'], model_file + " :: " + x['_comment'], cpuset)))
     return values
diff --git a/tests/test_utils/platform_config.py b/tests/test_utils/platform_config.py
index 2ae2384af..6d070434f 100644
--- a/tests/test_utils/platform_config.py
+++ b/tests/test_utils/platform_config.py
@@ -29,7 +29,11 @@
                 "Thread(s) per core:    2\n"
                 "Core(s) per socket:    28\n"
                 "Socket(s):             2\n"
-                "NUMA node(s):          2\n")
+                "NUMA node(s):          2\n"
+                "On-line CPU(s) list:   0-111\n"
+                "NUMA node0 CPU(s):     0-27,56-83\n"
+                "NUMA node1 CPU(s):     28-55,84-111\n")
+
 NUMA_CORES_OUTPUT = ['0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27',
                      '28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55']
 
diff --git a/tests/unit/common/tensorflow/test_run_tf_benchmarks.py b/tests/unit/common/tensorflow/test_run_tf_benchmarks.py
index f4a850815..6f86ca95a 100644
--- a/tests/unit/common/tensorflow/test_run_tf_benchmarks.py
+++ b/tests/unit/common/tensorflow/test_run_tf_benchmarks.py
@@ -63,7 +63,7 @@ def clear_kmp_env_vars():
 test_arg_values = parse_model_args_file()
 
 
-@pytest.mark.parametrize("test_args,expected_cmd,comment", test_arg_values)
+@pytest.mark.parametrize("test_args,expected_cmd,comment,cpuset", test_arg_values)
 @patch("os.mkdir")
 @patch("shutil.rmtree")
 @patch("os.listdir")
@@ -74,14 +74,15 @@ def clear_kmp_env_vars():
 @patch("os.chdir")
 @patch("os.remove")
 @patch("glob.glob")
+@patch("common.platform_util.PlatformUtil._get_cpuset")
 @patch("common.platform_util.os")
 @patch("common.platform_util.system_platform")
 @patch("common.platform_util.subprocess")
 @patch("common.base_model_init.BaseModelInitializer.run_command")
-def test_run_benchmark(mock_run_command, mock_subprocess, mock_platform, mock_os,
+def test_run_benchmark(mock_run_command, mock_subprocess, mock_platform, mock_os, mock_get_cpuset,
                        mock_glob, mock_remove, mock_chdir, mock_stat, mock_path_exists,
                        mock_is_file, mock_is_dir, mock_listdir, mock_rmtree, mock_mkdir,
-                       test_args, expected_cmd, comment):
+                       test_args, expected_cmd, comment, cpuset):
     """
     Runs through executing the specified run_tf_benchmarks.py command from the
     test_args and verifying that the model_init file calls run_command with
@@ -103,7 +104,8 @@ def test_run_benchmark(mock_run_command, mock_subprocess, mock_platform, mock_os
             if match_per_socket and match_per_socket.lastindex >= 1:
                 os.environ["MPI_NUM_PROCESSES_PER_SOCKET"] = match_per_socket.group(1)
 
-    mock_path_exists.return_value = True
+    mock_os.path.exists.side_effect = True
+    mock_get_cpuset.return_value = cpuset
     mock_is_dir.return_value = True
     mock_is_file.return_value = True
     mock_stat.return_value = MagicMock(st_nlink=0)
@@ -128,3 +130,71 @@ def test_run_benchmark(mock_run_command, mock_subprocess, mock_platform, mock_os
         # use fnmatch in case we have file names with wildcards (like timestamps in output files)
         assert fnmatch.fnmatch(actual_arg, expected_arg), \
             "Expected: {}\nActual: {}".format(expected_cmd, call_args)
+
+
+@pytest.mark.parametrize("test_args,socket_id,cpuset",
+                         [["run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision int8 "
+                           "--mode inference --model-name inceptionv3 --batch-size 128 "
+                           "--in-graph /final_int8_inceptionv3.pb --intelai-models . --socket-id 1 "
+                           "--benchmark-only", "1", "0-2"],
+                          ["run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision int8 "
+                           "--mode inference --model-name inceptionv3 --batch-size 128 "
+                           "--in-graph /final_int8_inceptionv3.pb --intelai-models . --socket-id 0 "
+                           "--benchmark-only", "0", "50-55"]])
+@patch("os.mkdir")
+@patch("shutil.rmtree")
+@patch("os.listdir")
+@patch("os.path.isdir")
+@patch("os.path.isfile")
+@patch("os.path.exists")
+@patch("os.stat")
+@patch("os.chdir")
+@patch("os.remove")
+@patch("glob.glob")
+@patch("common.platform_util.PlatformUtil._get_cpuset")
+@patch("common.platform_util.os")
+@patch("common.platform_util.system_platform")
+@patch("common.platform_util.subprocess")
+@patch("common.base_model_init.BaseModelInitializer.run_command")
+def test_run_benchmark_bad_socket(mock_run_command, mock_subprocess, mock_platform, mock_os, mock_get_cpuset,
+                                  mock_glob, mock_remove, mock_chdir, mock_stat, mock_path_exists,
+                                  mock_is_file, mock_is_dir, mock_listdir, mock_rmtree, mock_mkdir,
+                                  test_args, socket_id, cpuset):
+    """
+    Checks to ensure that the proper error handling is done when the cpuset does not include any cores
+    for the specified socket_id
+    """
+
+    os.environ["PYTHON_EXE"] = "python"
+    if "mpi" not in test_args:
+        os.environ["MPI_NUM_PROCESSES"] = "None"
+        os.environ["MPI_HOSTNAMES"] = "None"
+    else:
+        if "--mpi_num_processes=" in test_args:
+            match_mpi_procs = re.search('--mpi_num_processes=([0-9]+)', test_args)
+            if match_mpi_procs and match_mpi_procs.lastindex >= 1:
+                os.environ["MPI_NUM_PROCESSES"] = match_mpi_procs.group(1)
+        if "--mpi_num_processes_per_socket=" in test_args:
+            match_per_socket = re.search('--mpi_num_processes_per_socket=([0-9]+)', test_args)
+            if match_per_socket and match_per_socket.lastindex >= 1:
+                os.environ["MPI_NUM_PROCESSES_PER_SOCKET"] = match_per_socket.group(1)
+
+    mock_os.path.exists.side_effect = True
+    mock_get_cpuset.return_value = cpuset
+    mock_is_dir.return_value = True
+    mock_is_file.return_value = True
+    mock_stat.return_value = MagicMock(st_nlink=0)
+    parse_model_args_file()
+    mock_listdir.return_value = ["data.record"]
+    mock_glob.return_value = ["/usr/lib/libtcmalloc.so.4.2.6"]
+    clear_kmp_env_vars()
+    platform_config.set_mock_system_type(mock_platform)
+    platform_config.set_mock_os_access(mock_os)
+    platform_config.set_mock_lscpu_subprocess_values(mock_subprocess)
+    test_args = re.sub(" +", " ", test_args)        # get rid of extra spaces in the test_args string
+    test_arg_list = test_args.split(" ")
+    with pytest.raises(SystemExit,
+                       match="ERROR: There are no socket id {} cores in the cpuset.".format(socket_id)):
+        with patch.object(sys, "argv", test_arg_list):
+            model_benchmark = ModelBenchmarkUtil()
+            model_benchmark.main()
diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_3d_unet_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_3d_unet_args.json
index b5cd928b5..9172efa48 100644
--- a/tests/unit/common/tensorflow/tf_model_args/tf_3d_unet_args.json
+++ b/tests/unit/common/tensorflow/tf_model_args/tf_3d_unet_args.json
@@ -2,6 +2,7 @@
   {
     "_comment": "3d_unet_inference",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_segmentation --model-name=3d_unet --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0  --benchmark-only --verbose --in-graph=/in_graph/tumor_segmentation_model.h5 --data-location=/dataset",
-    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/brats/predict.py --inter 1 --intra 28 --nw 1 --nb 5 --bs 1"
+    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/brats/predict.py --inter 1 --intra 28 --nw 1 --nb 5 --bs 1",
+    "cpuset": "0-111"
   }
 ]
diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_3d_unet_mlperf_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_3d_unet_mlperf_args.json
index cd1b686c2..c2bab43be 100644
--- a/tests/unit/common/tensorflow/tf_model_args/tf_3d_unet_mlperf_args.json
+++ b/tests/unit/common/tensorflow/tf_model_args/tf_3d_unet_mlperf_args.json
@@ -2,31 +2,37 @@
   {
     "_comment": "3d_unet_mlperf_fp32_inference",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_segmentation --model-name=3d_unet_mlperf --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --benchmark-only --in-graph=/in_graph/3dunet_dynamic_ndhwc.pb --warmup-steps=20 --steps=100",
-    "output": "python /workspace/intelai_models/inference/fp32/brats/run_performance.py --input-graph=/in_graph/3dunet_dynamic_ndhwc.pb --num-inter-threads=2 --num-intra-threads=56 --batch-size=1 --model-name=3d_unet_mlperf --steps=100 --warmup-steps=20"
+    "output": "python /workspace/intelai_models/inference/fp32/brats/run_performance.py --input-graph=/in_graph/3dunet_dynamic_ndhwc.pb --num-inter-threads=2 --num-intra-threads=56 --batch-size=1 --model-name=3d_unet_mlperf --steps=100 --warmup-steps=20",
+    "cpuset": "0-111"
   },
   {
     "_comment": "3d_unet_mlperf_fp32_inference_accuracy",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_segmentation --model-name=3d_unet_mlperf --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --accuracy-only --in-graph=/in_graph/3dunet_dynamic_ndhwc.pb --data-location=/dataset/MICCAI_BraTS_2019_Data_Training",
-    "output": "python /workspace/intelai_models/inference/fp32/brats/run_accuracy.py --input-graph=/in_graph/3dunet_dynamic_ndhwc.pb --num-inter-threads=2 --num-intra-threads=56 --batch-size=1 --model-name=3d_unet_mlperf --data-location=/dataset/MICCAI_BraTS_2019_Data_Training --accuracy-only"
+    "output": "python /workspace/intelai_models/inference/fp32/brats/run_accuracy.py --input-graph=/in_graph/3dunet_dynamic_ndhwc.pb --num-inter-threads=2 --num-intra-threads=56 --batch-size=1 --model-name=3d_unet_mlperf --data-location=/dataset/MICCAI_BraTS_2019_Data_Training --accuracy-only",
+    "cpuset": "0-111"
   },
   {
     "_comment": "3d_unet_mlperf_int8_inference",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_segmentation --model-name=3d_unet_mlperf --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --benchmark-only --in-graph=/in_graph/3dunet_int8_fully_quantized_perchannel.pb --warmup-steps=20 --steps=100",
-    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/int8/brats/run_performance.py --input-graph=/in_graph/3dunet_int8_fully_quantized_perchannel.pb --num-inter-threads=2 --num-intra-threads=56 --batch-size=1 --model-name=3d_unet_mlperf --steps=100 --warmup-steps=20"
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/int8/brats/run_performance.py --input-graph=/in_graph/3dunet_int8_fully_quantized_perchannel.pb --num-inter-threads=2 --num-intra-threads=56 --batch-size=1 --model-name=3d_unet_mlperf --steps=100 --warmup-steps=20",
+    "cpuset": "0-111"
   },
   {
     "_comment": "3d_unet_mlperf_int8_inference_accuracy",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_segmentation --model-name=3d_unet_mlperf --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --accuracy-only --in-graph=/in_graph/3dunet_int8_fully_quantized_perchannel.pb --data-location=/dataset/MICCAI_BraTS_2019_Data_Training",
-    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/int8/brats/run_accuracy.py --input-graph=/in_graph/3dunet_int8_fully_quantized_perchannel.pb --num-inter-threads=2 --num-intra-threads=56 --batch-size=1 --model-name=3d_unet_mlperf --data-location=/dataset/MICCAI_BraTS_2019_Data_Training --accuracy-only"
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/int8/brats/run_accuracy.py --input-graph=/in_graph/3dunet_int8_fully_quantized_perchannel.pb --num-inter-threads=2 --num-intra-threads=56 --batch-size=1 --model-name=3d_unet_mlperf --data-location=/dataset/MICCAI_BraTS_2019_Data_Training --accuracy-only",
+    "cpuset": "0-111"
   },
   {
     "_comment": "3d_unet_mlperf_bfloat16_inference",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_segmentation --model-name=3d_unet_mlperf --precision=bfloat16 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --benchmark-only --in-graph=/in_graph/3dunet_dynamic_ndhwc.pb --warmup-steps=20 --steps=100",
-    "output": "python /workspace/intelai_models/inference/bfloat16/brats/run_performance.py --input-graph=/in_graph/3dunet_dynamic_ndhwc.pb --num-inter-threads=2 --num-intra-threads=56 --batch-size=1 --model-name=3d_unet_mlperf --steps=100 --warmup-steps=20"
+    "output": "python /workspace/intelai_models/inference/bfloat16/brats/run_performance.py --input-graph=/in_graph/3dunet_dynamic_ndhwc.pb --num-inter-threads=2 --num-intra-threads=56 --batch-size=1 --model-name=3d_unet_mlperf --steps=100 --warmup-steps=20",
+    "cpuset": "0-111"
   },
   {
     "_comment": "3d_unet_mlperf_bfloat16_inference_accuracy",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_segmentation --model-name=3d_unet_mlperf --precision=bfloat16 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --accuracy-only --in-graph=/in_graph/3dunet_dynamic_ndhwc.pb --data-location=/dataset/MICCAI_BraTS_2019_Data_Training",
-    "output": "python /workspace/intelai_models/inference/bfloat16/brats/run_accuracy.py --input-graph=/in_graph/3dunet_dynamic_ndhwc.pb --num-inter-threads=2 --num-intra-threads=56 --batch-size=1 --model-name=3d_unet_mlperf --data-location=/dataset/MICCAI_BraTS_2019_Data_Training --accuracy-only"
+    "output": "python /workspace/intelai_models/inference/bfloat16/brats/run_accuracy.py --input-graph=/in_graph/3dunet_dynamic_ndhwc.pb --num-inter-threads=2 --num-intra-threads=56 --batch-size=1 --model-name=3d_unet_mlperf --data-location=/dataset/MICCAI_BraTS_2019_Data_Training --accuracy-only",
+    "cpuset": "0-111"
   }
 ]
diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_bert_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_bert_args.json
index 582fbc647..1585de31f 100644
--- a/tests/unit/common/tensorflow/tf_model_args/tf_bert_args.json
+++ b/tests/unit/common/tensorflow/tf_model_args/tf_bert_args.json
@@ -2,51 +2,73 @@
   {
     "_comment": "bert_fp32_inference",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=bert --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --checkpoint=/checkpoints --intelai-models=/workspace/intelai_models --num-cores=28 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --data-location=/dataset --num-inter-threads=1 --num-intra-threads=28 --disable-tcmalloc=True --task_name=XNLI --max_seq_length=128 --batch-size=8 --learning_rate=5e-5",
-    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/run_classifier.py --data_dir=/dataset --output_dir=/workspace/benchmarks/common/tensorflow/logs --vocab_file=/checkpoints/vocab.txt --bert_config_file=/checkpoints/bert_config.json --init_checkpoint=/checkpoints/bert_model.ckpt --task_name=XNLI --max_seq_length=128 --eval_batch_size=8 --learning_rate=5e-05 --num_inter_threads=1 --num_intra_threads=28 --do_train=false --do_eval=true"
+    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/run_classifier.py --data_dir=/dataset --output_dir=/workspace/benchmarks/common/tensorflow/logs --vocab_file=/checkpoints/vocab.txt --bert_config_file=/checkpoints/bert_config.json --init_checkpoint=/checkpoints/bert_model.ckpt --task_name=XNLI --max_seq_length=128 --eval_batch_size=8 --learning_rate=5e-05 --num_inter_threads=1 --num_intra_threads=28 --do_train=false --do_eval=true",
+    "cpuset": "0-111"
   },
   {
     "_comment": "bert_large_fp32_squad_inference",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_modeling --model-name=bert_large --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --checkpoint=/checkpoints --intelai-models=/workspace/intelai_models --in-graph=/in_graph/bert.pb --output-dir=/workspace/logs --batch-size=128 --data-location=/dataset --infer-option=SQuAD",
-    "output": "python /workspace/intelai_models/inference/run_squad.py --init_checkpoint=/checkpoints/model.ckpt-3649 --vocab_file=/dataset/vocab.txt --bert_config_file=/dataset/bert_config.json --predict_file=/dataset/dev-v1.1.json --precision=fp32 --output_dir=/workspace/logs --predict_batch_size=128 --experimental_gelu=False --optimized_softmax=True --input_graph=/in_graph/bert.pb --do_predict=True  --inter_op_parallelism_threads=2 --intra_op_parallelism_threads=56 --warmup_steps=10 --steps=30"
+    "output": "python /workspace/intelai_models/inference/run_squad.py --init_checkpoint=/checkpoints/model.ckpt-3649 --vocab_file=/dataset/vocab.txt --bert_config_file=/dataset/bert_config.json --predict_file=/dataset/dev-v1.1.json --precision=fp32 --output_dir=/workspace/logs --predict_batch_size=128 --experimental_gelu=False --optimized_softmax=True --input_graph=/in_graph/bert.pb --do_predict=True  --inter_op_parallelism_threads=2 --intra_op_parallelism_threads=56 --warmup_steps=10 --steps=30",
+    "cpuset": "0-111"
   },
   {
     "_comment": "bert_large_fp32_squad_profile",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_modeling --model-name=bert_large --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --checkpoint=/checkpoints --intelai-models=/workspace/intelai_models --in-graph=/in_graph/bert.pb --output-dir=/workspace/logs --batch-size=128 --data-location=/dataset --infer-option=SQuAD --profile=True",
-    "output": "python /workspace/intelai_models/inference/run_squad.py --init_checkpoint=/checkpoints/model.ckpt-3649 --vocab_file=/dataset/vocab.txt --bert_config_file=/dataset/bert_config.json --predict_file=/dataset/dev-v1.1.json --precision=fp32 --output_dir=/workspace/logs --predict_batch_size=128 --experimental_gelu=False --optimized_softmax=True --input_graph=/in_graph/bert.pb --do_predict=True  --mode=profile --inter_op_parallelism_threads=2 --intra_op_parallelism_threads=56 --warmup_steps=10 --steps=30"
+    "output": "python /workspace/intelai_models/inference/run_squad.py --init_checkpoint=/checkpoints/model.ckpt-3649 --vocab_file=/dataset/vocab.txt --bert_config_file=/dataset/bert_config.json --predict_file=/dataset/dev-v1.1.json --precision=fp32 --output_dir=/workspace/logs --predict_batch_size=128 --experimental_gelu=False --optimized_softmax=True --input_graph=/in_graph/bert.pb --do_predict=True  --mode=profile --inter_op_parallelism_threads=2 --intra_op_parallelism_threads=56 --warmup_steps=10 --steps=30",
+    "cpuset": "0-111"
   },
   {
     "_comment": "bert_large_fp32_squad_accuracy",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_modeling --model-name=bert_large --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --checkpoint=/checkpoints --intelai-models=/workspace/intelai_models --in-graph=/in_graph/bert.pb --output-dir=/workspace/logs --batch-size=128 --data-location=/dataset --infer-option=SQuAD --accuracy-only",
-    "output": "python /workspace/intelai_models/inference/run_squad.py --init_checkpoint=/checkpoints/model.ckpt-3649 --vocab_file=/dataset/vocab.txt --bert_config_file=/dataset/bert_config.json --predict_file=/dataset/dev-v1.1.json --precision=fp32 --output_dir=/workspace/logs --predict_batch_size=128 --experimental_gelu=False --optimized_softmax=True --input_graph=/in_graph/bert.pb --do_predict=True  --mode=accuracy --inter_op_parallelism_threads=2 --intra_op_parallelism_threads=56 --warmup_steps=10 --steps=30"
+    "output": "python /workspace/intelai_models/inference/run_squad.py --init_checkpoint=/checkpoints/model.ckpt-3649 --vocab_file=/dataset/vocab.txt --bert_config_file=/dataset/bert_config.json --predict_file=/dataset/dev-v1.1.json --precision=fp32 --output_dir=/workspace/logs --predict_batch_size=128 --experimental_gelu=False --optimized_softmax=True --input_graph=/in_graph/bert.pb --do_predict=True  --mode=accuracy --inter_op_parallelism_threads=2 --intra_op_parallelism_threads=56 --warmup_steps=10 --steps=30",
+    "cpuset": "0-111"
   },
   {
     "_comment": "bert_large_int8_squad_inference",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_modeling --model-name=bert_large --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --checkpoint=/checkpoints --intelai-models=/workspace/intelai_models --in-graph=/in_graph/bert.pb --output-dir=/workspace/logs --batch-size=32 --data-location=/dataset --infer-option=SQuAD",
-    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/run_squad.py --init_checkpoint=/checkpoints/model.ckpt-3649 --vocab_file=/dataset/vocab.txt --bert_config_file=/dataset/bert_config.json --predict_file=/dataset/dev-v1.1.json --precision=int8 --output_dir=/workspace/logs --predict_batch_size=32 --experimental_gelu=False --optimized_softmax=True --input_graph=/in_graph/bert.pb --do_predict=True  --inter_op_parallelism_threads=2 --intra_op_parallelism_threads=56 --warmup_steps=10 --steps=30"
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/run_squad.py --init_checkpoint=/checkpoints/model.ckpt-3649 --vocab_file=/dataset/vocab.txt --bert_config_file=/dataset/bert_config.json --predict_file=/dataset/dev-v1.1.json --precision=int8 --output_dir=/workspace/logs --predict_batch_size=32 --experimental_gelu=False --optimized_softmax=True --input_graph=/in_graph/bert.pb --do_predict=True  --inter_op_parallelism_threads=2 --intra_op_parallelism_threads=56 --warmup_steps=10 --steps=30",
+    "cpuset": "0-111"
   },
   {
     "_comment": "bert_large_int8_inference_optional_args",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_modeling --model-name=bert_large --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --checkpoint=/checkpoints --intelai-models=/workspace/intelai_models --in-graph=/in_graph/bert.pb --output-dir=/workspace/logs --data-location=/dataset --infer-option=SQuAD --num-intra-threads=28 --num-inter-threads=1 --benchmark-only --doc-stride=128 --max-seq-length=384",
-    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/run_squad.py --init_checkpoint=/checkpoints/model.ckpt-3649 --vocab_file=/dataset/vocab.txt --bert_config_file=/dataset/bert_config.json --predict_file=/dataset/dev-v1.1.json --precision=int8 --output_dir=/workspace/logs --predict_batch_size=32 --experimental_gelu=False --optimized_softmax=True --input_graph=/in_graph/bert.pb --do_predict=True  --mode=benchmark --doc_stride=128 --max_seq_length=384 --inter_op_parallelism_threads=1 --intra_op_parallelism_threads=28 --warmup_steps=10 --steps=30"
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/run_squad.py --init_checkpoint=/checkpoints/model.ckpt-3649 --vocab_file=/dataset/vocab.txt --bert_config_file=/dataset/bert_config.json --predict_file=/dataset/dev-v1.1.json --precision=int8 --output_dir=/workspace/logs --predict_batch_size=32 --experimental_gelu=False --optimized_softmax=True --input_graph=/in_graph/bert.pb --do_predict=True  --mode=benchmark --doc_stride=128 --max_seq_length=384 --inter_op_parallelism_threads=1 --intra_op_parallelism_threads=28 --warmup_steps=10 --steps=30",
+    "cpuset": "0-111"
   },
   {
     "_comment": "bert_large_int8_squad_accuracy",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_modeling --model-name=bert_large --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --checkpoint=/checkpoints --intelai-models=/workspace/intelai_models --in-graph=/in_graph/bert.pb --output-dir=/workspace/logs --batch-size=32 --data-location=/dataset --infer-option=SQuAD --accuracy-only",
-    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/run_squad.py --init_checkpoint=/checkpoints/model.ckpt-3649 --vocab_file=/dataset/vocab.txt --bert_config_file=/dataset/bert_config.json --predict_file=/dataset/dev-v1.1.json --precision=int8 --output_dir=/workspace/logs --predict_batch_size=32 --experimental_gelu=False --optimized_softmax=True --input_graph=/in_graph/bert.pb --do_predict=True  --mode=accuracy --inter_op_parallelism_threads=2 --intra_op_parallelism_threads=56 --warmup_steps=10 --steps=30"
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/run_squad.py --init_checkpoint=/checkpoints/model.ckpt-3649 --vocab_file=/dataset/vocab.txt --bert_config_file=/dataset/bert_config.json --predict_file=/dataset/dev-v1.1.json --precision=int8 --output_dir=/workspace/logs --predict_batch_size=32 --experimental_gelu=False --optimized_softmax=True --input_graph=/in_graph/bert.pb --do_predict=True  --mode=accuracy --inter_op_parallelism_threads=2 --intra_op_parallelism_threads=56 --warmup_steps=10 --steps=30",
+    "cpuset": "0-111"
   },
   {
     "_comment": "bert_large_bfloat16_squad_inference",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_modeling --model-name=bert_large --precision=bfloat16 --mode=inference --benchmark-dir=/workspace/benchmarks --checkpoint=/checkpoints --intelai-models=/workspace/intelai_models --batch-size=32 --output-dir=/workspace/logs --infer-option=SQuAD --data-location=/dataset --benchmark-only",
-    "output": "python /workspace/intelai_models/inference/run_squad.py --init_checkpoint=/checkpoints/model.ckpt-3649 --vocab_file=/dataset/vocab.txt --bert_config_file=/dataset/bert_config.json --predict_file=/dataset/dev-v1.1.json --precision=bfloat16 --output_dir=/workspace/logs --predict_batch_size=32 --experimental_gelu=False --optimized_softmax=True --do_predict=True  --mode=benchmark --inter_op_parallelism_threads=2 --intra_op_parallelism_threads=56 --warmup_steps=10 --steps=30"
+    "output": "python /workspace/intelai_models/inference/run_squad.py --init_checkpoint=/checkpoints/model.ckpt-3649 --vocab_file=/dataset/vocab.txt --bert_config_file=/dataset/bert_config.json --predict_file=/dataset/dev-v1.1.json --precision=bfloat16 --output_dir=/workspace/logs --predict_batch_size=32 --experimental_gelu=False --optimized_softmax=True --do_predict=True  --mode=benchmark --inter_op_parallelism_threads=2 --intra_op_parallelism_threads=56 --warmup_steps=10 --steps=30",
+    "cpuset": "0-111"
   },
   {
     "_comment": "bert_large_bfloat16_squad_profile",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_modeling --model-name=bert_large --precision=bfloat16 --mode=inference --benchmark-dir=/workspace/benchmarks --checkpoint=/checkpoints --intelai-models=/workspace/intelai_models --batch-size=32 --output-dir=/workspace/logs --infer-option=SQuAD --data-location=/dataset --profile=True",
-    "output": "python /workspace/intelai_models/inference/run_squad.py --init_checkpoint=/checkpoints/model.ckpt-3649 --vocab_file=/dataset/vocab.txt --bert_config_file=/dataset/bert_config.json --predict_file=/dataset/dev-v1.1.json --precision=bfloat16 --output_dir=/workspace/logs --predict_batch_size=32 --experimental_gelu=False --optimized_softmax=True --do_predict=True  --mode=profile --inter_op_parallelism_threads=2 --intra_op_parallelism_threads=56 --warmup_steps=10 --steps=30"
+    "output": "python /workspace/intelai_models/inference/run_squad.py --init_checkpoint=/checkpoints/model.ckpt-3649 --vocab_file=/dataset/vocab.txt --bert_config_file=/dataset/bert_config.json --predict_file=/dataset/dev-v1.1.json --precision=bfloat16 --output_dir=/workspace/logs --predict_batch_size=32 --experimental_gelu=False --optimized_softmax=True --do_predict=True  --mode=profile --inter_op_parallelism_threads=2 --intra_op_parallelism_threads=56 --warmup_steps=10 --steps=30",
+    "cpuset": "0-111"
   },
   {
     "_comment": "bert_large_bfloat16_squad_accuracy",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_modeling --model-name=bert_large --precision=bfloat16 --mode=inference --benchmark-dir=/workspace/benchmarks --checkpoint=/checkpoints --intelai-models=/workspace/intelai_models --batch-size=32 --output-dir=/workspace/logs --infer-option=SQuAD --data-location=/dataset --accuracy-only",
-    "output": "python /workspace/intelai_models/inference/run_squad.py --init_checkpoint=/checkpoints/model.ckpt-3649 --vocab_file=/dataset/vocab.txt --bert_config_file=/dataset/bert_config.json --predict_file=/dataset/dev-v1.1.json --precision=bfloat16 --output_dir=/workspace/logs --predict_batch_size=32 --experimental_gelu=False --optimized_softmax=True --do_predict=True  --mode=accuracy --inter_op_parallelism_threads=2 --intra_op_parallelism_threads=56 --warmup_steps=10 --steps=30"
+    "output": "python /workspace/intelai_models/inference/run_squad.py --init_checkpoint=/checkpoints/model.ckpt-3649 --vocab_file=/dataset/vocab.txt --bert_config_file=/dataset/bert_config.json --predict_file=/dataset/dev-v1.1.json --precision=bfloat16 --output_dir=/workspace/logs --predict_batch_size=32 --experimental_gelu=False --optimized_softmax=True --do_predict=True  --mode=accuracy --inter_op_parallelism_threads=2 --intra_op_parallelism_threads=56 --warmup_steps=10 --steps=30",
+    "cpuset": "0-111"
+  },
+  {
+    "_comment": "bert_large_bfloat16_squad_inference_cpuset",
+    "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_modeling --model-name=bert_large --precision=bfloat16 --mode=inference --benchmark-dir=/workspace/benchmarks --checkpoint=/checkpoints --intelai-models=/workspace/intelai_models --batch-size=32 --output-dir=/workspace/logs --infer-option=SQuAD --data-location=/dataset --benchmark-only",
+    "output": "python /workspace/intelai_models/inference/run_squad.py --init_checkpoint=/checkpoints/model.ckpt-3649 --vocab_file=/dataset/vocab.txt --bert_config_file=/dataset/bert_config.json --predict_file=/dataset/dev-v1.1.json --precision=bfloat16 --output_dir=/workspace/logs --predict_batch_size=32 --experimental_gelu=False --optimized_softmax=True --do_predict=True  --mode=benchmark --inter_op_parallelism_threads=1 --intra_op_parallelism_threads=8 --warmup_steps=10 --steps=30",
+    "cpuset": "0-7"
+  },
+  {
+    "_comment": "bert_large_fp32_squad_inference_cpuset",
+    "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_modeling --model-name=bert_large --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --checkpoint=/checkpoints --intelai-models=/workspace/intelai_models --in-graph=/in_graph/bert.pb --output-dir=/workspace/logs --batch-size=128 --data-location=/dataset --infer-option=SQuAD",
+    "output": "python /workspace/intelai_models/inference/run_squad.py --init_checkpoint=/checkpoints/model.ckpt-3649 --vocab_file=/dataset/vocab.txt --bert_config_file=/dataset/bert_config.json --predict_file=/dataset/dev-v1.1.json --precision=fp32 --output_dir=/workspace/logs --predict_batch_size=128 --experimental_gelu=False --optimized_softmax=True --input_graph=/in_graph/bert.pb --do_predict=True  --inter_op_parallelism_threads=2 --intra_op_parallelism_threads=16 --warmup_steps=10 --steps=30",
+    "cpuset": "0-7,28-35"
   }
 ]
diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_densenet169_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_densenet169_args.json
index 2a6e1d877..75ef1158c 100644
--- a/tests/unit/common/tensorflow/tf_model_args/tf_densenet169_args.json
+++ b/tests/unit/common/tensorflow/tf_model_args/tf_densenet169_args.json
@@ -1,15 +1,18 @@
 [
   { "_comment": "densenet169_fp32_latency",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=densenet169 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/densenet169_fp32_pretrained_model.pb",
-    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/benchmark.py --num_intra_threads=28 --num_inter_threads=1 --batch_size=1 --input_graph=/in_graph/densenet169_fp32_pretrained_model.pb"},
+    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/benchmark.py --num_intra_threads=28 --num_inter_threads=1 --batch_size=1 --input_graph=/in_graph/densenet169_fp32_pretrained_model.pb",
+    "cpuset": "0-111" },
 
   { "_comment": "densenet169_fp32_accuracy",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=densenet169 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/densenet169_fp32_pretrained_model.pb --data-location=/dataset",
-    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/accuracy.py --num_intra_threads=28 --num_inter_threads=1 --batch_size=100 --input_graph=/in_graph/densenet169_fp32_pretrained_model.pb --data_location=/dataset"},
+    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/accuracy.py --num_intra_threads=28 --num_inter_threads=1 --batch_size=100 --input_graph=/in_graph/densenet169_fp32_pretrained_model.pb --data_location=/dataset",
+    "cpuset": "0-111" },
 
   { "_comment": "densenet169_fp32_throughput",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=densenet169 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/densenet169_fp32_pretrained_model.pb",
-    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/benchmark.py --num_intra_threads=28 --num_inter_threads=1 --batch_size=100 --input_graph=/in_graph/densenet169_fp32_pretrained_model.pb"}
+    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/benchmark.py --num_intra_threads=28 --num_inter_threads=1 --batch_size=100 --input_graph=/in_graph/densenet169_fp32_pretrained_model.pb",
+    "cpuset": "0-111" }
 ]
 
 
diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_dien_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_dien_args.json
index a9ad5d01b..f0fa78c7f 100644
--- a/tests/unit/common/tensorflow/tf_model_args/tf_dien_args.json
+++ b/tests/unit/common/tensorflow/tf_model_args/tf_dien_args.json
@@ -2,36 +2,43 @@
   {
     "_comment": "dien_fp32_training",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=dien --precision=fp32 --mode=training --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --data-location=/dataset/dien-dataset-folder --socket-id=0 --batch-size 128",
-    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/training/fp32/train.py --batch_size 128 --num_inter_threads 1 --num_intra_threads 28 --data_location /dataset/dien-dataset-folder --mode train"
+    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/training/fp32/train.py --batch_size 128 --num_inter_threads 1 --num_intra_threads 28 --data_location /dataset/dien-dataset-folder --mode train",
+    "cpuset": "0-111"
   },
   {
     "_comment": "dien_fp32_inference_throughput",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=dien --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --data-location=/dataset/dien-dataset-folder --in-graph=/workspace/dien_fp32_static_rnn_graph.pb --socket-id 0 --batch-size 128 --num-intra-threads 26 --num-inter-threads 1 --graph_type=static",
-    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference_pb.py --batch_size 128 --num_inter_threads 1 --num_intra_threads 26 --data_location /dataset/dien-dataset-folder --data_type fp32 --input_graph /workspace/dien_fp32_static_rnn_graph.pb --graph_type static"
+    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference_pb.py --batch_size 128 --num_inter_threads 1 --num_intra_threads 26 --data_location /dataset/dien-dataset-folder --data_type fp32 --input_graph /workspace/dien_fp32_static_rnn_graph.pb --graph_type static",
+    "cpuset": "0-111"
   },
   {
     "_comment": "dien_fp32_inference_latency",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=dien --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --data-location=/dataset/dien-dataset-folder --in-graph=/workspace/dien_fp32_static_rnn_graph.pb --socket-id 0 --batch-size 1 --num-intra-threads 26 --num-inter-threads 1 --graph_type=dynamic --exact-max-length=100 --num-iterations=10",
-    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference_pb.py --batch_size 1 --num_inter_threads 1 --num_intra_threads 26 --data_location /dataset/dien-dataset-folder --data_type fp32 --input_graph /workspace/dien_fp32_static_rnn_graph.pb --graph_type dynamic --exact_max_length 100 --num_iterations 10"
+    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference_pb.py --batch_size 1 --num_inter_threads 1 --num_intra_threads 26 --data_location /dataset/dien-dataset-folder --data_type fp32 --input_graph /workspace/dien_fp32_static_rnn_graph.pb --graph_type dynamic --exact_max_length 100 --num_iterations 10",
+    "cpuset": "0-111"
   },
   {
     "_comment": "dien_fp32_inference_accuracy",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=dien --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --data-location=/dataset/dien-dataset-folder --in-graph=/workspace/dien_fp32_static_rnn_graph.pb --socket-id 0 --batch-size 128 --num-intra-threads 26 --num-inter-threads 1 --accuracy-only --graph_type=static",
-    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference_pb.py --batch_size 128 --num_inter_threads 1 --num_intra_threads 26 --data_location /dataset/dien-dataset-folder --data_type fp32 --input_graph /workspace/dien_fp32_static_rnn_graph.pb --accuracy_only --graph_type static"
+    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference_pb.py --batch_size 128 --num_inter_threads 1 --num_intra_threads 26 --data_location /dataset/dien-dataset-folder --data_type fp32 --input_graph /workspace/dien_fp32_static_rnn_graph.pb --accuracy_only --graph_type static",
+    "cpuset": "0-111"
   },
   {
     "_comment": "dien_bfloat16_inference_throughput",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=dien --precision=bfloat16 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --data-location=/dataset/dien-dataset-folder --in-graph=/workspace/dien_bfloat16_pretrained_model.pb --socket-id 0 --batch-size 128 --num-intra-threads 26 --num-inter-threads 1 --graph_type=static",
-    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference_pb.py --batch_size 128 --num_inter_threads 1 --num_intra_threads 26 --data_location /dataset/dien-dataset-folder --data_type bfloat16 --input_graph /workspace/dien_bfloat16_pretrained_model.pb --graph_type static"
+    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference_pb.py --batch_size 128 --num_inter_threads 1 --num_intra_threads 26 --data_location /dataset/dien-dataset-folder --data_type bfloat16 --input_graph /workspace/dien_bfloat16_pretrained_model.pb --graph_type static",
+    "cpuset": "0-111"
   },
   {
     "_comment": "dien_bfloat16_inference_latency",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=dien --precision=bfloat16 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --data-location=/dataset/dien-dataset-folder --in-graph=/workspace/dien_bfloat16_pretrained_model.pb --socket-id 0 --batch-size 1 --num-intra-threads 26 --num-inter-threads 1 --graph_type=dynamic --exact-max-length=100 --num-iterations=10",
-    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference_pb.py --batch_size 1 --num_inter_threads 1 --num_intra_threads 26 --data_location /dataset/dien-dataset-folder --data_type bfloat16 --input_graph /workspace/dien_bfloat16_pretrained_model.pb --graph_type dynamic --exact_max_length 100 --num_iterations 10"
+    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference_pb.py --batch_size 1 --num_inter_threads 1 --num_intra_threads 26 --data_location /dataset/dien-dataset-folder --data_type bfloat16 --input_graph /workspace/dien_bfloat16_pretrained_model.pb --graph_type dynamic --exact_max_length 100 --num_iterations 10",
+    "cpuset": "0-111"
   },
   {
     "_comment": "dien_bfloat16_inference_accuracy",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=dien --precision=bfloat16 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --data-location=/dataset/dien-dataset-folder --in-graph=/workspace/dien_bfloat16_pretrained_model.pb --socket-id 0 --batch-size 128 --num-intra-threads 26 --num-inter-threads 1 --accuracy-only --graph_type=static",
-    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference_pb.py --batch_size 128 --num_inter_threads 1 --num_intra_threads 26 --data_location /dataset/dien-dataset-folder --data_type bfloat16 --input_graph /workspace/dien_bfloat16_pretrained_model.pb --accuracy_only --graph_type static"
+    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference_pb.py --batch_size 128 --num_inter_threads 1 --num_intra_threads 26 --data_location /dataset/dien-dataset-folder --data_type bfloat16 --input_graph /workspace/dien_bfloat16_pretrained_model.pb --accuracy_only --graph_type static",
+    "cpuset": "0-111"
   }
 ]
diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_faster_rcnn_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_faster_rcnn_args.json
index 01ee8dad0..a50701953 100644
--- a/tests/unit/common/tensorflow/tf_model_args/tf_faster_rcnn_args.json
+++ b/tests/unit/common/tensorflow/tf_model_args/tf_faster_rcnn_args.json
@@ -1,27 +1,32 @@
 [
   { "_comment": "FP32 accuracy command",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=faster_rcnn --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=-1 --accuracy-only  --verbose --checkpoint=/checkpoints         --data-location=/dataset         --in-graph=/in_graph/frozen_inference_graph.pb",
-    "output": "bash /workspace/intelai_models/inference/fp32/coco_accuracy.sh /in_graph/frozen_inference_graph.pb /dataset/coco_val.record /workspace/models"},
+    "output": "bash /workspace/intelai_models/inference/fp32/coco_accuracy.sh /in_graph/frozen_inference_graph.pb /dataset/coco_val.record /workspace/models",
+    "cpuset": "0-111"},
 
   { "_comment": "FP32 benchmark command",
     "input": "run_tf_benchmark.py --framework tensorflow --use-case object_detection --precision fp32 --mode inference --model-name faster_rcnn --checkpoint /checkpoints --intelai-models . --model-source-dir . --socket-id 0 --verbose --config_file=pipeline.config",
-    "output": "numactl --cpunodebind=0 --membind=0 python ./inference/fp32/eval.py --num_inter_threads 1 --num_intra_threads 28 --pipeline_config_path /checkpoints/pipeline.config --checkpoint_dir /checkpoints --eval_dir ./research/object_detection/log/eval"},
+    "output": "numactl --cpunodebind=0 --membind=0 python ./inference/fp32/eval.py --num_inter_threads 1 --num_intra_threads 28 --pipeline_config_path /checkpoints/pipeline.config --checkpoint_dir /checkpoints --eval_dir ./research/object_detection/log/eval",
+    "cpuset": "0-111"},
 
   { "_comment": "FP32 benchmark command with custom --num_inter_threads 4 --num_intra_threads 16",
     "input": "run_tf_benchmark.py --framework tensorflow --use-case object_detection --precision fp32 --mode inference --model-name faster_rcnn --checkpoint /checkpoints --intelai-models . --model-source-dir . --socket-id 0 --verbose --config_file=pipeline.config --num-inter-threads 4 --num-intra-threads 16",
-    "output": "numactl --cpunodebind=0 --membind=0 python ./inference/fp32/eval.py --num_inter_threads 4 --num_intra_threads 16 --pipeline_config_path /checkpoints/pipeline.config --checkpoint_dir /checkpoints --eval_dir ./research/object_detection/log/eval"},
+    "output": "numactl --cpunodebind=0 --membind=0 python ./inference/fp32/eval.py --num_inter_threads 4 --num_intra_threads 16 --pipeline_config_path /checkpoints/pipeline.config --checkpoint_dir /checkpoints --eval_dir ./research/object_detection/log/eval",
+    "cpuset": "0-111"},
 
   { "_comment": "Int8 command for throughput benchmark with --number-of-steps enabled.",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=faster_rcnn --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --socket-id=-1  --benchmark-only --verbose --in-graph=/in_graph/pretrained_int8_faster_rcnn_model.pb --data-location=/dataset --number-of-steps=500",
-    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/int8/run_frozen_graph_rcnn.py -g /in_graph/pretrained_int8_faster_rcnn_model.pb -n 500 -d /dataset --num-inter-threads 2 --num-intra-threads 56"},
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/int8/run_frozen_graph_rcnn.py -g /in_graph/pretrained_int8_faster_rcnn_model.pb -n 500 -d /dataset --num-inter-threads 2 --num-intra-threads 56",
+    "cpuset": "0-111"},
 
   { "_comment": "Int8 accuracy command",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=faster_rcnn --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --socket-id=-1 --accuracy-only  --verbose --in-graph=/in_graph/pretrained_int8_faster_rcnn_model.pb --data-location=/dataset",
-    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 /workspace/intelai_models/inference/int8/coco_int8.sh /in_graph/pretrained_int8_faster_rcnn_model.pb /dataset /workspace/models"},
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 /workspace/intelai_models/inference/int8/coco_int8.sh /in_graph/pretrained_int8_faster_rcnn_model.pb /dataset /workspace/models",
+    "cpuset": "0-111"},
 
   { "_comment": "FP32 benchmark command",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=faster_rcnn --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --socket-id=-1  --benchmark-only --verbose --in-graph=/in_graph/pretrained_int8_faster_rcnn_model.pb --data-location=/dataset",
-    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/int8/run_frozen_graph_rcnn.py -g /in_graph/pretrained_int8_faster_rcnn_model.pb -d /dataset --num-inter-threads 2 --num-intra-threads 56"
-  }
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/int8/run_frozen_graph_rcnn.py -g /in_graph/pretrained_int8_faster_rcnn_model.pb -d /dataset --num-inter-threads 2 --num-intra-threads 56",
+    "cpuset": "0-111"}
 ]
 
diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_gnmt_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_gnmt_args.json
index 6c48cf5ac..a054ab315 100644
--- a/tests/unit/common/tensorflow/tf_model_args/tf_gnmt_args.json
+++ b/tests/unit/common/tensorflow/tf_model_args/tf_gnmt_args.json
@@ -1,13 +1,16 @@
 [
   { "_comment": "gnmt_fp32_latency",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=mlperf_gnmt --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --data-location=/dataset --in-graph=workspace/mlperf_gnmt_fp32_pretrained_model.pb",
-    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/fp32/run_inference.py --in_graph=workspace/mlperf_gnmt_fp32_pretrained_model.pb --batch_size=1 --num_inter_threads=1 --num_intra_threads=28 --src_vocab_file=/dataset/vocab.bpe.32000.en --tgt_vocab_file=/dataset/vocab.bpe.32000.de --inference_input_file=/dataset/newstest2014.tok.bpe.32000.en --inference_ref_file=/dataset/newstest2014.tok.bpe.32000.de"},
+    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/fp32/run_inference.py --in_graph=workspace/mlperf_gnmt_fp32_pretrained_model.pb --batch_size=1 --num_inter_threads=1 --num_intra_threads=28 --src_vocab_file=/dataset/vocab.bpe.32000.en --tgt_vocab_file=/dataset/vocab.bpe.32000.de --inference_input_file=/dataset/newstest2014.tok.bpe.32000.en --inference_ref_file=/dataset/newstest2014.tok.bpe.32000.de",
+    "cpuset": "0-111"},
 
   { "_comment": "gnmt_fp32_throughput",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=mlperf_gnmt --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=32 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --data-location=/dataset --in-graph=workspace/mlperf_gnmt_fp32_pretrained_model.pb",
-    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/fp32/run_inference.py --in_graph=workspace/mlperf_gnmt_fp32_pretrained_model.pb --batch_size=32 --num_inter_threads=1 --num_intra_threads=28 --src_vocab_file=/dataset/vocab.bpe.32000.en --tgt_vocab_file=/dataset/vocab.bpe.32000.de --inference_input_file=/dataset/newstest2014.tok.bpe.32000.en --inference_ref_file=/dataset/newstest2014.tok.bpe.32000.de"},
+    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/fp32/run_inference.py --in_graph=workspace/mlperf_gnmt_fp32_pretrained_model.pb --batch_size=32 --num_inter_threads=1 --num_intra_threads=28 --src_vocab_file=/dataset/vocab.bpe.32000.en --tgt_vocab_file=/dataset/vocab.bpe.32000.de --inference_input_file=/dataset/newstest2014.tok.bpe.32000.en --inference_ref_file=/dataset/newstest2014.tok.bpe.32000.de",
+    "cpuset": "0-111"},
 
   { "_comment": "gnmt_fp32_accuracy",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=mlperf_gnmt --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=32 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --data-location=/dataset --in-graph=workspace/mlperf_gnmt_fp32_pretrained_model.pb",
-    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/fp32/run_inference.py --in_graph=workspace/mlperf_gnmt_fp32_pretrained_model.pb --batch_size=32 --num_inter_threads=1 --num_intra_threads=28 --src_vocab_file=/dataset/vocab.bpe.32000.en --tgt_vocab_file=/dataset/vocab.bpe.32000.de --inference_input_file=/dataset/newstest2014.tok.bpe.32000.en --inference_ref_file=/dataset/newstest2014.tok.bpe.32000.de"}
+    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/fp32/run_inference.py --in_graph=workspace/mlperf_gnmt_fp32_pretrained_model.pb --batch_size=32 --num_inter_threads=1 --num_intra_threads=28 --src_vocab_file=/dataset/vocab.bpe.32000.en --tgt_vocab_file=/dataset/vocab.bpe.32000.de --inference_input_file=/dataset/newstest2014.tok.bpe.32000.en --inference_ref_file=/dataset/newstest2014.tok.bpe.32000.de",
+    "cpuset": "0-111"}
 ]
diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_inceptionv3_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_inceptionv3_args.json
index fa3764b76..191be23e6 100644
--- a/tests/unit/common/tensorflow/tf_model_args/tf_inceptionv3_args.json
+++ b/tests/unit/common/tensorflow/tf_model_args/tf_inceptionv3_args.json
@@ -1,44 +1,68 @@
 [
   { "_comment": "inceptionv3_int8_accuracy",
     "input": "run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision int8 --mode inference --model-name inceptionv3 --batch-size 100 --in-graph /final_int8_inceptionv3.pb --intelai-models . --accuracy-only --verbose",
-    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python ./int8/accuracy.py --num_intra_threads=56 --num_inter_threads=2 --batch_size=100 --input_graph=/final_int8_inceptionv3.pb"},
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python ./int8/accuracy.py --num_intra_threads=56 --num_inter_threads=2 --batch_size=100 --input_graph=/final_int8_inceptionv3.pb",
+    "cpuset": "0-111"},
 
   { "_comment": "inception_v3_int8_latency_default_inter_intra",
     "input": "run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision int8 --mode inference --model-name inceptionv3 --batch-size 1 --in-graph /final_int8_inceptionv3.pb --intelai-models . --benchmark-only --socket-id 0 --verbose",
-    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python ./int8/benchmark.py --warmup_steps=10 --num_intra_threads=28 --num_inter_threads=1 --batch_size=1 --input_graph=/final_int8_inceptionv3.pb --steps=50 --num_cores=28"},
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python ./int8/benchmark.py --warmup_steps=10 --num_intra_threads=28 --num_inter_threads=1 --batch_size=1 --input_graph=/final_int8_inceptionv3.pb --steps=50 --num_cores=28",
+    "cpuset": "0-111"},
 
   { "_comment": "inceptionv3_int8_throughput",
     "input": "run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision int8 --mode inference --model-name inceptionv3 --batch-size 128 --in-graph /final_int8_inceptionv3.pb --intelai-models . --benchmark-only --socket-id 0 --verbose",
-    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python ./int8/benchmark.py --warmup_steps=10 --num_intra_threads=28 --num_inter_threads=1 --batch_size=128 --input_graph=/final_int8_inceptionv3.pb --steps=50 --num_cores=28"},
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python ./int8/benchmark.py --warmup_steps=10 --num_intra_threads=28 --num_inter_threads=1 --batch_size=128 --input_graph=/final_int8_inceptionv3.pb --steps=50 --num_cores=28",
+    "cpuset": "0-111"},
 
   { "_comment": "inceptionv3_int8_throughput_steps_warmup-steps",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inceptionv3 --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/inception_frozen_max_min.pb --steps=200 --warmup-steps=20",
-    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/int8/benchmark.py --warmup_steps=20 --num_intra_threads=28 --num_inter_threads=1 --batch_size=128 --input_graph=/in_graph/inception_frozen_max_min.pb --steps=200 --num_cores=28"},
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/int8/benchmark.py --warmup_steps=20 --num_intra_threads=28 --num_inter_threads=1 --batch_size=128 --input_graph=/in_graph/inception_frozen_max_min.pb --steps=200 --num_cores=28",
+    "cpuset": "0-111"},
 
   { "_comment": "inceptionv3_int8_latency_steps_warmup-steps",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inceptionv3 --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/inception_frozen_max_min.pb --steps=200 --warmup-steps=20",
-    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/int8/benchmark.py --warmup_steps=20 --num_intra_threads=28 --num_inter_threads=1 --batch_size=1 --input_graph=/in_graph/inception_frozen_max_min.pb --steps=200 --num_cores=28"},
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/int8/benchmark.py --warmup_steps=20 --num_intra_threads=28 --num_inter_threads=1 --batch_size=1 --input_graph=/in_graph/inception_frozen_max_min.pb --steps=200 --num_cores=28",
+    "cpuset": "0-111"},
 
   { "_comment": "inceptionv3_int8_throughput_disable-tcmalloc",
     "input": "run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision int8 --mode inference --model-name inceptionv3 --batch-size 128 --in-graph /final_int8_inceptionv3.pb --intelai-models . --benchmark-only --socket-id 0 --disable-tcmalloc=True",
-    "output": "numactl --cpunodebind=0 --membind=0 python ./int8/benchmark.py --warmup_steps=10 --num_intra_threads=28 --num_inter_threads=1 --batch_size=128 --input_graph=/final_int8_inceptionv3.pb --steps=50 --num_cores=28"},
+    "output": "numactl --cpunodebind=0 --membind=0 python ./int8/benchmark.py --warmup_steps=10 --num_intra_threads=28 --num_inter_threads=1 --batch_size=128 --input_graph=/final_int8_inceptionv3.pb --steps=50 --num_cores=28",
+    "cpuset": "0-111"},
 
   { "_comment": "inceptionv3_fp32_accuracy",
     "input": "run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name inceptionv3 --batch-size 100 --accuracy-only --data-location /dataset --in-graph /inceptionv3_fp32_pretrained_model.pb --intelai-models . --verbose",
-    "output": "python ./fp32/eval_image_classifier_inference.py --input-graph=/inceptionv3_fp32_pretrained_model.pb --num-inter-threads=2 --num-intra-threads=56 --num-cores=28 --batch-size=100 --warmup-steps=10 --steps=50 --data-location=/dataset --accuracy-only"},
+    "output": "python ./fp32/eval_image_classifier_inference.py --input-graph=/inceptionv3_fp32_pretrained_model.pb --num-inter-threads=2 --num-intra-threads=56 --num-cores=28 --batch-size=100 --warmup-steps=10 --steps=50 --data-location=/dataset --accuracy-only",
+    "cpuset": "0-111"},
 
   { "_comment": "inceptionv3_fp32_latency",
     "input": "run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name inceptionv3 --batch-size 1 --in-graph /inceptionv3_fp32_pretrained_model.pb --intelai-models . --socket-id 0 --verbose",
-    "output": "numactl --cpunodebind=0 --membind=0 python ./fp32/eval_image_classifier_inference.py --input-graph=/inceptionv3_fp32_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=1 --warmup-steps=10 --steps=50 --num-cores=28"
-  },
+    "output": "numactl --cpunodebind=0 --membind=0 python ./fp32/eval_image_classifier_inference.py --input-graph=/inceptionv3_fp32_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=1 --warmup-steps=10 --steps=50 --num-cores=28",
+    "cpuset": "0-111"},
 
   { "_comment": "inceptionv3_fp32_throughput",
     "input": "run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name inceptionv3 --batch-size 128 --in-graph /inceptionv3_fp32_pretrained_model.pb --intelai-models . --socket-id 0 --verbose",
-    "output": "numactl --cpunodebind=0 --membind=0 python ./fp32/eval_image_classifier_inference.py --input-graph=/inceptionv3_fp32_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=10 --steps=50 --num-cores=28"},
+    "output": "numactl --cpunodebind=0 --membind=0 python ./fp32/eval_image_classifier_inference.py --input-graph=/inceptionv3_fp32_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=10 --steps=50 --num-cores=28",
+    "cpuset": "0-111"},
 
   { "_comment": "inceptionv3_fp32_throughput_inter_intra",
     "input": "run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name inceptionv3 --batch-size 128 --in-graph /inceptionv3_fp32_pretrained_model.pb --intelai-models . --socket-id 0 --verbose --num-inter-threads 4 --num-intra-threads 16",
-    "output": "numactl --cpunodebind=0 --membind=0 python ./fp32/eval_image_classifier_inference.py --input-graph=/inceptionv3_fp32_pretrained_model.pb  --num-inter-threads=4 --num-intra-threads=16 --batch-size=128 --warmup-steps=10 --steps=50 --num-cores=28"}
+    "output": "numactl --cpunodebind=0 --membind=0 python ./fp32/eval_image_classifier_inference.py --input-graph=/inceptionv3_fp32_pretrained_model.pb  --num-inter-threads=4 --num-intra-threads=16 --batch-size=128 --warmup-steps=10 --steps=50 --num-cores=28",
+    "cpuset": "0-111"},
+
+  { "_comment": "inceptionv3_fp32_throughput_inter_intra_cpuset",
+    "input": "run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name inceptionv3 --batch-size 128 --in-graph /inceptionv3_fp32_pretrained_model.pb --intelai-models . --socket-id 0 --verbose --num-inter-threads 4 --num-intra-threads 16",
+    "output": "numactl --cpunodebind=0 --membind=0 python ./fp32/eval_image_classifier_inference.py --input-graph=/inceptionv3_fp32_pretrained_model.pb  --num-inter-threads=4 --num-intra-threads=16 --batch-size=128 --warmup-steps=10 --steps=50 --num-cores=28",
+    "cpuset": "0-7"},
+
+  { "_comment": "inceptionv3_fp32_throughput_cpuset",
+    "input": "run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name inceptionv3 --batch-size 128 --in-graph /inceptionv3_fp32_pretrained_model.pb --intelai-models . --verbose",
+    "output": "python ./fp32/eval_image_classifier_inference.py --input-graph=/inceptionv3_fp32_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=11 --batch-size=128 --warmup-steps=10 --steps=50 --num-cores=28",
+    "cpuset": "0-7,58-60"},
+
+  { "_comment": "inceptionv3_int8_throughput_cpuset",
+    "input": "run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision int8 --mode inference --model-name inceptionv3 --batch-size 128 --in-graph /final_int8_inceptionv3.pb --intelai-models . --benchmark-only --verbose",
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python ./int8/benchmark.py --warmup_steps=10 --num_intra_threads=9 --num_inter_threads=1 --batch_size=128 --input_graph=/final_int8_inceptionv3.pb --steps=50 --num_cores=28",
+    "cpuset": "28-34,50,55"}
 ]
 
 
diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_inceptionv4_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_inceptionv4_args.json
index ff8dbb6ba..b8a009dcb 100644
--- a/tests/unit/common/tensorflow/tf_model_args/tf_inceptionv4_args.json
+++ b/tests/unit/common/tensorflow/tf_model_args/tf_inceptionv4_args.json
@@ -1,21 +1,26 @@
 [
   { "_comment": "inceptionv4_int8_accuracy",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --precision=int8 --mode=inference --model-name=inceptionv4 --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=240 --socket-id 0 --accuracy-only --verbose --in-graph=/in_graph/inceptionv4_int8_pretrained_model.pb --data-location=/dataset",
-    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/accuracy.py --batch_size=240 --num_inter_threads=2 --input_graph=/in_graph/inceptionv4_int8_pretrained_model.pb --num_intra_threads=28 --data_location=/dataset"},
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/accuracy.py --batch_size=240 --num_inter_threads=2 --input_graph=/in_graph/inceptionv4_int8_pretrained_model.pb --num_intra_threads=28 --data_location=/dataset",
+    "cpuset": "0-111"},
 
   { "_comment": "inceptionv4_int8_latency_default_inter_intra",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --precision=int8 --mode=inference --model-name=inceptionv4 --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id 0 --benchmark-only --verbose --in-graph=/in_graph/inceptionv4_int8_pretrained_model.pb --data-location=/dataset",
-    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/benchmark.py --batch_size=1 --num_inter_threads=2 --input_graph=/in_graph/inceptionv4_int8_pretrained_model.pb --num_intra_threads=28"},
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/benchmark.py --batch_size=1 --num_inter_threads=2 --input_graph=/in_graph/inceptionv4_int8_pretrained_model.pb --num_intra_threads=28",
+    "cpuset": "0-111"},
 
   { "_comment": "inceptionv4_int8_throughput",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --precision=int8 --mode=inference --model-name=inceptionv4 --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=240 --socket-id 0 --benchmark-only --verbose --in-graph=/in_graph/inceptionv4_int8_pretrained_model.pb --data-location=/dataset",
-    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/benchmark.py --batch_size=240 --num_inter_threads=2 --input_graph=/in_graph/inceptionv4_int8_pretrained_model.pb --num_intra_threads=28"},
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/benchmark.py --batch_size=240 --num_inter_threads=2 --input_graph=/in_graph/inceptionv4_int8_pretrained_model.pb --num_intra_threads=28",
+    "cpuset": "0-111"},
 
   { "_comment": "inceptionv4_fp32_accuracy",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --precision=fp32 --mode=inference --model-name=inceptionv4 --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=240 --socket-id 0 --accuracy-only --verbose --in-graph=/in_graph/inceptionv4_fp32_pretrained_model.pb --data-location=/dataset",
-    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/accuracy.py --batch_size=240 --num_inter_threads=2 --input_graph=/in_graph/inceptionv4_fp32_pretrained_model.pb --num_intra_threads=28 --data_location=/dataset"},
+    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/accuracy.py --batch_size=240 --num_inter_threads=2 --input_graph=/in_graph/inceptionv4_fp32_pretrained_model.pb --num_intra_threads=28 --data_location=/dataset",
+    "cpuset": "0-111"},
 
   { "_comment": "inceptionv4_fp32_batch_inf",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --precision=fp32 --mode=inference --model-name=inceptionv4 --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=240 --socket-id 0 --benchmark-only --in-graph=/in_graph/inceptionv4_fp32_pretrained_model.pb --data-location=/dataset --steps=200",
-    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/benchmark.py --batch_size=240 --num_intra_threads=28 --num_inter_threads=2 --input_graph=/in_graph/inceptionv4_fp32_pretrained_model.pb --warmup_steps=10 --steps=200"}
+    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/benchmark.py --batch_size=240 --num_intra_threads=28 --num_inter_threads=2 --input_graph=/in_graph/inceptionv4_fp32_pretrained_model.pb --warmup_steps=10 --steps=200",
+    "cpuset": "0-111"}
 ]
diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_mask_rcnn_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_mask_rcnn_args.json
index 7eb68e8a6..8b78b7fb4 100644
--- a/tests/unit/common/tensorflow/tf_model_args/tf_mask_rcnn_args.json
+++ b/tests/unit/common/tensorflow/tf_model_args/tf_mask_rcnn_args.json
@@ -1,9 +1,21 @@
 [
   { "_comment": "FP32 benchmark",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_segmentation --model-name=maskrcnn --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0  --benchmark-only --verbose --data-location=/dataset",
-    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/coco.py evaluate  --dataset=/dataset --num_inter_threads 1 --num_intra_threads 28 --nw 5 --nb 50 --model=coco --infbs 1"},
+    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/coco.py evaluate  --dataset=/dataset --num_inter_threads 1 --num_intra_threads 28 --nw 5 --nb 50 --model=coco --infbs 1",
+    "cpuset": "0-111"},
 
   { "_comment": "FP32 benchmark with --num-inter-threads 4 --num-intra-threads 16",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_segmentation --model-name=maskrcnn --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0  --benchmark-only --verbose --data-location=/dataset --num-inter-threads 4 --num-intra-threads 16",
-    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/coco.py evaluate  --dataset=/dataset --num_inter_threads 4 --num_intra_threads 16 --nw 5 --nb 50 --model=coco --infbs 1"}
+    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/coco.py evaluate  --dataset=/dataset --num_inter_threads 4 --num_intra_threads 16 --nw 5 --nb 50 --model=coco --infbs 1",
+    "cpuset": "0-111"},
+
+  { "_comment": "FP32 benchmark with cpuset 1 socket",
+    "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_segmentation --model-name=maskrcnn --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --batch-size=1 --benchmark-only --verbose --data-location=/dataset",
+    "output": "python /workspace/intelai_models/inference/fp32/coco.py evaluate  --dataset=/dataset --num_inter_threads 1 --num_intra_threads 16 --nw 5 --nb 50 --model=coco --infbs 1",
+    "cpuset": "0-15"},
+
+  { "_comment": "FP32 benchmark with cpuset 2 socket",
+    "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_segmentation --model-name=maskrcnn --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --batch-size=1 --benchmark-only --verbose --data-location=/dataset",
+    "output": "python /workspace/intelai_models/inference/fp32/coco.py evaluate  --dataset=/dataset --num_inter_threads 2 --num_intra_threads 16 --nw 5 --nb 50 --model=coco --infbs 1",
+    "cpuset": "0-7,28-35"}
 ]
diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_minigo_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_minigo_args.json
index 23aec10ec..194726bcd 100644
--- a/tests/unit/common/tensorflow/tf_model_args/tf_minigo_args.json
+++ b/tests/unit/common/tensorflow/tf_model_args/tf_minigo_args.json
@@ -2,16 +2,19 @@
   {
     "_comment": "minigo_fp32_training",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=reinforcement --model-name=minigo --precision=fp32 --mode=training --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --steps=30 --quantization=True",
-    "output": "./run.sh  True"
+    "output": "./run.sh  True",
+    "cpuset": "0-111"
   },
   {
     "_comment": "minigo_fp32_training_multi_node",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=reinforcement --model-name=minigo --precision=fp32 --mode=training --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --steps=30 --quantization=True --num-train-nodes=2 --multi-node=True",
-    "output": "./run_mn.sh  2 True"
+    "output": "./run_mn.sh  2 True",
+    "cpuset": "0-111"
   },
   {
     "_comment": "minigo_fp32_training_multi_node_large_scale",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=reinforcement --model-name=minigo --precision=fp32 --mode=training --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --steps=30 --quantization=True --num-train-nodes=2 --num-eval-nodes=1 --large-scale=True --multi-node=True",
-    "output": "./run_mn.sh  2 1 True"
+    "output": "./run_mn.sh  2 1 True",
+    "cpuset": "0-111"
   }
 ]
diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_mobilenet_v1_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_mobilenet_v1_args.json
index 858d92e9a..b8a99e8c4 100644
--- a/tests/unit/common/tensorflow/tf_model_args/tf_mobilenet_v1_args.json
+++ b/tests/unit/common/tensorflow/tf_model_args/tf_mobilenet_v1_args.json
@@ -1,43 +1,58 @@
 [
   { "_comment": "mobilenet_v1_fp32_accuracy",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=-1 --accuracy-only  --verbose --checkpoint=/checkpoints --in-graph=/in_graph/mobilenet_v1_1.0_224_frozen.pb --data-location=/dataset",
-    "output": "python /workspace/intelai_models/inference/accuracy.py --precision=fp32 --batch_size=100 --data_location=/dataset --num_intra_threads=56 --num_inter_threads=2 --input_graph=/in_graph/mobilenet_v1_1.0_224_frozen.pb --input_height=224 --input_width=224 --input_layer=input --output_layer=MobilenetV1/Predictions/Reshape_1"},
+    "output": "python /workspace/intelai_models/inference/accuracy.py --precision=fp32 --batch_size=100 --data_location=/dataset --num_intra_threads=56 --num_inter_threads=2 --input_graph=/in_graph/mobilenet_v1_1.0_224_frozen.pb --input_height=224 --input_width=224 --input_layer=input --output_layer=MobilenetV1/Predictions/Reshape_1",
+    "cpuset": "0-111"},
 
   { "_comment": "mobilenet_v1_fp32_latency",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id 0  --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset",
-    "output": "numactl --cpunodebind=0 -l python /workspace/intelai_models/inference/benchmark.py --precision=fp32 --batch_size=100 --num_intra_threads=28 --num_inter_threads=2 --input_height=224 --input_width=224 --warmup_steps=10 --steps=50 --input_layer=input --output_layer=MobilenetV1/Predictions/Reshape_1"},
+    "output": "numactl --cpunodebind=0 -l python /workspace/intelai_models/inference/benchmark.py --precision=fp32 --batch_size=100 --num_intra_threads=28 --num_inter_threads=2 --input_height=224 --input_width=224 --warmup_steps=10 --steps=50 --input_layer=input --output_layer=MobilenetV1/Predictions/Reshape_1",
+    "cpuset": "0-111"},
 
   { "_comment": "mobilenet_v1_fp32_throughput",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id 0  --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset",
-    "output": "numactl --cpunodebind=0 -l python /workspace/intelai_models/inference/benchmark.py --precision=fp32 --batch_size=100 --num_intra_threads=28 --num_inter_threads=2 --input_height=224 --input_width=224 --warmup_steps=10 --steps=50 --input_layer=input --output_layer=MobilenetV1/Predictions/Reshape_1"},
+    "output": "numactl --cpunodebind=0 -l python /workspace/intelai_models/inference/benchmark.py --precision=fp32 --batch_size=100 --num_intra_threads=28 --num_inter_threads=2 --input_height=224 --input_width=224 --warmup_steps=10 --steps=50 --input_layer=input --output_layer=MobilenetV1/Predictions/Reshape_1",
+    "cpuset": "0-111"},
 
   { "_comment": "mobilenet_v1_fp32_dummy_data_output-dir",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs  --benchmark-only  --verbose --model-source-dir=/workspace/models --checkpoint=/checkpoints",
-    "output": "numactl --cpunodebind=0 -l python /workspace/intelai_models/inference/benchmark.py --precision=fp32 --batch_size=100 --num_intra_threads=28 --num_inter_threads=2 --input_height=224 --input_width=224 --warmup_steps=10 --steps=50 --input_layer=input --output_layer=MobilenetV1/Predictions/Reshape_1"},
+    "output": "numactl --cpunodebind=0 -l python /workspace/intelai_models/inference/benchmark.py --precision=fp32 --batch_size=100 --num_intra_threads=28 --num_inter_threads=2 --input_height=224 --input_width=224 --warmup_steps=10 --steps=50 --input_layer=input --output_layer=MobilenetV1/Predictions/Reshape_1",
+    "cpuset": "0-111"},
 
   { "_comment": "mobilenet_v1_int8_accuracy",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --data-location=/dataset --input_height=224 --input_width=224",
-    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/int8/accuracy.py --input_height=224 --input_width=224 --num_intra_threads=28 --output_layer=MobilenetV1/Predictions/Reshape_1 --num_inter_threads=1 --batch_size=100 --input_graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --data_location=/dataset --input_layer=input"},
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/int8/accuracy.py --input_height=224 --input_width=224 --num_intra_threads=28 --output_layer=MobilenetV1/Predictions/Reshape_1 --num_inter_threads=1 --batch_size=100 --input_graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --data_location=/dataset --input_layer=input",
+    "cpuset": "0-111"},
 
   { "_comment": "mobilenet_v1_int8_latency",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --input_height=224 --input_width=224 --warmup_steps=10 --steps=50",
-    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/int8/benchmark.py --input_height=224 --input_width=224 --warmup_steps=10 --num_intra_threads=28 --output_layer=MobilenetV1/Predictions/Reshape_1 --num_inter_threads=1 --batch_size=1 --input_graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --input_layer=input --steps=50"},
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/int8/benchmark.py --input_height=224 --input_width=224 --warmup_steps=10 --num_intra_threads=28 --output_layer=MobilenetV1/Predictions/Reshape_1 --num_inter_threads=1 --batch_size=1 --input_graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --input_layer=input --steps=50",
+    "cpuset": "0-111"},
 
   { "_comment": "mobilenet_v1_int8_throughput",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=240 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --input_height=224 --input_width=224 --warmup_steps=10 --steps=50",
-    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/int8/benchmark.py --input_height=224 --input_width=224 --warmup_steps=10 --num_intra_threads=28 --output_layer=MobilenetV1/Predictions/Reshape_1 --num_inter_threads=1 --batch_size=240 --input_graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --input_layer=input --steps=50"},
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/int8/benchmark.py --input_height=224 --input_width=224 --warmup_steps=10 --num_intra_threads=28 --output_layer=MobilenetV1/Predictions/Reshape_1 --num_inter_threads=1 --batch_size=240 --input_graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --input_layer=input --steps=50",
+    "cpuset": "0-111"},
 
   { "_comment": "mobilenet_v1_bfloat16_latency",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=bfloat16 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --batch-size=1 --socket-id=0 --data-location=/dataset/ImageNet_Validation --in-graph=/workspace/mobilenetv1.pb --input_height=200 --input_width=300 --steps=500 --warmup_steps=100",
-    "output": "numactl --cpunodebind=0 -l python /workspace/intelai_models/inference/benchmark.py --precision=bfloat16 --batch_size=1 --num_intra_threads=28 --num_inter_threads=2 --input_graph=/workspace/mobilenetv1.pb --input_height=200 --input_width=300 --warmup_steps=100 --steps=500 --input_layer=input --output_layer=MobilenetV1/Predictions/Reshape_1"},
+    "output": "numactl --cpunodebind=0 -l python /workspace/intelai_models/inference/benchmark.py --precision=bfloat16 --batch_size=1 --num_intra_threads=28 --num_inter_threads=2 --input_graph=/workspace/mobilenetv1.pb --input_height=200 --input_width=300 --warmup_steps=100 --steps=500 --input_layer=input --output_layer=MobilenetV1/Predictions/Reshape_1",
+    "cpuset": "0-111"},
 
   { "_comment": "mobilenet_v1_bfloat16_batch",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=bfloat16 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --batch-size 100 --socket-id=0 --data-location=/dataset/ImageNet_Validation --in-graph=/workspace/mobilenetv1.pb --input_height=150 --input_width=170 --steps=100 --warmup_steps=10",
-    "output": "numactl --cpunodebind=0 -l python /workspace/intelai_models/inference/benchmark.py --precision=bfloat16 --batch_size=100 --num_intra_threads=28 --num_inter_threads=2 --input_graph=/workspace/mobilenetv1.pb --input_height=150 --input_width=170 --warmup_steps=10 --steps=100 --input_layer=input --output_layer=MobilenetV1/Predictions/Reshape_1"},
+    "output": "numactl --cpunodebind=0 -l python /workspace/intelai_models/inference/benchmark.py --precision=bfloat16 --batch_size=100 --num_intra_threads=28 --num_inter_threads=2 --input_graph=/workspace/mobilenetv1.pb --input_height=150 --input_width=170 --warmup_steps=10 --steps=100 --input_layer=input --output_layer=MobilenetV1/Predictions/Reshape_1",
+    "cpuset": "0-111"},
 
   { "_comment": "mobilenet_v1_bfloat16_accuracy",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=bfloat16 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --batch-size 150 --data-location=/dataset --accuracy-only --in-graph=/workspace/mobilenetv1.pb",
-    "output": "python /workspace/intelai_models/inference/accuracy.py --precision=bfloat16 --batch_size=150 --data_location=/dataset --num_intra_threads=56 --num_inter_threads=2 --input_graph=/workspace/mobilenetv1.pb --input_height=224 --input_width=224 --input_layer=input --output_layer=MobilenetV1/Predictions/Reshape_1"}
+    "output": "python /workspace/intelai_models/inference/accuracy.py --precision=bfloat16 --batch_size=150 --data_location=/dataset --num_intra_threads=56 --num_inter_threads=2 --input_graph=/workspace/mobilenetv1.pb --input_height=224 --input_width=224 --input_layer=input --output_layer=MobilenetV1/Predictions/Reshape_1",
+    "cpuset": "0-111"},
+
+  { "_comment": "mobilenet_v1_bfloat16_batch_cpuset",
+    "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=bfloat16 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --batch-size 100 --data-location=/dataset/ImageNet_Validation --in-graph=/workspace/mobilenetv1.pb --input_height=150 --input_width=170 --steps=100 --warmup_steps=10",
+    "output": "python /workspace/intelai_models/inference/benchmark.py --precision=bfloat16 --batch_size=100 --num_intra_threads=16 --num_inter_threads=2 --input_graph=/workspace/mobilenetv1.pb --input_height=150 --input_width=170 --warmup_steps=10 --steps=100 --input_layer=input --output_layer=MobilenetV1/Predictions/Reshape_1",
+    "cpuset": "0-7,28-35"}
 ]
 
 
diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_ncf_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_ncf_args.json
index 4cbeed73a..815ab35ac 100644
--- a/tests/unit/common/tensorflow/tf_model_args/tf_ncf_args.json
+++ b/tests/unit/common/tensorflow/tf_model_args/tf_ncf_args.json
@@ -1,21 +1,26 @@
 [
   { "_comment": "FP32 latency benchmark",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=ncf --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id 0  --benchmark-only --verbose --checkpoint=/checkpoints     --data-location=/dataset",
-    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/ncf_main.py --data_dir=/dataset --model_dir=/checkpoints --intra_op_parallelism_threads=28 --inter_op_parallelism_threads=1 --batch_size=1 --inference_only --benchmark_only"},
+    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/ncf_main.py --data_dir=/dataset --model_dir=/checkpoints --intra_op_parallelism_threads=28 --inter_op_parallelism_threads=1 --batch_size=1 --inference_only --benchmark_only",
+    "cpuset": "0-111"},
 
   { "_comment": "Fp32 accuracy",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=ncf --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=256 --socket-id 0  --accuracy-only --verbose --checkpoint=/checkpoints     --data-location=/dataset",
-    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/ncf_main.py --data_dir=/dataset --model_dir=/checkpoints --intra_op_parallelism_threads=28 --inter_op_parallelism_threads=1 --batch_size=256 --inference_only --accuracy_only"},
+    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/ncf_main.py --data_dir=/dataset --model_dir=/checkpoints --intra_op_parallelism_threads=28 --inter_op_parallelism_threads=1 --batch_size=256 --inference_only --accuracy_only",
+    "cpuset": "0-111"},
 
   { "_comment": "FP32 Throughput benchmark",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=ncf --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=256 --socket-id 0  --benchmark-only --verbose --checkpoint=/checkpoints     --data-location=/dataset",
-    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/ncf_main.py --data_dir=/dataset --model_dir=/checkpoints --intra_op_parallelism_threads=28 --inter_op_parallelism_threads=1 --batch_size=256 --inference_only --benchmark_only"},
+    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/ncf_main.py --data_dir=/dataset --model_dir=/checkpoints --intra_op_parallelism_threads=28 --inter_op_parallelism_threads=1 --batch_size=256 --inference_only --benchmark_only",
+    "cpuset": "0-111"},
 
   { "_comment": "NCF FP32 Training",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=ncf --precision=fp32 --mode=training --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=98304 --num-inter-thread=2 --dataset=ml-20m --clean=1 --te=12",
-    "output": "python /workspace/intelai_models/training/ncf_estimator_main.py -dd=None -md=None -bs=98304 -hk=examplespersecondhook --dataset=ml-20m  --layers=256,256,128,64 --num_factors=64 --eval_batch_size 160000 --learning_rate 0.003821 --beta1 0.783529 --beta2 0.909003 --epsilon 1.45439e-07 --hr_threshold 0.635 --ml_perf --clean=1 --te=12"},
+    "output": "python /workspace/intelai_models/training/ncf_estimator_main.py -dd=None -md=None -bs=98304 -hk=examplespersecondhook --dataset=ml-20m  --layers=256,256,128,64 --num_factors=64 --eval_batch_size 160000 --learning_rate 0.003821 --beta1 0.783529 --beta2 0.909003 --epsilon 1.45439e-07 --hr_threshold 0.635 --ml_perf --clean=1 --te=12",
+    "cpuset": "0-111"},
 
    { "_comment": "NCF BFloat16 Training",
-    "input": "run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=ncf --precision=bfloat16 --mode=training --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=98304 --num-inter-thread=2 --dataset=ml-20m --clean=1 --te=12",
-    "output": "python /workspace/intelai_models/training/ncf_estimator_main.py --use_bfloat16 -dd=None -md=None -bs=98304 -hk=examplespersecondhook --dataset=ml-20m  --layers=256,256,128,64 --num_factors=64 --eval_batch_size 160000 --learning_rate 0.003821 --beta1 0.783529 --beta2 0.909003 --epsilon 1.45439e-07 --hr_threshold 0.635 --ml_perf --clean=1 --te=12"}
+     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=ncf --precision=bfloat16 --mode=training --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=98304 --num-inter-thread=2 --dataset=ml-20m --clean=1 --te=12",
+     "output": "python /workspace/intelai_models/training/ncf_estimator_main.py --use_bfloat16 -dd=None -md=None -bs=98304 -hk=examplespersecondhook --dataset=ml-20m  --layers=256,256,128,64 --num_factors=64 --eval_batch_size 160000 --learning_rate 0.003821 --beta1 0.783529 --beta2 0.909003 --epsilon 1.45439e-07 --hr_threshold 0.635 --ml_perf --clean=1 --te=12",
+     "cpuset": "0-111"}
 ]
diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_resnet101_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_resnet101_args.json
index 6e327c8d8..4f1eded04 100644
--- a/tests/unit/common/tensorflow/tf_model_args/tf_resnet101_args.json
+++ b/tests/unit/common/tensorflow/tf_model_args/tf_resnet101_args.json
@@ -1,21 +1,26 @@
 [
   { "_comment": "resnet101_fp32_accuracy",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet101 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --accuracy-only  --verbose --in-graph=/in_graph/resnet101_fp32_model.pb --data-location=/dataset",
-    "output": "python /workspace/intelai_models/inference/eval_image_classifier_inference.py --batch-size=128 --num-inter-threads=2 --input-graph=/in_graph/resnet101_fp32_model.pb --num-intra-threads=56 --warmup-steps=10 --steps=50 --data-location=/dataset --accuracy-only"},
+    "output": "python /workspace/intelai_models/inference/eval_image_classifier_inference.py --batch-size=128 --num-inter-threads=2 --input-graph=/in_graph/resnet101_fp32_model.pb --num-intra-threads=56 --warmup-steps=10 --steps=50 --data-location=/dataset --accuracy-only",
+    "cpuset": "0-111"},
 
   { "_comment": "resnet101_fp32_throughput",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet101 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id 0  --benchmark-only --verbose --in-graph=/in_graph/resnet101_fp32_model.pb",
-    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --batch-size=128 --num-inter-threads=1 --input-graph=/in_graph/resnet101_fp32_model.pb --num-intra-threads=28 --warmup-steps=10 --steps=50"},
+    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --batch-size=128 --num-inter-threads=1 --input-graph=/in_graph/resnet101_fp32_model.pb --num-intra-threads=28 --warmup-steps=10 --steps=50",
+    "cpuset": "0-111"},
 
   { "_comment": "resnet101_int8_latency",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet101 --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id 0  --benchmark-only --verbose --in-graph=/in_graph/resnet101_int8_model.pb",
-    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --batch-size=1 --num-intra-threads=28 --num-inter-threads=1 --input-graph=/in_graph/resnet101_int8_model.pb --warmup-steps=40 --steps=100"},
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --batch-size=1 --num-intra-threads=28 --num-inter-threads=1 --input-graph=/in_graph/resnet101_int8_model.pb --warmup-steps=40 --steps=100",
+    "cpuset": "0-111"},
 
   { "_comment": "resnet101_int8_inference calibration",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet101 --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id 0 --accuracy-only --calibration-only --in-graph=/in_graph/resnet101_int8_model.pb",
-    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/int8/calibration.py --batch_size=1 --num_intra_threads=28 --num_inter_threads=1 --input_graph=/in_graph/resnet101_int8_model.pb"},
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/int8/calibration.py --batch_size=1 --num_intra_threads=28 --num_inter_threads=1 --input_graph=/in_graph/resnet101_int8_model.pb",
+    "cpuset": "0-111"},
 
   { "_comment": "resnet101_fp32_latency",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet101 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id 0  --benchmark-only --verbose --in-graph=/in_graph/resnet101_fp32_model.pb",
-    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --batch-size=1 --num-inter-threads=1 --input-graph=/in_graph/resnet101_fp32_model.pb --num-intra-threads=28 --warmup-steps=10 --steps=50"}
+    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --batch-size=1 --num-inter-threads=1 --input-graph=/in_graph/resnet101_fp32_model.pb --num-intra-threads=28 --warmup-steps=10 --steps=50",
+    "cpuset": "0-111"}
 ]
diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_resnet50_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_resnet50_args.json
index ec0916102..72c734308 100644
--- a/tests/unit/common/tensorflow/tf_model_args/tf_resnet50_args.json
+++ b/tests/unit/common/tensorflow/tf_model_args/tf_resnet50_args.json
@@ -1,40 +1,48 @@
 [
   { "_comment": "resnet50_fp32_accuracy",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --batch-size 100 --socket-id 0 --accuracy-only  --verbose --in-graph=/in_graph/freezed_resnet50.pb --accuracy-only --data-location=/dataset",
-    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=/in_graph/freezed_resnet50.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=100 --data-location=/dataset --accuracy-only --num-cores=28 --warmup-steps=10 --steps=50"},
+    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=/in_graph/freezed_resnet50.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=100 --data-location=/dataset --accuracy-only --num-cores=28 --warmup-steps=10 --steps=50",
+    "cpuset": "0-111"},
 
   { "_comment": "resnet50_fp32_latency_default_inter_intra",
     "input": "run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name resnet50 --batch-size 128 --in-graph /freezed_resnet50.pb --intelai-models . --socket-id 0 --verbose",
-    "output": "numactl --cpunodebind=0 --membind=0 python ./inference/eval_image_classifier_inference.py --input-graph=/freezed_resnet50.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=10 --steps=50 --num-cores=28"},
+    "output": "numactl --cpunodebind=0 --membind=0 python ./inference/eval_image_classifier_inference.py --input-graph=/freezed_resnet50.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=10 --steps=50 --num-cores=28",
+    "cpuset": "0-111"},
 
   { "_comment": "resnet50_fp32_latency_inter_intra",
     "input": "run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name resnet50 --batch-size 1 --in-graph /freezed_resnet50.pb --intelai-models . --socket-id 0 --verbose --num-inter-threads 4 --num-intra-threads 16",
-    "output": "numactl --cpunodebind=0 --membind=0 python ./inference/eval_image_classifier_inference.py --input-graph=/freezed_resnet50.pb --num-inter-threads=4 --num-intra-threads=16 --batch-size=1 --warmup-steps=10 --steps=50 --num-cores=28"},
+    "output": "numactl --cpunodebind=0 --membind=0 python ./inference/eval_image_classifier_inference.py --input-graph=/freezed_resnet50.pb --num-inter-threads=4 --num-intra-threads=16 --batch-size=1 --warmup-steps=10 --steps=50 --num-cores=28",
+    "cpuset": "0-111"},
 
   { "_comment": "resnet50_fp32_throughput_inter_intra",
     "input": "run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name resnet50 --batch-size 128 --in-graph /freezed_resnet50.pb --intelai-models . --socket-id 0 --verbose",
-    "output": "numactl --cpunodebind=0 --membind=0 python ./inference/eval_image_classifier_inference.py --input-graph=/freezed_resnet50.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=10 --steps=50 --num-cores=28"},
+    "output": "numactl --cpunodebind=0 --membind=0 python ./inference/eval_image_classifier_inference.py --input-graph=/freezed_resnet50.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=10 --steps=50 --num-cores=28",
+    "cpuset": "0-111"},
 
   { "_comment": "resnet50_int8_throughput_output-dir",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50 --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs  --benchmark-only --verbose --in-graph=/in_graph/resnet50_int8_pretrained_model.pb --steps=200 --warmup-steps=20",
-    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=/in_graph/resnet50_int8_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=20 --steps=200"},
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=/in_graph/resnet50_int8_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=20 --steps=200",
+    "cpuset": "0-111"},
 
   { "_comment": "resnet50_int8_data_calibration",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50 --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=-1 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only   --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/resnet50_int8_pretrained_model.pb --data-location=/dataset --calibration-only",
-    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/int8/generate_calibration_data.py --num_intra_threads=56 --num_inter_threads=2 --batch_size=100 --input_graph=/in_graph/resnet50_int8_pretrained_model.pb --data_location=/dataset"},
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/int8/generate_calibration_data.py --num_intra_threads=56 --num_inter_threads=2 --batch_size=100 --input_graph=/in_graph/resnet50_int8_pretrained_model.pb --data_location=/dataset",
+    "cpuset": "0-111"},
 
   { "_comment": "resnet50_fp32_throughput_output-results",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs  --benchmark-only --output-results --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/resnet50_fp32_pretrained_model.pb --data-location=/dataset",
-    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=/in_graph/resnet50_fp32_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --num-cores=28 --batch-size=100 --warmup-steps=10 --steps=50 --data-location=/dataset --results-file-path /workspace/benchmarks/common/tensorflow/logs/resnet50_fp32_inference_results*.txt"},
+    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=/in_graph/resnet50_fp32_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --num-cores=28 --batch-size=100 --warmup-steps=10 --steps=50 --data-location=/dataset --results-file-path /workspace/benchmarks/common/tensorflow/logs/resnet50_fp32_inference_results*.txt",
+    "cpuset": "0-111"},
 
   { "_comment": "resnet50_int8_accuracy",
     "input": "run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision int8 --mode inference --model-name resnet50 --batch-size 100 --data-location /dataset --in-graph /final_int8_resnet50.pb --intelai-models . --accuracy-only --verbose",
-    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python ./inference/eval_image_classifier_inference.py --input-graph=/final_int8_resnet50.pb --num-inter-threads=2 --num-intra-threads=56 --batch-size=100 --warmup-steps=10 --steps=50 --data-location=/dataset --accuracy-only"},
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python ./inference/eval_image_classifier_inference.py --input-graph=/final_int8_resnet50.pb --num-inter-threads=2 --num-intra-threads=56 --batch-size=100 --warmup-steps=10 --steps=50 --data-location=/dataset --accuracy-only",
+    "cpuset": "0-111"},
 
   { "_comment": "resnet50_int8_throughput_steps_warmup-steps",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50 --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs  --benchmark-only --verbose --in-graph=/in_graph/resnet50_int8_pretrained_model.pb --steps=200 --warmup-steps=20",
-    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=/in_graph/resnet50_int8_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=20 --steps=200"
-  }
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=/in_graph/resnet50_int8_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=20 --steps=200",
+    "cpuset": "0-111"}
 ]
 
 
diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_resnet50v1_5_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_resnet50v1_5_args.json
index 885eda88f..d0ab8877a 100644
--- a/tests/unit/common/tensorflow/tf_model_args/tf_resnet50v1_5_args.json
+++ b/tests/unit/common/tensorflow/tf_model_args/tf_resnet50v1_5_args.json
@@ -1,75 +1,98 @@
 [
   { "_comment": "resnet50v1_5_fp32_accuracy",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50v1_5 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --batch-size 100 --socket-id 0 --accuracy-only  --verbose --in-graph=/in_graph/freezed_resnet50v1_5.pb --accuracy-only --data-location=/dataset",
-    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=/in_graph/freezed_resnet50v1_5.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=100 --data-location=/dataset --accuracy-only --num-cores=28 --warmup-steps=10 --steps=50"},
+    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=/in_graph/freezed_resnet50v1_5.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=100 --data-location=/dataset --accuracy-only --num-cores=28 --warmup-steps=10 --steps=50",
+    "cpuset": "0-111"},
 
   { "_comment": "resnet50v1_5_fp32_latency_default_inter_intra",
     "input": "run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name resnet50v1_5 --batch-size 128 --in-graph /freezed_resnet50v1_5.pb --intelai-models . --socket-id 0 --verbose",
-    "output": "numactl --cpunodebind=0 --membind=0 python ./inference/eval_image_classifier_inference.py --input-graph=/freezed_resnet50v1_5.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=10 --steps=50 --num-cores=28"},
+    "output": "numactl --cpunodebind=0 --membind=0 python ./inference/eval_image_classifier_inference.py --input-graph=/freezed_resnet50v1_5.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=10 --steps=50 --num-cores=28",
+    "cpuset": "0-111"},
 
   { "_comment": "resnet50v1_5_fp32_latency_inter_intra",
     "input": "run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name resnet50v1_5 --batch-size 1 --in-graph /freezed_resnet50v1_5.pb --intelai-models . --socket-id 0 --verbose --num-inter-threads 4 --num-intra-threads 16",
-    "output": "numactl --cpunodebind=0 --membind=0 python ./inference/eval_image_classifier_inference.py --input-graph=/freezed_resnet50v1_5.pb --num-inter-threads=4 --num-intra-threads=16 --batch-size=1 --warmup-steps=10 --steps=50 --num-cores=28"},
+    "output": "numactl --cpunodebind=0 --membind=0 python ./inference/eval_image_classifier_inference.py --input-graph=/freezed_resnet50v1_5.pb --num-inter-threads=4 --num-intra-threads=16 --batch-size=1 --warmup-steps=10 --steps=50 --num-cores=28",
+    "cpuset": "0-111"},
 
   { "_comment": "resnet50v1_5_fp32_throughput_inter_intra",
     "input": "run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name resnet50v1_5 --batch-size 128 --in-graph /freezed_resnet50v1_5.pb --intelai-models . --socket-id 0 --verbose",
-    "output": "numactl --cpunodebind=0 --membind=0 python ./inference/eval_image_classifier_inference.py --input-graph=/freezed_resnet50v1_5.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=10 --steps=50 --num-cores=28"},
+    "output": "numactl --cpunodebind=0 --membind=0 python ./inference/eval_image_classifier_inference.py --input-graph=/freezed_resnet50v1_5.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=10 --steps=50 --num-cores=28",
+    "cpuset": "0-111"},
 
   { "_comment": "resnet50v1_5_int8_throughput_output-dir",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50v1_5 --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs  --benchmark-only --verbose --in-graph=/in_graph/resnet50v1_5_int8_pretrained_model.pb --steps=200 --warmup-steps=20",
-    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=/in_graph/resnet50v1_5_int8_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=20 --steps=200"},
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=/in_graph/resnet50v1_5_int8_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=20 --steps=200",
+    "cpuset": "0-111"},
 
   { "_comment": "resnet50v1_5_int8_data_calibration",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50v1_5 --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=-1 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only   --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/resnet50v1_5_int8_pretrained_model.pb --data-location=/dataset --calibration-only",
-    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/int8/generate_calibration_data.py --num_intra_threads=56 --num_inter_threads=2 --batch_size=100 --input_graph=/in_graph/resnet50v1_5_int8_pretrained_model.pb --data_location=/dataset"},
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/int8/generate_calibration_data.py --num_intra_threads=56 --num_inter_threads=2 --batch_size=100 --input_graph=/in_graph/resnet50v1_5_int8_pretrained_model.pb --data_location=/dataset",
+    "cpuset": "0-111"},
 
   { "_comment": "resnet50v1_5_fp32_throughput_output-results",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50v1_5 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs  --benchmark-only --output-results --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/resnet50v1_5_fp32_pretrained_model.pb --data-location=/dataset",
-    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=/in_graph/resnet50v1_5_fp32_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --num-cores=28 --batch-size=100 --warmup-steps=10 --steps=50 --data-location=/dataset --results-file-path /workspace/benchmarks/common/tensorflow/logs/resnet50v1_5_fp32_inference_results*.txt"},
+    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=/in_graph/resnet50v1_5_fp32_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --num-cores=28 --batch-size=100 --warmup-steps=10 --steps=50 --data-location=/dataset --results-file-path /workspace/benchmarks/common/tensorflow/logs/resnet50v1_5_fp32_inference_results*.txt",
+    "cpuset": "0-111"},
 
   { "_comment": "resnet50v1_5_int8_accuracy",
     "input": "run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision int8 --mode inference --model-name resnet50v1_5 --batch-size 100 --data-location /dataset --in-graph /final_int8_resnet50v1_5.pb --intelai-models . --accuracy-only --verbose",
-    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python ./inference/eval_image_classifier_inference.py --input-graph=/final_int8_resnet50v1_5.pb --num-inter-threads=2 --num-intra-threads=56 --batch-size=100 --warmup-steps=10 --steps=50 --data-location=/dataset --accuracy-only"},
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python ./inference/eval_image_classifier_inference.py --input-graph=/final_int8_resnet50v1_5.pb --num-inter-threads=2 --num-intra-threads=56 --batch-size=100 --warmup-steps=10 --steps=50 --data-location=/dataset --accuracy-only",
+    "cpuset": "0-111"},
 
   { "_comment": "resnet50v1_5_int8_throughput_steps_warmup-steps",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50v1_5 --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs  --benchmark-only --verbose --in-graph=/in_graph/resnet50v1_5_int8_pretrained_model.pb --steps=200 --warmup-steps=20",
-    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=/in_graph/resnet50v1_5_int8_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=20 --steps=200"},
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=/in_graph/resnet50v1_5_int8_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=20 --steps=200",
+    "cpuset": "0-111"},
 
   { "_comment": "resnet50v1_5_bfloat16_batch_inference",
     "input": "run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision bfloat16 --mode inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --model-name resnet50v1_5 --batch-size=128 --data-location=/dataset --in-graph=resnet50v1_5.pb",
-    "output": "python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=resnet50v1_5.pb --num-inter-threads=2 --num-intra-threads=56 --num-cores=28 --batch-size=128 --warmup-steps=10 --steps=50 --data-location=/dataset"},
+    "output": "python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=resnet50v1_5.pb --num-inter-threads=2 --num-intra-threads=56 --num-cores=28 --batch-size=128 --warmup-steps=10 --steps=50 --data-location=/dataset",
+    "cpuset": "0-111"},
 
   { "_comment": "resnet50v1_5_bfloat16_online_inference",
     "input": "run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision bfloat16 --mode inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --model-name resnet50v1_5 --batch-size=1 --data-location=/dataset --in-graph=resnet50v1_5.pb",
-    "output": "python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=resnet50v1_5.pb --num-inter-threads=2 --num-intra-threads=56 --num-cores=28 --batch-size=1 --warmup-steps=10 --steps=50 --data-location=/dataset"},
+    "output": "python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=resnet50v1_5.pb --num-inter-threads=2 --num-intra-threads=56 --num-cores=28 --batch-size=1 --warmup-steps=10 --steps=50 --data-location=/dataset",
+    "cpuset": "0-111"},
 
   { "_comment": "resnet50v1_5_bfloat16_inference_accuracy",
     "input": "run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision bfloat16 --mode inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --model-name resnet50v1_5 --batch-size=100 --data-location=/dataset --in-graph=resnet50v1_5.pb --accuracy-only",
-    "output": "python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=resnet50v1_5.pb --num-inter-threads=2 --num-intra-threads=56 --num-cores=28 --batch-size=100 --warmup-steps=10 --steps=50 --data-location=/dataset --accuracy-only"},
+    "output": "python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=resnet50v1_5.pb --num-inter-threads=2 --num-intra-threads=56 --num-cores=28 --batch-size=100 --warmup-steps=10 --steps=50 --data-location=/dataset --accuracy-only",
+    "cpuset": "0-111"},
 
   { "_comment": "resnet50v1_5_fp32_multi_instance_one_socket",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50v1_5 --precision=fp32 --mode=inference --intelai-models=/workspace/intelai_models --batch-size 100 --socket-id 0 --numa-cores-per-instance socket --in-graph=/in_graph/freezed_resnet50v1_5.pb --benchmark-only",
-    "output": "python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=/in_graph/freezed_resnet50v1_5.pb --num-inter-threads=1 --num-intra-threads=28 --num-cores=28 --batch-size=100 --warmup-steps=10 --steps=50 --data-num-inter-threads=1 --data-num-intra-threads=28"},
+    "output": "python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=/in_graph/freezed_resnet50v1_5.pb --num-inter-threads=1 --num-intra-threads=28 --num-cores=28 --batch-size=100 --warmup-steps=10 --steps=50 --data-num-inter-threads=1 --data-num-intra-threads=28",
+    "cpuset": "0-111"},
 
   { "_comment": "resnet50v1_5_fp32_multi_instance_all_sockets",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50v1_5 --precision=fp32 --mode=inference --intelai-models=/workspace/intelai_models --batch-size 100 --numa-cores-per-instance socket --in-graph=/in_graph/freezed_resnet50v1_5.pb --benchmark-only",
-    "output": "python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=/in_graph/freezed_resnet50v1_5.pb --num-inter-threads=1 --num-intra-threads=28 --num-cores=28 --batch-size=100 --warmup-steps=10 --steps=50 --data-num-inter-threads=1 --data-num-intra-threads=28"},
+    "output": "python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=/in_graph/freezed_resnet50v1_5.pb --num-inter-threads=1 --num-intra-threads=28 --num-cores=28 --batch-size=100 --warmup-steps=10 --steps=50 --data-num-inter-threads=1 --data-num-intra-threads=28",
+    "cpuset": "0-111"},
 
   { "_comment": "resnet50v1_5_fp32_training",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50v1_5 --precision=fp32 --mode=training --intelai-models=/workspace/intelai_models --batch-size=256 --checkpoint=/workspace/checkpoints --output-dir=/workspace/logs --data-location=/dataset --steps=100 --train_epochs=6 --epochs_between_evals=2",
-    "output": "python /workspace/intelai_models/training/mlperf_resnet/imagenet_main.py 2 --batch_size=256 --max_train_steps=100 --train_epochs=6 --epochs_between_evals=2 --inter_op_parallelism_threads 2 --intra_op_parallelism_threads 56 --version 1 --resnet_size 50 --data_format=channels_last --data_dir=/dataset --model_dir=/workspace/checkpoints"},
+    "output": "python /workspace/intelai_models/training/mlperf_resnet/imagenet_main.py 2 --batch_size=256 --max_train_steps=100 --train_epochs=6 --epochs_between_evals=2 --inter_op_parallelism_threads 2 --intra_op_parallelism_threads 56 --version 1 --resnet_size 50 --data_format=channels_last --data_dir=/dataset --model_dir=/workspace/checkpoints",
+    "cpuset": "0-111"},
 
   { "_comment": "resnet50v1_5_fp32_distributed_training",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50v1_5 --precision=fp32 --mode=training --intelai-models=/workspace/intelai_models --batch-size=256 --checkpoint=/workspace/checkpoints --output-dir=/workspace/logs --mpi_num_processes=2 --mpi_num_processes_per_socket=1 --data-location=/dataset",
-    "output": "mpirun --allow-run-as-root -n 2 --map-by socket python /workspace/intelai_models/training/mlperf_resnet/imagenet_main.py 2 --batch_size=256 --max_train_steps=112590 --train_epochs=72 --epochs_between_evals=1 --inter_op_parallelism_threads 1 --intra_op_parallelism_threads 26 --version 1 --resnet_size 50 --data_format=channels_last --data_dir=/dataset --model_dir=/workspace/checkpoints"},
+    "output": "mpirun --allow-run-as-root -n 2 --map-by socket python /workspace/intelai_models/training/mlperf_resnet/imagenet_main.py 2 --batch_size=256 --max_train_steps=112590 --train_epochs=72 --epochs_between_evals=1 --inter_op_parallelism_threads 1 --intra_op_parallelism_threads 26 --version 1 --resnet_size 50 --data_format=channels_last --data_dir=/dataset --model_dir=/workspace/checkpoints",
+    "cpuset": "0-111"},
 
   { "_comment": "resnet50v1_5_bfloat16_training",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50v1_5 --precision=bfloat16 --mode=training --intelai-models=/workspace/intelai_models --checkpoint=/workspace/checkpoints --output-dir=/workspace/logs --steps=300 --train_epochs=10 --epochs_between_evals=2",
-    "output": "python /workspace/intelai_models/training/mlperf_resnet/imagenet_main.py 2 --batch_size=64 --max_train_steps=300 --train_epochs=10 --epochs_between_evals=2 --inter_op_parallelism_threads 2 --intra_op_parallelism_threads 56 --version 1 --resnet_size 50 --data_format=channels_last --model_dir=/workspace/checkpoints --use_bfloat16"},
+    "output": "python /workspace/intelai_models/training/mlperf_resnet/imagenet_main.py 2 --batch_size=64 --max_train_steps=300 --train_epochs=10 --epochs_between_evals=2 --inter_op_parallelism_threads 2 --intra_op_parallelism_threads 56 --version 1 --resnet_size 50 --data_format=channels_last --model_dir=/workspace/checkpoints --use_bfloat16",
+    "cpuset": "0-111"},
 
   { "_comment": "resnet50v1_5_bfloat16_distributed_training",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50v1_5 --precision=bfloat16 --mode=training --intelai-models=/workspace/intelai_models --checkpoint=/workspace/checkpoints --mpi_num_processes=4 --mpi_num_processes_per_socket=2 --output-dir=/workspace/logs",
-    "output": "mpirun --allow-run-as-root -n 4 --map-by ppr:2:socket:pe=14 --cpus-per-proc 14 python /workspace/intelai_models/training/mlperf_resnet/imagenet_main.py 2 --batch_size=64 --max_train_steps=112590 --train_epochs=72 --epochs_between_evals=1 --inter_op_parallelism_threads 1 --intra_op_parallelism_threads 26 --version 1 --resnet_size 50 --data_format=channels_last --model_dir=/workspace/checkpoints --use_bfloat16"}
+    "output": "mpirun --allow-run-as-root -n 4 --map-by ppr:2:socket:pe=14 --cpus-per-proc 14 python /workspace/intelai_models/training/mlperf_resnet/imagenet_main.py 2 --batch_size=64 --max_train_steps=112590 --train_epochs=72 --epochs_between_evals=1 --inter_op_parallelism_threads 1 --intra_op_parallelism_threads 26 --version 1 --resnet_size 50 --data_format=channels_last --model_dir=/workspace/checkpoints --use_bfloat16",
+    "cpuset": "0-111"},
+
+  { "_comment": "resnet50v1_5_bfloat16_online_inference_cpuset",
+    "input": "run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision bfloat16 --mode inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --model-name resnet50v1_5 --batch-size=1 --data-location=/dataset --in-graph=resnet50v1_5.pb",
+    "output": "python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=resnet50v1_5.pb --num-inter-threads=1 --num-intra-threads=16 --num-cores=28 --batch-size=1 --warmup-steps=10 --steps=50 --data-location=/dataset",
+    "cpuset": "2-17"}
 ]
 
 
diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_rfcn_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_rfcn_args.json
index 8301a4db9..ec759e926 100644
--- a/tests/unit/common/tensorflow/tf_model_args/tf_rfcn_args.json
+++ b/tests/unit/common/tensorflow/tf_model_args/tf_rfcn_args.json
@@ -1,25 +1,31 @@
 [
   { "_comment": "rfcn_fp32_accuracy",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=rfcn --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --verbose --in-graph=/in_graph/frozen_inference_graph.pb --data-location=/dataset --accuracy-only --split=accuracy_message",
-    "output": "FROZEN_GRAPH=/in_graph/frozen_inference_graph.pb TF_RECORD_FILES=/dataset/data.record SPLIT=accuracy_message TF_MODELS_ROOT=/workspace/models /workspace/intelai_models/inference/fp32/coco_mAP.sh"},
+    "output": "FROZEN_GRAPH=/in_graph/frozen_inference_graph.pb TF_RECORD_FILES=/dataset/data.record SPLIT=accuracy_message TF_MODELS_ROOT=/workspace/models /workspace/intelai_models/inference/fp32/coco_mAP.sh",
+    "cpuset": "0-111"},
 
   { "_comment": "rfcn_fp32",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=rfcn --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --socket-id 0 --verbose --in-graph=/in_graph/rfcn_resnet101_fp32_coco_pretrained_model.pb --data-location=/dataset --benchmark-only --number_of_steps=500",
-    "output": "numactl -N 0 -m 0 python /workspace/intelai_models/inference/fp32/run_rfcn_inference.py -m /workspace/models -g /in_graph/rfcn_resnet101_fp32_coco_pretrained_model.pb -x 500 -d /dataset --num-inter-threads 1 --num-intra-threads 28"},
+    "output": "numactl -N 0 -m 0 python /workspace/intelai_models/inference/fp32/run_rfcn_inference.py -m /workspace/models -g /in_graph/rfcn_resnet101_fp32_coco_pretrained_model.pb -x 500 -d /dataset --num-inter-threads 1 --num-intra-threads 28",
+    "cpuset": "0-111"},
 
   { "_comment": "rfcn_fp32_with_optional_args",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=rfcn --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --batch-size=-1 --socket-id=1 --verbose --in-graph=/in_graph/rfcn_resnet101_fp32_coco_pretrained_model.pb --data-location=/dataset --benchmark-only --num-cores=8 --visualize",
-    "output": "numactl -C +0,1,2,4 -N 1 -m 1 python /workspace/intelai_models/inference/fp32/run_rfcn_inference.py -m /workspace/models -g /in_graph/rfcn_resnet101_fp32_coco_pretrained_model.pb --num-intra-threads 8 --num-inter-threads 1 - v -d /dataset"},
+    "output": "numactl -C +0,1,2,4 -N 1 -m 1 python /workspace/intelai_models/inference/fp32/run_rfcn_inference.py -m /workspace/models -g /in_graph/rfcn_resnet101_fp32_coco_pretrained_model.pb --num-intra-threads 8 --num-inter-threads 1 - v -d /dataset",
+    "cpuset": "0-111"},
 
   { "_comment": "rfcn_int8_accuracy",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=rfcn --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --verbose --in-graph=/in_graph/rfcn_resnet101_int8_coco_pretrained_model.pb --data-location=/dataset --accuracy-only --split=accuracy_message",
-    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 FROZEN_GRAPH=/in_graph/rfcn_resnet101_int8_coco_pretrained_model.pb TF_RECORD_FILE=/dataset SPLIT=accuracy_message TF_MODELS_ROOT=/workspace/models /workspace/intelai_models/inference/int8/coco_mAP.sh"},
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 FROZEN_GRAPH=/in_graph/rfcn_resnet101_int8_coco_pretrained_model.pb TF_RECORD_FILE=/dataset SPLIT=accuracy_message TF_MODELS_ROOT=/workspace/models /workspace/intelai_models/inference/int8/coco_mAP.sh",
+    "cpuset": "0-111"},
 
   { "_comment": "rfcn_int8",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=rfcn --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --socket-id 0 --verbose --in-graph=/in_graph/rfcn_resnet101_int8_coco_pretrained_model.pb --data-location=/dataset --benchmark-only --number_of_steps=500",
-    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl -N 0 -m 0 python /workspace/intelai_models/inference/int8/run_rfcn_inference.py -m /workspace/models -g /in_graph/rfcn_resnet101_int8_coco_pretrained_model.pb -x 500 -d /dataset --num-inter-threads 1 --num-intra-threads 28"},
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl -N 0 -m 0 python /workspace/intelai_models/inference/int8/run_rfcn_inference.py -m /workspace/models -g /in_graph/rfcn_resnet101_int8_coco_pretrained_model.pb -x 500 -d /dataset --num-inter-threads 1 --num-intra-threads 28",
+    "cpuset": "0-111"},
 
   { "_comment": "rfcn_int8_with_optional_args",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=rfcn --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --batch-size=-1 --socket-id 1 --verbose --in-graph=/in_graph/rfcn_resnet101_int8_coco_pretrained_model.pb --data-location=/dataset --benchmark-only --num-cores=8 --print_accuracy --evaluate_tensor=Tensor --visualize",
-    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6  numactl -N 0 -m 0 python /workspace/intelai_models/inference/int8/run_rfcn_inference.py -m /workspace/models -g /in_graph/rfcn_resnet101_int8_coco_pretrained_model.pb --num-intra-threads 28 --num-inter-threads 1 -x 500 -d /dataset LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6  numactl -C +0,1,2,4 -N 1 -m 1 python /workspace/intelai_models/inference/int8/run_rfcn_inference.py -m /workspace/models -g /in_graph/rfcn_resnet101_int8_coco_pretrained_model.pb --num-intra-threads 8 --num-inter-threads 1 - v -d /dataset -e Tensor - p"}
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6  numactl -N 0 -m 0 python /workspace/intelai_models/inference/int8/run_rfcn_inference.py -m /workspace/models -g /in_graph/rfcn_resnet101_int8_coco_pretrained_model.pb --num-intra-threads 28 --num-inter-threads 1 -x 500 -d /dataset LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6  numactl -C +0,1,2,4 -N 1 -m 1 python /workspace/intelai_models/inference/int8/run_rfcn_inference.py -m /workspace/models -g /in_graph/rfcn_resnet101_int8_coco_pretrained_model.pb --num-intra-threads 8 --num-inter-threads 1 - v -d /dataset -e Tensor - p",
+    "cpuset": "0-111"}
 ]
diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_ssd_mobilenet_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_ssd_mobilenet_args.json
index c4a2be048..b3659c5b3 100644
--- a/tests/unit/common/tensorflow/tf_model_args/tf_ssd_mobilenet_args.json
+++ b/tests/unit/common/tensorflow/tf_model_args/tf_ssd_mobilenet_args.json
@@ -1,25 +1,41 @@
 [
   { "_comment": "ssd_mobilenet_fp32_accuracy",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-mobilenet --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --socket-id=0 --accuracy-only  --verbose --in-graph=/in_graph/frozen_inference_graph.pb --benchmark-dir=/workspace/benchmarks --data-location=/dataset",
-    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/infer_detections.py -g /in_graph/frozen_inference_graph.pb -i 1000 -w 200 -a 28 -e 1 -d /dataset -r"},
+    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/infer_detections.py -g /in_graph/frozen_inference_graph.pb -i 1000 -w 200 -a 28 -e 1 -d /dataset -r",
+    "cpuset": "0-111"},
 
   { "_comment": "ssd_mobilenet_fp32",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-mobilenet --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --socket-id=0  --benchmark-only --verbose --in-graph=/in_graph/frozen_inference_graph.pb  --data-location=/dataset",
-    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/infer_detections.py -g /in_graph/frozen_inference_graph.pb -i 1000 -w 200 -a 28 -e 1 -d /dataset -b -1"},
+    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/infer_detections.py -g /in_graph/frozen_inference_graph.pb -i 1000 -w 200 -a 28 -e 1 -d /dataset -b -1",
+    "cpuset": "0-111"},
 
   { "_comment": "ssd_mobilenet_int8_accuracy",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-mobilenet --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssdmobilenet_int8_pretrained_model.pb --data-location=/dataset",
-    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/int8/infer_detections.py -g /in_graph/ssdmobilenet_int8_pretrained_model.pb -i 1000 -w 200 -a 28 -e 1 -d /dataset -r"},
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/int8/infer_detections.py -g /in_graph/ssdmobilenet_int8_pretrained_model.pb -i 1000 -w 200 -a 28 -e 1 -d /dataset -r",
+    "cpuset": "0-111"},
 
   { "_comment": "ssd_mobilenet_int8",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-mobilenet --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --batch-size=1 --socket-id 0 --data-location=/dataset --verbose --in-graph=/in_graph/ssdmobilenet_int8_pretrained_model.pb --benchmark-only --in-graph=/in_graph/ssdmobilenet_int8_pretrained_model.pb",
-    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/int8/infer_detections.py -g /in_graph/ssdmobilenet_int8_pretrained_model.pb -i 1000 -w 200 -a 28 -e 1 -d /dataset -b 1"},
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/int8/infer_detections.py -g /in_graph/ssdmobilenet_int8_pretrained_model.pb -i 1000 -w 200 -a 28 -e 1 -d /dataset -b 1",
+    "cpuset": "0-111"},
 
   { "_comment": "ssd_mobilenet_bfloat16_inference",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-mobilenet --precision=bfloat16 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --socket-id=0 --data-location=/dataset/coco_val.record --benchmark-only --in-graph=/in_graph/ssdmobilenet_fp32_pretrained_model_combinedNMS.pb",
-    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/infer_detections.py -g /in_graph/ssdmobilenet_fp32_pretrained_model_combinedNMS.pb -i 1000 -w 200 -a 28 -e 1 -p bfloat16 -d /dataset/coco_val.record -b -1"},
+    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/infer_detections.py -g /in_graph/ssdmobilenet_fp32_pretrained_model_combinedNMS.pb -i 1000 -w 200 -a 28 -e 1 -p bfloat16 -d /dataset/coco_val.record -b -1",
+    "cpuset": "0-111"},
 
   { "_comment": "ssd_mobilenet_bfloat16_accuracy",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-mobilenet --precision=bfloat16 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --socket-id=0 --data-location=/dataset/coco_val.record --accuracy-only --in-graph=/in_graph/ssdmobilenet_fp32_pretrained_model_combinedNMS.pb",
-    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/infer_detections.py -g /in_graph/ssdmobilenet_fp32_pretrained_model_combinedNMS.pb -i 1000 -w 200 -a 28 -e 1 -p bfloat16 -d /dataset/coco_val.record -r"}
+    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/infer_detections.py -g /in_graph/ssdmobilenet_fp32_pretrained_model_combinedNMS.pb -i 1000 -w 200 -a 28 -e 1 -p bfloat16 -d /dataset/coco_val.record -r",
+    "cpuset": "0-111"},
+
+  { "_comment": "ssd_mobilenet_bfloat16_inference",
+    "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-mobilenet --precision=bfloat16 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --data-location=/dataset/coco_val.record --benchmark-only --in-graph=/in_graph/ssdmobilenet_fp32_pretrained_model_combinedNMS.pb",
+    "output": "python /workspace/intelai_models/inference/infer_detections.py -g /in_graph/ssdmobilenet_fp32_pretrained_model_combinedNMS.pb -i 1000 -w 200 -a 16 -e 1 -p bfloat16 -d /dataset/coco_val.record -b -1",
+    "cpuset": "0-7,8-15"},
+
+  { "_comment": "ssd_mobilenet_fp32_cpuset",
+    "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-mobilenet --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --batch-size=-1 --benchmark-only --verbose --in-graph=/in_graph/frozen_inference_graph.pb  --data-location=/dataset",
+    "output": "python /workspace/intelai_models/inference/infer_detections.py -g /in_graph/frozen_inference_graph.pb -i 1000 -w 200 -a 8 -e 2 -d /dataset -b -1",
+    "cpuset": "25-28,0-3"}
 ]
diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_ssd_resnet34_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_ssd_resnet34_args.json
index 7619c3650..aca0d5496 100644
--- a/tests/unit/common/tensorflow/tf_model_args/tf_ssd_resnet34_args.json
+++ b/tests/unit/common/tensorflow/tf_model_args/tf_ssd_resnet34_args.json
@@ -1,41 +1,61 @@
 [
   { "_comment": "ssd_resnet34_bfloat16_accuracy",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-resnet34 --precision=bfloat16 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only   --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssd_resnet34_bs1.pb --data-location=/dataset",
-    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/bfloat16/infer_detections.py --input-graph /in_graph/ssd_resnet34_bs1.pb --batch-size 1 --inter-op-parallelism-threads 1 --intra-op-parallelism-threads 28 --input-size 300 --warmup-steps 200 --steps 800 --accuracy-only  --data-location /dataset"},
+    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/bfloat16/infer_detections.py --input-graph /in_graph/ssd_resnet34_bs1.pb --batch-size 1 --inter-op-parallelism-threads 1 --intra-op-parallelism-threads 28 --input-size 300 --warmup-steps 200 --steps 800 --accuracy-only  --data-location /dataset",
+    "cpuset": "0-111"},
 
   { "_comment": "ssd_resnet34_fp32_accuracy",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-resnet34 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only   --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssd_resnet34_bs1.pb --data-location=/dataset",
-    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/infer_detections.py --input-graph /in_graph/ssd_resnet34_bs1.pb --batch-size 1 --inter-op-parallelism-threads 1 --intra-op-parallelism-threads 28 --input-size 300 --warmup-steps 200 --steps 800 --accuracy-only  --data-location /dataset"},
+    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/infer_detections.py --input-graph /in_graph/ssd_resnet34_bs1.pb --batch-size 1 --inter-op-parallelism-threads 1 --intra-op-parallelism-threads 28 --input-size 300 --warmup-steps 200 --steps 800 --accuracy-only  --data-location /dataset",
+    "cpuset": "0-111"},
 
   { "_comment": "ssd_resnet34_fp32",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-resnet34 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs  --benchmark-only  --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssd_resnet34_bs1.pb --data-location=/dataset",
-    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/infer_detections.py --input-graph /in_graph/ssd_resnet34_bs1.pb --batch-size 1 --inter-op-parallelism-threads 1 --intra-op-parallelism-threads 28 --input-size 300 --warmup-steps 200 --steps 800"},
+    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/infer_detections.py --input-graph /in_graph/ssd_resnet34_bs1.pb --batch-size 1 --inter-op-parallelism-threads 1 --intra-op-parallelism-threads 28 --input-size 300 --warmup-steps 200 --steps 800",
+    "cpuset": "0-111"},
 
   { "_comment": "ssd_resnet34_fp32_training",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-resnet34 --precision=fp32 --mode=training --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=32 --socket-id=-1 --output-dir=/workspace/benchmarks/common/tensorflow/logs --num-train-steps=500 --benchmark-only --model-source-dir=/workspace/models --data-location=/dataset --num-inter-threads=1 --num-intra-threads=27 --disable-tcmalloc=True --mpi_num_processes=2 --mpi_num_processes_per_socket=1",
-    "output": "mpirun --allow-run-as-root -n 2 --map-by socket python /tmp/benchmark_ssd_resnet34/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py --data_dir /dataset --batch_size 32 --num_inter_threads 1 --num_intra_threads 27 --model=ssd300 --data_name coco --mkl=True --device=cpu --data_format=NHWC --variable_update=horovod --horovod_device=cpu --kmp_affinity=granularity=fine,verbose,compact,1,0 --kmp_settings=1 --kmp_blocktime=1 --weight_decay 0.0005 --num_warmup_batches 0 --num_batches 500"},
+    "output": "mpirun --allow-run-as-root -n 2 --map-by socket python /tmp/benchmark_ssd_resnet34/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py --data_dir /dataset --batch_size 32 --num_inter_threads 1 --num_intra_threads 27 --model=ssd300 --data_name coco --mkl=True --device=cpu --data_format=NHWC --variable_update=horovod --horovod_device=cpu --kmp_affinity=granularity=fine,verbose,compact,1,0 --kmp_settings=1 --kmp_blocktime=1 --weight_decay 0.0005 --num_warmup_batches 0 --num_batches 500",
+    "cpuset": "0-111"},
 
   { "_comment": "ssd_resnet34_fp32_training_calc_threads",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-resnet34 --precision=fp32 --mode=training --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=32 --socket-id=-1 --output-dir=/workspace/benchmarks/common/tensorflow/logs --num-train-steps=500 --benchmark-only --model-source-dir=/workspace/models --data-location=/dataset --disable-tcmalloc=True --mpi_num_processes=2 --mpi_num_processes_per_socket=1 --timeline=file",
-    "output": "mpirun --allow-run-as-root -n 2 --map-by socket python /tmp/benchmark_ssd_resnet34/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py --data_dir /dataset --batch_size 32 --num_inter_threads 1 --num_intra_threads 54 --model=ssd300 --data_name coco --mkl=True --device=cpu --data_format=NHWC --variable_update=horovod --horovod_device=cpu --kmp_affinity=granularity=fine,verbose,compact,1,0 --kmp_settings=1 --kmp_blocktime=1 --use_chrome_trace_format=True --trace_file=file --weight_decay 0.0005 --num_warmup_batches 0 --num_batches 500"},
+    "output": "mpirun --allow-run-as-root -n 2 --map-by socket python /tmp/benchmark_ssd_resnet34/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py --data_dir /dataset --batch_size 32 --num_inter_threads 1 --num_intra_threads 54 --model=ssd300 --data_name coco --mkl=True --device=cpu --data_format=NHWC --variable_update=horovod --horovod_device=cpu --kmp_affinity=granularity=fine,verbose,compact,1,0 --kmp_settings=1 --kmp_blocktime=1 --use_chrome_trace_format=True --trace_file=file --weight_decay 0.0005 --num_warmup_batches 0 --num_batches 500",
+    "cpuset": "0-111"},
 
   { "_comment": "ssd_resnet34_int8_accuracy",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-resnet34 --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only   --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssd_resnet34_bs1.pb --data-location=/dataset",
-    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/int8/infer_detections.py --input-graph /in_graph/ssd_resnet34_bs1.pb --batch-size 1 --inter-op-parallelism-threads 1 --intra-op-parallelism-threads 28 --input-size 300 --warmup-steps 200 --steps 800 --accuracy-only --data-location /dataset"},
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/int8/infer_detections.py --input-graph /in_graph/ssd_resnet34_bs1.pb --batch-size 1 --inter-op-parallelism-threads 1 --intra-op-parallelism-threads 28 --input-size 300 --warmup-steps 200 --steps 800 --accuracy-only --data-location /dataset",
+    "cpuset": "0-111"},
   
   { "_comment": "ssd_resnet34_int8",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-resnet34 --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs  --benchmark-only  --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssd_resnet34_bs1.pb --data-location=/dataset",
-    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/int8/infer_detections.py --input-graph /in_graph/ssd_resnet34_bs1.pb --batch-size 1 --inter-op-parallelism-threads 1 --intra-op-parallelism-threads 28 --input-size 300 --warmup-steps 200 --steps 800"},
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/int8/infer_detections.py --input-graph /in_graph/ssd_resnet34_bs1.pb --batch-size 1 --inter-op-parallelism-threads 1 --intra-op-parallelism-threads 28 --input-size 300 --warmup-steps 200 --steps 800",
+    "cpuset": "0-111"},
 
   { "_comment": "ssd_resnet34_bfloat16_training",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-resnet34 --precision=bfloat16 --mode=training --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --data-location=/dataset --num-cores=52 --num-inter-threads=1 --num-intra-threads=52 --batch-size=100 --weight_decay=1e-4 --num-train-steps=100 --num_warmup_batches=20 --mpi_num_processes=1 --output-dir=/workspace/logs",
-    "output": "mpirun --allow-run-as-root -n 1 --map-by socket python /tmp/benchmark_ssd_resnet34/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py --data_dir /dataset --batch_size 100 --num_inter_threads 1 --num_intra_threads 52 --model=ssd300 --data_name coco --mkl=True --device=cpu --data_format=NHWC --variable_update=horovod --horovod_device=cpu --kmp_affinity=granularity=fine,verbose,compact,1,0 --kmp_settings=1 --kmp_blocktime=1 --weight_decay 0.0001 --num_warmup_batches 20 --num_batches 100"},
+    "output": "mpirun --allow-run-as-root -n 1 --map-by socket python /tmp/benchmark_ssd_resnet34/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py --data_dir /dataset --batch_size 100 --num_inter_threads 1 --num_intra_threads 52 --model=ssd300 --data_name coco --mkl=True --device=cpu --data_format=NHWC --variable_update=horovod --horovod_device=cpu --kmp_affinity=granularity=fine,verbose,compact,1,0 --kmp_settings=1 --kmp_blocktime=1 --weight_decay 0.0001 --num_warmup_batches 20 --num_batches 100",
+    "cpuset": "0-111"},
 
   { "_comment": "ssd_resnet34_bfloat16_training_accuracy",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-resnet34 --precision=bfloat16 --mode=training --accuracy-only --num-cores=52 --num-inter-threads=1 --num-intra-threads=52 --batch-size=100 --data-location=/dataset --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models",
-    "output": "python /tmp/benchmark_ssd_resnet34/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py --data_dir /dataset --batch_size 100 --num_inter_threads 1 --num_intra_threads 52 --model=ssd300 --data_name coco --mkl=True --device=cpu --data_format=NHWC --variable_update=horovod --horovod_device=cpu --kmp_affinity=granularity=fine,verbose,compact,1,0 --kmp_settings=1 --kmp_blocktime=1 --train_dir=None --eval=true --num_eval_epochs=1 --print_training_accuracy=True"},
+    "output": "python /tmp/benchmark_ssd_resnet34/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py --data_dir /dataset --batch_size 100 --num_inter_threads 1 --num_intra_threads 52 --model=ssd300 --data_name coco --mkl=True --device=cpu --data_format=NHWC --variable_update=horovod --horovod_device=cpu --kmp_affinity=granularity=fine,verbose,compact,1,0 --kmp_settings=1 --kmp_blocktime=1 --train_dir=None --eval=true --num_eval_epochs=1 --print_training_accuracy=True",
+    "cpuset": "0-111"},
 
   { "_comment": "ssd_resnet34_bfloat16_training_with_backbone_model",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-resnet34 --precision=bfloat16 --mode=training --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --data-location=/dataset --num-cores=52 --num-inter-threads=1 --num-intra-threads=50 --batch-size=100 --mpi_num_processes=4 --mpi_num_processes_per_socket=1 --epochs=60 --checkpoint /checkpoints --backbone-model=/workspace/ssd-backbone --output-dir=/workspace/logs",
-    "output": "mpirun --allow-run-as-root -n 4 --map-by socket python /tmp/benchmark_ssd_resnet34/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py --data_dir /dataset --batch_size 100 --num_inter_threads 1 --num_intra_threads 50 --model=ssd300 --data_name coco --mkl=True --device=cpu --data_format=NHWC --variable_update=horovod --horovod_device=cpu --kmp_affinity=granularity=fine,verbose,compact,1,0 --kmp_settings=1 --kmp_blocktime=1 --backbone_model_path=/workspace/ssd-backbone/model.ckpt-28152 --optimizer=momentum --weight_decay=0.0005 --momentum=0.9 --num_epochs=60 --num_warmup_batches=0 --train_dir=/checkpoints --save_model_steps=10000"}
+    "output": "mpirun --allow-run-as-root -n 4 --map-by socket python /tmp/benchmark_ssd_resnet34/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py --data_dir /dataset --batch_size 100 --num_inter_threads 1 --num_intra_threads 50 --model=ssd300 --data_name coco --mkl=True --device=cpu --data_format=NHWC --variable_update=horovod --horovod_device=cpu --kmp_affinity=granularity=fine,verbose,compact,1,0 --kmp_settings=1 --kmp_blocktime=1 --backbone_model_path=/workspace/ssd-backbone/model.ckpt-28152 --optimizer=momentum --weight_decay=0.0005 --momentum=0.9 --num_epochs=60 --num_warmup_batches=0 --train_dir=/checkpoints --save_model_steps=10000",
+    "cpuset": "0-111"},
+
+  { "_comment": "ssd_resnet34_fp32_accuracy_cpuset",
+    "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-resnet34 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --batch-size=1 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssd_resnet34_bs1.pb --data-location=/dataset",
+    "output": "python /workspace/intelai_models/inference/fp32/infer_detections.py --input-graph /in_graph/ssd_resnet34_bs1.pb --batch-size 1 --inter-op-parallelism-threads 1 --intra-op-parallelism-threads 14 --input-size 300 --warmup-steps 200 --steps 800 --accuracy-only  --data-location /dataset",
+    "cpuset": "5-15,20-22"},
+
+  { "_comment": "ssd_resnet34_bfloat16_accuracy_cpuset",
+    "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-resnet34 --precision=bfloat16 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --batch-size=1 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only   --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssd_resnet34_bs1.pb --data-location=/dataset",
+    "output": "python /workspace/intelai_models/inference/bfloat16/infer_detections.py --input-graph /in_graph/ssd_resnet34_bs1.pb --batch-size 1 --inter-op-parallelism-threads 1 --intra-op-parallelism-threads 8 --input-size 300 --warmup-steps 200 --steps 800 --accuracy-only  --data-location /dataset",
+    "cpuset": "28-35"}
 ]
diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_transformer_lt_official_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_transformer_lt_official_args.json
index f2e73edba..1cce64c70 100644
--- a/tests/unit/common/tensorflow/tf_model_args/tf_transformer_lt_official_args.json
+++ b/tests/unit/common/tensorflow/tf_model_args/tf_transformer_lt_official_args.json
@@ -2,11 +2,13 @@
   {
     "_comment": "Transformer LT official FP32 online inference",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=transformer_lt_official --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --batch-size=1 --socket-id=0  --benchmark-only --in-graph=fp32_graphdef.pb --data-location=/dataset --output-dir=/workspace/logs --file=newstest2014.en --file_out=out_translate.txt --reference=newstest2014.de --vocab_file=vocab.txt",
-    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/infer_ab.py --param_set=big --in_graph=fp32_graphdef.pb --batch_size=1 --file=newstest2014.en --file_out=/workspace/logs/out_translate.txt --vocab_file=vocab.txt --num_inter=1 --num_intra=28"
+    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/infer_ab.py --param_set=big --in_graph=fp32_graphdef.pb --batch_size=1 --file=newstest2014.en --file_out=/workspace/logs/out_translate.txt --vocab_file=vocab.txt --num_inter=1 --num_intra=28",
+    "cpuset": "0-111"
   },
   {
     "_comment": "Transformer LT official FP32 batch inference",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=transformer_lt_official --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --batch-size=64 --socket-id=0  --benchmark-only --in-graph=fp32_graphdef.pb --data-location=/dataset --output-dir=/workspace/logs --file=newstest2014.en --file_out=out_translate.txt --reference=newstest2014.de --vocab_file=vocab.txt",
-    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/infer_ab.py --param_set=big --in_graph=fp32_graphdef.pb --batch_size=64 --file=newstest2014.en --file_out=/workspace/logs/out_translate.txt --vocab_file=vocab.txt --num_inter=1 --num_intra=28"
+    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/infer_ab.py --param_set=big --in_graph=fp32_graphdef.pb --batch_size=64 --file=newstest2014.en --file_out=/workspace/logs/out_translate.txt --vocab_file=vocab.txt --num_inter=1 --num_intra=28",
+    "cpuset": "0-111"
   }
 ]
diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_transformer_mlperf_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_transformer_mlperf_args.json
index 879f52eec..2ae9eb560 100644
--- a/tests/unit/common/tensorflow/tf_model_args/tf_transformer_mlperf_args.json
+++ b/tests/unit/common/tensorflow/tf_model_args/tf_transformer_mlperf_args.json
@@ -2,21 +2,25 @@
   {
     "_comment": "Transformer MLPerf FP32 inference",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=transformer_mlperf --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --batch-size=64 -i=0 --in-graph=graph.pb --data-location=/dataset --file=newstest2014.en --file_out=translate.txt --reference=newstest2014.de",
-    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/transformer/translate.py --params=big --input_graph=graph.pb --batch_size=64 --test_mode=inference --warmup_steps=3 --steps=100 --vocab_file= --file=newstest2014.en --file_out=/models/benchmarks/common/tensorflow/logs/translate.txt --data_dir=/dataset --num_inter=None --num_intra=None"
+    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/transformer/translate.py --params=big --input_graph=graph.pb --batch_size=64 --test_mode=inference --warmup_steps=3 --steps=100 --vocab_file= --file=newstest2014.en --file_out=/models/benchmarks/common/tensorflow/logs/translate.txt --data_dir=/dataset --num_inter=None --num_intra=None",
+    "cpuset": "0-111"
   },
   {
     "_comment": "Transformer MLPerf BFloat16 inference",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=transformer_mlperf --precision=bfloat16 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --batch-size=64 -i=0 --in-graph=graph.pb --data-location=/dataset --file=newstest2014.en --file_out=translate.txt --reference=newstest2014.de",
-    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/bfloat16/transformer/translate.py --params=big --input_graph=graph.pb --batch_size=64 --test_mode=inference --warmup_steps=3 --steps=100 --vocab_file= --file=newstest2014.en --file_out=/models/benchmarks/common/tensorflow/logs/translate.txt --data_dir=/dataset --num_inter=None --num_intra=None"
+    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/bfloat16/transformer/translate.py --params=big --input_graph=graph.pb --batch_size=64 --test_mode=inference --warmup_steps=3 --steps=100 --vocab_file= --file=newstest2014.en --file_out=/models/benchmarks/common/tensorflow/logs/translate.txt --data_dir=/dataset --num_inter=None --num_intra=None",
+    "cpuset": "0-111"
   },
   {
     "_comment": "Transformer MLPerf FP32 training",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=transformer_mlperf --precision=fp32 --mode=training --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --socket-id 0 --data-location /dataset --output-dir=/workspace/logs --batch-size=5120 --random_seed=11 --train_steps=2 --steps_between_eval=1 --params=big --save_checkpoints=Yes --do_eval=Yes --print_iter=10",
-    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/training/fp32/transformer/transformer_main.py --data_dir=/dataset --model_dir=/workspace/logs --batch_size=5120 --random_seed=11 --params=big --train_steps=2 --steps_between_eval=1 --do_eval=Yes --save_checkpoints=Yes --save_profile=No --print_iter=10 --inter_op_parallelism_threads=1 --intra_op_parallelism_threads=28 --learning_rate=2 --static_batch=No"
+    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/training/fp32/transformer/transformer_main.py --data_dir=/dataset --model_dir=/workspace/logs --batch_size=5120 --random_seed=11 --params=big --train_steps=2 --steps_between_eval=1 --do_eval=Yes --save_checkpoints=Yes --save_profile=No --print_iter=10 --inter_op_parallelism_threads=1 --intra_op_parallelism_threads=28 --learning_rate=2 --static_batch=No",
+    "cpuset": "0-111"
   },
   {
     "_comment": "Transformer MLPerf BFloat16 training",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=transformer_mlperf --precision=bfloat16 --mode=training --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --socket-id 0 --data-location /dataset --output-dir=/workspace/logs --batch-size=5120 --random_seed=11 --train_steps=2 --steps_between_eval=1 --params=big --save_checkpoints=Yes --do_eval=Yes --print_iter=10",
-    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/training/bfloat16/transformer/transformer_main.py --data_dir=/dataset --model_dir=/workspace/logs --batch_size=5120 --random_seed=11 --params=big --train_steps=2 --steps_between_eval=1 --do_eval=Yes --save_checkpoints=Yes --save_profile=No --print_iter=10 --inter_op_parallelism_threads=1 --intra_op_parallelism_threads=28 --learning_rate=2 --static_batch=No"
+    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/training/bfloat16/transformer/transformer_main.py --data_dir=/dataset --model_dir=/workspace/logs --batch_size=5120 --random_seed=11 --params=big --train_steps=2 --steps_between_eval=1 --do_eval=Yes --save_checkpoints=Yes --save_profile=No --print_iter=10 --inter_op_parallelism_threads=1 --intra_op_parallelism_threads=28 --learning_rate=2 --static_batch=No",
+    "cpuset": "0-111"
   }
 ]
diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_unet_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_unet_args.json
index 37a5f21d7..651e6c368 100644
--- a/tests/unit/common/tensorflow/tf_model_args/tf_unet_args.json
+++ b/tests/unit/common/tensorflow/tf_model_args/tf_unet_args.json
@@ -1,5 +1,6 @@
 [
   { "_comment": "FP32 benchmark",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_segmentation --model-name=unet --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0  --benchmark-only --verbose --checkpoint=/checkpoints --checkpoint_name=model.ckpt",
-    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/benchmarks/image_segmentation/tensorflow/unet/inference/fp32/unet_infer.py -bs 1 -cp /checkpoints/model.ckpt --num_inter_threads 1 --num_intra_threads 28 -nw 80 -nb 400"}
+    "output": "numactl --cpunodebind=0 --membind=0 python /workspace/benchmarks/image_segmentation/tensorflow/unet/inference/fp32/unet_infer.py -bs 1 -cp /checkpoints/model.ckpt --num_inter_threads 1 --num_intra_threads 28 -nw 80 -nb 400",
+    "cpuset": "0-111"}
 ]
diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_wavenet_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_wavenet_args.json
index 99ef0e147..577b25562 100644
--- a/tests/unit/common/tensorflow/tf_model_args/tf_wavenet_args.json
+++ b/tests/unit/common/tensorflow/tf_model_args/tf_wavenet_args.json
@@ -1,5 +1,6 @@
 [
   { "_comment": "FP32 benchmark command",
     "input": "run_tf_benchmark.py --framework tensorflow --use-case text_to_speech --precision fp32 --mode inference --model-name wavenet --num-cores 1 --checkpoint /checkpoints --intelai-models . --model-source-dir . --socket-id 0 --verbose --checkpoint_name=model.ckpt-99 --sample=8510",
-    "output": "numactl --physcpubind=0-0 --membind=0 python generate.py /checkpoints/model.ckpt-99 --num_inter_threads=1 --num_intra_threads=1 --sample=8510"}
+    "output": "numactl --physcpubind=0-0 --membind=0 python generate.py /checkpoints/model.ckpt-99 --num_inter_threads=1 --num_intra_threads=1 --sample=8510",
+    "cpuset": "0-111"}
 ]
diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_wide_deep_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_wide_deep_args.json
index bff19d1e6..3ba041ec4 100644
--- a/tests/unit/common/tensorflow/tf_model_args/tf_wide_deep_args.json
+++ b/tests/unit/common/tensorflow/tf_model_args/tf_wide_deep_args.json
@@ -1,9 +1,11 @@
 [
   { "_comment": "wide_deep_small_fp32_batch_inference",
     "input": "run_tf_benchmark.py --framework tensorflow --use-case recommendation --precision fp32 --mode inference --model-name wide_deep --batch-size 1024 --data-location /dataset --checkpoint /checkpoints --intelai-models . --verbose",
-    "output": "OMP_NUM_THREADS=1 numactl --cpunodebind=0 --membind=0  python inference/fp32/wide_deep_inference.py --data_dir=/dataset --model_dir=/checkpoints --batch_size=1024"},
+    "output": "OMP_NUM_THREADS=1 numactl --cpunodebind=0 --membind=0  python inference/fp32/wide_deep_inference.py --data_dir=/dataset --model_dir=/checkpoints --batch_size=1024",
+    "cpuset": "0-111"},
 
   { "_comment": "wide_deep_small_fp32_online_inference",
     "input": "run_tf_benchmark.py --framework tensorflow --use-case recommendation --precision fp32 --mode inference --model-name wide_deep --data-location /dataset --checkpoint /checkpoints --intelai-models /workspace/models --verbose",
-    "output": "OMP_NUM_THREADS=1 numactl --cpunodebind=0 --membind=0 python inference/fp32/wide_deep_inference.py --data_dir=/dataset --model_dir=/checkpoints --batch_size=1"}
+    "output": "OMP_NUM_THREADS=1 numactl --cpunodebind=0 --membind=0 python inference/fp32/wide_deep_inference.py --data_dir=/dataset --model_dir=/checkpoints --batch_size=1",
+    "cpuset": "0-111"}
 ]
diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_wide_deep_large_ds_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_wide_deep_large_ds_args.json
index 29d5b8813..717dda7c1 100644
--- a/tests/unit/common/tensorflow/tf_model_args/tf_wide_deep_large_ds_args.json
+++ b/tests/unit/common/tensorflow/tf_model_args/tf_wide_deep_large_ds_args.json
@@ -1,38 +1,47 @@
 [
   { "_comment": "wide_deep_large_int8",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=wide_deep_large_ds --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/wide_deep_int8_pretrained_model.pb --data-location=/dataset",
-    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/inference.py --data_location=/dataset --input_graph=/in_graph/wide_deep_int8_pretrained_model.pb"},
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/inference.py --data_location=/dataset --input_graph=/in_graph/wide_deep_int8_pretrained_model.pb",
+    "cpuset": "0-111"},
 
   { "_comment": "wide_deep_large_int8_28_cores",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=wide_deep_large_ds --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=28 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/wide_deep_int8_pretrained_model.pb --data-location=/dataset",
-    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/inference.py --batch_size=1 --data_location=/dataset --input_graph=/in_graph/wide_deep_int8_pretrained_model.pb"},
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/inference.py --batch_size=1 --data_location=/dataset --input_graph=/in_graph/wide_deep_int8_pretrained_model.pb",
+    "cpuset": "0-111"},
 
   { "_comment": "wide_deep_large_int8_latency",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=wide_deep_large_ds --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/wide_deep_int8_pretrained_model.pb --data-location=/dataset",
-    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/inference.py --batch_size=1 --data_location=/dataset --input_graph=/in_graph/wide_deep_int8_pretrained_model.pb"},
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/inference.py --batch_size=1 --data_location=/dataset --input_graph=/in_graph/wide_deep_int8_pretrained_model.pb",
+    "cpuset": "0-111"},
 
   { "_comment": "wide_deep_large_int8_throughput",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=wide_deep_large_ds --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=512 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/wide_deep_int8_pretrained_model.pb --data-location=/dataset",
-    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/inference.py --batch_size=512 --data_location=/dataset --input_graph=/in_graph/wide_deep_int8_pretrained_model.pb"},
+    "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/inference.py --batch_size=512 --data_location=/dataset --input_graph=/in_graph/wide_deep_int8_pretrained_model.pb",
+    "cpuset": "0-111"},
 
   { "_comment": "wide_deep_large_fp32",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=wide_deep_large_ds --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/wide_deep_fp32_pretrained_model.pb --data-location=/dataset",
-    "output": "python /workspace/intelai_models/inference/inference.py --data_location=/dataset --input_graph=/in_graph/wide_deep_fp32_pretrained_model.pb"},
+    "output": "python /workspace/intelai_models/inference/inference.py --data_location=/dataset --input_graph=/in_graph/wide_deep_fp32_pretrained_model.pb",
+    "cpuset": "0-111"},
 
   { "_comment": "wide_deep_large_fp32_28_cores",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=wide_deep_large_ds --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=28 --batch-size=512 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/wide_deep_fp32_pretrained_model.pb --data-location=/dataset",
-    "output": "python /workspace/intelai_models/inference/inference.py --batch_size=512 --data_location=/dataset --input_graph=/in_graph/wide_deep_fp32_pretrained_model.pb"},
+    "output": "python /workspace/intelai_models/inference/inference.py --batch_size=512 --data_location=/dataset --input_graph=/in_graph/wide_deep_fp32_pretrained_model.pb",
+    "cpuset": "0-111"},
 
   { "_comment": "wide_deep_large_fp32_throughput",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=wide_deep_large_ds --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=512 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/wide_deep_fp32_pretrained_model.pb --data-location=/dataset",
-    "output": "python /workspace/intelai_models/inference/inference.py --batch_size=512 --data_location=/dataset --input_graph=/in_graph/wide_deep_fp32_pretrained_model.pb"},
+    "output": "python /workspace/intelai_models/inference/inference.py --batch_size=512 --data_location=/dataset --input_graph=/in_graph/wide_deep_fp32_pretrained_model.pb",
+    "cpuset": "0-111"},
 
   { "_comment": "wide_deep_large_fp32_latency",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=wide_deep_large_ds --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/wide_deep_fp32_pretrained_model.pb --data-location=/dataset",
-    "output": "python /workspace/intelai_models/inference/inference.py --batch_size=1 --data_location=/dataset --input_graph=/in_graph/wide_deep_fp32_pretrained_model.pb"},
+    "output": "python /workspace/intelai_models/inference/inference.py --batch_size=1 --data_location=/dataset --input_graph=/in_graph/wide_deep_fp32_pretrained_model.pb",
+    "cpuset": "0-111"},
   { "_comment": "wide_deep_large_ds_fp32training",
     "input": "run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=wide_deep_large_ds --precision=fp32 --mode=training --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=512 --output-dir=/workspace/logs --mode training --checkpoint /checkpoint_dir --data-location=/dataset",
-    "output": "python /workspace/intelai_models/training/train.py --batch_size=512 --data_location=/dataset --checkpoint=/checkpoint_dir --output_dir=/workspace/logs"}
+    "output": "python /workspace/intelai_models/training/train.py --batch_size=512 --data_location=/dataset --checkpoint=/checkpoint_dir --output_dir=/workspace/logs",
+    "cpuset": "0-111"}
 ]
 
 
diff --git a/tests/unit/common/test_base_model_init.py b/tests/unit/common/test_base_model_init.py
index 65bb0ba78..4ce28a6bd 100644
--- a/tests/unit/common/test_base_model_init.py
+++ b/tests/unit/common/test_base_model_init.py
@@ -328,3 +328,46 @@ def test_numa_multi_instance_run_command(
     for cpu_bind in expected_cpu_bind:
         assert "numactl --localalloc --physcpubind={} {} >> {}".\
             format(cpu_bind, test_run_command, test_output_dir) in system_call_args
+
+
+@pytest.mark.parametrize('test_num_instances,test_socket_id,test_num_cores,test_cpu_list,test_cpuset,'
+                         'expected_inter_threads,expected_intra_threads',
+                         [[2, -1, -1, [['0', '1'], ['2', '3']], {0: ['0', '1'], 1: ['2', '3']}, 1, 2],
+                          [None, 0, -1, [['1', '2', '3'], ['10', '11']], {0: ['1', '2', '3'], 1: ['10', '11']}, 1, 3],
+                          [None, 1, -1, [['1', '2', '3'], ['10', '11']], {0: ['1', '2', '3'], 1: ['10', '11']}, 1, 2],
+                          [None, 1, -1, [['1', '2', '3'], ['10', '11']], None, 1, 3],
+                          [None, 1, 8, [['1', '2', '3'], ['10', '11']], {0: ['1', '2', '3'], 1: ['10', '11']}, 1, 8]])
+@patch("os.path.exists")
+@patch("benchmarks.common.base_model_init.open")
+@patch("common.platform_util.os")
+@patch("common.platform_util.system_platform")
+@patch("common.platform_util.subprocess")
+@patch("os.system")
+def test_num_inter_intra_threads_settings(
+        mock_system, mock_subprocess, mock_platform, mock_os, mock_open,
+        mock_path_exists, test_num_instances, test_socket_id, test_num_cores,
+        test_cpu_list, test_cpuset, expected_inter_threads, expected_intra_threads):
+    """
+    Tests the base model init function that determines the num_inter_threads and
+    num_intra_thread values.
+    """
+    platform_util = MagicMock(cpu_core_list=test_cpu_list, cpuset_cpus=test_cpuset,
+                              num_cores_per_socket=len(test_cpu_list[0]))
+    test_output_dir = "/tmp/output"
+    args = MagicMock(verbose=True, model_name=test_model_name, batch_size=100,
+                     numa_cores_per_instance=test_num_instances, precision="fp32",
+                     output_dir=test_output_dir, socket_id=test_socket_id, num_cores=test_num_cores,
+                     num_inter_threads=None, num_intra_threads=None)
+    os.environ["PYTHON_EXE"] = "python"
+    os.environ["MPI_HOSTNAMES"] = "None"
+    os.environ["MPI_NUM_PROCESSES"] = "None"
+    base_model_init = BaseModelInitializer(args, [], platform_util)
+
+    mock_path_exists.return_value = True
+
+    # Get the number of inter/intra threads and compared to the expected values
+    base_model_init.set_num_inter_intra_threads()
+    print(base_model_init.args.num_inter_threads)
+    print(base_model_init.args.num_intra_threads)
+    assert base_model_init.args.num_inter_threads == expected_inter_threads
+    assert base_model_init.args.num_intra_threads == expected_intra_threads
diff --git a/tests/unit/common/test_platform_util.py b/tests/unit/common/test_platform_util.py
index 8796c5887..b05b71442 100644
--- a/tests/unit/common/test_platform_util.py
+++ b/tests/unit/common/test_platform_util.py
@@ -21,7 +21,7 @@
 import json
 import pytest
 import os
-from mock import MagicMock
+from mock import MagicMock, mock_open, patch
 
 from benchmarks.common.platform_util import PlatformUtil, CPUInfo
 from test_utils import platform_config
@@ -53,13 +53,20 @@ def platform_mock(patch):
     return patch("system_platform.system")
 
 
-def test_platform_util_lscpu_parsing(platform_mock, subprocess_mock, os_mock):
+@pytest.fixture
+def read_mock(patch):
+    return patch("read")
+
+
+@patch("benchmarks.common.platform_util.PlatformUtil._get_cpuset")
+def test_platform_util_lscpu_parsing(get_cpuset_mock, platform_mock, subprocess_mock, os_mock):
     """
     Verifies that platform_utils gives us the proper values that we expect
     based on the lscpu_output string provided.
     """
     platform_mock.return_value = platform_config.SYSTEM_TYPE
     os_mock.return_value = True
+    get_cpuset_mock.return_value = "0-111"
     subprocess_mock.return_value = platform_config.LSCPU_OUTPUT
     platform_util = PlatformUtil(MagicMock(verbose=True))
     platform_util.linux_init()
@@ -137,11 +144,13 @@ def test_cpu_info_binding_information_no_numa(subprocess_mock):
     assert generated_value == expected_value
 
 
-def test_numa_cpu_core_list(subprocess_mock, subprocess_popen_mock, platform_mock, os_mock):
+@patch("benchmarks.common.platform_util.PlatformUtil._get_cpuset")
+def test_numa_cpu_core_list(get_cpuset_mock, subprocess_mock, subprocess_popen_mock, platform_mock, os_mock):
     """ Test the platform utils to ensure that we are getting the proper core lists """
     subprocess_mock.return_value = platform_config.LSCPU_OUTPUT
     subprocess_popen_mock.return_value.stdout.readlines.return_value = platform_config.NUMA_CORES_OUTPUT
     platform_mock.return_value = platform_config.SYSTEM_TYPE
+    get_cpuset_mock.return_value = "0-111"
     os_mock.return_value = True
     subprocess_mock.return_value = platform_config.LSCPU_OUTPUT
     platform_util = PlatformUtil(MagicMock(verbose=True))
@@ -169,3 +178,79 @@ def test_platform_util_wmic_parsing(platform_mock, subprocess_mock, os_mock):
     assert platform_util.num_threads_per_core == 28
     assert platform_util.num_logical_cpus == 56
     assert platform_util.num_numa_nodes == 0
+
+
+@patch("benchmarks.common.platform_util.PlatformUtil._get_cpuset")
+@pytest.mark.parametrize('cpuset_range,expected_list',
+                         [['0-5', [0, 1, 2, 3, 4, 5]],
+                          ['0-3,7,6', [0, 1, 2, 3, 6, 7]],
+                          ['2-3,7,9-11,20', [2, 3, 7, 9, 10, 11, 20]],
+                          ['0-3,7-6,11,11', [0, 1, 2, 3, 11]],
+                          ['7-9,5-10,6,4', [4, 5, 6, 7, 8, 9, 10]]])
+def test_get_list_from_string_ranges(get_cpuset_mock, platform_mock, subprocess_mock, os_mock,
+                                     cpuset_range, expected_list,):
+    """
+    Tests the PlatformUtils _get_list_from_string_ranges function that converts string
+    number ranges to an integer list.
+    """
+    platform_mock.return_value = platform_config.SYSTEM_TYPE
+    subprocess_mock.return_value = platform_config.LSCPU_OUTPUT
+    get_cpuset_mock.return_value = cpuset_range
+    os_mock.return_value = True
+    platform_util = PlatformUtil(MagicMock())
+    result = platform_util._get_list_from_string_ranges(cpuset_range)
+    assert result == expected_list
+
+
+@pytest.mark.parametrize('cpuset_range,expected_core_list',
+                         [["0-7,28-35",
+                           [["0", "1", "2", "3", "4", "5", "6", "7"],
+                            ["28", "29", "30", "31", "32", "33", "34", "35"]]],
+                          ["0,2-5,20,29-32,1",
+                           [["0", "1", "2", "3", "4", "5", "20"],
+                            ["29", "30", "31", "32"]]]])
+@patch("os.path.exists")
+def test_numa_cpu_core_list_cpuset(path_exists_mock, subprocess_mock, subprocess_popen_mock,
+                                   platform_mock, os_mock, cpuset_range, expected_core_list):
+    """ Test the platform utils to ensure that we are getting the proper core lists """
+    subprocess_mock.return_value = platform_config.LSCPU_OUTPUT
+    subprocess_popen_mock.return_value.stdout.readlines.return_value = platform_config.NUMA_CORES_OUTPUT
+    platform_mock.return_value = platform_config.SYSTEM_TYPE
+    os_mock.return_value = True
+    subprocess_mock.return_value = platform_config.LSCPU_OUTPUT
+    path_exists_mock.return_value = True
+    cpuset_mock = mock_open(read_data=cpuset_range)
+    with patch("builtins.open", cpuset_mock):
+        platform_util = PlatformUtil(MagicMock(verbose=True, numa_cores_per_instance=4))
+
+    # ensure there are 2 items in the list since there are 2 sockets
+    assert len(platform_util.cpu_core_list) == 2
+
+    # Check that the core list matches the ranges defined for the cpuset file read
+    assert platform_util.cpu_core_list == expected_core_list
+
+
+@patch("benchmarks.common.platform_util.PlatformUtil._get_cpuset")
+@pytest.mark.parametrize('cpuset_range,expected_num_sockets',
+                         [['0-5', 1],
+                          ['0-3,7,6', 1],
+                          ['2-3,7,9-11,20', 1],
+                          ['0-3,7-6,11,11', 1],
+                          ['7-9,5-10,6,4', 1],
+                          ['0-111', 2],
+                          ['28-32,84-90', 1]])
+def test_platform_utils_num_sockets_with_cpuset(get_cpuset_mock, platform_mock, subprocess_mock,
+                                                os_mock, cpuset_range, expected_num_sockets):
+    """
+    Checks that the number of sockets in platform_utils reflects the proper value based on
+    the cpuset. If the cores being used by the container in the cpuset are all on one socket,
+    then the num_cpu_sockets should be 1, even if the system itself has 2 sockets (since the
+    container only has access to 1).
+    """
+    platform_mock.return_value = platform_config.SYSTEM_TYPE
+    os_mock.return_value = True
+    get_cpuset_mock.return_value = cpuset_range
+    subprocess_mock.return_value = platform_config.LSCPU_OUTPUT
+    platform_util = PlatformUtil(MagicMock(verbose=True))
+    platform_util.linux_init()
+    assert platform_util.num_cpu_sockets == expected_num_sockets
diff --git a/tests/unit/test_launch_benchmark.py b/tests/unit/test_launch_benchmark.py
index 6bc73e892..cab62775a 100644
--- a/tests/unit/test_launch_benchmark.py
+++ b/tests/unit/test_launch_benchmark.py
@@ -343,3 +343,31 @@ def test_disable_tcmalloc(launch_benchmark, mock_popen,
     # convert the run command args to a string and then check for the custom volume mounts
     docker_run_cmd = " ".join(args[0])
     assert "--env DISABLE_TCMALLOC=".format(expected_disable_tcmalloc) in docker_run_cmd
+
+
+@pytest.mark.parametrize("numa_cores_per_instance_arg,socket_id_args,num_cores_arg,mpi_num_proc_arg,run_privileged",
+                         [["4", -1, -1, None, True],
+                          [None, -1, -1, None, False],
+                          ["socket", -1, -1, None, True],
+                          [None, 0, -1, None, True],
+                          [None, 1, -1, None, True],
+                          [None, -1, 8, None, True],
+                          [None, -1, -1, 2, True]])
+def test_launch_benchmark_docker_privileged(launch_benchmark, mock_popen, platform_mock,
+                                            numa_cores_per_instance_arg, socket_id_args,
+                                            num_cores_arg, mpi_num_proc_arg, run_privileged):
+    """
+    Verifies that docker only runs with --privileged when it needs to (if args that
+    run multi-instance or numactl are used).
+    """
+    launch_benchmark.args.numa_cores_per_instance = numa_cores_per_instance_arg
+    launch_benchmark.args.socket_id = socket_id_args
+    launch_benchmark.args.num_cores = num_cores_arg
+    launch_benchmark.args.mpi = mpi_num_proc_arg
+    platform_mock.return_value = platform_config.OS_TYPE
+    launch_benchmark.main()
+    assert mock_popen.called
+    args, _ = mock_popen.call_args
+    # convert the run command args to a string and then check for the docker args
+    docker_run_cmd = " ".join(args[0])
+    assert ("--privileged" in docker_run_cmd) == run_privileged