Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allows specifying socket/numa node for BESS #946

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions core/bessctl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,12 @@ class BESSControlImpl final : public BESSControl::Service {
if (!is_cpu_present(core)) {
return return_with_error(response, EINVAL, "Invalid core %d", core);
}

if (rte_lcore_to_socket_id(core) != (unsigned int) FLAGS_n) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

style guide nit: could you explicitly brace initialize, or perhaps static_cast, here instead of c-style casting?

return return_with_error(response, EINVAL, "Invalid core %d not on socket %d",
core, FLAGS_n);
}

if (is_worker_active(wid)) {
return return_with_error(response, EEXIST, "worker:%d is already active",
wid);
Expand Down
21 changes: 14 additions & 7 deletions core/dpdk.cc
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ class CmdLineOpts {
std::vector<char *> argv_;
};

void init_eal(int dpdk_mb_per_socket, std::string nonworker_corelist) {
void init_eal(int dpdk_mb_per_socket, int socket, std::string nonworker_corelist) {
CmdLineOpts rte_args{
"bessd",
"--master-lcore",
Expand All @@ -118,11 +118,18 @@ void init_eal(int dpdk_mb_per_socket, std::string nonworker_corelist) {
// memory in advance. We allocate 512MB (this is shared among nodes).
rte_args.Append({"-m", "512"});
} else {
std::string opt_socket_mem = std::to_string(dpdk_mb_per_socket);
for (int i = 1; i < NumNumaNodes(); i++) {
opt_socket_mem += "," + std::to_string(dpdk_mb_per_socket);
std::string opt_socket_mem;
for (int i = 0; i < NumNumaNodes(); i++) {
if (i == socket) {
opt_socket_mem += std::to_string(dpdk_mb_per_socket);
} else {
opt_socket_mem += "0";
}
if (i+1 != NumNumaNodes()) {
opt_socket_mem += ",";
}
}

LOG(INFO) << "socket mem set as: " << opt_socket_mem;
rte_args.Append({"--socket-mem", opt_socket_mem});

// Unlink mapped hugepage files so that memory can be reclaimed as soon as
Expand Down Expand Up @@ -213,13 +220,13 @@ bool IsDpdkInitialized() {
return is_initialized;
}

void InitDpdk(int dpdk_mb_per_socket) {
void InitDpdk(int dpdk_mb_per_socket, int socket) {
current_worker.SetNonWorker();

if (!is_initialized) {
is_initialized = true;
LOG(INFO) << "Initializing DPDK";
init_eal(dpdk_mb_per_socket, GetNonWorkerCoreList());
init_eal(dpdk_mb_per_socket, socket, GetNonWorkerCoreList());
}
}

Expand Down
2 changes: 1 addition & 1 deletion core/dpdk.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ bool IsDpdkInitialized();

// Initialize DPDK, with the specified amount of hugepage memory.
// Safe to call multiple times.
void InitDpdk(int dpdk_mb_per_socket = 0);
void InitDpdk(int dpdk_mb_per_socket = 0, int socket = 0);

} // namespace bess

Expand Down
20 changes: 11 additions & 9 deletions core/drivers/pmd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@

#include "../utils/ether.h"
#include "../utils/format.h"
#include "../opts.h"

/*!
* The following are deprecated. Ignore us.
Expand Down Expand Up @@ -266,11 +267,19 @@ CommandResponse PMDPort::Init(const bess::pb::PMDPortArg &arg) {
}
rte_eth_promiscuous_enable(ret_port_id);

int sid = rte_eth_dev_socket_id(ret_port_id);

/* if socket_id is invalid, set to arg */
if (sid < 0 || sid > RTE_MAX_NUMA_NODES) {
sid = FLAGS_n;
LOG(WARNING) << "Unable to detect socket ID, defaulting to: " << sid;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

perhaps include the name of PMDPort being configured in this log?

} else {
LOG(INFO) << "socket ID in pmd is: " <<sid;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same thing about including the port name here

}

// NOTE: As of DPDK 17.02, TX queues should be initialized first.
// Otherwise the DPDK virtio PMD will crash in rte_eth_rx_burst() later.
for (i = 0; i < num_txq; i++) {
int sid = 0; /* XXX */

ret = rte_eth_tx_queue_setup(ret_port_id, i, queue_size[PACKET_DIR_OUT],
sid, &eth_txconf);
if (ret != 0) {
Expand All @@ -279,13 +288,6 @@ CommandResponse PMDPort::Init(const bess::pb::PMDPortArg &arg) {
}

for (i = 0; i < num_rxq; i++) {
int sid = rte_eth_dev_socket_id(ret_port_id);

/* if socket_id is invalid, set to 0 */
if (sid < 0 || sid > RTE_MAX_NUMA_NODES) {
sid = 0;
}

ret = rte_eth_rx_queue_setup(ret_port_id, i, queue_size[PACKET_DIR_INC],
sid, &eth_rxconf,
bess::PacketPool::GetDefaultPool(sid)->pool());
Expand Down
13 changes: 13 additions & 0 deletions core/opts.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@

#include "bessd.h"
#include "worker.h"
#include "memory.h"

// Port this BESS instance listens on.
// Panda came up with this default number
Expand Down Expand Up @@ -106,6 +107,18 @@ DEFINE_int32(m, 1024,
static const bool _m_dummy[[maybe_unused]] =
google::RegisterFlagValidator(&FLAGS_m, &ValidateMegabytesPerSocket);

static bool ValidateNumaNode(const char *, int32_t value) {
if (value < 0 || value >= bess::NumNumaNodes()) {
LOG(ERROR) << "Invalid Numa node: " << value;
return false;
}
return true;
}
DEFINE_int32(n, 0,
"Specifies which numa node to run BESS on");
static const bool _n_dummy[[maybe_unused]] =
google::RegisterFlagValidator(&FLAGS_n, &ValidateNumaNode);

static bool ValidateBuffersPerSocket(const char *, int32_t value) {
if (value <= 0) {
LOG(ERROR) << "Invalid number of buffers: " << value;
Expand Down
1 change: 1 addition & 0 deletions core/opts.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ DECLARE_string(b);
DECLARE_int32(p);
DECLARE_string(grpc_url);
DECLARE_int32(m);
DECLARE_int32(n);
DECLARE_bool(skip_root_check);
DECLARE_string(modules);
DECLARE_bool(core_dump);
Expand Down
32 changes: 15 additions & 17 deletions core/packet_pool.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,27 +36,25 @@ void DoMunmap(rte_mempool_memhdr *memhdr, void *) {
PacketPool *PacketPool::default_pools_[RTE_MAX_NUMA_NODES];

void PacketPool::CreateDefaultPools(size_t capacity) {
InitDpdk(FLAGS_dpdk ? FLAGS_m : 0);
InitDpdk(FLAGS_dpdk ? FLAGS_m : 0, FLAGS_n);

rte_dump_physmem_layout(stdout);

for (int sid = 0; sid < NumNumaNodes(); sid++) {
if (FLAGS_m == 0) {
LOG(WARNING) << "Hugepage is disabled! Creating PlainPacketPool for "
<< capacity << " packets on node " << sid;
default_pools_[sid] = new PlainPacketPool(capacity, sid);
} else if (FLAGS_dpdk) {
LOG(INFO) << "Creating DpdkPacketPool for " << capacity
<< " packets on node " << sid;
default_pools_[sid] = new DpdkPacketPool(capacity, sid);
} else {
LOG(INFO) << "Creating BessPacketPool for " << capacity
<< " packets on node " << sid;
default_pools_[sid] = new BessPacketPool(capacity, sid);
}
CHECK(default_pools_[sid])
<< "Packet pool allocation on node " << sid << " failed!";
if (FLAGS_m == 0) {
LOG(WARNING) << "Hugepage is disabled! Creating PlainPacketPool for "
<< capacity << " packets on node " << FLAGS_n;
default_pools_[FLAGS_n] = new PlainPacketPool(capacity, FLAGS_n);
} else if (FLAGS_dpdk) {
LOG(INFO) << "Creating DpdkPacketPool for " << capacity
<< " packets on node " << FLAGS_n;
default_pools_[FLAGS_n] = new DpdkPacketPool(capacity, FLAGS_n);
} else {
LOG(INFO) << "Creating BessPacketPool for " << capacity
<< " packets on node " << FLAGS_n;
default_pools_[FLAGS_n] = new BessPacketPool(capacity, FLAGS_n);
}
CHECK(default_pools_[FLAGS_n])
<< "Packet pool allocation on node " << FLAGS_n << " failed!";
}

PacketPool::PacketPool(size_t capacity, int socket_id) {
Expand Down
17 changes: 11 additions & 6 deletions core/worker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -290,15 +290,20 @@ void *Worker::Run(void *_arg) {
/* for workers, wid == rte_lcore_id() */
wid_ = arg->wid;
core_ = arg->core;
socket_ = rte_socket_id();
socket_ = FLAGS_n;

// For some reason, rte_socket_id() does not return a correct NUMA ID.
// Nevertheless, BESS should not crash.
if (socket_ == SOCKET_ID_ANY) {
LOG(WARNING) << "rte_socket_id() returned -1 for " << arg->core;
socket_ = 0;
if (rte_lcore_to_socket_id((unsigned int) core_) != (unsigned int) socket_) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

style guide nit: could you explicitly brace initialize, or perhaps static_cast, here instead of c-style casting?

LOG(ERROR) << "Core specified: " << core_ << " does not exist on socket: "
<< socket_ << ". Cannot create worker";

delete scheduler_;
delete rand_;

return nullptr;
}

LOG(INFO) << "Running worker: " << wid_ << " on core " << core_ << " on socket " << socket_;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: perhaps demote this to VLOG(1)


fd_event_ = eventfd(0, 0);
CHECK_GE(fd_event_, 0);

Expand Down