Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ROX-26025: Sanitize UTF-8 strings in process signals #1857

Merged
merged 9 commits into from
Sep 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 29 additions & 10 deletions collector/lib/ProcessSignalFormatter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,11 @@ std::string extract_proc_args(sinsp_threadinfo* tinfo) {
}
std::ostringstream args;
for (auto it = tinfo->m_args.begin(); it != tinfo->m_args.end();) {
args << *it++;
auto arg = *it++;
auto arg_sanitized = SanitizedUTF8(arg);

args << ((arg_sanitized ? *arg_sanitized : arg));

if (it != tinfo->m_args.end()) {
args << " ";
}
Expand Down Expand Up @@ -114,23 +118,36 @@ ProcessSignal* ProcessSignalFormatter::CreateProcessSignal(sinsp_evt* event) {
const std::string* name = event_extractor_->get_comm(event);
const std::string* exepath = event_extractor_->get_exepath(event);

std::optional<std::string> name_sanitized;
std::optional<std::string> exepath_sanitized;

if (name) {
name_sanitized = SanitizedUTF8(*name);
}

if (exepath) {
exepath_sanitized = SanitizedUTF8(*exepath);
}

// set name (if name is missing or empty, try to use exec_file_path)
if (name && !name->empty() && *name != "<NA>") {
signal->set_name(*name);
signal->set_name(name_sanitized ? *name_sanitized : *name);
} else if (exepath && !exepath->empty() && *exepath != "<NA>") {
signal->set_name(*exepath);
signal->set_name(exepath_sanitized ? *exepath_sanitized : *exepath);
}

// set exec_file_path (if exec_file_path is missing or empty, try to use name)
if (exepath && !exepath->empty() && *exepath != "<NA>") {
signal->set_exec_file_path(*exepath);
signal->set_exec_file_path(exepath_sanitized ? *exepath_sanitized : *exepath);
} else if (name && !name->empty() && *name != "<NA>") {
signal->set_exec_file_path(*name);
signal->set_exec_file_path(name_sanitized ? *name_sanitized : *name);
}

// set process arguments
if (const char* args = event_extractor_->get_proc_args(event)) {
signal->set_args(args);
std::string args_str = args;
auto args_sanitized = SanitizedUTF8(args_str);
signal->set_args(args_sanitized ? *args_sanitized : args_str);
}

// set pid
Expand Down Expand Up @@ -180,20 +197,22 @@ ProcessSignal* ProcessSignalFormatter::CreateProcessSignal(sinsp_threadinfo* tin
signal->set_id(UUIDStr());

const auto& name = tinfo->m_comm;
auto name_sanitized = SanitizedUTF8(name);
const auto& exepath = tinfo->m_exepath;
auto exepath_sanitized = SanitizedUTF8(exepath);

// set name (if name is missing or empty, try to use exec_file_path)
if (!name.empty() && name != "<NA>") {
signal->set_name(name);
signal->set_name(name_sanitized ? *name_sanitized : name);
} else if (!exepath.empty() && exepath != "<NA>") {
signal->set_name(exepath);
signal->set_name(exepath_sanitized ? *exepath_sanitized : exepath);
}

// set exec_file_path (if exec_file_path is missing or empty, try to use name)
if (!exepath.empty() && exepath != "<NA>") {
signal->set_exec_file_path(exepath);
signal->set_exec_file_path(exepath_sanitized ? *exepath_sanitized : exepath);
} else if (!name.empty() && name != "<NA>") {
signal->set_exec_file_path(name);
signal->set_exec_file_path(name_sanitized ? *name_sanitized : name);
}

// set the process as coming from a scrape as opposed to an exec
Expand Down
15 changes: 15 additions & 0 deletions collector/lib/Utility.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ extern "C" {

#include <libsinsp/sinsp.h>

#include <google/protobuf/stubs/common.h>

#include "HostInfo.h"
#include "Logging.h"
#include "Utility.h"
Expand Down Expand Up @@ -230,4 +232,17 @@ std::optional<std::string_view> ExtractContainerIDFromCgroup(std::string_view cg
}
return std::make_optional(container_id_part.substr(0, SHORT_CONTAINER_ID_LENGTH));
}

std::optional<std::string> SanitizedUTF8(const std::string& str) {
std::unique_ptr<char[]> work_buffer(new char[str.size()]);
char* sanitized;

sanitized = google::protobuf::internal::UTF8CoerceToStructurallyValid(str, work_buffer.get(), '?');

if (sanitized != work_buffer.get()) {
return std::nullopt;
}
return std::string(sanitized, str.size());
}

} // namespace collector
8 changes: 7 additions & 1 deletion collector/lib/Utility.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,12 @@ ScopedLock<Mutex> Lock(Mutex& mutex) {
#define ssizeof(x) static_cast<ssize_t>(sizeof(x))

std::optional<std::string_view> ExtractContainerIDFromCgroup(std::string_view cgroup);
} // namespace collector

// Replace any occurrence of an invalid UTF-8 sequence with the '?' character
// Returns :
// - a new string with invalid characters replaced.
// - nullopt if there is no invalid character (the input string is valid).
std::optional<std::string> SanitizedUTF8(const std::string& str);

} // namespace collector
#endif // _UTILITY_H_
21 changes: 21 additions & 0 deletions collector/test/UtilityTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,4 +115,25 @@ TEST(ExtractContainerIDFromCgroupTest, TestExtractContainerIDFromCgroup) {
EXPECT_EQ(short_container_id, c.expected_output);
}
}

TEST(SanitizeUTF8Test, TestSanitizeUTF8_Invalid) {
std::string input(
"ab\x80"
"cd");
std::string expected_output("ab?cd");

auto output = SanitizedUTF8(input);

EXPECT_TRUE(output.has_value());
EXPECT_EQ(*output, expected_output);
ovalenti marked this conversation as resolved.
Show resolved Hide resolved
}

TEST(SanitizeUTF8Test, TestSanitizeUTF8_Valid) {
std::string input("abcd");

auto output = SanitizedUTF8(input);

EXPECT_FALSE(output);
}

} // namespace collector
Loading