Skip to content

Commit

Permalink
ROX-26025: Backport UTF-8 sanitizing fix for 4.5 (#1863)
Browse files Browse the repository at this point in the history
* ROX-26025: Sanitize UTF-8 strings in process signals (#1857)

Replace every byte of an invalid UTF-8 sequence with '?'

* Pin ansible version to <2.17 to avoid loop bug (#1837)

---------

Co-authored-by: Mauro Ezequiel Moltrasio <[email protected]>
Co-authored-by: Giles Hutton <[email protected]>
  • Loading branch information
3 people authored Sep 26, 2024
1 parent 26b423f commit 1491bba
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 12 deletions.
3 changes: 2 additions & 1 deletion ansible/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# TODO: unpin if RHEL 8 VMs on GCP update their python interpreter to 3.7+
# https://github.com/ansible/ansible/blob/v2.17.0/changelogs/CHANGELOG-v2.17.rst#removed-features-previously-deprecated
ansible<10
ansible-core==2.16.10
ansible==9.7.0
# TODO: unpin after https://github.com/docker/docker-py gets a release with
# https://github.com/docker/docker-py/commit/7785ad913ddf2d86478f08278bb2c488d05a29ff
requests==2.31.0
Expand Down
1 change: 1 addition & 0 deletions ansible/requirements.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
---
collections:
- google.cloud
- community.general
- community.docker
- containers.podman
Expand Down
41 changes: 31 additions & 10 deletions collector/lib/ProcessSignalFormatter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,11 @@ std::string extract_proc_args(sinsp_threadinfo* tinfo) {
if (tinfo->m_args.empty()) return "";
std::ostringstream args;
for (auto it = tinfo->m_args.begin(); it != tinfo->m_args.end();) {
args << *it++;
auto arg = *it++;
auto arg_sanitized = SanitizedUTF8(arg);

args << ((arg_sanitized ? *arg_sanitized : arg));

if (it != tinfo->m_args.end()) args << " ";
}
return args.str();
Expand Down Expand Up @@ -106,22 +110,37 @@ ProcessSignal* ProcessSignalFormatter::CreateProcessSignal(sinsp_evt* event) {
const std::string* name = event_extractor_->get_comm(event);
const std::string* exepath = event_extractor_->get_exepath(event);

std::optional<std::string> name_sanitized;
std::optional<std::string> exepath_sanitized;

if (name) {
name_sanitized = SanitizedUTF8(*name);
}

if (exepath) {
exepath_sanitized = SanitizedUTF8(*exepath);
}

// set name (if name is missing or empty, try to use exec_file_path)
if (name && !name->empty() && *name != "<NA>") {
signal->set_name(*name);
signal->set_name(name_sanitized ? *name_sanitized : *name);
} else if (exepath && !exepath->empty() && *exepath != "<NA>") {
signal->set_name(*exepath);
signal->set_name(exepath_sanitized ? *exepath_sanitized : *exepath);
}

// set exec_file_path (if exec_file_path is missing or empty, try to use name)
if (exepath && !exepath->empty() && *exepath != "<NA>") {
signal->set_exec_file_path(*exepath);
signal->set_exec_file_path(exepath_sanitized ? *exepath_sanitized : *exepath);
} else if (name && !name->empty() && *name != "<NA>") {
signal->set_exec_file_path(*name);
signal->set_exec_file_path(name_sanitized ? *name_sanitized : *name);
}

// set process arguments
if (const char* args = event_extractor_->get_proc_args(event)) signal->set_args(args);
if (const char* args = event_extractor_->get_proc_args(event)) {
std::string args_str = args;
auto args_sanitized = SanitizedUTF8(args_str);
signal->set_args(args_sanitized ? *args_sanitized : args_str);
}

// set pid
if (const int64_t* pid = event_extractor_->get_pid(event)) signal->set_pid(*pid);
Expand Down Expand Up @@ -164,20 +183,22 @@ ProcessSignal* ProcessSignalFormatter::CreateProcessSignal(sinsp_threadinfo* tin
signal->set_id(UUIDStr());

const auto& name = tinfo->m_comm;
auto name_sanitized = SanitizedUTF8(name);
const auto& exepath = tinfo->m_exepath;
auto exepath_sanitized = SanitizedUTF8(exepath);

// set name (if name is missing or empty, try to use exec_file_path)
if (!name.empty() && name != "<NA>") {
signal->set_name(name);
signal->set_name(name_sanitized ? *name_sanitized : name);
} else if (!exepath.empty() && exepath != "<NA>") {
signal->set_name(exepath);
signal->set_name(exepath_sanitized ? *exepath_sanitized : exepath);
}

// set exec_file_path (if exec_file_path is missing or empty, try to use name)
if (!exepath.empty() && exepath != "<NA>") {
signal->set_exec_file_path(exepath);
signal->set_exec_file_path(exepath_sanitized ? *exepath_sanitized : exepath);
} else if (!name.empty() && name != "<NA>") {
signal->set_exec_file_path(name);
signal->set_exec_file_path(name_sanitized ? *name_sanitized : name);
}

// set the process as coming from a scrape as opposed to an exec
Expand Down
15 changes: 15 additions & 0 deletions collector/lib/Utility.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ extern "C" {

#include <libsinsp/sinsp.h>

#include <google/protobuf/stubs/common.h>

#include "HostInfo.h"
#include "Logging.h"
#include "Utility.h"
Expand Down Expand Up @@ -247,4 +249,17 @@ std::optional<std::string_view> ExtractContainerIDFromCgroup(std::string_view cg
}
return std::make_optional(container_id_part.substr(0, SHORT_CONTAINER_ID_LENGTH));
}

std::optional<std::string> SanitizedUTF8(const std::string& str) {
std::unique_ptr<char[]> work_buffer(new char[str.size()]);
char* sanitized;

sanitized = google::protobuf::internal::UTF8CoerceToStructurallyValid(str, work_buffer.get(), '?');

if (sanitized != work_buffer.get()) {
return std::nullopt;
}
return std::string(sanitized, str.size());
}

} // namespace collector
8 changes: 7 additions & 1 deletion collector/lib/Utility.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,12 @@ ScopedLock<Mutex> Lock(Mutex& mutex) {
extern const std::string kKernelModulesDir;

std::optional<std::string_view> ExtractContainerIDFromCgroup(std::string_view cgroup);
} // namespace collector

// Replace any occurrence of an invalid UTF-8 sequence with the '?' character
// Returns :
// - a new string with invalid characters replaced.
// - nullopt if there is no invalid character (the input string is valid).
std::optional<std::string> SanitizedUTF8(const std::string& str);

} // namespace collector
#endif // _UTILITY_H_
21 changes: 21 additions & 0 deletions collector/test/UtilityTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,4 +115,25 @@ TEST(ExtractContainerIDFromCgroupTest, TestExtractContainerIDFromCgroup) {
EXPECT_EQ(short_container_id, c.expected_output);
}
}

TEST(SanitizeUTF8Test, TestSanitizeUTF8_Invalid) {
std::string input(
"ab\x80"
"cd");
std::string expected_output("ab?cd");

auto output = SanitizedUTF8(input);

EXPECT_TRUE(output.has_value());
EXPECT_EQ(*output, expected_output);
}

TEST(SanitizeUTF8Test, TestSanitizeUTF8_Valid) {
std::string input("abcd");

auto output = SanitizedUTF8(input);

EXPECT_FALSE(output);
}

} // namespace collector

0 comments on commit 1491bba

Please sign in to comment.