Skip to content

Commit

Permalink
Adding agent correctness tests (#845)
Browse files Browse the repository at this point in the history
  • Loading branch information
pszkamruk-splunk authored Jul 17, 2023
1 parent 6d3ca53 commit 0312ea3
Show file tree
Hide file tree
Showing 6 changed files with 335 additions and 2 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,5 @@
# Helm
**/charts/*.tgz
helm-charts/splunk-otel-collector/Chart.lock

**/__pycache__/*
31 changes: 31 additions & 0 deletions test/config_yaml_files/agent_tests_values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
clusterName: sck-otel
splunkPlatform:
endpoint:
insecureSkipVerify: true
# token: ${CI_SPLUNK_HEC_TOKEN}
token: "a6b5e77f-d5f6-415a-bd43-930cecb12959"
logsEnabled: true
metricsEnabled: false
metricsIndex:
index: main
logsEngine: otel
agent:
resources:
limits:
cpu: 300m
# This value is being used as a source for default memory_limiter processor configurations
memory: 500Mi
logsCollection:

# Container logs collection
containers:
enabled: true
# Container runtime. One of `docker`, `cri-o`, or `containerd`
# Automatically discovered if not set.
containerRuntime: ""
# Paths of logfiles to exclude. object type is array:
# i.e. to exclude `kube-system` namespace,
# excludePaths: ["/var/log/pods/kube-system_*/*/*.log"]
excludePaths: []
# Boolean for ingesting the agent's own log
excludeAgentLogs: false
5 changes: 3 additions & 2 deletions test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,9 @@ def pytest_unconfigure(config):
search_query)],
password=config.getoption("--splunk-password"))
print("index=" + index + " event count=" + str(len(events)))
for event in events:
print(event)
print(f"Number of events for index: {index}: {len(events)}")
# for event in events:
# print(event)


@pytest.fixture(scope="function")
Expand Down
Empty file.
103 changes: 103 additions & 0 deletions test/k8s_agent_pod_tests/k8s_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import os
import time
import logging
import sys

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
formatter = logging.Formatter("%(message)s")
handler = logging.StreamHandler(sys.stdout)
handler.setFormatter(formatter)
logger.addHandler(handler)


DEFAULT_LOGS_DIR = "logs/"
GET_PODS_FILE_NAME = DEFAULT_LOGS_DIR + "get_pods.out"
AGENT_POD_LOGS = DEFAULT_LOGS_DIR + "agent_pod_logs.out"


def get_pod_full_name(pod):
os.system("kubectl get pods > " + GET_PODS_FILE_NAME)
lines = get_log_file_content(GET_PODS_FILE_NAME)
for line in lines:
tmp = line.split()
logger.info(tmp)
if pod in tmp[0]:
logger.info(f"{pod} full name is: {tmp[0]}")
return tmp[0]
return "pod_name_not_found"


def get_log_file_content(log_file_name):
with open(log_file_name) as f:
lines = f.readlines()
f.close()
return lines


def get_pod_logs(pod_full_name):
os.system(f"kubectl logs {pod_full_name} > {AGENT_POD_LOGS}")
return get_log_file_content(AGENT_POD_LOGS)


def check_if_upgrade_successful(upgrade_log_name):
upgrade_success_log = "has been upgraded. Happy Helming!"
lines = get_log_file_content(upgrade_log_name)
for line in lines:
if upgrade_success_log in line:
logger.info("upgrade successful")
return True
logger.error("upgrade failed")
logger.info(lines)
return False


def prepare_set_yaml_fields_cmd(fields_dict):
cmd = ""
if fields_dict != None:
for k, v in fields_dict.items():
cmd = cmd + " --set " + k + "=" + v
return cmd


def create_dir_if_not_exists(dir_name):
# Check whether the specified path exists or not
is_exist = os.path.exists(dir_name)
if not is_exist:
os.makedirs(dir_name)
logger.info("The new directory is created!")


def upgrade_helm(yaml_file, fields_dict=None):
logger.info("=======================")
create_dir_if_not_exists(DEFAULT_LOGS_DIR)
upgrade_sck_log = DEFAULT_LOGS_DIR + "upgrade.log"
set_yaml_fields_cmd = prepare_set_yaml_fields_cmd(fields_dict)
os.system(
f"helm upgrade ci-sck --values {yaml_file}"
+ set_yaml_fields_cmd
+ f" ./../helm-charts/splunk-otel-collector/ > {upgrade_sck_log}"
)
check_if_upgrade_successful(upgrade_sck_log)
wait_for_pods_initialization()


def wait_for_pods_initialization():
break_infinite_looping_counter = 60
for x in range(break_infinite_looping_counter):
time.sleep(1)
counter = 0
get_pods_logs = DEFAULT_LOGS_DIR + "get_pods_wait_for_pods.log"
os.system(f"kubectl get pods > {get_pods_logs}")
lines = get_log_file_content(get_pods_logs)
# skip first line/row - header row
for line in lines[1:]:
if "Running" == line.split()[2]:
counter += 1
else:
logger.info(
f"Not ready pod: {line.split()[0]}, status: {line.split()[2]}"
)
if counter == len(lines) - 1:
break
time.sleep(5) # wait for ingesting logs into splunk after connector is ready
196 changes: 196 additions & 0 deletions test/k8s_agent_pod_tests/test_agent_correctness_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
import os
import re
import time

import pytest
import logging
import sys

import yaml

from ..common import check_events_from_splunk
from k8s_agent_pod_tests import k8s_helper

AGENT_VALUES_YAML = "config_yaml_files/agent_tests_values.yaml"

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
formatter = logging.Formatter("%(message)s")
handler = logging.StreamHandler(sys.stdout)
handler.setFormatter(formatter)
logger.addHandler(handler)


INDEX_MAIN = "main"


@pytest.fixture(scope="module", autouse=True)
def setup_for_agent_tests():
# Set up code before the test
logger.info("Setup: prepare env before agent tests")
# currently tests are setting their own collector configuration

# Yield control to the test
yield

# Teardown code after the test
logger.info("Teardown: clean up after agent tests")
default_yaml_file = "./../ci_scripts/sck_otel_values.yaml"
yaml_fields_recall = {
"splunkPlatform.index": os.environ.get("CI_INDEX_EVENTS"),
"splunkPlatform.metricsIndex": os.environ.get("CI_INDEX_METRICS"),
"splunkPlatform.token": os.environ.get("CI_SPLUNK_HEC_TOKEN"),
"splunkPlatform.endpoint": "https://"
+ os.environ.get("CI_SPLUNK_HOST")
+ ":8088/services\/collector",
}
k8s_helper.upgrade_helm(default_yaml_file, yaml_fields_recall)


def test_agent_logs_metadata(setup):
"""
Test that agent logs have correct metadata:
- source
- sourcetype
- index
"""
# prepare connector for test
yaml_file = AGENT_VALUES_YAML
yaml_fields = {
"splunkPlatform.index": INDEX_MAIN,
"splunkPlatform.token": os.environ.get("CI_SPLUNK_HEC_TOKEN"),
"splunkPlatform.endpoint": "https://"
+ os.environ.get("CI_SPLUNK_HOST")
+ ":8088/services/collector",
}
k8s_helper.upgrade_helm(yaml_file, yaml_fields)

full_pod_name = k8s_helper.get_pod_full_name("agent")
search_query = (
"index="
+ INDEX_MAIN
+ " k8s.pod.name="
+ full_pod_name
+ ' "Everything is ready. Begin running and processing data."'
)
logger.info(f"Query: {search_query}")
events = check_events_from_splunk(
start_time="-5m@m",
url=setup["splunkd_url"],
user=setup["splunk_user"],
query=["search {0}".format(search_query)],
password=setup["splunk_password"],
)
logger.info("Splunk received %s events in the last minute", len(events))
assert len(events) == 1
event = events[0]
sourcetype = "kube:container:otel-collector"
sorce_regex_part = "[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"
source_pattern = (
r"^/var/log/pods/default_"
+ full_pod_name
+ "_"
+ sorce_regex_part
+ "/otel-collector/0.log$"
)
assert INDEX_MAIN == event["index"]
assert full_pod_name == event["k8s.pod.name"]
assert sourcetype == event["_sourcetype"]
assert re.match(
source_pattern, event["source"]
), f"Source does not match the pattern {source_pattern}"


def test_all_agent_logs_correctly_ingested_into_splunk(setup):
"""
Test that agent logs are correctly ingested into Splunk
"""
logger.info("testing that agent logs are correctly ingested into Splunk")
# prepare connector for test
yaml_file = AGENT_VALUES_YAML
yaml_fields = {
"splunkPlatform.index": INDEX_MAIN,
"splunkPlatform.token": os.environ.get("CI_SPLUNK_HEC_TOKEN"),
"splunkPlatform.endpoint": "https://"
+ os.environ.get("CI_SPLUNK_HOST")
+ ":8088/services/collector",
}
k8s_helper.upgrade_helm(yaml_file, yaml_fields)

full_pod_name = k8s_helper.get_pod_full_name("agent")
search_query = (
"index="
+ INDEX_MAIN
+ " k8s.pod.name="
+ full_pod_name
+ " source=*/otel-collector/*.log"
)
logger.info(f"Query: {search_query}")
events = check_events_from_splunk(
start_time="-5m@m",
url=setup["splunkd_url"],
user=setup["splunk_user"],
query=["search {0}".format(search_query)],
password=setup["splunk_password"],
)
logger.info("Splunk received %s events in the last minute", len(events))
assert len(events) >= 1 # ensure that we are getting logs
agent_logs = k8s_helper.get_pod_logs(full_pod_name)
match_counter = 0
for event in events:
for line in agent_logs:
if event["_raw"].strip() == line.strip():
match_counter += 1
break
assert len(events) == match_counter


def test_no_agent_logs_ingested_into_splunk_with_exclude_agent_logs_flag(setup):
"""
Test that agent logs are not ingested into Splunk while exclude agent logs flag is set
"""
logger.info(
"Testing that that agent logs are not ingested into Splunk while exclude agent logs flag is set"
)
# prepare connector for test
yaml_file = AGENT_VALUES_YAML
# Open the YAML file for reading
with open(yaml_file, "r") as file:
data = yaml.safe_load(file) # Parse the YAML data

# Modify data
data["logsCollection"]["containers"]["excludeAgentLogs"] = True

# write YAML file
new_yaml = "exclude_agent_logs.yaml"
with open(new_yaml, "w") as file:
yaml.safe_dump(data, file)

yaml_fields = {
"splunkPlatform.index": INDEX_MAIN,
"splunkPlatform.token": os.environ.get("CI_SPLUNK_HEC_TOKEN"),
"splunkPlatform.endpoint": "https://"
+ os.environ.get("CI_SPLUNK_HOST")
+ ":8088/services/collector",
}
k8s_helper.upgrade_helm(new_yaml, yaml_fields)
time.sleep(10) # wait for some time to have more time for potential logs ingestion

search_query = (
"index="
+ INDEX_MAIN
+ " k8s.pod.name="
+ k8s_helper.get_pod_full_name("agent")
+ " source=*/otel-collector/*.log"
)
logger.info(f"Query: {search_query}")
events = check_events_from_splunk(
start_time="-5m@m",
url=setup["splunkd_url"],
user=setup["splunk_user"],
query=["search {0}".format(search_query)],
password=setup["splunk_password"],
)
logger.info("Splunk received %s events in the 5 minutes", len(events))
assert len(events) == 0 # ensure that we are not getting any logs

0 comments on commit 0312ea3

Please sign in to comment.