Skip to content

Commit

Permalink
Fix container failing to start with UDS (#169)
Browse files Browse the repository at this point in the history
As far as I can tell there is an issue with binding a socket and
`chmod`-ing it on macOS. I have repro'd the issue here:

```
>>> server_socket = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
>>>
>>> server_socket.bind(socket_path)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
NameError: name 'socket_path' is not defined
>>> server_socket.bind("/tmp/run/test.sock")
>>> import os
>>> os.chmod("/tmp/run/test.sock", 0o722)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
OSError: [Errno 22] Invalid argument: '/tmp/run/test.sock'
```

I think it has something to due with volumes and sockets not playing
nicely together on docker for mac.

Anyway, this crashes the test agent on start-up and it's not the end of
the world if the permissions don't get set. So fail softly in this case.
  • Loading branch information
Kyle-Verhoog authored Feb 16, 2024
1 parent 1fb5dc9 commit 8ab55f5
Show file tree
Hide file tree
Showing 3 changed files with 110 additions and 1 deletion.
5 changes: 4 additions & 1 deletion ddapm_test_agent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -1267,7 +1267,10 @@ def main(args: Optional[List[str]] = None) -> None:
os.unlink(parsed_args.trace_uds_socket)
apm_sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
apm_sock.bind(parsed_args.trace_uds_socket)
os.chmod(parsed_args.trace_uds_socket, 0o722)
try:
os.chmod(parsed_args.trace_uds_socket, 0o722)
except OSError as e:
log.warning("could not set permissions on UDS socket %r due to %r", parsed_args.trace_uds_socket, str(e))
atexit.register(lambda: os.unlink(parsed_args.trace_uds_socket))

if parsed_args.trace_request_delay is not None:
Expand Down
5 changes: 5 additions & 0 deletions releasenotes/notes/uds-container-8138f8b1438b1a0a.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
fixes:
- |
Workaround an issue where `chmod`ing the UDS socket in containers causes
the testagent to crash. Instead, a warning is emitted.
101 changes: 101 additions & 0 deletions tests/test_container.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import contextlib
import os
import platform
import subprocess
import time
from typing import Dict
from typing import Generator
from typing import List
from typing import Tuple

import pytest


pytestmark = pytest.mark.skipif(os.getenv("SKIP_CONTAINER") is not None, reason="SKIP_CONTAINER set")


class DockerContainer:
def __init__(self, cid: str):
self.id = cid

def logs(self):
p = subprocess.run(["docker", "logs", self.id], capture_output=True, check=True)
return p.stdout.decode(), p.stderr.decode()


@contextlib.contextmanager
def docker_run(
image: str,
env: Dict[str, str],
volumes: List[str],
cmd: List[str] = [],
ports: List[Tuple[str, str]] = [],
) -> Generator[DockerContainer, None, None]:
_cmd: List[str] = [
"docker",
"run",
"-i",
"--rm",
"--detach",
]
for k, v in env.items():
_cmd.extend(["-e", "%s=%s" % (k, v)])
for v in volumes:
_cmd.extend(["-v", v])
for k, v in ports:
_cmd.extend(["-p", "%s:%s" % (k, v)])
_cmd += [image]
_cmd.extend(cmd)

# Run the docker container
p = subprocess.run(_cmd, capture_output=True)
assert p.returncode == 0, p.stderr
cid = p.stdout.decode().strip()
assert cid
yield DockerContainer(cid)
# Kill the container
subprocess.run(["docker", "kill", cid], capture_output=True, check=True)


@pytest.fixture(scope="session")
def build_image():
subprocess.run(
[
"docker",
"build",
"-t",
"ddapm-test-agent:test",
"-f",
"Dockerfile",
".",
],
check=True,
)
yield
subprocess.run(
[
"docker",
"rmi",
"-f",
"ddapm-test-agent:test",
],
check=True,
)


@pytest.mark.skipif(platform.system() == "Linux", reason="No socket mounting issues on Linux")
def test_container_uds(build_image, tmp_path_factory):
uds_dir = tmp_path_factory.mktemp("uds")

with docker_run(
image="ddapm-test-agent:test",
volumes=[f"{str(uds_dir)}:/opt/datadog-agent/run"],
env={"DD_APM_RECEIVER_SOCKET": "/opt/datadog-agent/run/apm.socket"},
) as c:
for i in range(50):
stdout, stderr = c.logs()
if "could not set permissions" in stderr:
break
time.sleep(0.1)
else:
raise Exception("Test agent did not start in time: %s" % stderr)

0 comments on commit 8ab55f5

Please sign in to comment.