From fec6080cae399d3c43a6696f63c35e09c5aaecbb Mon Sep 17 00:00:00 2001
From: Dawei Huang <dwhuang9@gmail.com>
Date: Fri, 8 Nov 2024 13:22:36 -0600
Subject: [PATCH 1/9] DBUS API for GNOI System.SetPackage (#171)

Create image_service class for supporting GNOI implementation for SetPackage.

TESTED: unit test.
---
 host_modules/image_service.py            | 135 ++++++++
 tests/host_modules/image_service_test.py | 372 +++++++++++++++++++++++
 2 files changed, 507 insertions(+)
 create mode 100644 host_modules/image_service.py
 create mode 100644 tests/host_modules/image_service_test.py

diff --git a/host_modules/image_service.py b/host_modules/image_service.py
new file mode 100644
index 00000000..e30e1293
--- /dev/null
+++ b/host_modules/image_service.py
@@ -0,0 +1,135 @@
+"""
+This module provides services related to SONiC images, including:
+1) Downloading images
+2) Installing images
+3) Calculating checksums for images
+"""
+
+import errno
+import hashlib
+import logging
+import os
+import requests
+import stat
+import subprocess
+
+from host_modules import host_service
+import tempfile
+
+MOD_NAME = "image_service"
+
+DEFAULT_IMAGE_SAVE_AS = "/tmp/downloaded-sonic.bin"
+
+logger = logging.getLogger(__name__)
+
+
+class ImageService(host_service.HostModule):
+    """DBus endpoint that handles downloading and installing SONiC images"""
+
+    @host_service.method(
+        host_service.bus_name(MOD_NAME), in_signature="ss", out_signature="is"
+    )
+    def download(self, image_url, save_as):
+        """
+        Download a SONiC image.
+
+        Args:
+             image_url: url for remote image.
+             save_as: local path for the downloaded image. The directory must exist and be *all* writable.
+        """
+        logger.info("Download new sonic image from {} as {}".format(image_url, save_as))
+        # Check if the directory exists, is absolute and has write permission.
+        if not os.path.isabs(save_as):
+            logger.error("The path {} is not an absolute path".format(save_as))
+            return errno.EINVAL, "Path is not absolute"
+        dir = os.path.dirname(save_as)
+        if not os.path.isdir(dir):
+            logger.error("Directory {} does not exist".format(dir))
+            return errno.ENOENT, "Directory does not exist"
+        st_mode = os.stat(dir).st_mode
+        if (
+            not (st_mode & stat.S_IWUSR)
+            or not (st_mode & stat.S_IWGRP)
+            or not (st_mode & stat.S_IWOTH)
+        ):
+            logger.error("Directory {} is not all writable {}".format(dir, st_mode))
+            return errno.EACCES, "Directory is not all writable"
+        try:
+            response = requests.get(image_url, stream=True)
+            if response.status_code != 200:
+                logger.error(
+                    "Failed to download image: HTTP status code {}".format(
+                        response.status_code
+                    )
+                )
+                return errno.EIO, "HTTP error: {}".format(response.status_code)
+
+            with tempfile.NamedTemporaryFile(dir="/tmp", delete=False) as tmp_file:
+                for chunk in response.iter_content(chunk_size=8192):
+                    tmp_file.write(chunk)
+                temp_file_path = tmp_file.name
+            os.replace(temp_file_path, save_as)
+            return 0, "Download successful"
+        except Exception as e:
+            logger.error("Failed to write downloaded image to disk: {}".format(e))
+            return errno.EIO, str(e)
+
+    @host_service.method(
+        host_service.bus_name(MOD_NAME), in_signature="s", out_signature="is"
+    )
+    def install(self, where):
+        """
+        Install a a sonic image:
+
+        Args:
+            where: either a local path or a remote url pointing to the image.
+        """
+        logger.info("Using sonic-installer to install the image at {}.".format(where))
+        cmd = ["/usr/local/bin/sonic-installer", "install", "-y", where]
+        result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        msg = ""
+        if result.returncode:
+            lines = result.stderr.decode().split("\n")
+            for line in lines:
+                if "Error" in line:
+                    msg = line
+                    break
+        return result.returncode, msg
+
+    @host_service.method(
+        host_service.bus_name(MOD_NAME), in_signature="ss", out_signature="is"
+    )
+    def checksum(self, file_path, algorithm):
+        """
+        Calculate the checksum of a file.
+
+        Args:
+            file_path: path to the file.
+            algorithm: checksum algorithm to use (sha256, sha512, md5).
+        """
+
+        logger.info("Calculating {} checksum for file {}".format(algorithm, file_path))
+
+        if not os.path.isfile(file_path):
+            logger.error("File {} does not exist".format(file_path))
+            return errno.ENOENT, "File does not exist"
+
+        hash_func = None
+        if algorithm == "sha256":
+            hash_func = hashlib.sha256()
+        elif algorithm == "sha512":
+            hash_func = hashlib.sha512()
+        elif algorithm == "md5":
+            hash_func = hashlib.md5()
+        else:
+            logger.error("Unsupported algorithm: {}".format(algorithm))
+            return errno.EINVAL, "Unsupported algorithm"
+
+        try:
+            with open(file_path, "rb") as f:
+                for chunk in iter(lambda: f.read(4096), b""):
+                    hash_func.update(chunk)
+            return 0, hash_func.hexdigest()
+        except Exception as e:
+            logger.error("Failed to calculate checksum: {}".format(e))
+            return errno.EIO, str(e)
diff --git a/tests/host_modules/image_service_test.py b/tests/host_modules/image_service_test.py
new file mode 100644
index 00000000..22e70e82
--- /dev/null
+++ b/tests/host_modules/image_service_test.py
@@ -0,0 +1,372 @@
+import hashlib
+import subprocess
+import sys
+import os
+import stat
+import pytest
+from unittest import mock
+from host_modules.image_service import ImageService
+
+
+class TestImageService(object):
+    @mock.patch("dbus.SystemBus")
+    @mock.patch("dbus.service.BusName")
+    @mock.patch("dbus.service.Object.__init__")
+    @mock.patch("os.path.isdir")
+    @mock.patch("os.stat")
+    @mock.patch("requests.get")
+    def test_download_success(
+        self, mock_get, mock_stat, mock_isdir, MockInit, MockBusName, MockSystemBus
+    ):
+        """
+        Test that the `download` method successfully downloads an image when the directory exists and is writable.
+        """
+        # Arrange
+        image_service = ImageService(mod_name="image_service")
+        image_url = "http://example.com/sonic_image.img"
+        save_as = "/tmp/sonic_image.img"
+        mock_isdir.return_value = True
+        mock_stat.return_value.st_mode = stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH
+        mock_response = mock.Mock()
+        mock_response.iter_content = lambda chunk_size: [b"data"]
+        mock_response.status_code = 200
+        mock_response.iter_content = lambda chunk_size: iter([b"data"])
+        mock_get.return_value = mock_response
+
+        # Act
+        rc, msg = image_service.download(image_url, save_as)
+
+        # Assert
+        assert rc == 0, "wrong return value"
+        assert (
+            "download" in msg.lower() and "successful" in msg.lower()
+        ), "message should contains 'download' and 'successful'"
+        mock_get.assert_called_once_with(image_url, stream=True)
+
+    @mock.patch("dbus.SystemBus")
+    @mock.patch("dbus.service.BusName")
+    @mock.patch("dbus.service.Object.__init__")
+    @mock.patch("os.path.isdir")
+    def test_download_fail_no_dir(
+        self, mock_isdir, MockInit, MockBusName, MockSystemBus
+    ):
+        """
+        Test that the `download` method fails when the directory does not exist.
+        """
+        # Arrange
+        image_service = ImageService(mod_name="image_service")
+        image_url = "http://example.com/sonic_image.img"
+        save_as = "/nonexistent_dir/sonic_image.img"
+        mock_isdir.return_value = False
+
+        # Act
+        rc, msg = image_service.download(image_url, save_as)
+
+        # Assert
+        assert rc != 0, "wrong return value"
+        assert (
+            "not" in msg.lower() and "exist" in msg.lower()
+        ), "message should contains 'not' and 'exist'"
+
+    @mock.patch("dbus.SystemBus")
+    @mock.patch("dbus.service.BusName")
+    @mock.patch("dbus.service.Object.__init__")
+    @mock.patch("os.path.isdir")
+    @mock.patch("os.stat")
+    def test_download_fail_missing_other_write(
+        self, mock_stat, mock_isdir, MockInit, MockBusName, MockSystemBus
+    ):
+        """
+        Test that the `download` method fails when the directory is not writable by others.
+        """
+        # Arrange
+        image_service = ImageService(mod_name="image_service")
+        image_url = "http://example.com/sonic_image.img"
+        save_as = "/tmp/sonic_image.img"
+        mock_isdir.return_value = True
+        mock_stat.return_value.st_mode = (
+            stat.S_IWUSR | stat.S_IWGRP
+        )  # Missing write permission for others
+
+        # Act
+        rc, msg = image_service.download(image_url, save_as)
+
+        # Assert
+        assert rc != 0, "wrong return value"
+        assert (
+            "permission" in msg.lower() or "writable" in msg.lower()
+        ), "message should contain 'permission' or 'writable'"
+
+    @mock.patch("dbus.SystemBus")
+    @mock.patch("dbus.service.BusName")
+    @mock.patch("dbus.service.Object.__init__")
+    def test_download_failed_relative_path(self, MockInit, MockBusName, MockSystemBus):
+        """
+        Test that the `download` method fails when the save_as path is not absolute.
+        """
+        # Arrange
+        image_service = ImageService(mod_name="image_service")
+        image_url = "http://example.com/sonic_image.img"
+        save_as = "relative/path/sonic_image.img"
+
+        # Act
+        rc, msg = image_service.download(image_url, save_as)
+
+        # Assert
+        assert rc != 0, "wrong return value"
+        assert "absolute" in msg.lower(), "message should contain 'absolute'"
+
+    @mock.patch("dbus.SystemBus")
+    @mock.patch("dbus.service.BusName")
+    @mock.patch("dbus.service.Object.__init__")
+    @mock.patch("os.path.isdir")
+    @mock.patch("os.stat")
+    @mock.patch("requests.get")
+    def test_download_failed_not_found(
+        self, mock_get, mock_stat, mock_isdir, MockInit, MockBusName, MockSystemBus
+    ):
+        """
+        Test that the `download` method fails when the image URL is not found (404 error).
+        """
+        # Arrange
+        image_service = ImageService(mod_name="image_service")
+        image_url = "http://example.com/nonexistent_image.img"
+        save_as = "/tmp/sonic_image.img"
+        mock_isdir.return_value = True
+        mock_stat.return_value.st_mode = stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH
+        mock_response = mock.Mock()
+        mock_response.status_code = 404
+        mock_get.return_value = mock_response
+
+        # Act
+        rc, msg = image_service.download(image_url, save_as)
+
+        # Assert
+        assert rc != 0, "wrong return value"
+        assert (
+            "404" in msg and "error" in msg.lower()
+        ), "message should contain '404' and 'error'"
+        mock_get.assert_called_once_with(image_url, stream=True)
+
+    @mock.patch("dbus.SystemBus")
+    @mock.patch("dbus.service.BusName")
+    @mock.patch("dbus.service.Object.__init__")
+    @mock.patch("os.path.isdir")
+    @mock.patch("os.stat")
+    @mock.patch("requests.get")
+    @mock.patch("tempfile.NamedTemporaryFile")
+    def test_download_fail_write_io_exception(
+        self,
+        mock_tempfile,
+        mock_get,
+        mock_stat,
+        mock_isdir,
+        MockInit,
+        MockBusName,
+        MockSystemBus,
+    ):
+        """
+        Test that the `download` method fails when there is an IOError while writing the file.
+        """
+        # Arrange
+        image_service = ImageService(mod_name="image_service")
+        image_url = "http://example.com/sonic_image.img"
+        save_as = "/tmp/sonic_image.img"
+        mock_isdir.return_value = True
+        mock_stat.return_value.st_mode = stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH
+        mock_response = mock.Mock()
+        mock_response.iter_content = lambda chunk_size: [b"data"]
+        mock_response.status_code = 200
+        mock_get.return_value = mock_response
+        mock_tempfile.side_effect = IOError("Disk write error")
+
+        # Act
+        rc, msg = image_service.download(image_url, save_as)
+
+        # Assert
+        assert rc != 0, "wrong return value"
+        assert (
+            "disk write error" in msg.lower()
+        ), "message should contain 'disk write error'"
+        mock_get.assert_called_once_with(image_url, stream=True)
+        mock_tempfile.assert_called_once_with(
+            delete=False, dir=os.path.dirname(save_as)
+        )
+
+    @mock.patch("dbus.SystemBus")
+    @mock.patch("dbus.service.BusName")
+    @mock.patch("dbus.service.Object.__init__")
+    @mock.patch("subprocess.run")
+    def test_install_success(self, mock_run, MockInit, MockBusName, MockSystemBus):
+        """
+        Test that the `install` method successfully installs an image.
+        """
+        # Arrange
+        image_service = ImageService(mod_name="image_service")
+        where = "/tmp/sonic_image.img"
+        mock_result = mock.Mock()
+        mock_result.returncode = 0
+        mock_result.stderr = b""
+        mock_run.return_value = mock_result
+
+        # Act
+        rc, msg = image_service.install(where)
+
+        # Assert
+        assert rc == 0, "wrong return value"
+        assert msg == "", "message should be empty on success"
+        mock_run.assert_called_once_with(
+            ["/usr/local/bin/sonic-installer", "install", "-y", where],
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+        )
+
+    @mock.patch("dbus.SystemBus")
+    @mock.patch("dbus.service.BusName")
+    @mock.patch("dbus.service.Object.__init__")
+    @mock.patch("subprocess.run")
+    def test_install_fail(self, mock_run, MockInit, MockBusName, MockSystemBus):
+        """
+        Test that the `install` method fails when the installation command returns a non-zero exit code.
+        """
+        # Arrange
+        image_service = ImageService(mod_name="image_service")
+        where = "/tmp/sonic_image.img"
+        mock_result = mock.Mock()
+        mock_result.returncode = 1
+        mock_result.stderr = b"Error: Installation failed"
+        mock_run.return_value = mock_result
+
+        # Act
+        rc, msg = image_service.install(where)
+
+        # Assert
+        assert rc != 0, "wrong return value"
+        assert "Error" in msg, "message should contain 'Error'"
+        mock_run.assert_called_once_with(
+            ["/usr/local/bin/sonic-installer", "install", "-y", where],
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+        )
+
+    @pytest.mark.parametrize(
+        "algorithm, expected_checksum",
+        [
+            ("sha256", hashlib.sha256(b"test data").hexdigest()),
+            ("sha512", hashlib.sha512(b"test data").hexdigest()),
+            ("md5", hashlib.md5(b"test data").hexdigest()),
+        ],
+    )
+    @mock.patch("dbus.SystemBus")
+    @mock.patch("dbus.service.BusName")
+    @mock.patch("dbus.service.Object.__init__")
+    @mock.patch("os.path.isfile")
+    @mock.patch("builtins.open", new_callable=mock.mock_open, read_data=b"test data")
+    def test_checksum(
+        self,
+        mock_open,
+        mock_isfile,
+        MockInit,
+        MockBusName,
+        MockSystemBus,
+        algorithm,
+        expected_checksum,
+    ):
+        """
+        Test that the `checksum` method correctly calculates the checksum of a file for different algorithms.
+        """
+        # Arrange
+        image_service = ImageService(mod_name="image_service")
+        file_path = "/tmp/test_file.img"
+        mock_isfile.return_value = True
+
+        # Act
+        rc, checksum = image_service.checksum(file_path, algorithm)
+
+        # Assert
+        assert rc == 0, "wrong return value"
+        assert checksum == expected_checksum, "checksum does not match expected value"
+        mock_isfile.assert_called_once_with(file_path)
+        mock_open.assert_called_once_with(file_path, "rb")
+
+    @mock.patch("dbus.SystemBus")
+    @mock.patch("dbus.service.BusName")
+    @mock.patch("dbus.service.Object.__init__")
+    @mock.patch("os.path.isfile")
+    def test_checksum_no_such_file(
+        self, mock_isfile, MockInit, MockBusName, MockSystemBus
+    ):
+        """
+        Test that the `checksum` method fails when the file does not exist.
+        """
+        # Arrange
+        image_service = ImageService(mod_name="image_service")
+        file_path = "/nonexistent_dir/test_file.img"
+        algorithm = "sha256"
+        mock_isfile.return_value = False
+
+        # Act
+        rc, msg = image_service.checksum(file_path, algorithm)
+
+        # Assert
+        assert rc != 0, "wrong return value"
+        assert "not exist" in msg.lower(), "message should contain 'not exist'"
+        mock_isfile.assert_called_once_with(file_path)
+
+    @mock.patch("dbus.SystemBus")
+    @mock.patch("dbus.service.BusName")
+    @mock.patch("dbus.service.Object.__init__")
+    @mock.patch("os.path.isfile")
+    def test_checksum_unsupported_algorithm(
+        self, mock_isfile, MockInit, MockBusName, MockSystemBus
+    ):
+        """
+        Test that the `checksum` method fails when an unsupported algorithm is provided.
+        """
+        # Arrange
+        image_service = ImageService(mod_name="image_service")
+        file_path = "/tmp/test_file.img"
+        algorithm = "unsupported_algo"
+        mock_isfile.return_value = True
+
+        # Act
+        rc, msg = image_service.checksum(file_path, algorithm)
+
+        # Assert
+        assert rc != 0, "wrong return value"
+        assert (
+            "unsupported algorithm" in msg.lower()
+        ), "message should contain 'unsupported algorithm'"
+        mock_isfile.assert_called_once_with(file_path)
+
+    @mock.patch("dbus.SystemBus")
+    @mock.patch("dbus.service.BusName")
+    @mock.patch("dbus.service.Object.__init__")
+    @mock.patch("os.path.isfile")
+    @mock.patch("builtins.open", new_callable=mock.mock_open, read_data=b"test data")
+    def test_checksum_general_exception(
+        self, mock_open, mock_isfile, MockInit, MockBusName, MockSystemBus
+    ):
+        """
+        Test that the `checksum` method handles general exceptions during file reading.
+        """
+        # Arrange
+        image_service = ImageService(mod_name="image_service")
+        file_path = "/tmp/test_file.img"
+        algorithm = "sha256"
+        mock_isfile.return_value = True
+
+        with mock.patch.object(hashlib, algorithm) as mock_hash_func:
+            mock_hash_instance = mock_hash_func.return_value
+            mock_hash_instance.update.side_effect = Exception("General error")
+
+            # Act
+            rc, msg = image_service.checksum(file_path, algorithm)
+
+            # Assert
+            assert rc != 0, "wrong return value"
+            assert (
+                "general error" in msg.lower()
+            ), "message should contain 'general error'"
+            mock_isfile.assert_called_once_with(file_path)
+            mock_open.assert_called_once_with(file_path, "rb")

From ff73070d7def8689bc92dd0e28d3dab5d0254450 Mon Sep 17 00:00:00 2001
From: Vasundhara Volam <163894573+vvolam@users.noreply.github.com>
Date: Fri, 8 Nov 2024 15:44:16 -0800
Subject: [PATCH 2/9] Add execute_reboot dbus_interface (#164)

As part of sonic-net/sonic-gnmi#286 changes, we require dbus support for executing HALT method.

This PR adds support for execute_reboot for invoking reboot methods.
---
 host_modules/systemd_service.py            | 22 +++++++++
 tests/host_modules/systemd_service_test.py | 54 ++++++++++++++++++++++
 2 files changed, 76 insertions(+)

diff --git a/host_modules/systemd_service.py b/host_modules/systemd_service.py
index 19a6dd60..974af1e0 100644
--- a/host_modules/systemd_service.py
+++ b/host_modules/systemd_service.py
@@ -1,5 +1,6 @@
 """Systemd service handler"""
 
+from enum import Enum
 from host_modules import host_service
 import subprocess
 
@@ -7,6 +8,11 @@
 ALLOWED_SERVICES = ['snmp', 'swss', 'dhcp_relay', 'radv', 'restapi', 'lldp', 'sshd', 'pmon', 'rsyslog', 'telemetry']
 EXIT_FAILURE = 1
 
+# Define an Enum for Reboot Methods which are defined as in 
+# https://github.com/openconfig/gnoi/blob/main/system/system.pb.go#L27
+class RebootMethod(Enum):
+    COLD = 1
+    HALT = 3
 
 class SystemdService(host_service.HostModule):
     """
@@ -48,3 +54,19 @@ def stop_service(self, service):
         if result.returncode:
             msg = result.stderr.decode()
         return result.returncode, msg
+
+    @host_service.method(host_service.bus_name(MOD_NAME), in_signature='i', out_signature='is')
+    def execute_reboot(self, rebootmethod):
+        if rebootmethod == RebootMethod.COLD:
+            cmd = ['/usr/local/bin/reboot']
+        elif rebootmethod == RebootMethod.HALT:
+            cmd = ['/usr/local/bin/reboot','-p']
+        else:
+            return EXIT_FAILURE, "{}: Invalid reboot method: {}".format(MOD_NAME, rebootmethod)
+
+        result = subprocess.run(cmd, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        msg = ''
+        if result.returncode:
+            msg = result.stderr.decode()
+
+        return result.returncode, msg
diff --git a/tests/host_modules/systemd_service_test.py b/tests/host_modules/systemd_service_test.py
index d8d5083f..54961498 100644
--- a/tests/host_modules/systemd_service_test.py
+++ b/tests/host_modules/systemd_service_test.py
@@ -84,3 +84,57 @@ def test_service_stop_empty(self, MockInit, MockBusName, MockSystemBus):
         ret, msg = systemd_service_stub.stop_service(service)
         assert ret == 1
         assert "stop_service called with no service specified" in msg
+
+    @mock.patch("dbus.SystemBus")
+    @mock.patch("dbus.service.BusName")
+    @mock.patch("dbus.service.Object.__init__")
+    def test_execute_reboot_cold(self, MockInit, MockBusName, MockSystemBus):
+        # Mock subprocess.run
+        with mock.patch("subprocess.run") as mock_run:
+            # Mock the result of subprocess.run
+            res_mock = mock.Mock()
+            test_ret = 0
+            test_msg = b"Succeeded"
+            res_mock.configure_mock(returncode=test_ret, stderr=test_msg)
+            mock_run.return_value = res_mock
+
+            method = systemd_service.RebootMethod.COLD
+            systemd_service_stub = systemd_service.SystemdService(systemd_service.MOD_NAME)
+
+            # Execute the reboot method
+            ret, msg = systemd_service_stub.execute_reboot(method)
+
+            # Assert the correct command was called
+            call_args = mock_run.call_args[0][0]
+            assert "/usr/local/bin/reboot" in call_args, f"Expected 'reboot' command, but got: {call_args}"
+
+            # Assert the return values are correct
+            assert ret == test_ret, f"Expected return code {test_ret}, got {ret}"
+            assert msg == "", f"Expected return message '', got {msg}"
+
+    @mock.patch("dbus.SystemBus")
+    @mock.patch("dbus.service.BusName")
+    @mock.patch("dbus.service.Object.__init__")
+    def test_execute_reboot_halt(self, MockInit, MockBusName, MockSystemBus):
+        # Mock subprocess.run
+        with mock.patch("subprocess.run") as mock_run:
+            # Mock the result of subprocess.run
+            res_mock = mock.Mock()
+            test_ret = 0
+            test_msg = b"Succeeded"
+            res_mock.configure_mock(returncode=test_ret, stderr=test_msg)
+            mock_run.return_value = res_mock
+
+            method = systemd_service.RebootMethod.HALT
+            systemd_service_stub = systemd_service.SystemdService(systemd_service.MOD_NAME)
+
+            # Execute the reboot method
+            ret, msg = systemd_service_stub.execute_reboot(method)
+
+            # Assert the correct command was called
+            call_args = mock_run.call_args[0][0]
+            assert "/usr/local/bin/reboot" in call_args, f"Expected 'reboot' command, but got: {call_args}"
+
+            # Assert the return values are correct
+            assert ret == test_ret, f"Expected return code {test_ret}, got {ret}"
+            assert msg == "", f"Expected return message '', got {msg}"

From c05d43e24dca74fd2cae83dad9936232c1011cb3 Mon Sep 17 00:00:00 2001
From: anamehra <54692434+anamehra@users.noreply.github.com>
Date: Mon, 18 Nov 2024 12:51:58 -0800
Subject: [PATCH 3/9] featured: use run() to run cli commands in place of
 check_call() (#177)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes: sonic-net/sonic-buildimage#20662

During some reboots, it was observed that some times featured.service script command fails to start the services like pmon, snmp, lldp etc.

From logs, it was observed that 'sudo systemctl enable ' command failed with errorcode 13 (SIGPIPE.

2024 Oct 29 01:31:26.191236 aaa14-rp INFO featured: Running cmd: '['sudo', 'systemctl', 'unmask', 'pmon.service']'
2024 Oct 29 01:31:26.211167 aaa14-rp INFO systemd[1]: Reloading.
2024 Oct 29 01:31:27.212381 aaa14-rp INFO featured: Running cmd: '['sudo', 'systemctl', 'enable', 'pmon.service']'
2024 Oct 29 01:31:27.232428 aaa14-rp INFO systemd[1]: Reloading.
2024 Oct 29 01:31:28.135667 aaa14-rp ERR featured: ['sudo', 'systemctl', 'enable', 'pmon.service'] - failed: return code - -13, output:#012None
2024 Oct 29 01:31:28.135746 aaa14-rp ERR featured: Feature 'pmon.service' failed to be enabled and started

2024 Oct 29 01:34:08.661711 aaa14-rp INFO featured: Running cmd: '['sudo', 'systemctl', 'enable', 'gnmi.service']'
2024 Oct 29 01:34:08.677242 aaa14-rp INFO systemd[1]: Reloading.
2024 Oct 29 01:34:09.316554 aaa14-rp ERR featured: ['sudo', 'systemctl', 'enable', 'gnmi.service'] - failed: return code - -13, output:#012None
2024 Oct 29 01:34:09.316791 aaa14-rp ERR featured: Feature 'gnmi.service' failed to be enabled and started
The issue does not recover and the pmon and other services never starts. On supervisor this also leads to swss, syncd and other related docker to stay down.

In general systemctl enable does not work for some services like pmon, snmp, lldp etc as there is no WantBy directive set for these services in unit file.

The command returns stderr :

"The unit files have no installation config (WantedBy=, RequiredBy=, Also=,
Alias= settings in the [Install] section, and DefaultInstance= for template
units). This means they are not meant to be enabled using systemctl.

Possible reasons for having this kind of units are:
• A unit may be statically enabled by being symlinked from another unit's
  .wants/ or .requires/ directory.
• A unit's purpose may be to act as a helper for some other unit which has
  a requirement dependency on it.
• A unit may be started when needed via activation (socket, path, timer,
  D-Bus, udev, scripted systemctl call, ...).
• In case of template units, the unit is meant to be enabled with some
  instance name specified.
”
featured python script uses subprocess.check_call() function to invoke the command which looks like is not very reliable at handling the stderr and may cause SIGPIPE with big buffer data.

Modifying the function to use subprocess.run() resolves this issue.

run() is more reliable at handing the return data.

Validated the change with multiple reboots.
---
 scripts/featured                |   6 +-
 tests/featured/featured_test.py | 123 ++++++++--------
 tests/featured/test_vectors.py  | 250 ++++++++++++++++----------------
 3 files changed, 192 insertions(+), 187 deletions(-)

diff --git a/scripts/featured b/scripts/featured
index 1f812bf0..a3c91c28 100644
--- a/scripts/featured
+++ b/scripts/featured
@@ -26,7 +26,11 @@ PORT_INIT_TIMEOUT_SEC = 180
 
 def run_cmd(cmd, log_err=True, raise_exception=False):
     try:
-        subprocess.check_call(cmd)
+        result = subprocess.run(cmd,
+                                capture_output=True,
+                                check=True, text=True)
+        syslog.syslog(syslog.LOG_INFO, "Output: {} , Stderr: {}"
+                      .format(result.stdout, result.stderr))
     except Exception as err:
         if log_err:
             syslog.syslog(syslog.LOG_ERR, "{} - failed: return code - {}, output:\n{}"
diff --git a/tests/featured/featured_test.py b/tests/featured/featured_test.py
index dbb3ca29..a42d2bb8 100644
--- a/tests/featured/featured_test.py
+++ b/tests/featured/featured_test.py
@@ -175,9 +175,9 @@ def test_sync_state_field(self, test_scenario_name, config_data, fs):
                             feature_table_state_db_calls = self.get_state_db_set_calls(feature_table)
 
                             self.checks_systemd_config_file(device_type, config_data['config_db']['FEATURE'], feature_systemd_name_map)
-                            mocked_subprocess.check_call.assert_has_calls(config_data['enable_feature_subprocess_calls'],
+                            mocked_subprocess.run.assert_has_calls(config_data['enable_feature_subprocess_calls'],
                                                                           any_order=True)
-                            mocked_subprocess.check_call.assert_has_calls(config_data['daemon_reload_subprocess_call'],
+                            mocked_subprocess.run.assert_has_calls(config_data['daemon_reload_subprocess_call'],
                                                                           any_order=True)
                             feature_state_table_mock.set.assert_has_calls(feature_table_state_db_calls)
                             self.checks_systemd_config_file(device_type, config_data['config_db']['FEATURE'], feature_systemd_name_map)
@@ -227,9 +227,9 @@ def test_handler(self, test_scenario_name, config_data, fs):
                             feature_systemd_name_map[feature_name] = feature_names
 
                         self.checks_systemd_config_file(device_type, config_data['config_db']['FEATURE'], feature_systemd_name_map)
-                        mocked_subprocess.check_call.assert_has_calls(config_data['enable_feature_subprocess_calls'],
+                        mocked_subprocess.run.assert_has_calls(config_data['enable_feature_subprocess_calls'],
                                                                       any_order=True)
-                        mocked_subprocess.check_call.assert_has_calls(config_data['daemon_reload_subprocess_call'],
+                        mocked_subprocess.run.assert_has_calls(config_data['daemon_reload_subprocess_call'],
                                                                       any_order=True)
 
     def test_feature_config_parsing(self):
@@ -375,15 +375,15 @@ def test_feature_events(self, mock_syslog, get_runtime):
                 daemon.start(time.time())
             except TimeoutError as e:
                 pass
-            expected = [call(['sudo', 'systemctl', 'daemon-reload']),
-                        call(['sudo', 'systemctl', 'unmask', 'dhcp_relay.service']),
-                        call(['sudo', 'systemctl', 'enable', 'dhcp_relay.service']),
-                        call(['sudo', 'systemctl', 'start', 'dhcp_relay.service']),
-                        call(['sudo', 'systemctl', 'daemon-reload']),
-                        call(['sudo', 'systemctl', 'unmask', 'mux.service']),
-                        call(['sudo', 'systemctl', 'enable', 'mux.service']),
-                        call(['sudo', 'systemctl', 'start', 'mux.service'])]
-            mocked_subprocess.check_call.assert_has_calls(expected)
+            expected = [call(['sudo', 'systemctl', 'daemon-reload'], capture_output=True, check=True, text=True),
+                        call(['sudo', 'systemctl', 'unmask', 'dhcp_relay.service'], capture_output=True, check=True, text=True),
+                        call(['sudo', 'systemctl', 'enable', 'dhcp_relay.service'], capture_output=True, check=True, text=True),
+                        call(['sudo', 'systemctl', 'start', 'dhcp_relay.service'], capture_output=True, check=True, text=True),
+                        call(['sudo', 'systemctl', 'daemon-reload'], capture_output=True, check=True, text=True),
+                        call(['sudo', 'systemctl', 'unmask', 'mux.service'], capture_output=True, check=True, text=True),
+                        call(['sudo', 'systemctl', 'enable', 'mux.service'], capture_output=True, check=True, text=True),
+                        call(['sudo', 'systemctl', 'start', 'mux.service'], capture_output=True, check=True, text=True)]
+            mocked_subprocess.run.assert_has_calls(expected, any_order=True)
 
             # Change the state to disabled
             MockSelect.reset_event_queue()
@@ -393,10 +393,10 @@ def test_feature_events(self, mock_syslog, get_runtime):
                 daemon.start(time.time())
             except TimeoutError:
                 pass
-            expected = [call(['sudo', 'systemctl', 'stop', 'dhcp_relay.service']),
-                        call(['sudo', 'systemctl', 'disable', 'dhcp_relay.service']),
-                        call(['sudo', 'systemctl', 'mask', 'dhcp_relay.service'])]
-            mocked_subprocess.check_call.assert_has_calls(expected)
+            expected = [call(['sudo', 'systemctl', 'stop', 'dhcp_relay.service'], capture_output=True, check=True, text=True),
+                        call(['sudo', 'systemctl', 'disable', 'dhcp_relay.service'], capture_output=True, check=True, text=True),
+                        call(['sudo', 'systemctl', 'mask', 'dhcp_relay.service'], capture_output=True, check=True, text=True)]
+            mocked_subprocess.run.assert_has_calls(expected, any_order=True)
 
     def test_delayed_service(self, mock_syslog, get_runtime):
         MockSelect.set_event_queue([('FEATURE', 'dhcp_relay'),
@@ -417,20 +417,20 @@ def test_delayed_service(self, mock_syslog, get_runtime):
                 daemon.start(time.time())
             except TimeoutError:
                 pass
-            expected = [call(['sudo', 'systemctl', 'daemon-reload']),
-                        call(['sudo', 'systemctl', 'unmask', 'dhcp_relay.service']),
-                        call(['sudo', 'systemctl', 'enable', 'dhcp_relay.service']),
-                        call(['sudo', 'systemctl', 'start', 'dhcp_relay.service']),
-                        call(['sudo', 'systemctl', 'daemon-reload']),
-                        call(['sudo', 'systemctl', 'unmask', 'mux.service']),
-                        call(['sudo', 'systemctl', 'enable', 'mux.service']),
-                        call(['sudo', 'systemctl', 'start', 'mux.service']),
-                        call(['sudo', 'systemctl', 'daemon-reload']),
-                        call(['sudo', 'systemctl', 'unmask', 'telemetry.service']),
-                        call(['sudo', 'systemctl', 'enable', 'telemetry.service']),
-                        call(['sudo', 'systemctl', 'start', 'telemetry.service'])]
-
-            mocked_subprocess.check_call.assert_has_calls(expected)
+            expected = [call(['sudo', 'systemctl', 'daemon-reload'], capture_output=True, check=True, text=True),
+                        call(['sudo', 'systemctl', 'unmask', 'dhcp_relay.service'], capture_output=True, check=True, text=True),
+                        call(['sudo', 'systemctl', 'enable', 'dhcp_relay.service'], capture_output=True, check=True, text=True),
+                        call(['sudo', 'systemctl', 'start', 'dhcp_relay.service'], capture_output=True, check=True, text=True),
+                        call(['sudo', 'systemctl', 'daemon-reload'], capture_output=True, check=True, text=True),
+                        call(['sudo', 'systemctl', 'unmask', 'mux.service'], capture_output=True, check=True, text=True),
+                        call(['sudo', 'systemctl', 'enable', 'mux.service'], capture_output=True, check=True, text=True),
+                        call(['sudo', 'systemctl', 'start', 'mux.service'], capture_output=True, check=True, text=True),
+                        call(['sudo', 'systemctl', 'daemon-reload'], capture_output=True, check=True, text=True),
+                        call(['sudo', 'systemctl', 'unmask', 'telemetry.service'], capture_output=True, check=True, text=True),
+                        call(['sudo', 'systemctl', 'enable', 'telemetry.service'], capture_output=True, check=True, text=True),
+                        call(['sudo', 'systemctl', 'start', 'telemetry.service'], capture_output=True, check=True, text=True)]
+
+            mocked_subprocess.run.assert_has_calls(expected, any_order=True)
 
     def test_advanced_reboot(self, mock_syslog, get_runtime):
         MockRestartWaiter.advancedReboot = True
@@ -442,25 +442,26 @@ def test_advanced_reboot(self, mock_syslog, get_runtime):
             daemon = featured.FeatureDaemon()
             daemon.render_all_feature_states()
             daemon.register_callbacks()
-            try:
+            try:            
                 daemon.start(time.time())
             except TimeoutError:
-                pass
-            expected = [call(['sudo', 'systemctl', 'daemon-reload']),
-                        call(['sudo', 'systemctl', 'unmask', 'dhcp_relay.service']),
-                        call(['sudo', 'systemctl', 'enable', 'dhcp_relay.service']),
-                        call(['sudo', 'systemctl', 'start', 'dhcp_relay.service']),
-                        call(['sudo', 'systemctl', 'daemon-reload']),
-                        call(['sudo', 'systemctl', 'unmask', 'mux.service']),
-                        call(['sudo', 'systemctl', 'enable', 'mux.service']),
-                        call(['sudo', 'systemctl', 'start', 'mux.service']),
-                        call(['sudo', 'systemctl', 'daemon-reload']),
-                        call(['sudo', 'systemctl', 'unmask', 'telemetry.service']),
-                        call(['sudo', 'systemctl', 'enable', 'telemetry.service']),
-                        call(['sudo', 'systemctl', 'start', 'telemetry.service'])]
-
-            mocked_subprocess.check_call.assert_has_calls(expected)
-    
+                pass        
+            expected = [
+                call(['sudo', 'systemctl', 'daemon-reload'], capture_output=True, check=True, text=True),
+                call(['sudo', 'systemctl', 'unmask', 'dhcp_relay.service'], capture_output=True, check=True, text=True),
+                call(['sudo', 'systemctl', 'enable', 'dhcp_relay.service'], capture_output=True, check=True, text=True),
+                call(['sudo', 'systemctl', 'start', 'dhcp_relay.service'], capture_output=True, check=True, text=True),
+                call(['sudo', 'systemctl', 'daemon-reload'], capture_output=True, check=True, text=True),
+                call(['sudo', 'systemctl', 'unmask', 'mux.service'], capture_output=True, check=True, text=True),
+                call(['sudo', 'systemctl', 'enable', 'mux.service'], capture_output=True, check=True, text=True),
+                call(['sudo', 'systemctl', 'start', 'mux.service'], capture_output=True, check=True, text=True),
+                call(['sudo', 'systemctl', 'daemon-reload'], capture_output=True, check=True, text=True),
+                call(['sudo', 'systemctl', 'unmask', 'telemetry.service'], capture_output=True, check=True, text=True),
+                call(['sudo', 'systemctl', 'enable', 'telemetry.service'], capture_output=True, check=True, text=True),
+                call(['sudo', 'systemctl', 'start', 'telemetry.service'], capture_output=True, check=True, text=True)]               
+        
+            mocked_subprocess.run.assert_has_calls(expected, any_order=True)
+
     def test_portinit_timeout(self, mock_syslog, get_runtime):
         print(MockConfigDb.CONFIG_DB)
         MockSelect.NUM_TIMEOUT_TRIES = 1
@@ -479,16 +480,16 @@ def test_portinit_timeout(self, mock_syslog, get_runtime):
                 daemon.start(0.0)
             except TimeoutError:
                 pass
-            expected = [call(['sudo', 'systemctl', 'daemon-reload']),
-                        call(['sudo', 'systemctl', 'unmask', 'dhcp_relay.service']),
-                        call(['sudo', 'systemctl', 'enable', 'dhcp_relay.service']),
-                        call(['sudo', 'systemctl', 'start', 'dhcp_relay.service']),
-                        call(['sudo', 'systemctl', 'daemon-reload']),
-                        call(['sudo', 'systemctl', 'unmask', 'mux.service']),
-                        call(['sudo', 'systemctl', 'enable', 'mux.service']),
-                        call(['sudo', 'systemctl', 'start', 'mux.service']),
-                        call(['sudo', 'systemctl', 'daemon-reload']),
-                        call(['sudo', 'systemctl', 'unmask', 'telemetry.service']),
-                        call(['sudo', 'systemctl', 'enable', 'telemetry.service']),
-                        call(['sudo', 'systemctl', 'start', 'telemetry.service'])]
-            mocked_subprocess.check_call.assert_has_calls(expected)
+            expected = [call(['sudo', 'systemctl', 'daemon-reload'], capture_output=True, check=True, text=True),
+                        call(['sudo', 'systemctl', 'unmask', 'dhcp_relay.service'], capture_output=True, check=True, text=True),
+                        call(['sudo', 'systemctl', 'enable', 'dhcp_relay.service'], capture_output=True, check=True, text=True),
+                        call(['sudo', 'systemctl', 'start', 'dhcp_relay.service'], capture_output=True, check=True, text=True),
+                        call(['sudo', 'systemctl', 'daemon-reload'], capture_output=True, check=True, text=True),
+                        call(['sudo', 'systemctl', 'unmask', 'mux.service'], capture_output=True, check=True, text=True),
+                        call(['sudo', 'systemctl', 'enable', 'mux.service'], capture_output=True, check=True, text=True),
+                        call(['sudo', 'systemctl', 'start', 'mux.service'], capture_output=True, check=True, text=True),
+                        call(['sudo', 'systemctl', 'daemon-reload'], capture_output=True, check=True, text=True),
+                        call(['sudo', 'systemctl', 'unmask', 'telemetry.service'], capture_output=True, check=True, text=True),
+                        call(['sudo', 'systemctl', 'enable', 'telemetry.service'], capture_output=True, check=True, text=True),
+                        call(['sudo', 'systemctl', 'start', 'telemetry.service'], capture_output=True, check=True, text=True)]
+            mocked_subprocess.run.assert_has_calls(expected, any_order=True)
diff --git a/tests/featured/test_vectors.py b/tests/featured/test_vectors.py
index b62186eb..1a5552b2 100644
--- a/tests/featured/test_vectors.py
+++ b/tests/featured/test_vectors.py
@@ -97,18 +97,18 @@
                 },
             },
             "enable_feature_subprocess_calls": [
-                call(["sudo", "systemctl", "unmask", "dhcp_relay.service"]),
-                call(["sudo", "systemctl", "enable", "dhcp_relay.service"]),
-                call(["sudo", "systemctl", "start", "dhcp_relay.service"]),
-                call(["sudo", "systemctl", "unmask", "mux.service"]),
-                call(["sudo", "systemctl", "enable", "mux.service"]),
-                call(["sudo", "systemctl", "start", "mux.service"]),
-                call(["sudo", "systemctl", "unmask", "telemetry.service"]),
-                call(["sudo", "systemctl", "enable", "telemetry.service"]),
-                call(["sudo", "systemctl", "start", "telemetry.service"]),
+                call(["sudo", "systemctl", "unmask", "dhcp_relay.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "enable", "dhcp_relay.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "start", "dhcp_relay.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "unmask", "mux.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "enable", "mux.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "start", "mux.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "unmask", "telemetry.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "enable", "telemetry.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "start", "telemetry.service"], capture_output=True, check=True, text=True),
             ],
             "daemon_reload_subprocess_call": [
-                call(["sudo", "systemctl", "daemon-reload"]),
+                call(["sudo", "systemctl", "daemon-reload"], capture_output=True, check=True, text=True),
             ],
             "popen_attributes": {
                 'communicate.return_value': ('output', 'error')
@@ -211,18 +211,18 @@
                 },
             },
             "enable_feature_subprocess_calls": [
-                call(["sudo", "systemctl", "stop", "mux.service"]),
-                call(["sudo", "systemctl", "disable", "mux.service"]),
-                call(["sudo", "systemctl", "mask", "mux.service"]),
-                call(["sudo", "systemctl", "unmask", "telemetry.service"]),
-                call(["sudo", "systemctl", "enable", "telemetry.service"]),
-                call(["sudo", "systemctl", "start", "telemetry.service"]),
-                call(["sudo", "systemctl", "unmask", "sflow.service"]),
-                call(["sudo", "systemctl", "enable", "sflow.service"]),
-                call(["sudo", "systemctl", "start", "sflow.service"]),
+                call(["sudo", "systemctl", "stop", "mux.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "disable", "mux.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "mask", "mux.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "unmask", "telemetry.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "enable", "telemetry.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "start", "telemetry.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "unmask", "sflow.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "enable", "sflow.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "start", "sflow.service"], capture_output=True, check=True, text=True),
             ],
             "daemon_reload_subprocess_call": [
-                call(["sudo", "systemctl", "daemon-reload"]),
+                call(["sudo", "systemctl", "daemon-reload"], capture_output=True, check=True, text=True),
             ],
             "popen_attributes": {
                 'communicate.return_value': ('output', 'error')
@@ -307,15 +307,15 @@
                 },
             },
             "enable_feature_subprocess_calls": [
-                call(["sudo", "systemctl", "stop", "mux.service"]),
-                call(["sudo", "systemctl", "disable", "mux.service"]),
-                call(["sudo", "systemctl", "mask", "mux.service"]),
-                call(["sudo", "systemctl", "unmask", "telemetry.service"]),
-                call(["sudo", "systemctl", "enable", "telemetry.service"]),
-                call(["sudo", "systemctl", "start", "telemetry.service"]),
+                call(["sudo", "systemctl", "stop", "mux.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "disable", "mux.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "mask", "mux.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "unmask", "telemetry.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "enable", "telemetry.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "start", "telemetry.service"], capture_output=True, check=True, text=True),
             ],
             "daemon_reload_subprocess_call": [
-                call(["sudo", "systemctl", "daemon-reload"]),
+                call(["sudo", "systemctl", "daemon-reload"], capture_output=True, check=True, text=True),
             ],
             "popen_attributes": {
                 'communicate.return_value': ('output', 'error')
@@ -400,18 +400,18 @@
                 },
             },
             "enable_feature_subprocess_calls": [
-                call(["sudo", "systemctl", "unmask", "dhcp_relay.service"]),
-                call(["sudo", "systemctl", "enable", "dhcp_relay.service"]),
-                call(["sudo", "systemctl", "start", "dhcp_relay.service"]),
-                call(["sudo", "systemctl", "stop", "mux.service"]),
-                call(["sudo", "systemctl", "disable", "mux.service"]),
-                call(["sudo", "systemctl", "mask", "mux.service"]),
-                call(["sudo", "systemctl", "unmask", "telemetry.service"]),
-                call(["sudo", "systemctl", "enable", "telemetry.service"]),
-                call(["sudo", "systemctl", "start", "telemetry.service"]),
+                call(["sudo", "systemctl", "unmask", "dhcp_relay.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "enable", "dhcp_relay.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "start", "dhcp_relay.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "stop", "mux.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "disable", "mux.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "mask", "mux.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "unmask", "telemetry.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "enable", "telemetry.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "start", "telemetry.service"], capture_output=True, check=True, text=True),
             ],
             "daemon_reload_subprocess_call": [
-                call(["sudo", "systemctl", "daemon-reload"]),
+                call(["sudo", "systemctl", "daemon-reload"], capture_output=True, check=True, text=True),
             ],
             "popen_attributes": {
                 'communicate.return_value': ('output', 'error')
@@ -498,7 +498,7 @@
             },
             "enable_feature_subprocess_calls": [],
             "daemon_reload_subprocess_call": [
-                call(["sudo", "systemctl", "daemon-reload"]),
+                call(["sudo", "systemctl", "daemon-reload"], capture_output=True, check=True, text=True),
             ],
             "popen_attributes": {
                 'communicate.return_value': ('enabled', 'error')
@@ -595,16 +595,16 @@
                 },
             },
             "enable_feature_subprocess_calls": [
-                call(["sudo", "systemctl", "stop", "bgp.service"]),
-                call(["sudo", "systemctl", "disable", "bgp.service"]),
-                call(["sudo", "systemctl", "mask", "bgp.service"]),
-                call(["sudo", "systemctl", "unmask", "syncd.service"]),
-                call(["sudo", "systemctl", "enable", "syncd.service"]),
-                call(["sudo", "systemctl", "start", "syncd.service"]),
+                call(["sudo", "systemctl", "stop", "bgp.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "disable", "bgp.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "mask", "bgp.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "unmask", "syncd.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "enable", "syncd.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "start", "syncd.service"], capture_output=True, check=True, text=True),
 
             ],
             "daemon_reload_subprocess_call": [
-                call(["sudo", "systemctl", "daemon-reload"]),
+                call(["sudo", "systemctl", "daemon-reload"], capture_output=True, check=True, text=True),
             ],
             "popen_attributes": {
                 'communicate.return_value': ('output', 'error')
@@ -701,16 +701,16 @@
                 },
             },
             "enable_feature_subprocess_calls": [
-                call(["sudo", "systemctl", "stop", "bgp.service"]),
-                call(["sudo", "systemctl", "disable", "bgp.service"]),
-                call(["sudo", "systemctl", "mask", "bgp.service"]),
-                call(["sudo", "systemctl", "unmask", "syncd.service"]),
-                call(["sudo", "systemctl", "enable", "syncd.service"]),
-                call(["sudo", "systemctl", "start", "syncd.service"]),
+                call(["sudo", "systemctl", "stop", "bgp.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "disable", "bgp.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "mask", "bgp.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "unmask", "syncd.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "enable", "syncd.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "start", "syncd.service"], capture_output=True, check=True, text=True),
 
             ],
             "daemon_reload_subprocess_call": [
-                call(["sudo", "systemctl", "daemon-reload"]),
+                call(["sudo", "systemctl", "daemon-reload"], capture_output=True, check=True, text=True),
             ],
             "popen_attributes": {
                 'communicate.return_value': ('output', 'error')
@@ -807,19 +807,19 @@
                 },
             },
             "enable_feature_subprocess_calls": [
-                call(["sudo", "systemctl", "start", "bgp.service"]),
-                call(["sudo", "systemctl", "enable", "bgp.service"]),
-                call(["sudo", "systemctl", "unmask", "bgp.service"]),
-                call(["sudo", "systemctl", "start", "teamd.service"]),
-                call(["sudo", "systemctl", "enable", "teamd.service"]),
-                call(["sudo", "systemctl", "unmask", "teamd.service"]),
-                call(["sudo", "systemctl", "unmask", "syncd.service"]),
-                call(["sudo", "systemctl", "enable", "syncd.service"]),
-                call(["sudo", "systemctl", "start", "syncd.service"]),
+                call(["sudo", "systemctl", "start", "bgp.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "enable", "bgp.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "unmask", "bgp.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "start", "teamd.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "enable", "teamd.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "unmask", "teamd.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "unmask", "syncd.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "enable", "syncd.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "start", "syncd.service"], capture_output=True, check=True, text=True),
 
             ],
             "daemon_reload_subprocess_call": [
-                call(["sudo", "systemctl", "daemon-reload"]),
+                call(["sudo", "systemctl", "daemon-reload"], capture_output=True, check=True, text=True),
             ],
             "popen_attributes": {
                 'communicate.return_value': ('output', 'error')
@@ -916,19 +916,19 @@
                 },
             },
             "enable_feature_subprocess_calls": [
-                call(["sudo", "systemctl", "start", "bgp.service"]),
-                call(["sudo", "systemctl", "enable", "bgp.service"]),
-                call(["sudo", "systemctl", "unmask", "bgp.service"]),
-                call(["sudo", "systemctl", "start", "teamd.service"]),
-                call(["sudo", "systemctl", "enable", "teamd.service"]),
-                call(["sudo", "systemctl", "unmask", "teamd.service"]),
-                call(["sudo", "systemctl", "unmask", "syncd.service"]),
-                call(["sudo", "systemctl", "enable", "syncd.service"]),
-                call(["sudo", "systemctl", "start", "syncd.service"]),
+                call(["sudo", "systemctl", "start", "bgp.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "enable", "bgp.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "unmask", "bgp.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "start", "teamd.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "enable", "teamd.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "unmask", "teamd.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "unmask", "syncd.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "enable", "syncd.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "start", "syncd.service"], capture_output=True, check=True, text=True),
 
             ],
             "daemon_reload_subprocess_call": [
-                call(["sudo", "systemctl", "daemon-reload"]),
+                call(["sudo", "systemctl", "daemon-reload"], capture_output=True, check=True, text=True),
             ],
             "popen_attributes": {
                 'communicate.return_value': ('output', 'error')
@@ -1026,33 +1026,33 @@
                 },
             },
             "enable_feature_subprocess_calls": [
-                call(["sudo", "systemctl", "stop", "bgp@0.service"]),
-                call(["sudo", "systemctl", "disable", "bgp@0.service"]),
-                call(["sudo", "systemctl", "mask", "bgp@0.service"]),
-                call(["sudo", "systemctl", "stop", "bgp@1.service"]),
-                call(["sudo", "systemctl", "disable", "bgp@1.service"]),
-                call(["sudo", "systemctl", "mask", "bgp@1.service"]),
-                call(["sudo", "systemctl", "start", "teamd@0.service"]),
-                call(["sudo", "systemctl", "enable", "teamd@0.service"]),
-                call(["sudo", "systemctl", "unmask", "teamd@0.service"]),
-                call(["sudo", "systemctl", "start", "teamd@1.service"]),
-                call(["sudo", "systemctl", "enable", "teamd@1.service"]),
-                call(["sudo", "systemctl", "unmask", "teamd@1.service"]),
-                call(["sudo", "systemctl", "stop", "lldp@0.service"]),
-                call(["sudo", "systemctl", "disable", "lldp@0.service"]),
-                call(["sudo", "systemctl", "mask", "lldp@0.service"]),
-                call(["sudo", "systemctl", "stop", "lldp@1.service"]),
-                call(["sudo", "systemctl", "disable", "lldp@1.service"]),
-                call(["sudo", "systemctl", "mask", "lldp@1.service"]),
-                call(["sudo", "systemctl", "start", "syncd@0.service"]),
-                call(["sudo", "systemctl", "enable", "syncd@0.service"]),
-                call(["sudo", "systemctl", "unmask", "syncd@0.service"]),
-                call(["sudo", "systemctl", "start", "syncd@1.service"]),
-                call(["sudo", "systemctl", "enable", "syncd@1.service"]),
-                call(["sudo", "systemctl", "unmask", "syncd@1.service"]),
+                call(["sudo", "systemctl", "stop", "bgp@0.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "disable", "bgp@0.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "mask", "bgp@0.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "stop", "bgp@1.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "disable", "bgp@1.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "mask", "bgp@1.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "start", "teamd@0.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "enable", "teamd@0.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "unmask", "teamd@0.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "start", "teamd@1.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "enable", "teamd@1.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "unmask", "teamd@1.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "stop", "lldp@0.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "disable", "lldp@0.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "mask", "lldp@0.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "stop", "lldp@1.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "disable", "lldp@1.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "mask", "lldp@1.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "start", "syncd@0.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "enable", "syncd@0.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "unmask", "syncd@0.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "start", "syncd@1.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "enable", "syncd@1.service"], capture_output=True, check=True, text=True),
+                call(["sudo", "systemctl", "unmask", "syncd@1.service"], capture_output=True, check=True, text=True),
             ],
             "daemon_reload_subprocess_call": [
-                call(["sudo", "systemctl", "daemon-reload"]),
+                call(["sudo", "systemctl", "daemon-reload"], capture_output=True, check=True, text=True),
             ],
             "popen_attributes": {
                 'communicate.return_value': ('output', 'error')
@@ -1167,36 +1167,36 @@
                 },
             },
             "enable_feature_subprocess_calls": [
-                call(['sudo', 'systemctl', 'unmask', 'bgp@0.service']),
-                call(['sudo', 'systemctl', 'enable', 'bgp@0.service']),
-                call(['sudo', 'systemctl', 'start', 'bgp@0.service']),
-                call(['sudo', 'systemctl', 'unmask', 'bgp@1.service']),
-                call(['sudo', 'systemctl', 'enable', 'bgp@1.service']),
-                call(['sudo', 'systemctl', 'start', 'bgp@1.service']),
-                call(['sudo', 'systemctl', 'unmask', 'teamd@0.service']),
-                call(['sudo', 'systemctl', 'enable', 'teamd@0.service']),
-                call(['sudo', 'systemctl', 'start', 'teamd@0.service']),
-                call(['sudo', 'systemctl', 'unmask', 'teamd@1.service']),
-                call(['sudo', 'systemctl', 'enable', 'teamd@1.service']),
-                call(['sudo', 'systemctl', 'start', 'teamd@1.service']),
-                call(['sudo', 'systemctl', 'mask', 'lldp.service']),
-                call(['sudo', 'systemctl', 'disable', 'lldp.service']),
-                call(['sudo', 'systemctl', 'stop', 'lldp.service']),
-                call(['sudo', 'systemctl', 'unmask', 'lldp@0.service']),
-                call(['sudo', 'systemctl', 'enable', 'lldp@0.service']),
-                call(['sudo', 'systemctl', 'start', 'lldp@0.service']),
-                call(['sudo', 'systemctl', 'unmask', 'lldp@1.service']),
-                call(['sudo', 'systemctl', 'enable', 'lldp@1.service']),
-                call(['sudo', 'systemctl', 'start', 'lldp@1.service']),
-                call(['sudo', 'systemctl', 'unmask', 'macsec@0.service']),
-                call(['sudo', 'systemctl', 'enable', 'macsec@0.service']),
-                call(['sudo', 'systemctl', 'start', 'macsec@0.service']),
-                call(['sudo', 'systemctl', 'unmask', 'macsec@1.service']),
-                call(['sudo', 'systemctl', 'enable', 'macsec@1.service']),
-                call(['sudo', 'systemctl', 'start', 'macsec@1.service'])
+                call(['sudo', 'systemctl', 'unmask', 'bgp@0.service'], capture_output=True, check=True, text=True),
+                call(['sudo', 'systemctl', 'enable', 'bgp@0.service'], capture_output=True, check=True, text=True),
+                call(['sudo', 'systemctl', 'start', 'bgp@0.service'], capture_output=True, check=True, text=True),
+                call(['sudo', 'systemctl', 'unmask', 'bgp@1.service'], capture_output=True, check=True, text=True),
+                call(['sudo', 'systemctl', 'enable', 'bgp@1.service'], capture_output=True, check=True, text=True),
+                call(['sudo', 'systemctl', 'start', 'bgp@1.service'], capture_output=True, check=True, text=True),
+                call(['sudo', 'systemctl', 'unmask', 'teamd@0.service'], capture_output=True, check=True, text=True),
+                call(['sudo', 'systemctl', 'enable', 'teamd@0.service'], capture_output=True, check=True, text=True),
+                call(['sudo', 'systemctl', 'start', 'teamd@0.service'], capture_output=True, check=True, text=True),
+                call(['sudo', 'systemctl', 'unmask', 'teamd@1.service'], capture_output=True, check=True, text=True),
+                call(['sudo', 'systemctl', 'enable', 'teamd@1.service'], capture_output=True, check=True, text=True),
+                call(['sudo', 'systemctl', 'start', 'teamd@1.service'], capture_output=True, check=True, text=True),
+                call(['sudo', 'systemctl', 'mask', 'lldp.service'], capture_output=True, check=True, text=True),
+                call(['sudo', 'systemctl', 'disable', 'lldp.service'], capture_output=True, check=True, text=True),
+                call(['sudo', 'systemctl', 'stop', 'lldp.service'], capture_output=True, check=True, text=True),
+                call(['sudo', 'systemctl', 'unmask', 'lldp@0.service'], capture_output=True, check=True, text=True),
+                call(['sudo', 'systemctl', 'enable', 'lldp@0.service'], capture_output=True, check=True, text=True),
+                call(['sudo', 'systemctl', 'start', 'lldp@0.service'], capture_output=True, check=True, text=True),
+                call(['sudo', 'systemctl', 'unmask', 'lldp@1.service'], capture_output=True, check=True, text=True),
+                call(['sudo', 'systemctl', 'enable', 'lldp@1.service'], capture_output=True, check=True, text=True),
+                call(['sudo', 'systemctl', 'start', 'lldp@1.service'], capture_output=True, check=True, text=True),
+                call(['sudo', 'systemctl', 'unmask', 'macsec@0.service'], capture_output=True, check=True, text=True),
+                call(['sudo', 'systemctl', 'enable', 'macsec@0.service'], capture_output=True, check=True, text=True),
+                call(['sudo', 'systemctl', 'start', 'macsec@0.service'], capture_output=True, check=True, text=True),
+                call(['sudo', 'systemctl', 'unmask', 'macsec@1.service'], capture_output=True, check=True, text=True),
+                call(['sudo', 'systemctl', 'enable', 'macsec@1.service'], capture_output=True, check=True, text=True),
+                call(['sudo', 'systemctl', 'start', 'macsec@1.service'], capture_output=True, check=True, text=True)
             ],
             "daemon_reload_subprocess_call": [
-                call(["sudo", "systemctl", "daemon-reload"]),
+                call(["sudo", "systemctl", "daemon-reload"], capture_output=True, check=True, text=True),
             ],
             "popen_attributes": {
                 'communicate.return_value': ('output', 'error')

From c15aebc8dd214f7ed21b4afde65a280d73aa5f35 Mon Sep 17 00:00:00 2001
From: jhli-cisco <93410383+jhli-cisco@users.noreply.github.com>
Date: Wed, 20 Nov 2024 16:16:08 -0800
Subject: [PATCH 4/9] [cisco|express-boot]: Add support for cisco express boot
 in sonic-host-services (#90)

* add express-boot support

* add ut test coverage
---
 scripts/determine-reboot-cause       |  4 ++++
 tests/determine-reboot-cause_test.py | 11 +++++++++--
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/scripts/determine-reboot-cause b/scripts/determine-reboot-cause
index 2406a646..67722cfc 100755
--- a/scripts/determine-reboot-cause
+++ b/scripts/determine-reboot-cause
@@ -38,6 +38,7 @@ REBOOT_TYPE_KEXEC_FILE = "/proc/cmdline"
 # To extract the commom part of them, we should have the following PATTERN
 REBOOT_TYPE_KEXEC_PATTERN_WARM = ".*SONIC_BOOT_TYPE=(warm|fastfast).*"
 REBOOT_TYPE_KEXEC_PATTERN_FAST = ".*SONIC_BOOT_TYPE=(fast|fast-reboot).*"
+REBOOT_TYPE_KEXEC_PATTERN_EXPRESS = ".*SONIC_BOOT_TYPE=(express).*"
 
 REBOOT_CAUSE_UNKNOWN = "Unknown"
 REBOOT_CAUSE_NON_HARDWARE = "Non-Hardware"
@@ -58,6 +59,9 @@ def parse_warmfast_reboot_from_proc_cmdline():
         m = re.search(REBOOT_TYPE_KEXEC_PATTERN_FAST, cause_file_kexec)
         if m and m.group(1):
             return 'fast-reboot'
+        m = re.search(REBOOT_TYPE_KEXEC_PATTERN_EXPRESS, cause_file_kexec)
+        if m and m.group(1):
+            return 'express-reboot'
     return None
 
 
diff --git a/tests/determine-reboot-cause_test.py b/tests/determine-reboot-cause_test.py
index 6bf52fe5..61ca4f2e 100644
--- a/tests/determine-reboot-cause_test.py
+++ b/tests/determine-reboot-cause_test.py
@@ -39,8 +39,10 @@
 
 EXPECTED_PARSE_WARMFAST_REBOOT_FROM_PROC_CMDLINE = "warm"
 
-PROC_CMDLINE_CONTENTS = """\
-BOOT_IMAGE=/image-20191130.52/boot/vmlinuz-4.9.0-11-2-amd64 root=/dev/sda4 rw console=tty0 console=ttyS1,9600n8 quiet net.ifnames=0 biosdevname=0 loop=image-20191130.52/fs.squashfs loopfstype=squashfs apparmor=1 security=apparmor varlog_size=4096 usbcore.autosuspend=-1 module_blacklist=gpio_ich SONIC_BOOT_TYPE=warm"""
+PROC_CMDLINE_EXPRESS_BOOT_CONTENTS = """\
+BOOT_IMAGE=/image-20191130.52/boot/vmlinuz-4.9.0-11-2-amd64 root=/dev/sda4 rw console=tty0 console=ttyS1,9600n8 quiet net.ifnames=0 biosdevname=0 loop=image-20191130.52/fs.squashfs loopfstype=squashfs apparmor=1 security=apparmor varlog_size=4096 usbcore.autosuspend=-1 module_blacklist=gpio_ich SONIC_BOOT_TYPE=express"""
+
+EXPECTED_PARSE_EXPRESS_REBOOT_FROM_PROC_CMDLINE = "express-reboot"
 
 REBOOT_CAUSE_CONTENTS = """\
 User issued 'warm-reboot' command [User: admin, Time: Mon Nov  2 22:37:45 UTC 2020]"""
@@ -81,6 +83,11 @@ def test_parse_warmfast_reboot_from_proc_cmdline(self):
                 result = determine_reboot_cause.parse_warmfast_reboot_from_proc_cmdline()
                 assert result == EXPECTED_PARSE_WARMFAST_REBOOT_FROM_PROC_CMDLINE
                 open_mocked.assert_called_once_with("/proc/cmdline")
+            express_mocked = mock.mock_open(read_data=PROC_CMDLINE_EXPRESS_BOOT_CONTENTS)
+            with mock.patch("{}.open".format(BUILTINS), express_mocked):
+                result = determine_reboot_cause.parse_warmfast_reboot_from_proc_cmdline()
+                assert result == EXPECTED_PARSE_EXPRESS_REBOOT_FROM_PROC_CMDLINE
+                express_mocked.assert_called_once_with("/proc/cmdline")
 
     def test_find_software_reboot_cause_user(self):
         with mock.patch("os.path.isfile") as mock_isfile:

From 89aead2c34eb95102328c4730fce534190ee5dac Mon Sep 17 00:00:00 2001
From: Dawei Huang <daweihuang@microsoft.com>
Date: Fri, 22 Nov 2024 16:04:00 -0600
Subject: [PATCH 5/9] DBUS API for Containerz.StopContainer (#179)

DBUS API for Containerz.StopContainer
---
 host_modules/docker_service.py            | 143 +++++++++++++++
 tests/host_modules/docker_service_test.py | 209 ++++++++++++++++++++++
 2 files changed, 352 insertions(+)
 create mode 100644 host_modules/docker_service.py
 create mode 100644 tests/host_modules/docker_service_test.py

diff --git a/host_modules/docker_service.py b/host_modules/docker_service.py
new file mode 100644
index 00000000..f1c7fc8c
--- /dev/null
+++ b/host_modules/docker_service.py
@@ -0,0 +1,143 @@
+"""Docker service handler"""
+
+from host_modules import host_service
+import docker
+import signal
+import errno
+
+MOD_NAME = "docker_service"
+
+# The set of allowed containers that can be managed by this service.
+# First element is the image name, second element is the container name.
+ALLOWED_CONTAINERS = [
+    ("docker-syncd-brcm", "syncd"),
+    ("docker-acms", "acms"),
+    ("docker-sonic-gnmi", "gnmi"),
+    ("docker-sonic-telemetry", "telemetry"),
+    ("docker-snmp", "snmp"),
+    ("docker-platform-monitor", "pmon"),
+    ("docker-lldp", "lldp"),
+    ("docker-dhcp-relay", "dhcp_relay"),
+    ("docker-router-advertiser", "radv"),
+    ("docker-teamd", "teamd"),
+    ("docker-fpm-frr", "bgp"),
+    ("docker-orchagent", "swss"),
+    ("docker-sonic-restapi", "restapi"),
+    ("docker-eventd", "eventd"),
+    ("docker-database", "database"),
+]
+
+
+def is_allowed_container(container):
+    """
+    Check if the container is allowed to be managed by this service.
+
+    Args:
+        container (str): The container name.
+
+    Returns:
+        bool: True if the container is allowed, False otherwise.
+    """
+    for _, allowed_container in ALLOWED_CONTAINERS:
+        if container == allowed_container:
+            return True
+    return False
+
+
+class DockerService(host_service.HostModule):
+    """
+    DBus endpoint that executes the docker command
+    """
+
+    @host_service.method(
+        host_service.bus_name(MOD_NAME), in_signature="s", out_signature="is"
+    )
+    def stop(self, container):
+        """
+        Stop a running Docker container.
+
+        Args:
+            container (str): The name or ID of the Docker container.
+
+        Returns:
+            tuple: A tuple containing the exit code (int) and a message indicating the result of the operation.
+        """
+        try:
+            client = docker.from_env()
+            if not is_allowed_container(container):
+                return (
+                    errno.EPERM,
+                    "Container {} is not allowed to be managed by this service.".format(
+                        container
+                    ),
+                )
+            container = client.containers.get(container)
+            container.stop()
+            return 0, "Container {} has been stopped.".format(container.name)
+        except docker.errors.NotFound:
+            return errno.ENOENT, "Container {} does not exist.".format(container)
+        except Exception as e:
+            return 1, "Failed to stop container {}: {}".format(container, str(e))
+
+    @host_service.method(
+        host_service.bus_name(MOD_NAME), in_signature="si", out_signature="is"
+    )
+    def kill(self, container, signal=signal.SIGKILL):
+        """
+        Kill or send a signal to a running Docker container.
+
+        Args:
+            container (str): The name or ID of the Docker container.
+            signal (int): The signal to send. Defaults to SIGKILL.
+
+        Returns:
+            tuple: A tuple containing the exit code (int) and a message indicating the result of the operation.
+        """
+        try:
+            client = docker.from_env()
+            if not is_allowed_container(container):
+                return (
+                    errno.EPERM,
+                    "Container {} is not allowed to be managed by this service.".format(
+                        container
+                    ),
+                )
+            container = client.containers.get(container)
+            container.kill(signal=signal)
+            return 0, "Container {} has been killed with signal {}.".format(
+                container.name, signal
+            )
+        except docker.errors.NotFound:
+            return errno.ENOENT, "Container {} does not exist.".format(container)
+        except Exception as e:
+            return 1, "Failed to kill container {}: {}".format(container, str(e))
+
+    @host_service.method(
+        host_service.bus_name(MOD_NAME), in_signature="s", out_signature="is"
+    )
+    def restart(self, container):
+        """
+        Restart a running Docker container.
+
+        Args:
+            container (str): The name or ID of the Docker container.
+
+        Returns:
+            tuple: A tuple containing the exit code (int) and a message indicating the result of the operation.
+        """
+        try:
+            client = docker.from_env()
+            if not is_allowed_container(container):
+                return (
+                    errno.EPERM,
+                    "Container {} is not allowed to be managed by this service.".format(
+                        container
+                    ),
+                )
+            container = client.containers.get(container)
+            container.restart()
+            return 0, "Container {} has been restarted.".format(container.name)
+        except docker.errors.NotFound:
+            return errno.ENOENT, "Container {} does not exist.".format(container)
+        except Exception as e:
+            return 1, "Failed to restart container {}: {}".format(container, str(e))
diff --git a/tests/host_modules/docker_service_test.py b/tests/host_modules/docker_service_test.py
new file mode 100644
index 00000000..0836d826
--- /dev/null
+++ b/tests/host_modules/docker_service_test.py
@@ -0,0 +1,209 @@
+import errno
+import docker
+from unittest import mock
+from host_modules.docker_service import DockerService
+
+MOD_NAME = "docker_service"
+
+
+class TestDockerService(object):
+    @mock.patch("dbus.SystemBus")
+    @mock.patch("dbus.service.BusName")
+    @mock.patch("dbus.service.Object.__init__")
+    def test_docker_stop_success(self, MockInit, MockBusName, MockSystemBus):
+        mock_docker_client = mock.Mock()
+        mock_docker_client.containers.get.return_value.stop.return_value = None
+
+        with mock.patch.object(docker, "from_env", return_value=mock_docker_client):
+            docker_service = DockerService(MOD_NAME)
+            rc, _ = docker_service.stop("syncd")
+
+        assert rc == 0, "Return code is wrong"
+        mock_docker_client.containers.get.assert_called_once_with("syncd")
+        mock_docker_client.containers.get.return_value.stop.assert_called_once()
+
+    @mock.patch("dbus.SystemBus")
+    @mock.patch("dbus.service.BusName")
+    @mock.patch("dbus.service.Object.__init__")
+    def test_docker_stop_fail_disallowed(self, MockInit, MockBusName, MockSystemBus):
+        mock_docker_client = mock.Mock()
+
+        with mock.patch.object(docker, "from_env", return_value=mock_docker_client):
+            docker_service = DockerService(MOD_NAME)
+            rc, msg = docker_service.stop("bad-container")
+
+        assert rc == errno.EPERM, "Return code is wrong"
+        assert (
+            "not" in msg and "allowed" in msg
+        ), "Message should contain 'not' and 'allowed'"
+
+    @mock.patch("dbus.SystemBus")
+    @mock.patch("dbus.service.BusName")
+    @mock.patch("dbus.service.Object.__init__")
+    def test_docker_stop_fail_not_exist(self, MockInit, MockBusName, MockSystemBus):
+        mock_docker_client = mock.Mock()
+        mock_docker_client.containers.get.side_effect = docker.errors.NotFound(
+            "Container not found"
+        )
+
+        with mock.patch.object(docker, "from_env", return_value=mock_docker_client):
+            docker_service = DockerService(MOD_NAME)
+            rc, msg = docker_service.stop("syncd")
+
+        assert rc == errno.ENOENT, "Return code is wrong"
+        assert (
+            "not" in msg and "exist" in msg
+        ), "Message should contain 'not' and 'exist'"
+        mock_docker_client.containers.get.assert_called_once_with("syncd")
+
+    @mock.patch("dbus.SystemBus")
+    @mock.patch("dbus.service.BusName")
+    @mock.patch("dbus.service.Object.__init__")
+    def test_docker_stop_fail_api_error(self, MockInit, MockBusName, MockSystemBus):
+        mock_docker_client = mock.Mock()
+        mock_docker_client.containers.get.return_value.stop.side_effect = (
+            docker.errors.APIError("API error")
+        )
+
+        with mock.patch.object(docker, "from_env", return_value=mock_docker_client):
+            docker_service = DockerService(MOD_NAME)
+            rc, msg = docker_service.stop("syncd")
+
+        assert rc != 0, "Return code is wrong"
+        assert "API error" in msg, "Message should contain 'API error'"
+        mock_docker_client.containers.get.assert_called_once_with("syncd")
+        mock_docker_client.containers.get.return_value.stop.assert_called_once()
+
+    @mock.patch("dbus.SystemBus")
+    @mock.patch("dbus.service.BusName")
+    @mock.patch("dbus.service.Object.__init__")
+    def test_docker_kill_success(self, MockInit, MockBusName, MockSystemBus):
+        mock_docker_client = mock.Mock()
+        mock_docker_client.containers.get.return_value.kill.return_value = None
+
+        with mock.patch.object(docker, "from_env", return_value=mock_docker_client):
+            docker_service = DockerService(MOD_NAME)
+            rc, _ = docker_service.kill("syncd")
+
+        assert rc == 0, "Return code is wrong"
+        mock_docker_client.containers.get.assert_called_once_with("syncd")
+        mock_docker_client.containers.get.return_value.kill.assert_called_once()
+
+    @mock.patch("dbus.SystemBus")
+    @mock.patch("dbus.service.BusName")
+    @mock.patch("dbus.service.Object.__init__")
+    def test_docker_kill_fail_disallowed(self, MockInit, MockBusName, MockSystemBus):
+        mock_docker_client = mock.Mock()
+
+        with mock.patch.object(docker, "from_env", return_value=mock_docker_client):
+            docker_service = DockerService(MOD_NAME)
+            rc, msg = docker_service.kill("bad-container")
+
+        assert rc == errno.EPERM, "Return code is wrong"
+        assert (
+            "not" in msg and "allowed" in msg
+        ), "Message should contain 'not' and 'allowed'"
+
+    @mock.patch("dbus.SystemBus")
+    @mock.patch("dbus.service.BusName")
+    @mock.patch("dbus.service.Object.__init__")
+    def test_docker_kill_fail_not_found(self, MockInit, MockBusName, MockSystemBus):
+        mock_docker_client = mock.Mock()
+        mock_docker_client.containers.get.side_effect = docker.errors.NotFound(
+            "Container not found"
+        )
+
+        with mock.patch.object(docker, "from_env", return_value=mock_docker_client):
+            docker_service = DockerService(MOD_NAME)
+            rc, msg = docker_service.kill("syncd")
+
+        assert rc == errno.ENOENT, "Return code is wrong"
+        assert (
+            "not" in msg and "exist" in msg
+        ), "Message should contain 'not' and 'exist'"
+        mock_docker_client.containers.get.assert_called_once_with("syncd")
+
+    @mock.patch("dbus.SystemBus")
+    @mock.patch("dbus.service.BusName")
+    @mock.patch("dbus.service.Object.__init__")
+    def test_docker_kill_fail_api_error(self, MockInit, MockBusName, MockSystemBus):
+        mock_docker_client = mock.Mock()
+        mock_docker_client.containers.get.return_value.kill.side_effect = (
+            docker.errors.APIError("API error")
+        )
+
+        with mock.patch.object(docker, "from_env", return_value=mock_docker_client):
+            docker_service = DockerService(MOD_NAME)
+            rc, msg = docker_service.kill("syncd")
+
+        assert rc != 0, "Return code is wrong"
+        assert "API error" in msg, "Message should contain 'API error'"
+        mock_docker_client.containers.get.assert_called_once_with("syncd")
+        mock_docker_client.containers.get.return_value.kill.assert_called_once()
+
+    @mock.patch("dbus.SystemBus")
+    @mock.patch("dbus.service.BusName")
+    @mock.patch("dbus.service.Object.__init__")
+    def test_docker_restart_success(self, MockInit, MockBusName, MockSystemBus):
+        mock_docker_client = mock.Mock()
+        mock_docker_client.containers.get.return_value.restart.return_value = None
+
+        with mock.patch.object(docker, "from_env", return_value=mock_docker_client):
+            docker_service = DockerService(MOD_NAME)
+            rc, _ = docker_service.restart("syncd")
+
+        assert rc == 0, "Return code is wrong"
+        mock_docker_client.containers.get.assert_called_once_with("syncd")
+        mock_docker_client.containers.get.return_value.restart.assert_called_once()
+
+    @mock.patch("dbus.SystemBus")
+    @mock.patch("dbus.service.BusName")
+    @mock.patch("dbus.service.Object.__init__")
+    def test_docker_restart_fail_disallowed(self, MockInit, MockBusName, MockSystemBus):
+        mock_docker_client = mock.Mock()
+
+        with mock.patch.object(docker, "from_env", return_value=mock_docker_client):
+            docker_service = DockerService(MOD_NAME)
+            rc, msg = docker_service.restart("bad-container")
+
+        assert rc == errno.EPERM, "Return code is wrong"
+        assert (
+            "not" in msg and "allowed" in msg
+        ), "Message should contain 'not' and 'allowed'"
+
+    @mock.patch("dbus.SystemBus")
+    @mock.patch("dbus.service.BusName")
+    @mock.patch("dbus.service.Object.__init__")
+    def test_docker_restart_fail_not_found(self, MockInit, MockBusName, MockSystemBus):
+        mock_docker_client = mock.Mock()
+        mock_docker_client.containers.get.side_effect = docker.errors.NotFound(
+            "Container not found"
+        )
+
+        with mock.patch.object(docker, "from_env", return_value=mock_docker_client):
+            docker_service = DockerService(MOD_NAME)
+            rc, msg = docker_service.restart("syncd")
+
+        assert rc == errno.ENOENT, "Return code is wrong"
+        assert (
+            "not" in msg and "exist" in msg
+        ), "Message should contain 'not' and 'exist'"
+        mock_docker_client.containers.get.assert_called_once_with("syncd")
+
+    @mock.patch("dbus.SystemBus")
+    @mock.patch("dbus.service.BusName")
+    @mock.patch("dbus.service.Object.__init__")
+    def test_docker_restart_fail_api_error(self, MockInit, MockBusName, MockSystemBus):
+        mock_docker_client = mock.Mock()
+        mock_docker_client.containers.get.return_value.restart.side_effect = (
+            docker.errors.APIError("API error")
+        )
+
+        with mock.patch.object(docker, "from_env", return_value=mock_docker_client):
+            docker_service = DockerService(MOD_NAME)
+            rc, msg = docker_service.restart("syncd")
+
+        assert rc != 0, "Return code is wrong"
+        assert "API error" in msg, "Message should contain 'API error'"
+        mock_docker_client.containers.get.assert_called_once_with("syncd")
+        mock_docker_client.containers.get.return_value.restart.assert_called_once()

From 438e54aca68507d70b37100d00e4be3647c7f59f Mon Sep 17 00:00:00 2001
From: Yevhen Fastiuk <yfastiuk@nvidia.com>
Date: Mon, 2 Dec 2024 20:00:32 +0200
Subject: [PATCH 6/9] [Logrotate] Update log rotate configuration via ConfigDB
 (#61)

* Add log rotation configuration mechanism

* Handle changes in LOGGING table

Signed-off-by: Yevhen Fastiuk <yfastiuk@nvidia.com>

* Added UT for logrotate service in hostcfgd

* Align run_cmd function input arguments

Signed-off-by: Yevhen Fastiuk <yfastiuk@nvidia.com>

---------

Signed-off-by: Yevhen Fastiuk <yfastiuk@nvidia.com>
---
 scripts/hostcfgd                        | 51 ++++++++++++++
 tests/hostcfgd/hostcfgd_logging_test.py | 91 +++++++++++++++++++++++++
 tests/hostcfgd/test_logging_vectors.py  | 48 +++++++++++++
 3 files changed, 190 insertions(+)
 create mode 100644 tests/hostcfgd/hostcfgd_logging_test.py
 create mode 100644 tests/hostcfgd/test_logging_vectors.py

diff --git a/scripts/hostcfgd b/scripts/hostcfgd
index 224b2b9d..77d75a0d 100644
--- a/scripts/hostcfgd
+++ b/scripts/hostcfgd
@@ -1827,6 +1827,44 @@ class BannerCfg(object):
             self.cache[k] = v
 
 
+class LoggingCfg(object):
+    """Logging Config Daemon
+
+    Handles changes in LOGGING table.
+    1) Handle change of debug/syslog log files config
+    """
+    def __init__(self):
+        self.cache = {}
+
+    def load(self, logging_cfg={}):
+        # Get initial logging file configuration
+        self.cache = logging_cfg
+        syslog.syslog(syslog.LOG_DEBUG, f'Initial logging config: {self.cache}')
+
+    def update_logging_cfg(self, key, data):
+        """Apply logging configuration
+
+        The daemon restarts logrotate-config which will regenerate logrotate
+        config files.
+        Args:
+            key:    DB table's key that was triggered change (basically it is a
+                    config file)
+            data:   File's config data
+        """
+        syslog.syslog(syslog.LOG_DEBUG, 'LoggingCfg: logging files cfg update')
+        if self.cache.get(key) != data:
+            syslog.syslog(syslog.LOG_INFO,
+                          f'Set logging file {key} config: {data}')
+            try:
+                run_cmd(['systemctl', 'restart', 'logrotate-config'], True, True)
+            except Exception:
+                syslog.syslog(syslog.LOG_ERR, f'Failed to update {key} message')
+                return
+
+        # Update cache
+        self.cache[key] = data
+
+
 class HostConfigDaemon:
     def __init__(self):
         self.state_db_conn = DBConnector(STATE_DB, 0)
@@ -1884,6 +1922,9 @@ class HostConfigDaemon:
         # Initialize BannerCfg
         self.bannermsgcfg = BannerCfg()
 
+        # Initialize LoggingCfg
+        self.loggingcfg = LoggingCfg()
+
     def load(self, init_data):
         aaa = init_data['AAA']
         tacacs_global = init_data['TACPLUS']
@@ -1908,6 +1949,7 @@ class HostConfigDaemon:
         ntp_keys = init_data.get(swsscommon.CFG_NTP_KEY_TABLE_NAME)
         serial_console = init_data.get('SERIAL_CONSOLE', {})
         banner_messages = init_data.get(swsscommon.CFG_BANNER_MESSAGE_TABLE_NAME)
+        logging = init_data.get(swsscommon.CFG_LOGGING_TABLE_NAME, {})
 
         self.aaacfg.load(aaa, tacacs_global, tacacs_server, radius_global, radius_server, ldap_global, ldap_server)
         self.iptables.load(lpbk_table)
@@ -1922,6 +1964,7 @@ class HostConfigDaemon:
         self.ntpcfg.load(ntp_global, ntp_servers, ntp_keys)
         self.serialconscfg.load(serial_console)
         self.bannermsgcfg.load(banner_messages)
+        self.loggingcfg.load(logging)
 
         self.pamLimitsCfg.update_config_file()
 
@@ -2080,6 +2123,10 @@ class HostConfigDaemon:
         syslog.syslog(syslog.LOG_INFO, 'BANNER_MESSAGE table handler...')
         self.bannermsgcfg.banner_message(key, data)
 
+    def logging_handler(self, key, op, data):
+        syslog.syslog(syslog.LOG_INFO, 'LOGGING table handler...')
+        self.loggingcfg.update_logging_cfg(key, data)
+
     def wait_till_system_init_done(self):
         # No need to print the output in the log file so using the "--quiet"
         # flag
@@ -2151,6 +2198,10 @@ class HostConfigDaemon:
         self.config_db.subscribe(swsscommon.CFG_BANNER_MESSAGE_TABLE_NAME,
                                  make_callback(self.banner_handler))
 
+        # Handle LOGGING changes
+        self.config_db.subscribe(swsscommon.CFG_LOGGING_TABLE_NAME,
+                                 make_callback(self.logging_handler))
+
         syslog.syslog(syslog.LOG_INFO,
                       "Waiting for systemctl to finish initialization")
         self.wait_till_system_init_done()
diff --git a/tests/hostcfgd/hostcfgd_logging_test.py b/tests/hostcfgd/hostcfgd_logging_test.py
new file mode 100644
index 00000000..1742246c
--- /dev/null
+++ b/tests/hostcfgd/hostcfgd_logging_test.py
@@ -0,0 +1,91 @@
+import importlib.machinery
+import importlib.util
+import os
+import sys
+
+from copy import copy
+from swsscommon import swsscommon
+from syslog import syslog, LOG_ERR
+from tests.hostcfgd.test_logging_vectors \
+    import HOSTCFGD_TEST_LOGGING_VECTOR as logging_test_data
+from tests.common.mock_configdb import MockConfigDb, MockDBConnector
+from unittest import TestCase, mock
+
+test_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+modules_path = os.path.dirname(test_path)
+scripts_path = os.path.join(modules_path, "scripts")
+src_path = os.path.dirname(modules_path)
+templates_path = os.path.join(src_path, "sonic-host-services-data/templates")
+output_path = os.path.join(test_path, "hostcfgd/output")
+sample_output_path = os.path.join(test_path, "hostcfgd/sample_output")
+sys.path.insert(0, modules_path)
+
+# Load the file under test
+hostcfgd_path = os.path.join(scripts_path, 'hostcfgd')
+loader = importlib.machinery.SourceFileLoader('hostcfgd', hostcfgd_path)
+spec = importlib.util.spec_from_loader(loader.name, loader)
+hostcfgd = importlib.util.module_from_spec(spec)
+loader.exec_module(hostcfgd)
+sys.modules['hostcfgd'] = hostcfgd
+
+# Mock swsscommon classes
+hostcfgd.ConfigDBConnector = MockConfigDb
+hostcfgd.DBConnector = MockDBConnector
+hostcfgd.Table = mock.Mock()
+hostcfgd.run_cmd = mock.Mock()
+
+
+class TestHostcfgLogging(TestCase):
+    """
+        Test hostcfgd daemon - LogRotate
+    """
+
+    def __init__(self, *args, **kwargs):
+        super(TestHostcfgLogging, self).__init__(*args, **kwargs)
+        self.host_config_daemon = None
+
+    def setUp(self):
+        MockConfigDb.set_config_db(logging_test_data['initial'])
+        self.host_config_daemon = hostcfgd.HostConfigDaemon()
+
+        logging_config = self.host_config_daemon.config_db.get_table(
+            swsscommon.CFG_LOGGING_TABLE_NAME)
+
+        assert self.host_config_daemon.loggingcfg.cache == {}
+        self.host_config_daemon.loggingcfg.load(logging_config)
+        assert self.host_config_daemon.loggingcfg.cache != {}
+
+        # Reset run_cmd mock
+        hostcfgd.run_cmd.reset_mock()
+
+    def tearDown(self):
+        self.host_config_daemon = None
+        MockConfigDb.set_config_db({})
+
+    def update_config(self, config_name):
+        MockConfigDb.mod_config_db(logging_test_data[config_name])
+
+        syslog_data = logging_test_data[config_name]['LOGGING']['syslog']
+        debug_data = logging_test_data[config_name]['LOGGING']['debug']
+
+        self.host_config_daemon.logging_handler(key='syslog', op=None,
+                                                data=syslog_data)
+        self.host_config_daemon.logging_handler(key='debug', op=None,
+                                                data=debug_data)
+
+    def assert_applied(self, config_name):
+        """Assert that updated config triggered appropriate services
+
+        Args:
+            config_name: str:   Test vectors config name
+
+        Assert:
+            Assert when config wasn't used
+        """
+        orig_cache = copy(self.host_config_daemon.loggingcfg.cache)
+        self.update_config(config_name)
+        assert self.host_config_daemon.loggingcfg.cache != orig_cache
+        hostcfgd.run_cmd.assert_called()
+
+    def test_rsyslog_handle_modified(self):
+        self.assert_applied('modified')
diff --git a/tests/hostcfgd/test_logging_vectors.py b/tests/hostcfgd/test_logging_vectors.py
new file mode 100644
index 00000000..b882cd7d
--- /dev/null
+++ b/tests/hostcfgd/test_logging_vectors.py
@@ -0,0 +1,48 @@
+'''
+    hostcfgd test logging configuration vector
+'''
+
+HOSTCFGD_TEST_LOGGING_VECTOR = {
+    'initial': {
+        'DEVICE_METADATA': {
+            'localhost': {
+                'hostname': 'logrotate',
+            },
+        },
+        'LOGGING': {
+            'syslog': {
+                'disk_percentage': '',
+                'frequency': 'daily',
+                'max_number': '20',
+                'size': '10.0'
+            },
+            'debug': {
+                'disk_percentage': '',
+                'frequency': 'daily',
+                'max_number': '10',
+                'size': '20.0'
+            }
+        },
+        "SSH_SERVER": {
+            "POLICIES" :{
+                "max_sessions": "100"
+            }
+        }
+    },
+    'modified': {
+        'LOGGING': {
+            'syslog': {
+                'disk_percentage': '',
+                'frequency': 'weekly',
+                'max_number': '100',
+                'size': '20.0'
+            },
+            'debug': {
+                'disk_percentage': '',
+                'frequency': 'weekly',
+                'max_number': '20',
+                'size': '100.0'
+            }
+        }
+    }
+}

From d455924c97a092290c91e7e58b1cf11627e62250 Mon Sep 17 00:00:00 2001
From: Saikrishna Arcot <sarcot@microsoft.com>
Date: Mon, 9 Dec 2024 09:47:13 -0800
Subject: [PATCH 7/9] Update pipeline to Bookworm (#193)

Signed-off-by: Saikrishna Arcot <sarcot@microsoft.com>
---
 azure-pipelines.yml | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index af214110..faa5ca32 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -27,7 +27,7 @@ stages:
       vmImage: ubuntu-20.04
 
     container:
-      image: sonicdev-microsoft.azurecr.io:443/sonic-slave-bullseye:$(BUILD_BRANCH)
+      image: sonicdev-microsoft.azurecr.io:443/sonic-slave-bookworm:$(BUILD_BRANCH)
 
     steps:
     - checkout: self
@@ -58,7 +58,7 @@ stages:
         sudo dpkg -i libyang_1.0.73_*.deb
         sudo dpkg -i libswsscommon_1.0.0_amd64.deb
         sudo dpkg -i python3-swsscommon_1.0.0_amd64.deb
-      workingDirectory: $(Pipeline.Workspace)/target/debs/bullseye/
+      workingDirectory: $(Pipeline.Workspace)/target/debs/bookworm/
       displayName: 'Install Debian dependencies'
 
     - script: |
@@ -71,20 +71,22 @@ stages:
         sudo pip3 install sonic_config_engine-1.0-py3-none-any.whl
         sudo pip3 install sonic_platform_common-1.0-py3-none-any.whl
         sudo pip3 install sonic_utilities-1.2-py3-none-any.whl
-      workingDirectory: $(Pipeline.Workspace)/target/python-wheels/bullseye/
+      workingDirectory: $(Pipeline.Workspace)/target/python-wheels/bookworm/
       displayName: 'Install Python dependencies'
 
     - script: |
         set -ex
         # Install .NET CORE
         curl -sSL https://packages.microsoft.com/keys/microsoft.asc | sudo apt-key add -
-        sudo apt-add-repository https://packages.microsoft.com/debian/11/prod
+        sudo apt-add-repository https://packages.microsoft.com/debian/12/prod
         sudo apt-get update
-        sudo apt-get install -y dotnet-sdk-5.0
+        sudo apt-get install -y dotnet-sdk-8.0
       displayName: "Install .NET CORE"
 
     - script: |
-        python3 setup.py test
+        pip3 install ".[testing]"
+        pip3 uninstall --yes sonic-host-services
+        pytest
       displayName: 'Test Python 3'
 
     - task: PublishTestResults@2
@@ -103,8 +105,7 @@ stages:
       displayName: 'Publish Python 3 test coverage'
 
     - script: |
-        set -e
-        python3 setup.py bdist_wheel
+        python3 -m build -n
       displayName: 'Build Python 3 wheel'
 
     - publish: '$(System.DefaultWorkingDirectory)/dist/'

From b0b3ca54614ac6fd350f2609f1ebd74757b72f14 Mon Sep 17 00:00:00 2001
From: kanza-latif <kanzalateef@gmail.com>
Date: Tue, 10 Dec 2024 17:53:11 +0500
Subject: [PATCH 8/9] Support for Memory Statistics Host-Services (#167)

* Added memory statistics host services

* updated the hostcfgd file

* set the indentation and parmater names of schema

Signed-off-by: Arham-Nasir <arqamnasir719@gmail.com>

* added key validation and log error for handling updates

Signed-off-by: Arham-Nasir <arqamnasir719@gmail.com>

* added memory_statistics message function

Signed-off-by: Arham-Nasir <arqamnasir719@gmail.com>

* update Memory_StatisticsCfg class

Signed-off-by: Arham-Nasir <arqamnasir719@gmail.com>

* Modified scripts/hostcfgd

* update the MemoryStatisticsCfg class

Signed-off-by: Arham-Nasir <arqamnasir719@gmail.com>

* update the tests cases

Signed-off-by: Arham-Nasir <arqamnasir719@gmail.com>

* updated the hostcfgd and hostcfgd_test files

Signed-off-by: Arham-Nasir <arqamnasir719@gmail.com>

* Add comprehensive test cases for MemoryStatisticsCfg functionalities

Signed-off-by: Arham-Nasir <arqamnasir719@gmail.com>

* Fix test_get_memory_statistics_pid_exception

Signed-off-by: Arham-Nasir <arqamnasir719@gmail.com>

* Improve test coverage for daemon management and error handling

Signed-off-by: Arham-Nasir <arqamnasir719@gmail.com>

* Update test file

Signed-off-by: Arham-Nasir <arqamnasir719@gmail.com>

* update test file

Signed-off-by: Arham-Nasir <arqamnasir719@gmail.com>

* update testfile

Signed-off-by: Arham-Nasir <arqamnasir719@gmail.com>

---------

Signed-off-by: Arham-Nasir <arqamnasir719@gmail.com>
Co-authored-by: Arham-Nasir <arqamnasir719@gmail.com>
Co-authored-by: Rida Hanif <rida.hanif@xflowresearch.com>
Co-authored-by: Arham-Nasir <100487254+Arham-Nasir@users.noreply.github.com>
---
 scripts/hostcfgd                | 185 ++++++++++++++-
 tests/hostcfgd/hostcfgd_test.py | 384 +++++++++++++++++++++++++++++++-
 tests/hostcfgd/test_vectors.py  |   8 +
 3 files changed, 569 insertions(+), 8 deletions(-)

diff --git a/scripts/hostcfgd b/scripts/hostcfgd
index 77d75a0d..3c247c22 100644
--- a/scripts/hostcfgd
+++ b/scripts/hostcfgd
@@ -9,6 +9,8 @@ import syslog
 import signal
 import re
 import jinja2
+import psutil
+import time
 import json
 from shutil import copy2
 from datetime import datetime
@@ -1715,7 +1717,171 @@ class FipsCfg(object):
             return
         syslog.syslog(syslog.LOG_INFO, f'FipsCfg: update the FIPS enforce option {self.enforce}.')
         loader.set_fips(image, self.enforce)
+        
+class MemoryStatisticsCfg:
+    """
+    The MemoryStatisticsCfg class manages the configuration updates for the MemoryStatisticsDaemon, a daemon
+    responsible for collecting memory usage statistics. It monitors configuration changes in ConfigDB and, based 
+    on those updates, performs actions such as restarting, shutting down, or reloading the daemon.
+    Attributes:
+        VALID_KEYS (list): List of valid configuration keys ("enabled", "sampling_interval", "retention_period").
+        PID_FILE_PATH (str): Path where the daemon’s process ID (PID) is stored.
+        DAEMON_EXEC_PATH (str): Path to the executable file of the memory statistics daemon.
+        DAEMON_PROCESS_NAME (str): Name of the daemon process used for validation.
+    """
+    VALID_KEYS = ["enabled", "sampling_interval", "retention_period"]
+    PID_FILE_PATH = '/var/run/memory_statistics_daemon.pid'
+    DAEMON_EXEC_PATH = '/usr/bin/memory_statistics_service.py'
+    DAEMON_PROCESS_NAME = 'memory_statistics_service.py'
+
+    def __init__(self, config_db):
+        """
+        Initialize MemoryStatisticsCfg with a configuration database.
+        Parameters:
+            config_db (object): Instance of the configuration database (ConfigDB) used to retrieve and
+                                apply configuration changes.
+        """
+        self.cache = {
+            "enabled": "false",
+            "sampling_interval": "5",
+            "retention_period": "15"
+        }
+        self.config_db = config_db
+
+    def load(self, memory_statistics_config: dict):
+        """
+        Load the initial memory statistics configuration from a provided dictionary.
+        Parameters:
+            memory_statistics_config (dict): Dictionary containing the initial configuration values.
+        """
+        syslog.syslog(syslog.LOG_INFO, 'MemoryStatisticsCfg: Loading initial configuration')
+
+        if not memory_statistics_config:
+            memory_statistics_config = {}
+
+        for key, value in memory_statistics_config.items():
+            if key not in self.VALID_KEYS:
+                syslog.syslog(syslog.LOG_ERR, f"MemoryStatisticsCfg: Invalid key '{key}' in initial configuration.")
+                continue
+            self.memory_statistics_update(key, value)
+
+    def memory_statistics_update(self, key, data):
+        """
+        Handles updates for each configuration setting, validates the data, and updates the cache if the value changes.
+        Parameters:
+            key (str): Configuration key, e.g., "enabled", "sampling_interval", or "retention_period".
+            data (str): The new value for the configuration key.
+        """
+        if key not in self.VALID_KEYS:
+            syslog.syslog(syslog.LOG_ERR, f"MemoryStatisticsCfg: Invalid key '{key}' received.")
+            return
+
+        data = str(data)  
 
+        if key in ["retention_period", "sampling_interval"] and (not data.isdigit() or int(data) <= 0):
+            syslog.syslog(syslog.LOG_ERR, f"MemoryStatisticsCfg: Invalid value '{data}' for key '{key}'. Must be a positive integer.")
+            return
+
+        if data != self.cache.get(key):
+            syslog.syslog(syslog.LOG_INFO, f"MemoryStatisticsCfg: Detected change in '{key}' to '{data}'")
+            try:
+                self.apply_setting(key, data)
+                self.cache[key] = data  
+            except Exception as e:
+                syslog.syslog(syslog.LOG_ERR, f'MemoryStatisticsCfg: Failed to manage MemoryStatisticsDaemon: {e}')
+
+    def apply_setting(self, key, data):
+        """
+        Apply the setting based on the key. If "enabled" is set to true or false, start or stop the daemon.
+        For other keys, reload the daemon configuration.
+        Parameters:
+            key (str): The specific configuration setting being updated.
+            data (str): The value for the setting.
+        """
+        try:
+            if key == "enabled":
+                if data.lower() == "true":
+                    self.restart_memory_statistics()
+                else:
+                    self.shutdown_memory_statistics()
+            else:
+                self.reload_memory_statistics()
+        except Exception as e:
+            syslog.syslog(syslog.LOG_ERR, f"MemoryStatisticsCfg: {type(e).__name__} in apply_setting() for key '{key}': {e}")
+
+    def restart_memory_statistics(self):
+        """Restarts the memory statistics daemon by first shutting it down (if running) and then starting it again."""
+        try:
+            self.shutdown_memory_statistics()
+            time.sleep(1)  
+            syslog.syslog(syslog.LOG_INFO, "MemoryStatisticsCfg: Starting MemoryStatisticsDaemon")
+            subprocess.Popen([self.DAEMON_EXEC_PATH])
+        except Exception as e:
+            syslog.syslog(syslog.LOG_ERR, f"MemoryStatisticsCfg: Failed to start MemoryStatisticsDaemon: {e}")
+
+    def reload_memory_statistics(self):
+        """Sends a SIGHUP signal to the daemon to reload its configuration without restarting."""
+        pid = self.get_memory_statistics_pid()
+        if pid:
+            try:
+                os.kill(pid, signal.SIGHUP)
+                syslog.syslog(syslog.LOG_INFO, "MemoryStatisticsCfg: Sent SIGHUP to reload daemon configuration")
+            except Exception as e:
+                syslog.syslog(syslog.LOG_ERR, f"MemoryStatisticsCfg: Failed to reload MemoryStatisticsDaemon: {e}")
+
+    def shutdown_memory_statistics(self):
+        """Sends a SIGTERM signal to gracefully shut down the daemon."""
+        pid = self.get_memory_statistics_pid()
+        if pid:
+            try:
+                os.kill(pid, signal.SIGTERM)
+                syslog.syslog(syslog.LOG_INFO, "MemoryStatisticsCfg: Sent SIGTERM to stop MemoryStatisticsDaemon")
+                self.wait_for_shutdown(pid)
+            except Exception as e:
+                syslog.syslog(syslog.LOG_ERR, f"MemoryStatisticsCfg: Failed to shutdown MemoryStatisticsDaemon: {e}")
+
+    def wait_for_shutdown(self, pid, timeout=10):
+        """
+        Waits for the daemon process to terminate gracefully within a given timeout.
+        Parameters:
+            pid (int): Process ID of the daemon to shut down.
+            timeout (int): Maximum wait time in seconds for the process to terminate (default is 10 seconds).
+        """
+        try:
+            process = psutil.Process(pid)
+            process.wait(timeout=timeout)
+            syslog.syslog(syslog.LOG_INFO, "MemoryStatisticsCfg: MemoryStatisticsDaemon stopped gracefully")
+        except psutil.TimeoutExpired:
+            syslog.syslog(syslog.LOG_WARNING, f"MemoryStatisticsCfg: Timed out while waiting for daemon (PID {pid}) to shut down.")
+        except psutil.NoSuchProcess:
+            syslog.syslog(syslog.LOG_WARNING, "MemoryStatisticsCfg: MemoryStatisticsDaemon process not found.")
+        except Exception as e:
+            syslog.syslog(syslog.LOG_ERR, f"MemoryStatisticsCfg: Exception in wait_for_shutdown(): {e}")
+
+    def get_memory_statistics_pid(self):
+        """
+        Retrieves the PID of the currently running daemon from the PID file, verifying it matches the expected daemon.
+        Returns:
+            int or None: Returns the PID if the process is running and matches the expected daemon; otherwise, returns None.
+        """
+        try:
+            with open(self.PID_FILE_PATH, 'r') as pid_file:
+                pid = int(pid_file.read().strip())
+            if psutil.pid_exists(pid):
+                process = psutil.Process(pid)
+                if process.name() == self.DAEMON_PROCESS_NAME:
+                    return pid
+                else:
+                    syslog.syslog(syslog.LOG_WARNING, f"MemoryStatisticsCfg: PID {pid} does not correspond to {self.DAEMON_PROCESS_NAME}.")
+            else:
+                syslog.syslog(syslog.LOG_WARNING, "MemoryStatisticsCfg: PID does not exist.")
+        except FileNotFoundError:
+            syslog.syslog(syslog.LOG_WARNING, "MemoryStatisticsCfg: PID file not found. Daemon might not be running.")
+        except ValueError:
+            syslog.syslog(syslog.LOG_ERR, "MemoryStatisticsCfg: PID file contents invalid.")
+        except Exception as e:
+            syslog.syslog(syslog.LOG_ERR, f"MemoryStatisticsCfg: {type(e).__name__} failed to retrieve MemoryStatisticsDaemon PID: {e}")
+        return None
 
 class SerialConsoleCfg:
 
@@ -1748,7 +1914,6 @@ class SerialConsoleCfg:
 
         return
 
-
 class BannerCfg(object):
     """
     Banner Config Daemon
@@ -1826,7 +1991,6 @@ class BannerCfg(object):
         for k,v in data.items():
             self.cache[k] = v
 
-
 class LoggingCfg(object):
     """Logging Config Daemon
 
@@ -1864,7 +2028,6 @@ class LoggingCfg(object):
         # Update cache
         self.cache[key] = data
 
-
 class HostConfigDaemon:
     def __init__(self):
         self.state_db_conn = DBConnector(STATE_DB, 0)
@@ -1880,6 +2043,9 @@ class HostConfigDaemon:
         # Initialize KDump Config and set the config to default if nothing is provided
         self.kdumpCfg = KdumpCfg(self.config_db)
 
+        # Initialize MemoryStatisticsCfg
+        self.memorystatisticscfg = MemoryStatisticsCfg(self.config_db)
+
         # Initialize IpTables
         self.iptables = Iptables()
 
@@ -1937,6 +2103,7 @@ class HostConfigDaemon:
         kdump = init_data['KDUMP']
         passwh = init_data['PASSW_HARDENING']
         ssh_server = init_data['SSH_SERVER']
+        memory_statistics = init_data["MEMORY_STATISTICS"]
         dev_meta = init_data.get(swsscommon.CFG_DEVICE_METADATA_TABLE_NAME, {})
         mgmt_ifc = init_data.get(swsscommon.CFG_MGMT_INTERFACE_TABLE_NAME, {})
         mgmt_vrf = init_data.get(swsscommon.CFG_MGMT_VRF_CONFIG_TABLE_NAME, {})
@@ -1956,6 +2123,7 @@ class HostConfigDaemon:
         self.kdumpCfg.load(kdump)
         self.passwcfg.load(passwh)
         self.sshscfg.load(ssh_server)
+        self.memorystatisticscfg.load(memory_statistics)
         self.devmetacfg.load(dev_meta)
         self.mgmtifacecfg.load(mgmt_ifc, mgmt_vrf)
         self.rsyslogcfg.load(syslog_cfg, syslog_srv)
@@ -2086,6 +2254,13 @@ class HostConfigDaemon:
         syslog.syslog(syslog.LOG_INFO, 'Kdump handler...')
         self.kdumpCfg.kdump_update(key, data)
 
+    def memory_statistics_handler(self, key, op, data):
+        syslog.syslog(syslog.LOG_INFO, 'Memory_Statistics handler...')
+        try:
+            self.memorystatisticscfg.memory_statistics_update(key, data)
+        except Exception as e:
+            syslog.syslog(syslog.LOG_ERR, f"MemoryStatisticsCfg: Error while handling memory statistics update: {e}")
+
     def device_metadata_handler(self, key, op, data):
         syslog.syslog(syslog.LOG_INFO, 'DeviceMeta handler...')
         self.devmetacfg.hostname_update(data)
@@ -2156,6 +2331,7 @@ class HostConfigDaemon:
         self.config_db.subscribe('LDAP_SERVER', make_callback(self.ldap_server_handler))
         self.config_db.subscribe('PASSW_HARDENING', make_callback(self.passwh_handler))
         self.config_db.subscribe('SSH_SERVER', make_callback(self.ssh_handler))
+        self.config_db.subscribe('MEMORY_STATISTICS',make_callback(self.memory_statistics_handler))
         # Handle SERIAL_CONSOLE
         self.config_db.subscribe('SERIAL_CONSOLE', make_callback(self.serial_console_config_handler))
         # Handle IPTables configuration
@@ -2170,7 +2346,7 @@ class HostConfigDaemon:
         # Handle DEVICE_MEATADATA changes
         self.config_db.subscribe(swsscommon.CFG_DEVICE_METADATA_TABLE_NAME,
                                  make_callback(self.device_metadata_handler))
-
+        
         # Handle MGMT_VRF_CONFIG changes
         self.config_db.subscribe(swsscommon.CFG_MGMT_VRF_CONFIG_TABLE_NAME,
                                  make_callback(self.mgmt_vrf_handler))
@@ -2221,4 +2397,3 @@ def main():
 
 if __name__ == "__main__":
     main()
-
diff --git a/tests/hostcfgd/hostcfgd_test.py b/tests/hostcfgd/hostcfgd_test.py
index 9ec3f658..fbb432f7 100644
--- a/tests/hostcfgd/hostcfgd_test.py
+++ b/tests/hostcfgd/hostcfgd_test.py
@@ -1,17 +1,17 @@
 import os
 import sys
 import time
+import signal
+import psutil
 import swsscommon as swsscommon_package
 from sonic_py_common import device_info
 from swsscommon import swsscommon
-
 from parameterized import parameterized
 from sonic_py_common.general import load_module_from_source
 from unittest import TestCase, mock
 
 from .test_vectors import HOSTCFG_DAEMON_INIT_CFG_DB, HOSTCFG_DAEMON_CFG_DB
 from tests.common.mock_configdb import MockConfigDb, MockDBConnector
-
 from pyfakefs.fake_filesystem_unittest import patchfs
 from deepdiff import DeepDiff
 from unittest.mock import call
@@ -271,7 +271,7 @@ def test_devicemeta_event(self):
             with mock.patch('hostcfgd.subprocess') as mocked_subprocess:
                 mocked_syslog.LOG_INFO = original_syslog.LOG_INFO
                 try:
-                    daemon.start()
+                    daemon.start() 
                 except TimeoutError:
                     pass
 
@@ -371,3 +371,381 @@ def test_banner_message(self, mock_run_cmd):
         banner_cfg.banner_message(None, {'test': 'test'})
 
         mock_run_cmd.assert_has_calls([call(['systemctl', 'restart', 'banner-config'], True, True)])
+
+
+class TestMemoryStatisticsCfgd(TestCase):
+    """Test suite for MemoryStatisticsCfg class which handles memory statistics configuration and daemon management."""
+
+    def setUp(self):
+        """Set up test environment before each test case."""
+        MockConfigDb.CONFIG_DB['MEMORY_STATISTICS'] = {
+            'memory_statistics': {
+                'enabled': 'false',
+                'sampling_interval': '5',
+                'retention_period': '15'
+            }
+        }
+        self.mem_stat_cfg = hostcfgd.MemoryStatisticsCfg(MockConfigDb.CONFIG_DB)
+
+    def tearDown(self):
+        """Clean up after each test case."""
+        MockConfigDb.CONFIG_DB = {}
+
+    # Group 1: Configuration Loading Tests
+    def test_load_with_invalid_key(self):
+        """
+        Test loading configuration with an invalid key.
+        Ensures the system properly logs when encountering unknown configuration parameters.
+        """
+        config = {'invalid_key': 'value', 'enabled': 'true'}
+        with mock.patch('hostcfgd.syslog.syslog') as mock_syslog:
+            self.mem_stat_cfg.load(config)
+            mock_syslog.assert_any_call(mock.ANY, "MemoryStatisticsCfg: Invalid key 'invalid_key' in initial configuration.")
+
+    def test_load_with_empty_config(self):
+        """
+        Test loading an empty configuration.
+        Verifies system behavior when no configuration is provided.
+        """
+        with mock.patch('hostcfgd.syslog.syslog') as mock_syslog:
+            self.mem_stat_cfg.load(None)
+            mock_syslog.assert_any_call(mock.ANY, "MemoryStatisticsCfg: Loading initial configuration")
+
+    # Group 2: Configuration Update Tests
+    def test_memory_statistics_update_invalid_key(self):
+        """
+        Test updating configuration with an invalid key.
+        Ensures system properly handles and logs attempts to update non-existent configuration parameters.
+        """
+        with mock.patch('hostcfgd.syslog.syslog') as mock_syslog:
+            self.mem_stat_cfg.memory_statistics_update('invalid_key', 'value')
+            mock_syslog.assert_any_call(mock.ANY, "MemoryStatisticsCfg: Invalid key 'invalid_key' received.")
+
+    def test_memory_statistics_update_invalid_numeric_value(self):
+        """
+        Test updating numeric configuration with invalid value.
+        Verifies system properly validates numeric input parameters.
+        """
+        with mock.patch('hostcfgd.syslog.syslog') as mock_syslog:
+            self.mem_stat_cfg.memory_statistics_update('sampling_interval', '-1')
+            mock_syslog.assert_any_call(mock.ANY, "MemoryStatisticsCfg: Invalid value '-1' for key 'sampling_interval'. Must be a positive integer.")
+
+    def test_memory_statistics_update_same_value(self):
+        """
+        Test updating configuration with the same value.
+        Ensures system doesn't perform unnecessary updates when value hasn't changed.
+        """
+        with mock.patch.object(self.mem_stat_cfg, 'apply_setting') as mock_apply:
+            self.mem_stat_cfg.memory_statistics_update('sampling_interval', '5')
+            mock_apply.assert_not_called()
+
+    # Group 3: Daemon Management Tests
+    @mock.patch('hostcfgd.subprocess.Popen')
+    @mock.patch('hostcfgd.os.kill')
+    def test_restart_memory_statistics_success(self, mock_kill, mock_popen):
+        """
+        Test successful restart of the memory statistics daemon.
+        Verifies proper shutdown of existing process and startup of new process.
+        """
+        with mock.patch('hostcfgd.syslog.syslog'):
+            with mock.patch.object(self.mem_stat_cfg, 'get_memory_statistics_pid', return_value=123):
+                self.mem_stat_cfg.restart_memory_statistics()
+                mock_kill.assert_called_with(123, signal.SIGTERM)
+                mock_popen.assert_called_once()
+
+    @mock.patch('hostcfgd.subprocess.Popen')
+    def test_restart_memory_statistics_failure(self, mock_popen):
+        """
+        Test failed restart of memory statistics daemon.
+        Ensures proper error handling when daemon fails to start.
+        """
+        mock_popen.side_effect = Exception("Failed to start")
+        with mock.patch('hostcfgd.syslog.syslog') as mock_syslog:
+            self.mem_stat_cfg.restart_memory_statistics()
+            mock_syslog.assert_any_call(mock.ANY, "MemoryStatisticsCfg: Failed to start MemoryStatisticsDaemon: Failed to start")
+
+    # Group 4: PID Management Tests
+    def test_get_memory_statistics_pid_success(self):
+        """
+        Test successful retrieval of daemon PID.
+        Verifies proper PID retrieval when daemon is running correctly.
+        """
+        mock_process = mock.Mock()
+        mock_process.name.return_value = "memory_statistics_service.py"
+        
+        with mock.patch('builtins.open', mock.mock_open(read_data="123")), \
+             mock.patch('hostcfgd.psutil.pid_exists', return_value=True), \
+             mock.patch('hostcfgd.psutil.Process', return_value=mock_process):
+            pid = self.mem_stat_cfg.get_memory_statistics_pid()
+            self.assertEqual(pid, 123)
+
+    def test_get_memory_statistics_pid_file_not_found(self):
+        """
+        Test PID retrieval when PID file doesn't exist.
+        Ensures proper handling of missing PID file.
+        """
+        with mock.patch('builtins.open', side_effect=FileNotFoundError):
+            with mock.patch('hostcfgd.syslog.syslog') as mock_syslog:
+                pid = self.mem_stat_cfg.get_memory_statistics_pid()
+                self.assertIsNone(pid)
+                mock_syslog.assert_any_call(mock.ANY, "MemoryStatisticsCfg: PID file not found. Daemon might not be running.")
+
+    def test_get_memory_statistics_pid_invalid_content(self):
+        """
+        Test PID retrieval when PID file contains invalid content.
+        Ensures proper handling and error logging when PID file is corrupted or contains non-numeric data.
+        """
+        mock_open = mock.mock_open(read_data="invalid")
+        with mock.patch('builtins.open', mock_open):
+            with mock.patch('hostcfgd.syslog.syslog') as mock_syslog:
+                pid = self.mem_stat_cfg.get_memory_statistics_pid()
+                self.assertIsNone(pid)
+                mock_syslog.assert_any_call(mock.ANY, "MemoryStatisticsCfg: PID file contents invalid.")
+
+    @mock.patch('hostcfgd.psutil.pid_exists', return_value=True)
+    @mock.patch('hostcfgd.psutil.Process')
+    def test_get_memory_statistics_pid_wrong_process(self, mock_process, mock_pid_exists):
+        """
+        Test PID retrieval when process exists but name doesn't match expected daemon name.
+        Verifies proper handling when PID belongs to a different process than the memory statistics daemon.
+        """
+        mock_process_instance = mock.Mock()
+        mock_process_instance.name.return_value = "wrong_process"
+        mock_process.return_value = mock_process_instance
+        
+        mock_open = mock.mock_open(read_data="123")
+        with mock.patch('builtins.open', mock_open):
+            with mock.patch('hostcfgd.syslog.syslog') as mock_syslog:
+                pid = self.mem_stat_cfg.get_memory_statistics_pid()
+                self.assertIsNone(pid)
+                mock_syslog.assert_any_call(mock.ANY, "MemoryStatisticsCfg: PID 123 does not correspond to memory_statistics_service.py.")
+
+    @mock.patch('hostcfgd.psutil.pid_exists', return_value=False)
+    def test_get_memory_statistics_pid_nonexistent(self, mock_pid_exists):
+        """Test get_memory_statistics_pid when PID doesn't exist"""
+        mock_open = mock.mock_open(read_data="123")
+        with mock.patch('builtins.open', mock_open):
+            with mock.patch('hostcfgd.syslog.syslog') as mock_syslog:
+                pid = self.mem_stat_cfg.get_memory_statistics_pid()
+                self.assertIsNone(pid)
+                mock_syslog.assert_any_call(mock.ANY, "MemoryStatisticsCfg: PID does not exist.")
+
+    # Group 5: Enable/Disable Tests
+    def test_memory_statistics_enable(self):
+        """
+        Test enabling memory statistics functionality.
+        Verifies proper activation of memory statistics monitoring.
+        """
+        with mock.patch.object(self.mem_stat_cfg, 'restart_memory_statistics') as mock_restart:
+            self.mem_stat_cfg.memory_statistics_update('enabled', 'true')
+            mock_restart.assert_called_once()
+            self.assertEqual(self.mem_stat_cfg.cache['enabled'], 'true')
+
+    def test_apply_setting_with_non_enabled_key(self):
+        """Test apply_setting with sampling_interval or retention_period"""
+        with mock.patch.object(self.mem_stat_cfg, 'reload_memory_statistics') as mock_reload:
+            self.mem_stat_cfg.apply_setting('sampling_interval', '10')
+            mock_reload.assert_called_once()
+
+    def test_apply_setting_with_enabled_false(self):
+        """Test apply_setting with enabled=false"""
+        with mock.patch.object(self.mem_stat_cfg, 'shutdown_memory_statistics') as mock_shutdown:
+            self.mem_stat_cfg.apply_setting('enabled', 'false')
+            mock_shutdown.assert_called_once()
+
+    def test_memory_statistics_disable(self):
+        """
+        Test disabling memory statistics functionality.
+        Ensures proper deactivation of memory statistics monitoring.
+        """
+        self.mem_stat_cfg.cache['enabled'] = 'true'
+        with mock.patch.object(self.mem_stat_cfg, 'apply_setting') as mock_apply:
+            self.mem_stat_cfg.memory_statistics_update('enabled', 'false')
+            mock_apply.assert_called_once_with('enabled', 'false')
+            self.assertEqual(self.mem_stat_cfg.cache['enabled'], 'false')
+
+    def test_memory_statistics_disable_with_shutdown(self):
+        """Test disabling memory statistics with full shutdown chain"""
+        self.mem_stat_cfg.cache['enabled'] = 'true'
+        
+        with mock.patch.object(self.mem_stat_cfg, 'get_memory_statistics_pid', return_value=123) as mock_get_pid, \
+             mock.patch('hostcfgd.os.kill') as mock_kill, \
+             mock.patch.object(self.mem_stat_cfg, 'wait_for_shutdown') as mock_wait:
+            
+            self.mem_stat_cfg.memory_statistics_update('enabled', 'false')
+            
+            mock_get_pid.assert_called_once()
+            mock_kill.assert_called_once_with(123, signal.SIGTERM)
+            mock_wait.assert_called_once_with(123)
+            
+            self.assertEqual(self.mem_stat_cfg.cache['enabled'], 'false')
+
+    def test_memory_statistics_disable_no_running_daemon(self):
+        """Test disabling memory statistics when daemon is not running"""
+        self.mem_stat_cfg.cache['enabled'] = 'true'
+        
+        with mock.patch.object(self.mem_stat_cfg, 'get_memory_statistics_pid', return_value=None) as mock_get_pid:
+            self.mem_stat_cfg.memory_statistics_update('enabled', 'false')
+            
+            mock_get_pid.assert_called_once()
+            
+            self.assertEqual(self.mem_stat_cfg.cache['enabled'], 'false')
+
+    # Group 6: Reload Tests
+    def test_reload_memory_statistics_success(self):
+        """
+        Test successful reload of memory statistics configuration.
+        Verifies proper handling of configuration updates without restart.
+        """
+        with mock.patch.object(self.mem_stat_cfg, 'get_memory_statistics_pid', return_value=123), \
+             mock.patch('hostcfgd.os.kill') as mock_kill, \
+             mock.patch('hostcfgd.syslog.syslog'):
+            self.mem_stat_cfg.reload_memory_statistics()
+            mock_kill.assert_called_once_with(123, signal.SIGHUP)
+
+    def test_reload_memory_statistics_no_pid(self):
+        """
+        Test reload when daemon is not running.
+        Ensures proper handling of reload request when daemon is inactive.
+        """
+        with mock.patch.object(self.mem_stat_cfg, 'get_memory_statistics_pid', return_value=None), \
+             mock.patch('hostcfgd.os.kill') as mock_kill:
+            self.mem_stat_cfg.reload_memory_statistics()
+            mock_kill.assert_not_called()
+
+    def test_reload_memory_statistics_failure(self):
+        """Test reload failure with exception"""
+        with mock.patch.object(self.mem_stat_cfg, 'get_memory_statistics_pid', return_value=123) as mock_get_pid, \
+            mock.patch('hostcfgd.os.kill', side_effect=Exception("Test error")), \
+            mock.patch('hostcfgd.syslog.syslog') as mock_syslog:
+            
+            self.mem_stat_cfg.reload_memory_statistics()
+            
+            mock_get_pid.assert_called_once()
+            mock_syslog.assert_any_call(mock.ANY, "MemoryStatisticsCfg: Failed to reload MemoryStatisticsDaemon: Test error")
+
+    # Group 7: Shutdown Tests
+    def test_shutdown_memory_statistics_success(self):
+        """
+        Test successful shutdown of memory statistics daemon.
+        Verifies proper termination of the daemon process.
+        """
+        with mock.patch.object(self.mem_stat_cfg, 'get_memory_statistics_pid', return_value=123), \
+             mock.patch('hostcfgd.os.kill') as mock_kill, \
+             mock.patch.object(self.mem_stat_cfg, 'wait_for_shutdown'), \
+             mock.patch('hostcfgd.syslog.syslog'):
+            self.mem_stat_cfg.shutdown_memory_statistics()
+            mock_kill.assert_called_once_with(123, signal.SIGTERM)
+
+    def test_wait_for_shutdown_timeout(self):
+        """
+        Test shutdown behavior when daemon doesn't respond to termination signal.
+        Ensures proper handling of timeout during shutdown.
+        """
+        mock_process = mock.Mock()
+        mock_process.wait.side_effect = psutil.TimeoutExpired(123, 10)
+        with mock.patch('hostcfgd.psutil.Process', return_value=mock_process), \
+             mock.patch('hostcfgd.syslog.syslog') as mock_syslog:
+            self.mem_stat_cfg.wait_for_shutdown(123)
+            mock_syslog.assert_any_call(mock.ANY, "MemoryStatisticsCfg: Timed out while waiting for daemon (PID 123) to shut down.")
+
+    @mock.patch('hostcfgd.psutil.Process')
+    def test_wait_for_shutdown_no_process(self, mock_process):
+        """Test shutdown waiting when process doesn't exist"""
+        mock_process.side_effect = psutil.NoSuchProcess(123)
+        
+        with mock.patch('hostcfgd.syslog.syslog') as mock_syslog:
+            self.mem_stat_cfg.wait_for_shutdown(123)
+            mock_syslog.assert_any_call(mock.ANY, "MemoryStatisticsCfg: MemoryStatisticsDaemon process not found.")
+
+    def test_shutdown_memory_statistics_failure(self):
+        """Test shutdown failure with exception"""
+        with mock.patch.object(self.mem_stat_cfg, 'get_memory_statistics_pid', return_value=123) as mock_get_pid, \
+            mock.patch('hostcfgd.os.kill', side_effect=Exception("Test error")), \
+            mock.patch('hostcfgd.syslog.syslog') as mock_syslog:
+            
+            self.mem_stat_cfg.shutdown_memory_statistics()
+            
+            mock_get_pid.assert_called_once()
+            mock_syslog.assert_any_call(mock.ANY, "MemoryStatisticsCfg: Failed to shutdown MemoryStatisticsDaemon: Test error")
+
+    def test_wait_for_shutdown_success(self):
+        """Test successful wait for shutdown"""
+        mock_process = mock.Mock()
+        with mock.patch('hostcfgd.psutil.Process', return_value=mock_process) as mock_process_class, \
+            mock.patch('hostcfgd.syslog.syslog') as mock_syslog:
+            
+            self.mem_stat_cfg.wait_for_shutdown(123)
+            
+            mock_process_class.assert_called_once_with(123)
+            mock_process.wait.assert_called_once_with(timeout=10)
+            mock_syslog.assert_any_call(mock.ANY, "MemoryStatisticsCfg: MemoryStatisticsDaemon stopped gracefully")
+
+    # Group 8: Error Handling Tests
+    def test_memory_statistics_update_exception_handling(self):
+        """
+        Test exception handling during configuration updates.
+        Verifies proper error handling and logging of exceptions.
+        """
+        with mock.patch.object(self.mem_stat_cfg, 'apply_setting', side_effect=Exception("Test error")), \
+             mock.patch('hostcfgd.syslog.syslog') as mock_syslog:
+            self.mem_stat_cfg.memory_statistics_update('enabled', 'true')
+            mock_syslog.assert_any_call(mock.ANY, "MemoryStatisticsCfg: Failed to manage MemoryStatisticsDaemon: Test error")
+
+    def test_apply_setting_exception(self):
+        """Test exception handling in apply_setting"""
+        with mock.patch.object(self.mem_stat_cfg, 'restart_memory_statistics', 
+                             side_effect=Exception("Test error")):
+            with mock.patch('hostcfgd.syslog.syslog') as mock_syslog:
+                self.mem_stat_cfg.apply_setting('enabled', 'true')
+                mock_syslog.assert_any_call(mock.ANY, 
+                    "MemoryStatisticsCfg: Exception in apply_setting() for key 'enabled': Test error")
+
+    @mock.patch('hostcfgd.psutil.Process')
+    def test_get_memory_statistics_pid_exception(self, mock_process):
+        """Test general exception handling in get_memory_statistics_pid"""
+        mock_process.side_effect = Exception("Unexpected error")
+        mock_open = mock.mock_open(read_data="123")
+        
+        with mock.patch('hostcfgd.psutil.pid_exists', return_value=True):
+            with mock.patch('builtins.open', mock_open):
+                with mock.patch('hostcfgd.syslog.syslog') as mock_syslog:
+                    pid = self.mem_stat_cfg.get_memory_statistics_pid()
+                    self.assertIsNone(pid)
+                    mock_syslog.assert_any_call(mock.ANY, 
+                        "MemoryStatisticsCfg: Exception failed to retrieve MemoryStatisticsDaemon PID: Unexpected error")
+                    
+    def test_memory_statistics_handler_exception(self):
+        """Test exception handling in memory_statistics_handler"""
+        daemon = hostcfgd.HostConfigDaemon()
+        with mock.patch.object(daemon.memorystatisticscfg, 'memory_statistics_update', 
+                             side_effect=Exception("Handler error")):
+            with mock.patch('hostcfgd.syslog.syslog') as mock_syslog:
+                daemon.memory_statistics_handler('enabled', None, 'true')
+                mock_syslog.assert_any_call(mock.ANY, 
+                    "MemoryStatisticsCfg: Error while handling memory statistics update: Handler error")
+
+    @mock.patch('hostcfgd.psutil.Process')
+    def test_wait_for_shutdown_general_exception(self, mock_process):
+        """Test general exception handling in wait_for_shutdown"""
+        mock_process.side_effect = Exception("Unexpected shutdown error")
+        with mock.patch('hostcfgd.syslog.syslog') as mock_syslog:
+            self.mem_stat_cfg.wait_for_shutdown(123)
+            mock_syslog.assert_any_call(mock.ANY, 
+                "MemoryStatisticsCfg: Exception in wait_for_shutdown(): Unexpected shutdown error")
+
+    def test_process_name_mismatch(self):
+        """
+        Test handling of process name mismatches.
+        Ensures proper validation of daemon process identity.
+        """
+        mock_process = mock.Mock()
+        mock_process.name.return_value = "wrong_process_name"
+        
+        with mock.patch('builtins.open', mock.mock_open(read_data="123")), \
+             mock.patch('hostcfgd.psutil.pid_exists', return_value=True), \
+             mock.patch('hostcfgd.psutil.Process', return_value=mock_process), \
+             mock.patch('hostcfgd.syslog.syslog') as mock_syslog:
+            pid = self.mem_stat_cfg.get_memory_statistics_pid()
+            self.assertIsNone(pid)
+            mock_syslog.assert_any_call(mock.ANY, "MemoryStatisticsCfg: PID 123 does not correspond to memory_statistics_service.py.")
\ No newline at end of file
diff --git a/tests/hostcfgd/test_vectors.py b/tests/hostcfgd/test_vectors.py
index afa50564..44217477 100644
--- a/tests/hostcfgd/test_vectors.py
+++ b/tests/hostcfgd/test_vectors.py
@@ -15,6 +15,7 @@
     "PASSW_HARDENING": {},
     "SSH_SERVER": {},
     "KDUMP": {},
+    "MEMORY_STATISTICS": {},
     "NTP": {},
     "NTP_SERVER": {},
     "LOOPBACK_INTERFACE": {},
@@ -79,6 +80,13 @@
             "timezone": "Europe/Kyiv"
         }
     },
+    "MEMORY_STATISTICS": {
+        "memory_statistics": {
+            "enabled": "false",
+            "sampling_interval": "5",
+            "retention_period": "15"  
+        }
+    },
     "MGMT_INTERFACE": {
         "eth0|1.2.3.4/24": {}
     },

From 25bd8ff548d9550e7bed877cc0628ab2e2681d30 Mon Sep 17 00:00:00 2001
From: Dawei Huang <daweihuang@microsoft.com>
Date: Mon, 16 Dec 2024 11:20:50 +0800
Subject: [PATCH 9/9] Implement run function for docker_services

* DBUS Services required for GNOI Containerz StartContainer.

* Add a placeholder function that prevent arbitrary request to run any container.

* Update to only allow running known images.

* Rewrite image validation so it get recognized by semgrep.

* Rewrite command validation so it get recognize by semgrep.

* maybe semgrep will recognize inline function

* semgrep only allow hardcoded image name. We need to bypass it.

* add documentation.

* documentation need to be before nosemgrep

* Add allowed_image_name and use it to verify the run function.

* address copilot comment.

* address comment and reformat.

* increase test coverage and address comment.

* Seperate allowed images and allowed containers.

* Address comment.

* add bmp container to allowed list.

* add validation to kwargs in run.

* fix test error.

* update allow list
---
 host_modules/docker_service.py            | 240 ++++++++++++++++------
 tests/host_modules/docker_service_test.py |  80 ++++++++
 2 files changed, 255 insertions(+), 65 deletions(-)

diff --git a/host_modules/docker_service.py b/host_modules/docker_service.py
index f1c7fc8c..a88ed368 100644
--- a/host_modules/docker_service.py
+++ b/host_modules/docker_service.py
@@ -4,44 +4,108 @@
 import docker
 import signal
 import errno
+import logging
 
 MOD_NAME = "docker_service"
 
 # The set of allowed containers that can be managed by this service.
-# First element is the image name, second element is the container name.
-ALLOWED_CONTAINERS = [
-    ("docker-syncd-brcm", "syncd"),
-    ("docker-acms", "acms"),
-    ("docker-sonic-gnmi", "gnmi"),
-    ("docker-sonic-telemetry", "telemetry"),
-    ("docker-snmp", "snmp"),
-    ("docker-platform-monitor", "pmon"),
-    ("docker-lldp", "lldp"),
-    ("docker-dhcp-relay", "dhcp_relay"),
-    ("docker-router-advertiser", "radv"),
-    ("docker-teamd", "teamd"),
-    ("docker-fpm-frr", "bgp"),
-    ("docker-orchagent", "swss"),
-    ("docker-sonic-restapi", "restapi"),
-    ("docker-eventd", "eventd"),
-    ("docker-database", "database"),
-]
-
-
-def is_allowed_container(container):
+ALLOWED_CONTAINERS = {
+    "bgp",
+    "bmp",
+    "database",
+    "dhcp_relay",
+    "eventd",
+    "gnmi",
+    "lldp",
+    "pmon",
+    "radv",
+    "restapi",
+    "snmp",
+    "swss",
+    "syncd",
+    "teamd",
+    "telemetry",
+}
+
+# The set of allowed images that can be managed by this service.
+ALLOWED_IMAGES = {
+    "docker-database",
+    "docker-dhcp-relay",
+    "docker-eventd",
+    "docker-fpm-frr",
+    "docker-lldp",
+    "docker-orchagent",
+    "docker-platform-monitor",
+    "docker-router-advertiser",
+    "docker-snmp",
+    "docker-sonic-bmp",
+    "docker-sonic-gnmi",
+    "docker-sonic-restapi",
+    "docker-sonic-telemetry",
+    "docker-syncd-brcm",
+    "docker-syncd-cisco",
+    "docker-teamd",
+}
+
+
+def is_allowed_image(image):
     """
-    Check if the container is allowed to be managed by this service.
+    Check if the image is allowed to be managed by this service.
 
     Args:
-        container (str): The container name.
+        image (str): The image name.
 
     Returns:
-        bool: True if the container is allowed, False otherwise.
+        bool: True if the image is allowed, False otherwise.
     """
-    for _, allowed_container in ALLOWED_CONTAINERS:
-        if container == allowed_container:
-            return True
-    return False
+    image_name = image.split(":")[0]  # Remove tag if present
+    return image_name in ALLOWED_IMAGES
+
+
+def get_sonic_container(container_id):
+    """
+    Get a Sonic docker container by name. If the container is not a Sonic container, raise PermissionError.
+    """
+    client = docker.from_env()
+    if container_id not in ALLOWED_CONTAINERS:
+        raise PermissionError(
+            "Container {} is not allowed to be managed by this service.".format(
+                container_id
+            )
+        )
+    container = client.containers.get(container_id)
+    return container
+
+
+def validate_docker_run_options(kwargs):
+    """
+    Validate the keyword arguments passed to the Docker container run API.
+    """
+    # Validate the keyword arguments here if needed
+    # Disallow priviledge mode for security reasons
+    if kwargs.get("privileged", False):
+        raise ValueError("Privileged mode is not allowed for security reasons.")
+    # Disallow sensitive directories to be mounted.
+    sensitive_dirs = ["/etc", "/var", "/usr"]
+    for bind in kwargs.get("volumes", {}).keys():
+        for sensitive_dir in sensitive_dirs:
+            if bind.startswith(sensitive_dir):
+                raise ValueError(
+                    "Mounting sensitive directories is not allowed for security reasons."
+                )
+    # Disallow running containers as root.
+    if kwargs.get("user", None) == "root":
+        raise ValueError(
+            "Running containers as root is not allowed for security reasons."
+        )
+    # Disallow cap_add for security reasons.
+    if kwargs.get("cap_add", None):
+        raise ValueError(
+            "Adding capabilities to containers is not allowed for security reasons."
+        )
+    # Disallow access to sensitive devices.
+    if kwargs.get("devices", None):
+        raise ValueError("Access to devices is not allowed for security reasons.")
 
 
 class DockerService(host_service.HostModule):
@@ -52,92 +116,138 @@ class DockerService(host_service.HostModule):
     @host_service.method(
         host_service.bus_name(MOD_NAME), in_signature="s", out_signature="is"
     )
-    def stop(self, container):
+    def stop(self, container_id):
         """
         Stop a running Docker container.
 
         Args:
-            container (str): The name or ID of the Docker container.
+            container_id (str): The name of the Docker container.
 
         Returns:
             tuple: A tuple containing the exit code (int) and a message indicating the result of the operation.
         """
         try:
-            client = docker.from_env()
-            if not is_allowed_container(container):
-                return (
-                    errno.EPERM,
-                    "Container {} is not allowed to be managed by this service.".format(
-                        container
-                    ),
-                )
-            container = client.containers.get(container)
+            container = get_sonic_container(container_id)
             container.stop()
             return 0, "Container {} has been stopped.".format(container.name)
+        except PermissionError:
+            msg = "Container {} is not allowed to be managed by this service.".format(
+                container_id
+            )
+            logging.error(msg)
+            return errno.EPERM, msg
         except docker.errors.NotFound:
-            return errno.ENOENT, "Container {} does not exist.".format(container)
+            msg = "Container {} does not exist.".format(container_id)
+            logging.error(msg)
+            return errno.ENOENT, msg
         except Exception as e:
-            return 1, "Failed to stop container {}: {}".format(container, str(e))
+            msg = "Failed to stop container {}: {}".format(container_id, str(e))
+            logging.error(msg)
+            return 1, msg
 
     @host_service.method(
         host_service.bus_name(MOD_NAME), in_signature="si", out_signature="is"
     )
-    def kill(self, container, signal=signal.SIGKILL):
+    def kill(self, container_id, signal=signal.SIGKILL):
         """
         Kill or send a signal to a running Docker container.
 
         Args:
-            container (str): The name or ID of the Docker container.
+            container_id (str): The name or ID of the Docker container.
             signal (int): The signal to send. Defaults to SIGKILL.
 
         Returns:
             tuple: A tuple containing the exit code (int) and a message indicating the result of the operation.
         """
         try:
-            client = docker.from_env()
-            if not is_allowed_container(container):
-                return (
-                    errno.EPERM,
-                    "Container {} is not allowed to be managed by this service.".format(
-                        container
-                    ),
-                )
-            container = client.containers.get(container)
+            container = get_sonic_container(container_id)
             container.kill(signal=signal)
             return 0, "Container {} has been killed with signal {}.".format(
                 container.name, signal
             )
+        except PermissionError:
+            msg = "Container {} is not allowed to be managed by this service.".format(
+                container_id
+            )
+            logging.error(msg)
+            return errno.EPERM, msg
         except docker.errors.NotFound:
-            return errno.ENOENT, "Container {} does not exist.".format(container)
+            msg = "Container {} does not exist.".format(container_id)
+            logging.error(msg)
+            return errno.ENOENT, msg
         except Exception as e:
-            return 1, "Failed to kill container {}: {}".format(container, str(e))
+            return 1, "Failed to kill container {}: {}".format(container_id, str(e))
 
     @host_service.method(
         host_service.bus_name(MOD_NAME), in_signature="s", out_signature="is"
     )
-    def restart(self, container):
+    def restart(self, container_id):
         """
         Restart a running Docker container.
 
         Args:
-            container (str): The name or ID of the Docker container.
+            container_id (str): The name or ID of the Docker container.
+
+        Returns:
+            tuple: A tuple containing the exit code (int) and a message indicating the result of the operation.
+        """
+        try:
+            container = get_sonic_container(container_id)
+            container.restart()
+            return 0, "Container {} has been restarted.".format(container.name)
+        except PermissionError:
+            return (
+                errno.EPERM,
+                "Container {} is not allowed to be managed by this service.".format(
+                    container_id
+                ),
+            )
+        except docker.errors.NotFound:
+            return errno.ENOENT, "Container {} does not exist.".format(container_id)
+        except Exception as e:
+            return 1, "Failed to restart container {}: {}".format(container_id, str(e))
+
+    @host_service.method(
+        host_service.bus_name(MOD_NAME), in_signature="ssa{sv}", out_signature="is"
+    )
+    def run(self, image, command, kwargs):
+        """
+        Run a Docker container.
+
+        Args:
+            image (str): The name of the Docker image to run.
+            command (str): The command to run in the container
+            kwargs (dict): Additional keyword arguments to pass to the Docker API.
 
         Returns:
             tuple: A tuple containing the exit code (int) and a message indicating the result of the operation.
         """
         try:
             client = docker.from_env()
-            if not is_allowed_container(container):
+
+            if not is_allowed_image(image):
                 return (
                     errno.EPERM,
-                    "Container {} is not allowed to be managed by this service.".format(
-                        container
+                    "Image {} is not allowed to be managed by this service.".format(
+                        image
                     ),
                 )
-            container = client.containers.get(container)
-            container.restart()
-            return 0, "Container {} has been restarted.".format(container.name)
-        except docker.errors.NotFound:
-            return errno.ENOENT, "Container {} does not exist.".format(container)
+
+            if command:
+                return (
+                    errno.EPERM,
+                    "Only an empty string command is allowed. Non-empty commands are not permitted by this service.",
+                )
+
+            validate_docker_run_options(kwargs)
+
+            # Semgrep cannot detect codes for validating image and command.
+            # nosemgrep: python.docker.security.audit.docker-arbitrary-container-run.docker-arbitrary-container-run
+            container = client.containers.run(image, command, **kwargs)
+            return 0, "Container {} has been created.".format(container.name)
+        except ValueError as e:
+            return errno.EINVAL, "Invalid argument.".format(str(e))
+        except docker.errors.ImageNotFound:
+            return errno.ENOENT, "Image {} not found.".format(image)
         except Exception as e:
-            return 1, "Failed to restart container {}: {}".format(container, str(e))
+            return 1, "Failed to run image {}: {}".format(image, str(e))
diff --git a/tests/host_modules/docker_service_test.py b/tests/host_modules/docker_service_test.py
index 0836d826..4152fbc8 100644
--- a/tests/host_modules/docker_service_test.py
+++ b/tests/host_modules/docker_service_test.py
@@ -207,3 +207,83 @@ def test_docker_restart_fail_api_error(self, MockInit, MockBusName, MockSystemBu
         assert "API error" in msg, "Message should contain 'API error'"
         mock_docker_client.containers.get.assert_called_once_with("syncd")
         mock_docker_client.containers.get.return_value.restart.assert_called_once()
+
+    @mock.patch("dbus.SystemBus")
+    @mock.patch("dbus.service.BusName")
+    @mock.patch("dbus.service.Object.__init__")
+    def test_docker_run_success(self, MockInit, MockBusName, MockSystemBus):
+        mock_docker_client = mock.Mock()
+        mock_docker_client.containers.run.return_value.name = "syncd"
+
+        with mock.patch.object(docker, "from_env", return_value=mock_docker_client):
+            docker_service = DockerService(MOD_NAME)
+            rc, msg = docker_service.run("docker-syncd-brcm:latest", "", {})
+
+        assert rc == 0, "Return code is wrong"
+        mock_docker_client.containers.run.assert_called_once_with(
+            "docker-syncd-brcm:latest", "", **{}
+        )
+
+    @mock.patch("dbus.SystemBus")
+    @mock.patch("dbus.service.BusName")
+    @mock.patch("dbus.service.Object.__init__")
+    def test_docker_run_fail_image_not_found(
+        self, MockInit, MockBusName, MockSystemBus
+    ):
+        mock_docker_client = mock.Mock()
+        mock_docker_client.containers.run.side_effect = docker.errors.ImageNotFound(
+            "Image not found"
+        )
+
+        with mock.patch.object(docker, "from_env", return_value=mock_docker_client):
+            docker_service = DockerService(MOD_NAME)
+            rc, msg = docker_service.run("docker-syncd-brcm:latest", "", {})
+
+        assert rc == errno.ENOENT, "Return code is wrong"
+        assert "not found" in msg, "Message should contain 'not found'"
+        mock_docker_client.containers.run.assert_called_once_with(
+            "docker-syncd-brcm:latest", "", **{}
+        )
+
+    @mock.patch("dbus.SystemBus")
+    @mock.patch("dbus.service.BusName")
+    @mock.patch("dbus.service.Object.__init__")
+    def test_docker_run_fail_api_error(self, MockInit, MockBusName, MockSystemBus):
+        mock_docker_client = mock.Mock()
+        mock_docker_client.containers.run.side_effect = docker.errors.APIError(
+            "API error"
+        )
+
+        with mock.patch.object(docker, "from_env", return_value=mock_docker_client):
+            docker_service = DockerService(MOD_NAME)
+            rc, msg = docker_service.run("docker-syncd-brcm:latest", "", {})
+
+        assert rc != 0, "Return code is wrong"
+        assert "API error" in msg, "Message should contain 'API error'"
+        mock_docker_client.containers.run.assert_called_once_with(
+            "docker-syncd-brcm:latest", "", **{}
+        )
+
+    @mock.patch("dbus.SystemBus")
+    @mock.patch("dbus.service.BusName")
+    @mock.patch("dbus.service.Object.__init__")
+    def test_docker_run_fail_image_not_allowed(
+        self, MockInit, MockBusName, MockSystemBus
+    ):
+        mock_docker_client = mock.Mock()
+        with mock.patch.object(docker, "from_env", return_value=mock_docker_client):
+            docker_service = DockerService(MOD_NAME)
+            rc, msg = docker_service.run("wrong_image_name", "", {})
+        assert rc == errno.EPERM, "Return code is wrong"
+
+    @mock.patch("dbus.SystemBus")
+    @mock.patch("dbus.service.BusName")
+    @mock.patch("dbus.service.Object.__init__")
+    def test_docker_run_fail_non_empty_command(
+        self, MockInit, MockBusName, MockSystemBus
+    ):
+        mock_docker_client = mock.Mock()
+        with mock.patch.object(docker, "from_env", return_value=mock_docker_client):
+            docker_service = DockerService(MOD_NAME)
+            rc, msg = docker_service.run("docker-syncd-brcm:latest", "rm -rf /", {})
+        assert rc == errno.EPERM, "Return code is wrong"