From 5811c7446bd5ee07c181b516dbceed610e11492d Mon Sep 17 00:00:00 2001 From: Andrei Matveyeu Date: Tue, 15 Oct 2024 07:00:49 +0200 Subject: [PATCH 1/2] Revert "Remove testrunner validation retries and cache (#86)" This reverts commit 5418166454e874a9567ea820fb05349417f4f9e4. --- python/src/etos_api/library/validator.py | 97 +++++++++++++++++++++++- 1 file changed, 94 insertions(+), 3 deletions(-) diff --git a/python/src/etos_api/library/validator.py b/python/src/etos_api/library/validator.py index 5e63bec..c35c530 100644 --- a/python/src/etos_api/library/validator.py +++ b/python/src/etos_api/library/validator.py @@ -15,6 +15,8 @@ # limitations under the License. """ETOS API suite validator module.""" import logging +import asyncio +import time from typing import List, Union from uuid import UUID @@ -24,12 +26,82 @@ from pydantic import BaseModel # pylint:disable=no-name-in-module from pydantic import ValidationError, conlist, constr, field_validator from pydantic.fields import PrivateAttr +from opentelemetry import trace from etos_api.library.docker import Docker # pylint:disable=too-few-public-methods +class TestRunnerValidationCache: + """Lazy test runner validation via in-memory cache.""" + + # Cache for lazy testrunner validation. Keys: container names, values: timestamp. + # Only passed validations are cached. + TESTRUNNER_VALIDATION_CACHE = {} + TESTRUNNER_VALIDATION_WINDOW = 1800 # seconds + + lock = asyncio.Lock() + + @classmethod + async def get_timestamp(cls, test_runner: str) -> Union[float, None]: + """Get latest passed validation timestamp for the given testrunner. + + :param test_runner: test runner container name + :type test_runner: str + :return: validation timestamp or none if not found + :rtype: float or NoneType + """ + async with cls.lock: + if test_runner in cls.TESTRUNNER_VALIDATION_CACHE: + return cls.TESTRUNNER_VALIDATION_CACHE[test_runner] + return None + + @classmethod + async def set_timestamp(cls, test_runner: str, timestamp: float) -> None: + """Set passed validation timestamp for the given testrunner. + + :param test_runner: test runner container name + :type test_runner: str + :param timestamp: test runner container name + :type timestamp: float + :return: none + :rtype: NoneType + """ + async with cls.lock: + cls.TESTRUNNER_VALIDATION_CACHE[test_runner] = timestamp + + @classmethod + async def remove(cls, test_runner: str) -> None: + """Remove the given test runner from the validation cache. + + :param test_runner: test runner container name + :type test_runner: str + :return: none + :rtype: NoneType + """ + async with cls.lock: + if test_runner in cls.TESTRUNNER_VALIDATION_CACHE: + del cls.TESTRUNNER_VALIDATION_CACHE[test_runner] + + @classmethod + async def is_test_runner_valid(cls, test_runner: str) -> bool: + """Determine if the given test runner is valid. + + :param test_runner: test runner container name + :type test_runner: str + :return: validation result from cache + :rtype: bool + """ + timestamp = await cls.get_timestamp(test_runner) + if timestamp is None: + return False + if (timestamp + cls.TESTRUNNER_VALIDATION_WINDOW) > time.time(): + return True + await cls.remove(test_runner) + return False + + class Environment(BaseModel): """ETOS suite definion 'ENVIRONMENT' constraint.""" @@ -179,6 +251,7 @@ async def validate(self, test_suite_url): :type test_suite_url: str :raises ValidationError: If the suite did not validate. """ + span = trace.get_current_span() downloaded_suite = await self._download_suite(test_suite_url) assert ( len(downloaded_suite) > 0 @@ -194,6 +267,24 @@ async def validate(self, test_suite_url): test_runners.add(constraint.value) docker = Docker() for test_runner in test_runners: - assert ( - await docker.digest(test_runner) is not None - ), f"Test runner {test_runner} not found" + if await TestRunnerValidationCache.is_test_runner_valid(test_runner): + self.logger.info("Using cached test runner validation result: %s", test_runner) + continue + for attempt in range(5): + if attempt > 0: + span.add_event(f"Test runner validation unsuccessful, retry #{attempt}") + self.logger.warning( + "Test runner %s validation unsuccessful, retry #%d", + test_runner, + attempt, + ) + result = await docker.digest(test_runner) + if result: + # only passed validations shall be cached + await TestRunnerValidationCache.set_timestamp(test_runner, time.time()) + break + # Total wait time with 5 attempts: 55 seconds + sleep_time = (attempt + 1) ** 2 + await asyncio.sleep(sleep_time) + + assert result is not None, f"Test runner {test_runner} not found" From 32f0d3e3d16fd876073fa7f8a534904b0a76c681 Mon Sep 17 00:00:00 2001 From: Andrei Matveyeu Date: Tue, 15 Oct 2024 07:03:10 +0200 Subject: [PATCH 2/2] extend TESTRUNNER_VALIDATION_WINDOW to 1 week --- python/src/etos_api/library/validator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/src/etos_api/library/validator.py b/python/src/etos_api/library/validator.py index c35c530..165b14e 100644 --- a/python/src/etos_api/library/validator.py +++ b/python/src/etos_api/library/validator.py @@ -39,7 +39,7 @@ class TestRunnerValidationCache: # Cache for lazy testrunner validation. Keys: container names, values: timestamp. # Only passed validations are cached. TESTRUNNER_VALIDATION_CACHE = {} - TESTRUNNER_VALIDATION_WINDOW = 1800 # seconds + TESTRUNNER_VALIDATION_WINDOW = 3600 * 24 * 7 # 1 week lock = asyncio.Lock()