From 7d4dfe44a8023cd9010f8cb81c88bea80bc13b59 Mon Sep 17 00:00:00 2001 From: Tim van Katwijk Date: Mon, 26 Dec 2022 21:30:58 +0100 Subject: [PATCH 1/4] add imageName class Signed-off-by: Tim van Katwijk --- dockerfile_parse/parser.py | 6 +- dockerfile_parse/util.py | 120 ++++++++++++++++++++++++++++++++++++- tests/test_parser.py | 112 +++++++++++++++++++++++++++++++++- 3 files changed, 234 insertions(+), 4 deletions(-) diff --git a/dockerfile_parse/parser.py b/dockerfile_parse/parser.py index 1fe07f0..9f740c1 100644 --- a/dockerfile_parse/parser.py +++ b/dockerfile_parse/parser.py @@ -16,7 +16,7 @@ from .constants import DOCKERFILE_FILENAME, COMMENT_INSTRUCTION from .util import (b2u, extract_key_values, get_key_val_dictionary, - u2b, Context, WordSplitter) + u2b, Context, WordSplitter, ImageName) logger = logging.getLogger(__name__) @@ -877,7 +877,9 @@ def image_from(from_value): )? """) match = re.match(regex, from_value) - return match.group('image', 'name') if match else (None, None) + image = ImageName.parse(match.group('image')) if match else None + name = match.group('name') if match else None + return image, name def _endline(line): diff --git a/dockerfile_parse/util.py b/dockerfile_parse/util.py index 1e2e298..c479924 100644 --- a/dockerfile_parse/util.py +++ b/dockerfile_parse/util.py @@ -48,7 +48,7 @@ def __init__(self, s, args=None, envs=None): :param envs: dict, environment variables to use; if None, do not attempt substitution """ - self.stream = StringIO(s) + self.stream = StringIO(str(s)) self.args = args self.envs = envs @@ -326,3 +326,121 @@ def get_values(self, context_type): if context_type.upper() == "LABEL": return self.labels raise ValueError("Unexpected context type: " + context_type) + + +class ImageName(object): + """Represent an image. + Naming Conventions + ================== + registry.somewhere/namespace/image_name:tag + |-----------------| registry, reg_uri + |---------| namespace + |--------------------------------------| repository + |--------------------| image name + |--| tag + |------------------------| image + |------------------------------------------| image + """ + + def __init__(self, registry=None, namespace=None, repo=None, tag=None): + self.registry = registry + self.namespace = namespace + self.repo = repo + self.tag = tag + + @classmethod + def parse(cls, image_name): + result = cls() + + if not image_name or str(image_name).isspace(): + return ImageName() + + if isinstance(image_name, cls): + return image_name + + # registry.org/namespace/repo:tag + s = image_name.split('/', 2) + + if len(s) == 2: + if '.' in s[0] or ':' in s[0]: + result.registry = s[0] if s[0] else None + else: + result.namespace = s[0] + elif len(s) == 3: + result.registry = s[0] if s[0] else None + result.namespace = s[1] + result.repo = s[-1] + + for sep in '@:': + try: + result.repo, result.tag = result.repo.rsplit(sep, 1) + except ValueError: + continue + break + + return result + + def to_str(self, registry=True, tag=True, explicit_tag=False, + explicit_namespace=False): + if self.repo is None: + raise RuntimeError('No image repository specified') + + result = self.get_repo(explicit_namespace) + + if tag and self.tag and ':' in self.tag: + result = '{0}@{1}'.format(result, self.tag) + elif tag and self.tag: + result = '{0}:{1}'.format(result, self.tag) + elif tag and explicit_tag: + result = '{0}:{1}'.format(result, 'latest') + + if registry and self.registry: + result = '{0}/{1}'.format(self.registry, result) + + return result + + def get_repo(self, explicit_namespace=False): + result = self.repo + if self.namespace: + result = '{0}/{1}'.format(self.namespace, result) + elif explicit_namespace: + result = '{0}/{1}'.format('library', result) + return result + + def enclose(self, organization): + if self.namespace == organization: + return + + repo_parts = [self.repo] + if self.namespace: + repo_parts.insert(0, self.namespace) + + self.namespace = organization + self.repo = '-'.join(repo_parts) + + def __str__(self): + return self.to_str(registry=True, tag=True) + + def __repr__(self): + return ( + "ImageName(registry={s.registry!r}, namespace={s.namespace!r}," + " repo={s.repo!r}, tag={s.tag!r})" + ).format(s=self) + + def __eq__(self, other): + if isinstance(other, str): + return self.__str__() == other + elif isinstance(other, ImageName): + return self.__dict__ == other.__dict__ + else: + return NotImplemented + + def __hash__(self): + return hash(self.to_str()) + + def copy(self): + return ImageName( + registry=self.registry, + namespace=self.namespace, + repo=self.repo, + tag=self.tag) diff --git a/tests/test_parser.py b/tests/test_parser.py index 5fc2888..dd07afc 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -19,7 +19,7 @@ from dockerfile_parse import DockerfileParser from dockerfile_parse.parser import image_from from dockerfile_parse.constants import COMMENT_INSTRUCTION -from dockerfile_parse.util import b2u, u2b, Context +from dockerfile_parse.util import b2u, u2b, Context, ImageName from tests.fixtures import dfparser, instruction NON_ASCII = "žluťoučký" @@ -28,6 +28,116 @@ instruction = instruction # pylint: disable=self-assigning-variable +@pytest.mark.parametrize(('image_string', 'dictionary'), [ + ( + " ", + {"namespace": None, "registry": None, "tag": None, "repo": None}, + ), ( + "registry.org/namespace/repo:tag", + {"namespace": "namespace", "registry": "registry.org", "tag": "tag", "repo": "repo"}, + ), ( + "/namespace/repo:tag", + {"namespace": "namespace", "registry": None, "tag": "tag", "repo": "repo"}, + ), ( + "registry.org/repo:tag", + {"namespace": None, "registry": "registry.org", "tag": "tag", "repo": "repo"}, + ), ( + "registry.org/repo", + {"namespace": None, "registry": "registry.org", "tag": None, "repo": "repo"}, + ), ( + "registry.org/repo@sha256:hash", + {"namespace": None, "registry": "registry.org", "tag": "sha256:hash", "repo": "repo"}, + ) +]) +class TestImageName(object): + def test_util_image_name_parse(self, image_string, dictionary): + image = ImageName.parse(image_string) + assert image.namespace == dictionary["namespace"] + assert image.registry == dictionary["registry"] + assert image.tag == dictionary["tag"] + assert image.repo == dictionary["repo"] + + def test_util_image_name_get_repo(self, image_string, dictionary): + image = ImageName.parse(image_string) + repo = "/".join(filter(None, (dictionary["namespace"], dictionary["repo"]))) + assert image.get_repo() == (repo if repo != "" else None) + assert image.get_repo(explicit_namespace=True) == "{0}/{1}".format( + dictionary["namespace"] if dictionary["namespace"] else "library", dictionary["repo"]) + + def test_util_image_name_to_str(self, image_string, dictionary): + image = ImageName.parse(image_string) + if dictionary["repo"] is None: + with pytest.raises(RuntimeError): + image.to_str() + else: + assert image.to_str() == image_string.lstrip('/') + assert image.to_str() == (image_string.lstrip('/') if image_string else None) + assert image.to_str(explicit_tag=True) == \ + image_string.lstrip('/') + (':latest' if dictionary["tag"] is None else "") + + def test_image_name_hash(self, image_string, dictionary): + image = ImageName.parse(image_string) + if dictionary["repo"] is None: + with pytest.raises(RuntimeError): + hash(image) + else: + hash(image) + + def test_image_name_repr(self, image_string, dictionary): + # so linter won't trip on unused argument + del dictionary + image = ImageName.parse(image_string) + repr(image) + + def test_image_name_comparison(self, image_string, dictionary): + # make sure that "==" is implemented correctly on both Python major releases + i1 = ImageName.parse(image_string) + i2 = ImageName(registry=dictionary["registry"], namespace=dictionary["namespace"], + repo=dictionary["repo"], + tag=dictionary["tag"]) + assert i1 == i2 + + i2 = ImageName(registry='foo.com', namespace='spam', repo='bar', tag='2') + # pylint: disable=unneeded-not + assert not i1 == i2 + + i1 = ImageName.parse(i2) + assert i1 == i2 + + i1 = i2.copy() + assert i1 == i2 + + +@pytest.mark.parametrize(('repo', 'organization', 'enclosed_repo'), ( + ('fedora', 'spam', 'spam/fedora'), + ('spam/fedora', 'spam', 'spam/fedora'), + ('spam/fedora', 'maps', 'maps/spam-fedora'), +)) +@pytest.mark.parametrize('registry', ( + 'example.registry.com', + 'example.registry.com:8888', + None, +)) +@pytest.mark.parametrize('tag', ('bacon', None)) +def test_image_name_enclose(repo, organization, enclosed_repo, registry, tag): + reference = repo + if tag: + reference = '{}:{}'.format(repo, tag) + if registry: + reference = '{}/{}'.format(registry, reference) + + image_name = ImageName.parse(reference) + assert image_name.get_repo() == repo + assert image_name.registry == registry + assert image_name.tag == tag + + image_name.enclose(organization) + assert image_name.get_repo() == enclosed_repo + # Verify that registry and tag are unaffected + assert image_name.registry == registry + assert image_name.tag == tag + + class TestDockerfileParser(object): def test_all_versions_match(self): def read_version(fp, regex): From a374d582da21ccbfb7d1db25d3e84599103e467f Mon Sep 17 00:00:00 2001 From: Tim van Katwijk Date: Mon, 26 Dec 2022 21:50:38 +0100 Subject: [PATCH 2/4] add srpm_build_deps for packit Signed-off-by: Tim van Katwijk --- .packit.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.packit.yaml b/.packit.yaml index edf3529..fc82613 100644 --- a/.packit.yaml +++ b/.packit.yaml @@ -17,3 +17,7 @@ jobs: targets: - fedora-all - epel-8-x86_64 + +srpm_build_deps: + - python3-pip + - python3-setuptools_scm From 5fe6ebb4872358b441ccd3817932d431a8f93d3e Mon Sep 17 00:00:00 2001 From: Tim van Katwijk Date: Mon, 26 Dec 2022 22:13:42 +0100 Subject: [PATCH 3/4] rename fixtures.py to conftest.py to make codeql and flake8 happy. Signed-off-by: Tim van Katwijk --- tests/{fixtures.py => conftest.py} | 0 tests/test_parser.py | 4 ---- 2 files changed, 4 deletions(-) rename tests/{fixtures.py => conftest.py} (100%) diff --git a/tests/fixtures.py b/tests/conftest.py similarity index 100% rename from tests/fixtures.py rename to tests/conftest.py diff --git a/tests/test_parser.py b/tests/test_parser.py index dd07afc..c5db795 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -20,12 +20,8 @@ from dockerfile_parse.parser import image_from from dockerfile_parse.constants import COMMENT_INSTRUCTION from dockerfile_parse.util import b2u, u2b, Context, ImageName -from tests.fixtures import dfparser, instruction NON_ASCII = "žluťoučký" -# flake8 does not understand fixtures: -dfparser = dfparser # pylint: disable=self-assigning-variable -instruction = instruction # pylint: disable=self-assigning-variable @pytest.mark.parametrize(('image_string', 'dictionary'), [ From 4c766444240dab2b13f01d8300dab12b1296a0f7 Mon Sep 17 00:00:00 2001 From: Tim van Katwijk Date: Fri, 6 Jan 2023 11:52:31 +0100 Subject: [PATCH 4/4] add image_name_from function and add deprecation warning to image_name Signed-off-by: Tim van Katwijk --- dockerfile_parse/parser.py | 26 +++++++++++++++++++------- tests/test_parser.py | 9 ++++++--- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/dockerfile_parse/parser.py b/dockerfile_parse/parser.py index 9f740c1..9255a22 100644 --- a/dockerfile_parse/parser.py +++ b/dockerfile_parse/parser.py @@ -11,6 +11,7 @@ import logging import os import re +import warnings from contextlib import contextmanager from shlex import quote @@ -238,6 +239,7 @@ def structure(self): "value": "yum -y update && yum clean all"} ] """ + def _rstrip_eol(text, line_continuation_char='\\'): text = text.rstrip() if text.endswith(line_continuation_char): @@ -262,8 +264,8 @@ def _clean_comment_line(line): lineno = -1 line_continuation_char = '\\' insnre = re.compile(r'^\s*(\S+)\s+(.*)$') # matched group is insn - contre = re.compile(r'^.*\\\s*$') # line continues? - commentre = re.compile(r'^\s*#') # line is a comment? + contre = re.compile(r'^.*\\\s*$') # line continues? + commentre = re.compile(r'^\s*#') # line is a comment? directive_possible = True # escape directive regex escape_directive_re = re.compile(r'^\s*#\s*escape\s*=\s*(\\|`)\s*$', re.I) @@ -354,7 +356,7 @@ def parent_images(self): top_args[key] = value elif instr['instruction'] == 'FROM': in_stage = True - image, _ = image_from(instr['value']) + image, _ = image_name_from(instr['value']) if image is not None: image = WordSplitter(image, args=top_args).dequote() parents.append(image) @@ -376,7 +378,7 @@ def parent_images(self, parents): if instr['instruction'] != 'FROM': continue - old_image, stage = image_from(instr['value']) + old_image, stage = image_name_from(instr['value']) if old_image is None: continue # broken FROM, fixing would just confuse things if not parents: @@ -393,7 +395,7 @@ def parent_images(self, parents): lines = self.lines for instr in reversed(change_instrs): - lines[instr['startline']:instr['endline']+1] = [instr['content']] + lines[instr['startline']:instr['endline'] + 1] = [instr['content']] self.lines = lines @@ -416,7 +418,7 @@ def baseimage(self, new_image): images = [] for instr in self.structure: if instr['instruction'] == 'FROM': - image, _ = image_from(instr['value']) + image, _ = image_name_from(instr['value']) if image is not None: images.append(image) if not images: @@ -762,7 +764,7 @@ def add_lines(self, *lines, **kwargs): for stage in range(len(froms)-2, -1, -1): # e.g. 0 for single or 2, 1, 0 for 3 stages start, finish = froms[stage], froms[stage+1] linenum = start['endline'] + 1 if at_start else finish['startline'] - image, _ = image_from(froms[stage].get('value') or '') + image, _ = image_name_from(froms[stage].get('value') or '') if skip_scratch and image == 'scratch': continue df_lines[linenum:linenum] = lines @@ -862,6 +864,16 @@ def context_structure(self): def image_from(from_value): + """ + :param from_value: string like "image:tag" or "image:tag AS name" + :return: tuple of the image and stage name, e.g. ("image:tag", None) + """ + warnings.warn("Use image_name_from instead.", DeprecationWarning) + image, name = image_name_from(from_value) + return str(image) if image else None, name + + +def image_name_from(from_value): """ :param from_value: string like "image:tag" or "image:tag AS name" :return: tuple of the image and stage name, e.g. ("image:tag", None) diff --git a/tests/test_parser.py b/tests/test_parser.py index c5db795..5ac6569 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -17,7 +17,7 @@ from textwrap import dedent from dockerfile_parse import DockerfileParser -from dockerfile_parse.parser import image_from +from dockerfile_parse.parser import image_from, image_name_from from dockerfile_parse.constants import COMMENT_INSTRUCTION from dockerfile_parse.util import b2u, u2b, Context, ImageName @@ -573,9 +573,12 @@ def test_get_instructions_from_df(self, dfparser, instruction, instr_value, ('registry.example.com:5000/foo/bar:baz', None), ) ]) - def test_image_from(self, from_value, expect): - result = image_from(from_value) + def test_image_name_from(self, from_value, expect): + result = image_name_from(from_value) + # image_from is deprecated. But we still want to test it. + deprecated_result = image_from(from_value) assert result == expect + assert deprecated_result == expect def test_parent_images(self, dfparser): FROM = ('my-builder:latest', 'rhel7:7.5')