From cc339314c8e4dbe23669a84e7c680dc6d41b243c Mon Sep 17 00:00:00 2001 From: Tim van Katwijk Date: Tue, 29 Nov 2022 22:54:49 +0100 Subject: [PATCH] Move ImageName from osbs-client to util. Refactor to keep string behavior identical for parser. Signed-off-by: Tim van Katwijk --- dockerfile_parse/parser.py | 6 +- dockerfile_parse/util.py | 120 ++++++++- tests/test_parser.py | 525 +++++++++++++++++++++---------------- 3 files changed, 426 insertions(+), 225 deletions(-) diff --git a/dockerfile_parse/parser.py b/dockerfile_parse/parser.py index 009ed16..07ce585 100644 --- a/dockerfile_parse/parser.py +++ b/dockerfile_parse/parser.py @@ -19,7 +19,7 @@ from .constants import DOCKERFILE_FILENAME, COMMENT_INSTRUCTION from .util import (b2u, extract_key_values, get_key_val_dictionary, - u2b, Context, WordSplitter) + u2b, Context, WordSplitter, ImageName) logger = logging.getLogger(__name__) @@ -880,7 +880,9 @@ def image_from(from_value): )? """) match = re.match(regex, from_value) - return match.group('image', 'name') if match else (None, None) + image = ImageName.parse(match.group('image')) if match else None + name = match.group('name') if match else None + return image, name def _endline(line): diff --git a/dockerfile_parse/util.py b/dockerfile_parse/util.py index a13e260..ebe6267 100644 --- a/dockerfile_parse/util.py +++ b/dockerfile_parse/util.py @@ -54,7 +54,7 @@ def __init__(self, s, args=None, envs=None): :param envs: dict, environment variables to use; if None, do not attempt substitution """ - self.stream = StringIO(s) + self.stream = StringIO(str(s)) self.args = args self.envs = envs @@ -332,3 +332,121 @@ def get_values(self, context_type): if context_type.upper() == "LABEL": return self.labels raise ValueError("Unexpected context type: " + context_type) + + +class ImageName(object): + """Represent an image. + Naming Conventions + ================== + registry.somewhere/namespace/image_name:tag + |-----------------| registry, reg_uri + |---------| namespace + |--------------------------------------| repository + |--------------------| image name + |--| tag + |------------------------| image + |------------------------------------------| image + """ + + def __init__(self, registry=None, namespace=None, repo=None, tag=None): + self.registry = registry + self.namespace = namespace + self.repo = repo + self.tag = tag + + @classmethod + def parse(cls, image_name): + result = cls() + + if not image_name or image_name.isspace(): + return ImageName() + + if isinstance(image_name, cls): + return image_name + + # registry.org/namespace/repo:tag + s = image_name.split('/', 2) + + if len(s) == 2: + if '.' in s[0] or ':' in s[0]: + result.registry = s[0] if s[0] else None + else: + result.namespace = s[0] + elif len(s) == 3: + result.registry = s[0] if s[0] else None + result.namespace = s[1] + result.repo = s[-1] + + for sep in '@:': + try: + result.repo, result.tag = result.repo.rsplit(sep, 1) + except ValueError: + continue + break + + return result + + def to_str(self, registry=True, tag=True, explicit_tag=False, + explicit_namespace=False): + if self.repo is None: + raise RuntimeError('No image repository specified') + + result = self.get_repo(explicit_namespace) + + if tag and self.tag and ':' in self.tag: + result = '{0}@{1}'.format(result, self.tag) + elif tag and self.tag: + result = '{0}:{1}'.format(result, self.tag) + elif tag and explicit_tag: + result = '{0}:{1}'.format(result, 'latest') + + if registry and self.registry: + result = '{0}/{1}'.format(self.registry, result) + + return result + + def get_repo(self, explicit_namespace=False): + result = self.repo + if self.namespace: + result = '{0}/{1}'.format(self.namespace, result) + elif explicit_namespace: + result = '{0}/{1}'.format('library', result) + return result + + def enclose(self, organization): + if self.namespace == organization: + return + + repo_parts = [self.repo] + if self.namespace: + repo_parts.insert(0, self.namespace) + + self.namespace = organization + self.repo = '-'.join(repo_parts) + + def __str__(self): + return self.to_str(registry=True, tag=True) + + def __repr__(self): + return ( + "ImageName(registry={s.registry!r}, namespace={s.namespace!r}," + " repo={s.repo!r}, tag={s.tag!r})" + ).format(s=self) + + def __eq__(self, other): + if type(other) == str: + return self.__str__() == other + elif type(other) == type(self): + return self.__dict__ == other.__dict__ + else: + return NotImplemented + + def __hash__(self): + return hash(self.to_str()) + + def copy(self): + return ImageName( + registry=self.registry, + namespace=self.namespace, + repo=self.repo, + tag=self.tag) diff --git a/tests/test_parser.py b/tests/test_parser.py index bb20325..7aaf3dc 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -22,7 +22,7 @@ from dockerfile_parse import DockerfileParser from dockerfile_parse.parser import image_from from dockerfile_parse.constants import COMMENT_INSTRUCTION -from dockerfile_parse.util import b2u, u2b, Context +from dockerfile_parse.util import b2u, u2b, Context, ImageName from tests.fixtures import dfparser, instruction NON_ASCII = "žluťoučký" @@ -31,6 +31,86 @@ instruction = instruction # pylint: disable=self-assigning-variable +@pytest.mark.parametrize(('image_string', 'dictionary'), [ + ( + " ", + {"namespace": None, "registry": None, "tag": None, "repo": None}, + ), ( + "registry.org/namespace/repo:tag", + {"namespace": "namespace", "registry": "registry.org", "tag": "tag", "repo": "repo"}, + ), ( + "/namespace/repo:tag", + {"namespace": "namespace", "registry": None, "tag": "tag", "repo": "repo"}, + ), ( + "registry.org/repo:tag", + {"namespace": None, "registry": "registry.org", "tag": "tag", "repo": "repo"}, + ) +]) +class TestImageName(object): + def test_util_ImageName_parse(self, image_string, dictionary): + image = ImageName.parse(image_string) + assert image.namespace == dictionary["namespace"] + assert image.registry == dictionary["registry"] + assert image.tag == dictionary["tag"] + assert image.repo == dictionary["repo"] + + def test_util_ImageName_get_repo(self, image_string, dictionary): + image = ImageName.parse(image_string) + repo = "/".join(filter(None, (dictionary["namespace"], dictionary["repo"]))) + assert image.get_repo() == (repo if repo != "" else None) + assert image.get_repo(explicit_namespace=True) == "{0}/{1}".format( + dictionary["namespace"] if dictionary["namespace"] else "library", dictionary["repo"]) + + def test_util_ImageName_to_str(self, image_string, dictionary): + image = ImageName.parse(image_string) + if dictionary["repo"] is None: + with pytest.raises(RuntimeError): + image.to_str() + else: + assert image.to_str() == image_string.lstrip('/') + + def test_image_name_comparison(self, image_string, dictionary): + # make sure that both "==" and "!=" are implemented right on both Python major releases + i1 = ImageName.parse(image_string) + i2 = ImageName(registry=dictionary["registry"], namespace=dictionary["namespace"], + repo=dictionary["repo"], + tag=dictionary["tag"]) + assert i1 == i2 + + i2 = ImageName(registry='foo.com', namespace='spam', repo='bar', tag='2') + assert not i1 == i2 + + +@pytest.mark.parametrize(('repo', 'organization', 'enclosed_repo'), ( + ('fedora', 'spam', 'spam/fedora'), + ('spam/fedora', 'spam', 'spam/fedora'), + ('spam/fedora', 'maps', 'maps/spam-fedora'), +)) +@pytest.mark.parametrize('registry', ( + 'example.registry.com', + 'example.registry.com:8888', + None, +)) +@pytest.mark.parametrize('tag', ('bacon', None)) +def test_image_name_enclose(repo, organization, enclosed_repo, registry, tag): + reference = repo + if tag: + reference = '{}:{}'.format(repo, tag) + if registry: + reference = '{}/{}'.format(registry, reference) + + image_name = ImageName.parse(reference) + assert image_name.get_repo() == repo + assert image_name.registry == registry + assert image_name.tag == tag + + image_name.enclose(organization) + assert image_name.get_repo() == enclosed_repo + # Verify that registry and tag are unaffected + assert image_name.registry == registry + assert image_name.tag == tag + + class TestDockerfileParser(object): def test_all_versions_match(self): def read_version(fp, regex): @@ -130,103 +210,103 @@ def test_constructor_cache(self, tmpdir): assert df2.cached_content def test_dockerfile_structure(self, dfparser): - dfparser.lines = ["# comment\n", # single-line comment - " From \\\n", # mixed-case - " base\n", # extra ws, continuation line + dfparser.lines = ["# comment\n", # single-line comment + " From \\\n", # mixed-case + " base\n", # extra ws, continuation line " # another comment\n", # extra ws - " label foo \\\n", # extra ws - "# interrupt LABEL\n", # comment interrupting multi-line LABEL - " bar \n", # extra ws, instruction continuation + " label foo \\\n", # extra ws + "# interrupt LABEL\n", # comment interrupting multi-line LABEL + " bar \n", # extra ws, instruction continuation "USER {0}\n".format(NON_ASCII), - "# comment \\\n", # extra ws - "# with \\ \n", # extra ws with a space - "# backslashes \\\\ \n", # two backslashes + "# comment \\\n", # extra ws + "# with \\ \n", # extra ws with a space + "# backslashes \\\\ \n", # two backslashes "#no space after hash\n", "# comment # with hash inside\n", "RUN command1\n", "RUN command2 && \\\n", " command3\n", "RUN command4 && \\\n", - "# interrupt RUN\n", # comment interrupting multi-line RUN + "# interrupt RUN\n", # comment interrupting multi-line RUN " command5\n", ] assert dfparser.structure == [ - {'instruction': COMMENT_INSTRUCTION, - 'startline': 0, - 'endline': 0, - 'content': '# comment\n', - 'value': 'comment'}, - {'instruction': 'FROM', - 'startline': 1, - 'endline': 2, - 'content': ' From \\\n base\n', - 'value': 'base'}, - {'instruction': COMMENT_INSTRUCTION, - 'startline': 3, - 'endline': 3, - 'content': ' # another comment\n', - 'value': 'another comment'}, - {'instruction': COMMENT_INSTRUCTION, - 'startline': 5, - 'endline': 5, - 'content': '# interrupt LABEL\n', - 'value': 'interrupt LABEL'}, - {'instruction': 'LABEL', - 'startline': 4, - 'endline': 6, - 'content': ' label foo \\\n bar \n', - 'value': 'foo bar'}, - {'instruction': 'USER', - 'startline': 7, - 'endline': 7, - 'content': 'USER {0}\n'.format(NON_ASCII), - 'value': '{0}'.format(NON_ASCII)}, - {'instruction': COMMENT_INSTRUCTION, - 'startline': 8, - 'endline': 8, - 'content': '# comment \\\n', - 'value': 'comment \\'}, - {'instruction': COMMENT_INSTRUCTION, - 'startline': 9, - 'endline': 9, - 'content': '# with \\ \n', - 'value': 'with \\ '}, - {'instruction': COMMENT_INSTRUCTION, - 'startline': 10, - 'endline': 10, - 'content': '# backslashes \\\\ \n', - 'value': 'backslashes \\\\ '}, - {'instruction': COMMENT_INSTRUCTION, - 'startline': 11, - 'endline': 11, - 'content': '#no space after hash\n', - 'value': 'no space after hash'}, - {'instruction': COMMENT_INSTRUCTION, - 'startline': 12, - 'endline': 12, - 'content': '# comment # with hash inside\n', - 'value': 'comment # with hash inside'}, - {'instruction': 'RUN', - 'startline': 13, - 'endline': 13, - 'content': 'RUN command1\n', - 'value': 'command1'}, - {'instruction': 'RUN', - 'startline': 14, - 'endline': 15, - 'content': 'RUN command2 && \\\n command3\n', - 'value': 'command2 && command3'}, - {'instruction': COMMENT_INSTRUCTION, - 'startline': 17, - 'endline': 17, - 'content': '# interrupt RUN\n', - 'value': 'interrupt RUN'}, - {'instruction': 'RUN', - 'startline': 16, - 'endline': 18, - 'content': 'RUN command4 && \\\n command5\n', - 'value': 'command4 && command5'}] + {'instruction': COMMENT_INSTRUCTION, + 'startline': 0, + 'endline': 0, + 'content': '# comment\n', + 'value': 'comment'}, + {'instruction': 'FROM', + 'startline': 1, + 'endline': 2, + 'content': ' From \\\n base\n', + 'value': 'base'}, + {'instruction': COMMENT_INSTRUCTION, + 'startline': 3, + 'endline': 3, + 'content': ' # another comment\n', + 'value': 'another comment'}, + {'instruction': COMMENT_INSTRUCTION, + 'startline': 5, + 'endline': 5, + 'content': '# interrupt LABEL\n', + 'value': 'interrupt LABEL'}, + {'instruction': 'LABEL', + 'startline': 4, + 'endline': 6, + 'content': ' label foo \\\n bar \n', + 'value': 'foo bar'}, + {'instruction': 'USER', + 'startline': 7, + 'endline': 7, + 'content': 'USER {0}\n'.format(NON_ASCII), + 'value': '{0}'.format(NON_ASCII)}, + {'instruction': COMMENT_INSTRUCTION, + 'startline': 8, + 'endline': 8, + 'content': '# comment \\\n', + 'value': 'comment \\'}, + {'instruction': COMMENT_INSTRUCTION, + 'startline': 9, + 'endline': 9, + 'content': '# with \\ \n', + 'value': 'with \\ '}, + {'instruction': COMMENT_INSTRUCTION, + 'startline': 10, + 'endline': 10, + 'content': '# backslashes \\\\ \n', + 'value': 'backslashes \\\\ '}, + {'instruction': COMMENT_INSTRUCTION, + 'startline': 11, + 'endline': 11, + 'content': '#no space after hash\n', + 'value': 'no space after hash'}, + {'instruction': COMMENT_INSTRUCTION, + 'startline': 12, + 'endline': 12, + 'content': '# comment # with hash inside\n', + 'value': 'comment # with hash inside'}, + {'instruction': 'RUN', + 'startline': 13, + 'endline': 13, + 'content': 'RUN command1\n', + 'value': 'command1'}, + {'instruction': 'RUN', + 'startline': 14, + 'endline': 15, + 'content': 'RUN command2 && \\\n command3\n', + 'value': 'command2 && command3'}, + {'instruction': COMMENT_INSTRUCTION, + 'startline': 17, + 'endline': 17, + 'content': '# interrupt RUN\n', + 'value': 'interrupt RUN'}, + {'instruction': 'RUN', + 'startline': 16, + 'endline': 18, + 'content': 'RUN command4 && \\\n command5\n', + 'value': 'command4 && command5'}] def test_invalid_dockerfile_structure(self, dfparser): '''Invalid instruction is reserverd.''' @@ -235,16 +315,16 @@ def test_invalid_dockerfile_structure(self, dfparser): apt-get install something """) assert dfparser.structure == [ - {'instruction': 'RUN', - 'startline': 0, - 'endline': 0, - 'content': 'RUN apt-get update\n', - 'value': 'apt-get update'}, - {'instruction': 'APT-GET', - 'startline': 1, - 'endline': 1, - 'content': ' apt-get install something\n', - 'value': 'install something'}] + {'instruction': 'RUN', + 'startline': 0, + 'endline': 0, + 'content': 'RUN apt-get update\n', + 'value': 'apt-get update'}, + {'instruction': 'APT-GET', + 'startline': 1, + 'endline': 1, + 'content': ' apt-get install something\n', + 'value': 'install something'}] def test_dockerfile_json(self, dfparser): dfparser.content = dedent("""\ @@ -451,23 +531,23 @@ def test_get_instructions_from_df(self, dfparser, instruction, instr_value, @pytest.mark.parametrize(('from_value', 'expect'), [ ( - " ", - (None, None), + " ", + (None, None), ), ( - " foo", - ('foo', None), + " foo", + ('foo', None), ), ( - "foo:bar as baz ", - ('foo:bar', 'baz'), + "foo:bar as baz ", + ('foo:bar', 'baz'), ), ( - "foo as baz", - ('foo', 'baz'), + "foo as baz", + ('foo', 'baz'), ), ( - "foo and some other junk", # we won't judge - ('foo', None), + "foo and some other junk", # we won't judge + ('foo', None), ), ( - "registry.example.com:5000/foo/bar:baz", - ('registry.example.com:5000/foo/bar:baz', None), + "registry.example.com:5000/foo/bar:baz", + ('registry.example.com:5000/foo/bar:baz', None), ) ]) def test_image_from(self, from_value, expect): @@ -621,54 +701,54 @@ def test_add_del_instruction(self, dfparser): @pytest.mark.parametrize(('existing', 'delete_key', 'expected'), [ - # Delete non-existing key - (['a b\n', - 'x="y z"\n'], - 'name', - KeyError()), - - # Simple remove - (['a b\n', - 'x="y z"\n'], - 'a', - ['x="y z"\n']), - - # Simple remove - (['a b\n', - 'x="y z"\n'], - 'x', - ['a b\n']), - - # Simple remove unicode - (['a b\n', - 'x="y ❤"\n'], - 'x', - ['a b\n']), - - # Simple remove unicode - (['a b\n', - '❤="y z"\n'], - '❤', - ['a b\n']), - - # Remove first of two instructions on the same line - (['a b\n', - 'x="y z"\n', - '"first"="first" "second"="second"\n'], - 'first', - ['a b\n', - 'x="y z"\n', - '"second"="second"\n']), - - # Remove second of two instructions on the same line - (['a b\n', - 'x="y z"\n', - '"first"="first" "second"="second"\n'], - 'second', - ['a b\n', - 'x="y z"\n', - '"first"="first"\n']), - ]) + # Delete non-existing key + (['a b\n', + 'x="y z"\n'], + 'name', + KeyError()), + + # Simple remove + (['a b\n', + 'x="y z"\n'], + 'a', + ['x="y z"\n']), + + # Simple remove + (['a b\n', + 'x="y z"\n'], + 'x', + ['a b\n']), + + # Simple remove unicode + (['a b\n', + 'x="y ❤"\n'], + 'x', + ['a b\n']), + + # Simple remove unicode + (['a b\n', + '❤="y z"\n'], + '❤', + ['a b\n']), + + # Remove first of two instructions on the same line + (['a b\n', + 'x="y z"\n', + '"first"="first" "second"="second"\n'], + 'first', + ['a b\n', + 'x="y z"\n', + '"second"="second"\n']), + + # Remove second of two instructions on the same line + (['a b\n', + 'x="y z"\n', + '"first"="first" "second"="second"\n'], + 'second', + ['a b\n', + 'x="y z"\n', + '"first"="first"\n']), + ]) def test_delete_instruction(self, dfparser, instruction, existing, delete_key, expected): existing = [instruction + ' ' + i for i in existing] if isinstance(expected, list): @@ -685,52 +765,53 @@ def test_delete_instruction(self, dfparser, instruction, existing, delete_key, e @pytest.mark.parametrize(('existing', 'new', 'expected'), [ - # Simple test: set an instruction - (['a b\n', - 'x="y z"\n'], - {'Name': 'New shiny project'}, - ['Name=\'New shiny project\'\n']), - - # Set two instructions - (['a b\n', - 'x="y z"\n'], - {'something': 'nothing', 'mine': 'yours'}, - ['something=nothing\n', 'mine=yours\n']), - - # Set instructions to what they already were: should be no difference - (['a b\n', - 'x="y z"\n', - '"first"="first" second=\'second value\'\n'], - {'a': 'b', 'x': 'y z', 'first': 'first', 'second': 'second value'}, - ['a b\n', - 'x="y z"\n', - '"first"="first" second=\'second value\'\n']), - - # Adjust one label of a multi-value LABEL/ENV statement - (['a b\n', - 'first=\'first value\' "second"=second\n', - 'x="y z"\n'], - {'first': 'changed', 'second': 'second'}, - ['first=changed "second"=second\n']), - - # Delete one label of a multi-value LABEL/ENV statement - (['a b\n', - 'x="y z"\n', - 'first=first second=second\n'], - {'second': 'second'}, - ['second=second\n']), - - # Nested quotes - (['"ownership"="Alice\'s label" other=value\n'], - {'ownership': "Alice's label"}, - # Keeps existing key quoting style - ['"ownership"="Alice\'s label"\n']), - - # Modify a single value that needs quoting - (['foo bar\n'], - {'foo': 'extra bar'}, - ["foo 'extra bar'\n"]), - ]) + # Simple test: set an instruction + (['a b\n', + 'x="y z"\n'], + {'Name': 'New shiny project'}, + ['Name=\'New shiny project\'\n']), + + # Set two instructions + (['a b\n', + 'x="y z"\n'], + {'something': 'nothing', 'mine': 'yours'}, + ['something=nothing\n', 'mine=yours\n']), + + # Set instructions to what they already were: should be no difference + (['a b\n', + 'x="y z"\n', + '"first"="first" second=\'second value\'\n'], + {'a': 'b', 'x': 'y z', 'first': 'first', + 'second': 'second value'}, + ['a b\n', + 'x="y z"\n', + '"first"="first" second=\'second value\'\n']), + + # Adjust one label of a multi-value LABEL/ENV statement + (['a b\n', + 'first=\'first value\' "second"=second\n', + 'x="y z"\n'], + {'first': 'changed', 'second': 'second'}, + ['first=changed "second"=second\n']), + + # Delete one label of a multi-value LABEL/ENV statement + (['a b\n', + 'x="y z"\n', + 'first=first second=second\n'], + {'second': 'second'}, + ['second=second\n']), + + # Nested quotes + (['"ownership"="Alice\'s label" other=value\n'], + {'ownership': "Alice's label"}, + # Keeps existing key quoting style + ['"ownership"="Alice\'s label"\n']), + + # Modify a single value that needs quoting + (['foo bar\n'], + {'foo': 'extra bar'}, + ["foo 'extra bar'\n"]), + ]) def test_setter(self, dfparser, instruction, existing, new, expected): existing = [instruction + ' ' + i for i in existing] if isinstance(expected, list): @@ -869,8 +950,8 @@ def test_arg_env_invalid(self, dfparser, instruction, label): pass @pytest.mark.parametrize(('instruction', 'attribute'), ( - ('ARG', 'args'), - ('ENV', 'envs'), + ('ARG', 'args'), + ('ENV', 'envs'), )) def test_arg_env_multistage(self, dfparser, instruction, attribute): dfparser.content = dedent("""\ @@ -1143,7 +1224,7 @@ def test_label_invalid(self, dfparser, label_value, bad_keyval, envs, action): dfparser.lines = [ "FROM scratch\n", - env_line, # has to appear before the LABEL line + env_line, # has to appear before the LABEL line "LABEL {0}\n".format(label_value), ] with pytest.raises(ValueError) as exc_info: @@ -1205,35 +1286,35 @@ def test_add_lines_stage_edge(self, dfparser): @pytest.mark.parametrize(('anchor', 'raises'), [ ( - 3, None + 3, None ), ( - 'CMD xyz ❤\n', None + 'CMD xyz ❤\n', None ), ( - dict( - content='CMD xyz ❤\n', - startline=3, - endline=3, - instruction='CMD', - value='xyz ❤' - ), - None + dict( + content='CMD xyz ❤\n', + startline=3, + endline=3, + instruction='CMD', + value='xyz ❤' + ), + None ), ( - -2, AssertionError + -2, AssertionError ), ( - 20, AssertionError + 20, AssertionError ), ( - 2.0, RuntimeError + 2.0, RuntimeError ), ( - 'not there', RuntimeError + 'not there', RuntimeError ), ( - dict(), AssertionError + dict(), AssertionError ), ]) def test_add_lines_at(self, dfparser, anchor, raises): @@ -1388,7 +1469,7 @@ def _test_escape_directive(self, dfparser, escape_value, used_line_continuation) 'startline': 2, 'endline': 3, 'content': 'RUN touch foo; {line_cont}\n touch bar\n'.format( - line_cont=used_line_continuation + line_cont=used_line_continuation ), 'value': 'touch foo; touch bar' } @@ -1448,7 +1529,7 @@ def _test_escape_after_syntax_directive(self, dfparser, escape_value, used_line_ 'startline': 3, 'endline': 4, 'content': 'RUN touch foo; {line_cont}\n touch bar\n'.format( - line_cont=used_line_continuation + line_cont=used_line_continuation ), 'value': 'touch foo; touch bar' }