Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move ImageName class from osbs client to utils of dockerfile parser. #139

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .packit.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,7 @@ jobs:
targets:
- fedora-all
- epel-8-x86_64

srpm_build_deps:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was apparently still missing in the master repo: I'm not 100% certain if this is correct. Please verify.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm still not sure what this fix is fixing, we had not such issue previously

- python3-pip
- python3-setuptools_scm
32 changes: 23 additions & 9 deletions dockerfile_parse/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,13 @@
import logging
import os
import re
import warnings
from contextlib import contextmanager
from shlex import quote

from .constants import DOCKERFILE_FILENAME, COMMENT_INSTRUCTION
from .util import (b2u, extract_key_values, get_key_val_dictionary,
u2b, Context, WordSplitter)
u2b, Context, WordSplitter, ImageName)


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -238,6 +239,7 @@ def structure(self):
"value": "yum -y update && yum clean all"}
]
"""

def _rstrip_eol(text, line_continuation_char='\\'):
text = text.rstrip()
if text.endswith(line_continuation_char):
Expand All @@ -262,8 +264,8 @@ def _clean_comment_line(line):
lineno = -1
line_continuation_char = '\\'
insnre = re.compile(r'^\s*(\S+)\s+(.*)$') # matched group is insn
contre = re.compile(r'^.*\\\s*$') # line continues?
commentre = re.compile(r'^\s*#') # line is a comment?
contre = re.compile(r'^.*\\\s*$') # line continues?
commentre = re.compile(r'^\s*#') # line is a comment?
directive_possible = True
# escape directive regex
escape_directive_re = re.compile(r'^\s*#\s*escape\s*=\s*(\\|`)\s*$', re.I)
Expand Down Expand Up @@ -354,7 +356,7 @@ def parent_images(self):
top_args[key] = value
elif instr['instruction'] == 'FROM':
in_stage = True
image, _ = image_from(instr['value'])
image, _ = image_name_from(instr['value'])
if image is not None:
image = WordSplitter(image, args=top_args).dequote()
parents.append(image)
Expand All @@ -376,7 +378,7 @@ def parent_images(self, parents):
if instr['instruction'] != 'FROM':
continue

old_image, stage = image_from(instr['value'])
old_image, stage = image_name_from(instr['value'])
if old_image is None:
continue # broken FROM, fixing would just confuse things
if not parents:
Expand All @@ -393,7 +395,7 @@ def parent_images(self, parents):

lines = self.lines
for instr in reversed(change_instrs):
lines[instr['startline']:instr['endline']+1] = [instr['content']]
lines[instr['startline']:instr['endline'] + 1] = [instr['content']]

self.lines = lines

Expand All @@ -416,7 +418,7 @@ def baseimage(self, new_image):
images = []
for instr in self.structure:
if instr['instruction'] == 'FROM':
image, _ = image_from(instr['value'])
image, _ = image_name_from(instr['value'])
if image is not None:
images.append(image)
if not images:
Expand Down Expand Up @@ -762,7 +764,7 @@ def add_lines(self, *lines, **kwargs):
for stage in range(len(froms)-2, -1, -1): # e.g. 0 for single or 2, 1, 0 for 3 stages
start, finish = froms[stage], froms[stage+1]
linenum = start['endline'] + 1 if at_start else finish['startline']
image, _ = image_from(froms[stage].get('value') or '')
image, _ = image_name_from(froms[stage].get('value') or '')
if skip_scratch and image == 'scratch':
continue
df_lines[linenum:linenum] = lines
Expand Down Expand Up @@ -862,6 +864,16 @@ def context_structure(self):


def image_from(from_value):
"""
:param from_value: string like "image:tag" or "image:tag AS name"
:return: tuple of the image and stage name, e.g. ("image:tag", None)
"""
warnings.warn("Use image_name_from instead.", DeprecationWarning)
image, name = image_name_from(from_value)
return str(image) if image else None, name


def image_name_from(from_value):
"""
:param from_value: string like "image:tag" or "image:tag AS name"
:return: tuple of the image and stage name, e.g. ("image:tag", None)
Expand All @@ -877,7 +889,9 @@ def image_from(from_value):
)?
""")
match = re.match(regex, from_value)
return match.group('image', 'name') if match else (None, None)
image = ImageName.parse(match.group('image')) if match else None
name = match.group('name') if match else None
return image, name
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we are breaking BW compatibility here, I'm not sure if we can do this

Copy link
Contributor Author

@tim-vk tim-vk Dec 26, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As far as i'm aware it shouldn't break backwards compatibility:

Old version: Image was a string
New version: Image is the new ImageName class which has a str representation (which is idential to the original string of the previous version).

This should be backwards compatible (the unit tests reflect this: there are only tests added, not changed).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addition:

Just realized what I typed above is not 100% true: See the explicit str(s) in util.py (line 51).
impact should still be minimal.

Alternative solution would be keeping the old function.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, explicit casting to str() is required, that's breaking change

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO would be safer to add new function and add deprecation warning to this one

Copy link
Contributor Author

@tim-vk tim-vk Jan 6, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done!
I could not find any examples within the containerbuildsystem project with deprecations so i opted to just add a simple DeprecationWarning. Let me know if an external package or other method is preferred.



def _endline(line):
Expand Down
120 changes: 119 additions & 1 deletion dockerfile_parse/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def __init__(self, s, args=None, envs=None):
:param envs: dict, environment variables to use; if None, do not
attempt substitution
"""
self.stream = StringIO(s)
self.stream = StringIO(str(s))
self.args = args
self.envs = envs

Expand Down Expand Up @@ -326,3 +326,121 @@ def get_values(self, context_type):
if context_type.upper() == "LABEL":
return self.labels
raise ValueError("Unexpected context type: " + context_type)


class ImageName(object):
"""Represent an image.
Naming Conventions
==================
registry.somewhere/namespace/image_name:tag
|-----------------| registry, reg_uri
|---------| namespace
|--------------------------------------| repository
|--------------------| image name
|--| tag
|------------------------| image
|------------------------------------------| image
"""

def __init__(self, registry=None, namespace=None, repo=None, tag=None):
self.registry = registry
self.namespace = namespace
self.repo = repo
self.tag = tag

@classmethod
def parse(cls, image_name):
result = cls()

if not image_name or str(image_name).isspace():
return ImageName()

if isinstance(image_name, cls):
return image_name

# registry.org/namespace/repo:tag
s = image_name.split('/', 2)

if len(s) == 2:
if '.' in s[0] or ':' in s[0]:
result.registry = s[0] if s[0] else None
else:
result.namespace = s[0]
elif len(s) == 3:
result.registry = s[0] if s[0] else None
result.namespace = s[1]
result.repo = s[-1]

for sep in '@:':
try:
result.repo, result.tag = result.repo.rsplit(sep, 1)
except ValueError:
continue
break

return result

def to_str(self, registry=True, tag=True, explicit_tag=False,
explicit_namespace=False):
if self.repo is None:
raise RuntimeError('No image repository specified')

result = self.get_repo(explicit_namespace)

if tag and self.tag and ':' in self.tag:
result = '{0}@{1}'.format(result, self.tag)
elif tag and self.tag:
result = '{0}:{1}'.format(result, self.tag)
elif tag and explicit_tag:
result = '{0}:{1}'.format(result, 'latest')

if registry and self.registry:
result = '{0}/{1}'.format(self.registry, result)

return result

def get_repo(self, explicit_namespace=False):
result = self.repo
if self.namespace:
result = '{0}/{1}'.format(self.namespace, result)
elif explicit_namespace:
result = '{0}/{1}'.format('library', result)
return result

def enclose(self, organization):
if self.namespace == organization:
return

repo_parts = [self.repo]
if self.namespace:
repo_parts.insert(0, self.namespace)

self.namespace = organization
self.repo = '-'.join(repo_parts)

def __str__(self):
return self.to_str(registry=True, tag=True)

def __repr__(self):
return (
"ImageName(registry={s.registry!r}, namespace={s.namespace!r},"
" repo={s.repo!r}, tag={s.tag!r})"
).format(s=self)

def __eq__(self, other):
if isinstance(other, str):
return self.__str__() == other
elif isinstance(other, ImageName):
return self.__dict__ == other.__dict__
else:
return NotImplemented

def __hash__(self):
return hash(self.to_str())

def copy(self):
return ImageName(
registry=self.registry,
namespace=self.namespace,
repo=self.repo,
tag=self.tag)
File renamed without changes.
125 changes: 117 additions & 8 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,121 @@
from textwrap import dedent

from dockerfile_parse import DockerfileParser
from dockerfile_parse.parser import image_from
from dockerfile_parse.parser import image_from, image_name_from
from dockerfile_parse.constants import COMMENT_INSTRUCTION
from dockerfile_parse.util import b2u, u2b, Context
from tests.fixtures import dfparser, instruction
from dockerfile_parse.util import b2u, u2b, Context, ImageName

NON_ASCII = "žluťoučký"
# flake8 does not understand fixtures:
dfparser = dfparser # pylint: disable=self-assigning-variable
instruction = instruction # pylint: disable=self-assigning-variable


@pytest.mark.parametrize(('image_string', 'dictionary'), [
(
" ",
{"namespace": None, "registry": None, "tag": None, "repo": None},
), (
"registry.org/namespace/repo:tag",
{"namespace": "namespace", "registry": "registry.org", "tag": "tag", "repo": "repo"},
), (
"/namespace/repo:tag",
{"namespace": "namespace", "registry": None, "tag": "tag", "repo": "repo"},
), (
"registry.org/repo:tag",
{"namespace": None, "registry": "registry.org", "tag": "tag", "repo": "repo"},
), (
"registry.org/repo",
{"namespace": None, "registry": "registry.org", "tag": None, "repo": "repo"},
), (
"registry.org/repo@sha256:hash",
{"namespace": None, "registry": "registry.org", "tag": "sha256:hash", "repo": "repo"},
)
])
class TestImageName(object):
def test_util_image_name_parse(self, image_string, dictionary):
image = ImageName.parse(image_string)
assert image.namespace == dictionary["namespace"]
assert image.registry == dictionary["registry"]
assert image.tag == dictionary["tag"]
assert image.repo == dictionary["repo"]

def test_util_image_name_get_repo(self, image_string, dictionary):
image = ImageName.parse(image_string)
repo = "/".join(filter(None, (dictionary["namespace"], dictionary["repo"])))
assert image.get_repo() == (repo if repo != "" else None)
assert image.get_repo(explicit_namespace=True) == "{0}/{1}".format(
dictionary["namespace"] if dictionary["namespace"] else "library", dictionary["repo"])

def test_util_image_name_to_str(self, image_string, dictionary):
image = ImageName.parse(image_string)
if dictionary["repo"] is None:
with pytest.raises(RuntimeError):
image.to_str()
else:
assert image.to_str() == image_string.lstrip('/')
assert image.to_str() == (image_string.lstrip('/') if image_string else None)
assert image.to_str(explicit_tag=True) == \
image_string.lstrip('/') + (':latest' if dictionary["tag"] is None else "")

def test_image_name_hash(self, image_string, dictionary):
image = ImageName.parse(image_string)
if dictionary["repo"] is None:
with pytest.raises(RuntimeError):
hash(image)
else:
hash(image)

def test_image_name_repr(self, image_string, dictionary):
# so linter won't trip on unused argument
del dictionary
image = ImageName.parse(image_string)
repr(image)

def test_image_name_comparison(self, image_string, dictionary):
# make sure that "==" is implemented correctly on both Python major releases
i1 = ImageName.parse(image_string)
i2 = ImageName(registry=dictionary["registry"], namespace=dictionary["namespace"],
repo=dictionary["repo"],
tag=dictionary["tag"])
assert i1 == i2

i2 = ImageName(registry='foo.com', namespace='spam', repo='bar', tag='2')
# pylint: disable=unneeded-not
assert not i1 == i2

i1 = ImageName.parse(i2)
assert i1 == i2

i1 = i2.copy()
assert i1 == i2


@pytest.mark.parametrize(('repo', 'organization', 'enclosed_repo'), (
('fedora', 'spam', 'spam/fedora'),
('spam/fedora', 'spam', 'spam/fedora'),
('spam/fedora', 'maps', 'maps/spam-fedora'),
))
@pytest.mark.parametrize('registry', (
'example.registry.com',
'example.registry.com:8888',
None,
))
@pytest.mark.parametrize('tag', ('bacon', None))
def test_image_name_enclose(repo, organization, enclosed_repo, registry, tag):
reference = repo
if tag:
reference = '{}:{}'.format(repo, tag)
if registry:
reference = '{}/{}'.format(registry, reference)

image_name = ImageName.parse(reference)
assert image_name.get_repo() == repo
assert image_name.registry == registry
assert image_name.tag == tag

image_name.enclose(organization)
assert image_name.get_repo() == enclosed_repo
# Verify that registry and tag are unaffected
assert image_name.registry == registry
assert image_name.tag == tag


class TestDockerfileParser(object):
Expand Down Expand Up @@ -467,9 +573,12 @@ def test_get_instructions_from_df(self, dfparser, instruction, instr_value,
('registry.example.com:5000/foo/bar:baz', None),
)
])
def test_image_from(self, from_value, expect):
result = image_from(from_value)
def test_image_name_from(self, from_value, expect):
result = image_name_from(from_value)
# image_from is deprecated. But we still want to test it.
deprecated_result = image_from(from_value)
assert result == expect
assert deprecated_result == expect

def test_parent_images(self, dfparser):
FROM = ('my-builder:latest', 'rhel7:7.5')
Expand Down