Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Custom inline cache implementation #60

Merged
merged 1 commit into from
May 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions cache/remotecache/import.go
Original file line number Diff line number Diff line change
Expand Up @@ -302,8 +302,9 @@ type image struct {
Rootfs struct {
DiffIDs []digest.Digest `json:"diff_ids"`
} `json:"rootfs"`
Cache []byte `json:"moby.buildkit.cache.v0"`
History []struct {
Cache []byte `json:"moby.buildkit.cache.v0"`
EarthlyInlineCache []byte `json:"earthly.inlinecache.v0"`
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This field is added to an image's "config" and can be viewed using something like crane config. The map of cache keys to registry layer digests is stored here.

History []struct {
Created *time.Time `json:"created,omitempty"`
CreatedBy string `json:"created_by,omitempty"`
EmptyLayer bool `json:"empty_layer,omitempty"`
Expand Down
253 changes: 253 additions & 0 deletions cache/remotecache/inline.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,253 @@
package remotecache

import (
"context"
"encoding/json"
"fmt"

"github.com/containerd/containerd/content"
"github.com/containerd/containerd/images"
"github.com/containerd/containerd/labels"
v1 "github.com/moby/buildkit/cache/remotecache/v1"
"github.com/moby/buildkit/solver"
"github.com/moby/buildkit/util/bklog"
"github.com/moby/buildkit/util/imageutil"
"github.com/moby/buildkit/util/progress"
"github.com/moby/buildkit/worker"
digest "github.com/opencontainers/go-digest"
ocispecs "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/pkg/errors"
)

// earthlyInlineCacheItem stores a relation between a simple solver cache key &
// a remote image descriptor. Used for inline caching.
type earthlyInlineCacheItem struct {
Key digest.Digest `json:"cacheKey"`
Descriptor digest.Digest `json:"descriptor"`
}

// EarthlyInlineCacheRemotes produces a map of cache keys to remote sources by
// parsing inline-cache metadata from a remote image's config data.
func EarthlyInlineCacheRemotes(ctx context.Context, provider content.Provider, desc ocispecs.Descriptor, w worker.Worker) (map[digest.Digest]*solver.Remote, error) {
dt, err := readBlob(ctx, provider, desc)
if err != nil {
return nil, err
}

manifestType, err := imageutil.DetectManifestBlobMediaType(dt)
if err != nil {
return nil, err
}

layerDone := progress.OneOff(ctx, fmt.Sprintf("inferred cache manifest type: %s", manifestType))
layerDone(nil)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm guessing this is intentional since I see it done in cache/remotecache/azblob/importer.go, but as someone who's new to the OneOff, this seems confusing. but since it's a pattern that's already used in buildkit maybe it's fine.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Much of this code was taken from the preexisting inline cache code: https://github.com/earthly/buildkit/blob/earthly-main/cache/remotecache/import.go#L58


configDesc, err := configDescriptor(dt, manifestType)
if err != nil {
return nil, err
}

if configDesc.Digest != "" {
return nil, errors.New("expected empty digest value")
}

m := map[digest.Digest][]byte{}

if err := allDistributionManifests(ctx, provider, dt, m); err != nil {
return nil, err
}

remotes := map[digest.Digest]*solver.Remote{}

for _, dt := range m {
var m ocispecs.Manifest

if err := json.Unmarshal(dt, &m); err != nil {
return nil, errors.Wrap(err, "failed to unmarshal manifest")
}

if m.Config.Digest == "" || len(m.Layers) == 0 {
continue
}

p, err := content.ReadBlob(ctx, provider, m.Config)
if err != nil {
return nil, errors.Wrap(err, "failed to read blob")
}

var img image

if err := json.Unmarshal(p, &img); err != nil {
return nil, errors.Wrap(err, "failed to unmarshal image")
}

if len(img.Rootfs.DiffIDs) != len(m.Layers) {
bklog.G(ctx).Warnf("invalid image with mismatching manifest and config")
continue
}

if img.EarthlyInlineCache == nil {
continue
}

cacheItems := []earthlyInlineCacheItem{}
if err := json.Unmarshal(img.EarthlyInlineCache, &cacheItems); err != nil {
return nil, errors.Wrap(err, "failed to unmarshal cache items")
}

layers, err := preprocessLayers(img, m)
if err != nil {
return nil, err
}

found := extractRemotes(provider, cacheItems, layers)
for key, remote := range found {
remotes[key] = remote
}
}

return remotes, nil
}

// extractRemotes constructs a list of descriptors--which represent the layer
// chain for the given digest--for each of the items discovered in the inline
// metadata.
func extractRemotes(provider content.Provider, cacheItems []earthlyInlineCacheItem, layers []ocispecs.Descriptor) map[digest.Digest]*solver.Remote {

remotes := map[digest.Digest]*solver.Remote{}

for _, cacheItem := range cacheItems {
descs := []ocispecs.Descriptor{}

found := false
for _, layer := range layers {
descs = append(descs, layer)
if layer.Digest == cacheItem.Descriptor {
found = true
break
}
}

if found {
remote := &solver.Remote{
Descriptors: descs,
Provider: provider,
}

remotes[cacheItem.Key] = remote
}
}

return remotes
}

// preprocessLayers adds custom annotations which are used later when
// reconstructing the ref.
func preprocessLayers(img image, m ocispecs.Manifest) ([]ocispecs.Descriptor, error) {
createdDates, createdMsg, err := parseCreatedLayerInfo(img)
if err != nil {
return nil, err
}

n := len(m.Layers)

if len(createdDates) != n {
return nil, errors.New("unexpected creation dates length")
}

if len(createdMsg) != n {
return nil, errors.New("unexpected creation messages length")
}

if len(img.Rootfs.DiffIDs) != n {
return nil, errors.New("unexpected rootfs diff IDs")
}

ret := []ocispecs.Descriptor{}

for i, layer := range m.Layers {
if layer.Annotations == nil {
layer.Annotations = map[string]string{}
}

if createdAt := createdDates[i]; createdAt != "" {
mikejholly marked this conversation as resolved.
Show resolved Hide resolved
layer.Annotations["buildkit/createdat"] = createdAt
}

if createdBy := createdMsg[i]; createdBy != "" {
mikejholly marked this conversation as resolved.
Show resolved Hide resolved
layer.Annotations["buildkit/description"] = createdBy
}

layer.Annotations[labels.LabelUncompressed] = img.Rootfs.DiffIDs[i].String()
mikejholly marked this conversation as resolved.
Show resolved Hide resolved

ret = append(ret, layer)
}

return ret, nil
}

// configDescriptor parses and returns the correct manifest for the given manifest type.
func configDescriptor(dt []byte, manifestType string) (ocispecs.Descriptor, error) {
var configDesc ocispecs.Descriptor

switch manifestType {
case images.MediaTypeDockerSchema2ManifestList, ocispecs.MediaTypeImageIndex:
var mfst ocispecs.Index
if err := json.Unmarshal(dt, &mfst); err != nil {
return ocispecs.Descriptor{}, err
}

for _, m := range mfst.Manifests {
if m.MediaType == v1.CacheConfigMediaTypeV0 {
configDesc = m
continue
}
}
case images.MediaTypeDockerSchema2Manifest, ocispecs.MediaTypeImageManifest:
var mfst ocispecs.Manifest
if err := json.Unmarshal(dt, &mfst); err != nil {
return ocispecs.Descriptor{}, err
}

if mfst.Config.MediaType == v1.CacheConfigMediaTypeV0 {
configDesc = mfst.Config
}
default:
return ocispecs.Descriptor{}, errors.Errorf("unsupported or uninferrable manifest type %s", manifestType)
}

return configDesc, nil
}

// allDistributionManifests pulls all manifest data & linked manifests using the provider.
func allDistributionManifests(ctx context.Context, provider content.Provider, dt []byte, m map[digest.Digest][]byte) error {
mt, err := imageutil.DetectManifestBlobMediaType(dt)
if err != nil {
return err
}

switch mt {
case images.MediaTypeDockerSchema2Manifest, ocispecs.MediaTypeImageManifest:
m[digest.FromBytes(dt)] = dt
case images.MediaTypeDockerSchema2ManifestList, ocispecs.MediaTypeImageIndex:
var index ocispecs.Index
if err := json.Unmarshal(dt, &index); err != nil {
return errors.WithStack(err)
}

for _, d := range index.Manifests {
if _, ok := m[d.Digest]; ok {
continue
}
p, err := content.ReadBlob(ctx, provider, d)
if err != nil {
return errors.WithStack(err)
}
if err := allDistributionManifests(ctx, provider, p, m); err != nil {
return err
}
}
}

return nil
}
58 changes: 58 additions & 0 deletions cache/remotecache/registry/inline.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
package registry

import (
"context"
"strconv"

"github.com/containerd/containerd/remotes/docker"
"github.com/moby/buildkit/cache/remotecache"
"github.com/moby/buildkit/session"
"github.com/moby/buildkit/solver"
"github.com/moby/buildkit/util/contentutil"
"github.com/moby/buildkit/util/resolver"
"github.com/moby/buildkit/util/resolver/limited"
"github.com/moby/buildkit/worker"
digest "github.com/opencontainers/go-digest"
"github.com/pkg/errors"
)

// EarthlyInlineCacheRemotes fetches a group of remote sources based on values
// discovered in a remote image's inline-cache metadata field.
func EarthlyInlineCacheRemotes(ctx context.Context, sm *session.Manager, w worker.Worker, hosts docker.RegistryHosts, g session.Group, attrs map[string]string) (map[digest.Digest]*solver.Remote, error) {
ref, err := canonicalizeRef(attrs[attrRef])
if err != nil {
return nil, err
}

refString := ref.String()

insecure := false
if v, ok := attrs[attrInsecure]; ok {
val, err := strconv.ParseBool(v)
if err != nil {
return nil, errors.Wrapf(err, "failed to parse %s", attrInsecure)
}
insecure = val
}

scope, hosts := registryConfig(hosts, ref, "pull", insecure)
remote := resolver.DefaultPool.GetResolver(hosts, refString, scope, sm, g)

xref, desc, err := remote.Resolve(ctx, refString)
if err != nil {
return nil, err
}

fetcher, err := remote.Fetcher(ctx, xref)
if err != nil {
return nil, err
}

src := &withDistributionSourceLabel{
Provider: contentutil.FromFetcher(limited.Default.WrapFetcher(fetcher, refString)),
ref: refString,
source: w.ContentStore(),
}

return remotecache.EarthlyInlineCacheRemotes(ctx, src, desc, w)
}
1 change: 1 addition & 0 deletions cmd/buildkitd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -844,6 +844,7 @@ func newController(c *cli.Context, cfg *config.Config, shutdownCh chan struct{})
ContentStore: w.ContentStore(),
HistoryConfig: cfg.History,
RootDir: cfg.Root,
RegistryHosts: resolverFn,
})
}

Expand Down
3 changes: 3 additions & 0 deletions control/control.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (

contentapi "github.com/containerd/containerd/api/services/content/v1"
"github.com/containerd/containerd/content"
"github.com/containerd/containerd/remotes/docker"
"github.com/containerd/containerd/services/content/contentserver"
"github.com/distribution/reference"
"github.com/hashicorp/go-multierror"
Expand Down Expand Up @@ -69,6 +70,7 @@ type Opt struct {
ContentStore *containerdsnapshot.Store
HistoryConfig *config.HistoryConfig
RootDir string
RegistryHosts docker.RegistryHosts
}

type Controller struct { // TODO: ControlService
Expand Down Expand Up @@ -107,6 +109,7 @@ func NewController(opt Opt) (*Controller, error) {
Entitlements: opt.Entitlements,
HistoryQueue: hq,
RootDir: opt.RootDir,
RegistryHosts: opt.RegistryHosts,
})
if err != nil {
return nil, errors.Wrap(err, "failed to create solver")
Expand Down
2 changes: 1 addition & 1 deletion executor/runcexecutor/monitor_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ func (w *runcExecutor) monitorContainerStats(ctx context.Context, id string, sam
for {
select {
case <-ctx.Done():
bklog.G(ctx).Infof("stats collection context done: %v", ctx.Err())
bklog.G(ctx).Debugf("stats collection context done: %v", ctx.Err())
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This helps with log noise.

return
case <-timer.C: // Initial sleep will give container the chance to start.
stats, err := w.runc.Stats(ctx, id)
Expand Down
1 change: 1 addition & 0 deletions exporter/containerimage/exptypes/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ const (
ExporterImageConfigDigestKey = "containerimage.config.digest"
ExporterImageDescriptorKey = "containerimage.descriptor"
ExporterInlineCache = "containerimage.inlinecache"
EarthlyInlineCache = "earthly.inlinecache"
ExporterPlatformsKey = "refs.platforms"
)

Expand Down
Loading
Loading