releng: support multiarch docker images

If we don't want to have separate registry tags by architecture (EWWWW),
we need to be able to build multiarch docker images. This is pretty
simple, and just requires making a manifest pointing to each of the
component images.

I was *going* to just do this API prodding with manifest-tool, but it
doesn't support putting metadata on the outer manifest, which is
actually kind of a problem because it then doesn't render the metadata
on github. So I guess we get a simple little containers API
implementation that is 90% auth code.

Change-Id: I8bdd118d4cbc13b23224f2fb174b232432686bea
This commit is contained in:
jade 2024-06-09 00:27:06 -07:00
parent 4392d89eea
commit 9aeb314e6a
7 changed files with 527 additions and 26 deletions

View file

@ -1,5 +1,7 @@
{
pkgs ? import <nixpkgs> { },
# Git commit ID, if available
lixRevision ? null,
nix2container,
lib ? pkgs.lib,
name ? "lix",
@ -353,6 +355,23 @@ let
"NIX_SSL_CERT_FILE=/nix/var/nix/profiles/default/etc/ssl/certs/ca-bundle.crt"
"NIX_PATH=/nix/var/nix/profiles/per-user/root/channels:/root/.nix-defexpr/channels"
];
Labels = {
"org.opencontainers.image.title" = "Lix";
"org.opencontainers.image.source" = "https://git.lix.systems/lix-project/lix";
"org.opencontainers.image.vendor" = "Lix project";
"org.opencontainers.image.version" = pkgs.nix.version;
"org.opencontainers.image.description" = "Minimal Lix container image, with some batteries included.";
} // lib.optionalAttrs (lixRevision != null) { "org.opencontainers.image.revision" = lixRevision; };
};
meta = {
description = "Docker image for Lix. This is built with nix2container; see that project's README for details";
longDescription = ''
Docker image for Lix, built with nix2container.
To copy it to your docker daemon, nix run .#dockerImage.copyToDockerDaemon
To copy it to podman, nix run .#dockerImage.copyTo containers-storage:lix
'';
};
};
in
@ -379,12 +398,4 @@ image
gzip $image
echo "file binary-dist $image" >> $out/nix-support/hydra-build-products
'';
meta = image.meta // {
description = "Docker image for Lix. This is built with nix2container; see that project's README for details";
longDescription = ''
Docker image for Lix, built with nix2container.
To copy it to your docker daemon, nix run .#dockerImage.copyToDockerDaemon
To copy it to podman, nix run .#dockerImage.copyTo containers-storage:lix
'';
};
}

View file

@ -417,6 +417,8 @@ stdenv.mkDerivation (finalAttrs: {
p: [
p.yapf
p.python-frontmatter
p.requests
p.xdg-base-dirs
(p.toPythonModule xonsh-unwrapped)
]
);

View file

@ -2,12 +2,29 @@ from xonsh.main import setup
setup()
del setup
import logging
from . import environment
from . import create_release
from . import keys
from . import version
from . import cli
from . import docker
from . import docker_assemble
rootLogger = logging.getLogger()
rootLogger.setLevel(logging.DEBUG)
log = logging.getLogger(__name__)
log.setLevel(logging.DEBUG)
fmt = logging.Formatter('{asctime} {levelname} {name}: {message}',
datefmt='%b %d %H:%M:%S',
style='{')
if not any(isinstance(h, logging.StreamHandler) for h in rootLogger.handlers):
hand = logging.StreamHandler()
hand.setFormatter(fmt)
rootLogger.addHandler(hand)
def reload():
import importlib
@ -17,3 +34,4 @@ def reload():
importlib.reload(version)
importlib.reload(cli)
importlib.reload(docker)
importlib.reload(docker_assemble)

View file

@ -258,15 +258,16 @@ def upload_artifacts(env: RelengEnvironment, noconfirm=False, no_check_git=False
'I want to release this'
)
docker_images = list((ARTIFACTS / f'lix/lix-{VERSION}').glob(f'lix-{VERSION}-docker-image-*.tar.gz'))
assert docker_images
print('[+] Upload to cache')
with open(DRVS_TXT) as fh:
upload_drv_paths_and_outputs(env, [x.strip() for x in fh.readlines() if x])
docker_images = (ARTIFACTS / f'lix/lix-{VERSION}').glob(f'lix-{VERSION}-docker-image-*.tar.gz')
print('[+] Upload docker images')
for image in docker_images:
for target in env.docker_targets:
docker.upload_docker_image(target, image)
for target in env.docker_targets:
docker.upload_docker_images(target, docker_images)
print('[+] Upload to release bucket')
aws s3 cp --recursive @(ARTIFACTS)/ @(env.releases_bucket)/
@ -321,7 +322,8 @@ def build_artifacts(no_check_git=False):
build_manual(eval_result)
with open(DRVS_TXT, 'w') as fh:
fh.write('\n'.join(drv_paths))
# don't bother putting the release tarballs themselves because they are duplicate and huge
fh.write('\n'.join(x['drvPath'] for x in eval_result if x['attr'] != 'lix-release-tarballs'))
make_artifacts_dir(eval_result, ARTIFACTS)
print(f'[+] Done! See {ARTIFACTS}')

View file

@ -1,6 +1,18 @@
from .environment import DockerTarget, RelengEnvironment
from .version import VERSION
import json
import logging
from pathlib import Path
import tempfile
import requests
from .environment import DockerTarget, RelengEnvironment
from .version import VERSION, MAJOR
from . import gitutils
from .docker_assemble import Registry, OCIIndex, OCIIndexItem
from . import docker_assemble
log = logging.getLogger(__name__)
log.setLevel(logging.INFO)
def check_all_logins(env: RelengEnvironment):
for target in env.docker_targets:
@ -9,5 +21,54 @@ def check_all_logins(env: RelengEnvironment):
def check_login(target: DockerTarget):
skopeo login @(target.registry_name())
def upload_docker_image(target: DockerTarget, path: Path):
skopeo --insecure-policy copy docker-archive:@(path) docker://@(target.resolve(version=VERSION))
def upload_docker_images(target: DockerTarget, paths: list[Path]):
if not paths: return
sess = requests.Session()
sess.headers['User-Agent'] = 'lix-releng'
tag_names = [DockerTarget.resolve(tag, version=VERSION, major=MAJOR) for tag in target.tags]
# latest only gets tagged for the current release branch of Lix
if not gitutils.is_maintenance_branch('HEAD'):
tag_names.append('latest')
meta = {}
reg = docker_assemble.Registry(sess)
manifests = []
with tempfile.TemporaryDirectory() as tmp:
tmp = Path(tmp)
for path in paths:
digest_file = tmp / (path.name + '.digest')
inspection = json.loads($(skopeo inspect docker-archive:@(path)))
docker_arch = inspection['Architecture']
docker_os = inspection['Os']
meta = inspection['Labels']
log.info('Pushing image %s for %s', path, docker_arch)
# insecure-policy: we don't have any signature policy, we are just uploading an image
# We upload to a junk tag, because otherwise it will upload to `latest`, which is undesirable
skopeo --insecure-policy copy --format oci --digestfile @(digest_file) docker-archive:@(path) docker://@(target.registry_path):temp
digest = digest_file.read_text().strip()
# skopeo doesn't give us the manifest size directly, so we just ask the registry
metadata = reg.image_info(target.registry_path, digest)
manifests.append(OCIIndexItem(metadata=metadata, architecture=docker_arch, os=docker_os))
# delete the temp tag, which we only have to create because of skopeo
# limitations anyhow (it seems to not have a way to say "don't tag it, find
# your checksum and put it there")
# FIXME: this is not possible because GitHub only has a proprietary API for it. amazing. 11/10.
# reg.delete_tag(target.registry_path, 'temp')
log.info('Pushed images, building a bigger and more menacing manifest from %r with metadata %r', manifests, meta)
# send the multiarch manifest to each tag
index = OCIIndex(manifests=manifests, annotations=meta)
for tag in tag_names:
reg.upload_index(target.registry_path, tag, index)

399
releng/docker_assemble.py Normal file
View file

@ -0,0 +1,399 @@
from typing import Any, Literal, Optional
import re
from pathlib import Path
import json
import dataclasses
import time
from urllib.parse import unquote
import urllib.request
import logging
import requests.auth
import requests
import xdg_base_dirs
log = logging.getLogger(__name__)
log.setLevel(logging.INFO)
DEBUG_REQUESTS = False
if DEBUG_REQUESTS:
urllib3_logger = logging.getLogger('requests.packages.urllib3')
urllib3_logger.setLevel(logging.DEBUG)
urllib3_logger.propagate = True
# So, there is a bunch of confusing stuff happening in this file. The gist of why it's Like This is:
#
# nix2container does not concern itself with tags (reasonably enough):
# https://github.com/nlewo/nix2container/issues/59
#
# This is fine. But then we noticed: docker images don't play nice if you have
# multiple architectures you want to abstract over if you don't do special
# things. Those special things are images with manifests containing multiple
# images.
#
# Docker has a data model vaguely analogous to git: you have higher level
# objects referring to a bunch of content-addressed blobs.
#
# A multiarch image is more or less just a manifest that refers to more
# manifests; in OCI it is an Index.
#
# See the API spec here: https://github.com/opencontainers/distribution-spec/blob/v1.0.1/spec.md#definitions
# And the Index spec here: https://github.com/opencontainers/image-spec/blob/v1.0.1/image-index.md
#
# skopeo doesn't *know* how to make multiarch *manifests*:
# https://github.com/containers/skopeo/issues/1136
#
# There is a tool called manifest-tool that is supposed to do this
# (https://github.com/estesp/manifest-tool) but it doesn't support putting in
# annotations on the outer image, and I *really* didn't want to write golang to
# fix that. Thus, a little bit of homebrew containers code.
#
# Essentially what we are doing in here is splatting a bunch of images into the
# registry without tagging them (except as "temp", due to podman issues), then
# simply sending a new composite manifest ourselves.
DockerArchitecture = Literal['amd64'] | Literal['arm64']
MANIFEST_MIME = 'application/vnd.oci.image.manifest.v1+json'
INDEX_MIME = 'application/vnd.oci.image.index.v1+json'
@dataclasses.dataclass(frozen=True, order=True)
class ImageMetadata:
size: int
digest: str
"""sha256:SOMEHEX"""
@dataclasses.dataclass(frozen=True, order=True)
class OCIIndexItem:
"""Information about an untagged uploaded image."""
metadata: ImageMetadata
architecture: DockerArchitecture
os: str = 'linux'
def serialize(self):
return {
'mediaType': MANIFEST_MIME,
'size': self.metadata.size,
'digest': self.metadata.digest,
'platform': {
'architecture': self.architecture,
'os': self.os,
}
}
@dataclasses.dataclass(frozen=True)
class OCIIndex:
manifests: list[OCIIndexItem]
annotations: dict[str, str]
def serialize(self):
return {
'schemaVersion': 2,
'manifests': [item.serialize() for item in sorted(self.manifests)],
'annotations': self.annotations
}
def docker_architecture_from_nix_system(system: str) -> DockerArchitecture:
MAP = {
'x86_64-linux': 'amd64',
'aarch64-linux': 'arm64',
}
return MAP[system] # type: ignore
@dataclasses.dataclass
class TaggingOperation:
manifest: OCIIndex
tags: list[str]
"""Tags this image is uploaded under"""
runtime_dir = xdg_base_dirs.xdg_runtime_dir()
config_dir = xdg_base_dirs.xdg_config_home()
AUTH_FILES = ([runtime_dir / 'containers/auth.json'] if runtime_dir else []) + \
[config_dir / 'containers/auth.json', Path.home() / '.docker/config.json']
# Copied from Werkzeug https://github.com/pallets/werkzeug/blob/62e3ea45846d06576199a2f8470be7fe44c867c1/src/werkzeug/http.py#L300-L325
def parse_list_header(value: str) -> list[str]:
"""Parse a header value that consists of a list of comma separated items according
to `RFC 9110 <https://httpwg.org/specs/rfc9110.html#abnf.extension>`__.
This extends :func:`urllib.request.parse_http_list` to remove surrounding quotes
from values.
.. code-block:: python
parse_list_header('token, "quoted value"')
['token', 'quoted value']
This is the reverse of :func:`dump_header`.
:param value: The header value to parse.
"""
result = []
for item in urllib.request.parse_http_list(value):
if len(item) >= 2 and item[0] == item[-1] == '"':
item = item[1:-1]
result.append(item)
return result
# https://www.rfc-editor.org/rfc/rfc2231#section-4
_charset_value_re = re.compile(
r"""
([\w!#$%&*+\-.^`|~]*)' # charset part, could be empty
[\w!#$%&*+\-.^`|~]*' # don't care about language part, usually empty
([\w!#$%&'*+\-.^`|~]+) # one or more token chars with percent encoding
""",
re.ASCII | re.VERBOSE,
)
# Copied from: https://github.com/pallets/werkzeug/blob/62e3ea45846d06576199a2f8470be7fe44c867c1/src/werkzeug/http.py#L327-L394
def parse_dict_header(value: str) -> dict[str, str | None]:
"""Parse a list header using :func:`parse_list_header`, then parse each item as a
``key=value`` pair.
.. code-block:: python
parse_dict_header('a=b, c="d, e", f')
{"a": "b", "c": "d, e", "f": None}
This is the reverse of :func:`dump_header`.
If a key does not have a value, it is ``None``.
This handles charsets for values as described in
`RFC 2231 <https://www.rfc-editor.org/rfc/rfc2231#section-3>`__. Only ASCII, UTF-8,
and ISO-8859-1 charsets are accepted, otherwise the value remains quoted.
:param value: The header value to parse.
.. versionchanged:: 3.0
Passing bytes is not supported.
.. versionchanged:: 3.0
The ``cls`` argument is removed.
.. versionchanged:: 2.3
Added support for ``key*=charset''value`` encoded items.
.. versionchanged:: 0.9
The ``cls`` argument was added.
"""
result: dict[str, str | None] = {}
for item in parse_list_header(value):
key, has_value, value = item.partition("=")
key = key.strip()
if not has_value:
result[key] = None
continue
value = value.strip()
encoding: str | None = None
if key[-1] == "*":
# key*=charset''value becomes key=value, where value is percent encoded
# adapted from parse_options_header, without the continuation handling
key = key[:-1]
match = _charset_value_re.match(value)
if match:
# If there is a charset marker in the value, split it off.
encoding, value = match.groups()
assert encoding
encoding = encoding.lower()
# A safe list of encodings. Modern clients should only send ASCII or UTF-8.
# This list will not be extended further. An invalid encoding will leave the
# value quoted.
if encoding in {"ascii", "us-ascii", "utf-8", "iso-8859-1"}:
# invalid bytes are replaced during unquoting
value = unquote(value, encoding=encoding)
if len(value) >= 2 and value[0] == value[-1] == '"':
value = value[1:-1]
result[key] = value
return result
def parse_www_authenticate(www_authenticate):
scheme, _, rest = www_authenticate.partition(' ')
scheme = scheme.lower()
rest = rest.strip()
parsed = parse_dict_header(rest.rstrip('='))
return parsed
class AuthState:
def __init__(self, auth_files: list[Path] = AUTH_FILES):
self.auth_map: dict[str, str] = {}
for f in auth_files:
self.auth_map.update(AuthState.load_auth_file(f))
self.token_cache: dict[str, str] = {}
@staticmethod
def load_auth_file(path: Path) -> dict[str, str]:
if path.exists():
with path.open() as fh:
try:
json_obj = json.load(fh)
return {k: v['auth'] for k, v in json_obj['auths'].items()}
except (json.JSONDecodeError, KeyError) as e:
log.exception('JSON decode error in %s', path, exc_info=e)
return {}
def get_token(self, hostname: str) -> Optional[str]:
return self.token_cache.get(hostname)
def obtain_token(self, session: requests.Session, token_endpoint: str,
scope: str, service: str, image_path: str) -> str:
authority, _, _ = image_path.partition('/')
if tok := self.get_token(authority):
return tok
creds = self.find_credential_for(image_path)
if not creds:
raise ValueError('No credentials available for ' + image_path)
resp = session.get(token_endpoint,
params={
'client_id': 'lix-releng',
'scope': scope,
'service': service,
},
headers={
'Authorization': 'Basic ' + creds
}).json()
token = resp['token']
self.token_cache[service] = token
return token
def find_credential_for(self, image_path: str):
trails = image_path.split('/')
for i in range(len(trails)):
prefix = '/'.join(trails[:len(trails) - i])
if prefix in self.auth_map:
return self.auth_map[prefix]
return None
class RegistryAuthenticator(requests.auth.AuthBase):
"""Authenticates to an OCI compliant registry"""
def __init__(self, auth_state: AuthState, session: requests.Session,
image: str):
self.auth_map: dict[str, str] = {}
self.image = image
self.session = session
self.auth_state = auth_state
def response_hook(self, r: requests.Response,
**kwargs: Any) -> requests.Response:
if r.status_code == 401:
www_authenticate = r.headers.get('www-authenticate', '').lower()
parsed = parse_www_authenticate(www_authenticate)
assert parsed
tok = self.auth_state.obtain_token(
self.session,
parsed['realm'], # type: ignore
parsed['scope'], # type: ignore
parsed['service'], # type: ignore
self.image)
new_req = r.request.copy()
new_req.headers['Authorization'] = 'Bearer ' + tok
return self.session.send(new_req)
else:
return r
def __call__(self,
r: requests.PreparedRequest) -> requests.PreparedRequest:
authority, _, _ = self.image.partition('/')
auth_may = self.auth_state.get_token(authority)
if auth_may:
r.headers['Authorization'] = 'Bearer ' + auth_may
r.register_hook('response', self.response_hook)
return r
class Registry:
def __init__(self, session: requests.Session):
self.auth_state = AuthState()
self.session = session
def image_info(self, image_path: str, manifest_id: str) -> ImageMetadata:
authority, _, path = image_path.partition('/')
resp = self.session.head(
f'https://{authority}/v2/{path}/manifests/{manifest_id}',
headers={'Accept': MANIFEST_MIME},
auth=RegistryAuthenticator(self.auth_state, self.session,
image_path))
resp.raise_for_status()
return ImageMetadata(int(resp.headers['content-length']),
resp.headers['docker-content-digest'])
def delete_tag(self, image_path: str, tag: str):
authority, _, path = image_path.partition('/')
resp = self.session.delete(
f'https://{authority}/v2/{path}/manifests/{tag}',
headers={'Content-Type': INDEX_MIME},
auth=RegistryAuthenticator(self.auth_state, self.session,
image_path))
resp.raise_for_status()
def _upload_index(self, image_path: str, tag: str, index: OCIIndex):
authority, _, path = image_path.partition('/')
body = json.dumps(index.serialize(),
separators=(',', ':'),
sort_keys=True)
resp = self.session.put(
f'https://{authority}/v2/{path}/manifests/{tag}',
data=body,
headers={'Content-Type': INDEX_MIME},
auth=RegistryAuthenticator(self.auth_state, self.session,
image_path))
resp.raise_for_status()
return resp.headers['Location']
def upload_index(self,
image_path: str,
tag: str,
index: OCIIndex,
retries=20,
retry_delay=1):
# eventual consistency lmao
for _ in range(retries):
try:
return self._upload_index(image_path, tag, index)
except requests.HTTPError as e:
if e.response.status_code != 404:
raise
time.sleep(retry_delay)

View file

@ -1,5 +1,5 @@
import dataclasses
import urllib.parse
import dataclasses
S3_HOST = 's3.lix.systems'
S3_ENDPOINT = 'https://s3.lix.systems'
@ -19,12 +19,19 @@ DEFAULT_STORE_URI_BITS = {
@dataclasses.dataclass
class DockerTarget:
registry_path: str
"""Registry path without the tag, e.g. ghcr.io/lix-project/lix"""
def resolve(self, version: str) -> str:
"""Applies templates:
- version: the Lix version
tags: list[str]
"""List of tags this image should take. There must be at least one."""
@staticmethod
def resolve(item: str, version: str, major: str) -> str:
"""
return self.registry_path.format(version=version)
Applies templates:
- version: the Lix version e.g. 2.90.0
- major: the major Lix version e.g. 2.90
"""
return item.format(version=version, major=major)
def registry_name(self) -> str:
[a, _, _] = self.registry_path.partition('/')
@ -57,10 +64,11 @@ STAGING = RelengEnvironment(
releases_bucket='s3://staging-releases',
git_repo='ssh://git@git.lix.systems/lix-project/lix-releng-staging',
docker_targets=[
DockerTarget(
'git.lix.systems/lix-project/lix-releng-staging:{version}'),
DockerTarget(
'ghcr.io/lix-project/lix-releng-staging:{version}'),
# FIXME: how do we make sure that latest gets the latest of the *most recent* branch?
DockerTarget('git.lix.systems/lix-project/lix-releng-staging',
tags=['{version}', '{major}']),
DockerTarget('ghcr.io/lix-project/lix-releng-staging',
tags=['{version}', '{major}']),
],
)