From 9aeb314e6a8276d3bd29f968c2baa44d5d19ca37 Mon Sep 17 00:00:00 2001 From: Jade Lovelace Date: Sun, 9 Jun 2024 00:27:06 -0700 Subject: [PATCH] releng: support multiarch docker images If we don't want to have separate registry tags by architecture (EWWWW), we need to be able to build multiarch docker images. This is pretty simple, and just requires making a manifest pointing to each of the component images. I was *going* to just do this API prodding with manifest-tool, but it doesn't support putting metadata on the outer manifest, which is actually kind of a problem because it then doesn't render the metadata on github. So I guess we get a simple little containers API implementation that is 90% auth code. Change-Id: I8bdd118d4cbc13b23224f2fb174b232432686bea --- docker.nix | 27 ++- package.nix | 2 + releng/__init__.py | 18 ++ releng/create_release.xsh | 12 +- releng/docker.xsh | 69 ++++++- releng/docker_assemble.py | 399 ++++++++++++++++++++++++++++++++++++++ releng/environment.py | 26 ++- 7 files changed, 527 insertions(+), 26 deletions(-) create mode 100644 releng/docker_assemble.py diff --git a/docker.nix b/docker.nix index a60fd5763..b23a9637f 100644 --- a/docker.nix +++ b/docker.nix @@ -1,5 +1,7 @@ { pkgs ? import { }, + # Git commit ID, if available + lixRevision ? null, nix2container, lib ? pkgs.lib, name ? "lix", @@ -353,6 +355,23 @@ let "NIX_SSL_CERT_FILE=/nix/var/nix/profiles/default/etc/ssl/certs/ca-bundle.crt" "NIX_PATH=/nix/var/nix/profiles/per-user/root/channels:/root/.nix-defexpr/channels" ]; + + Labels = { + "org.opencontainers.image.title" = "Lix"; + "org.opencontainers.image.source" = "https://git.lix.systems/lix-project/lix"; + "org.opencontainers.image.vendor" = "Lix project"; + "org.opencontainers.image.version" = pkgs.nix.version; + "org.opencontainers.image.description" = "Minimal Lix container image, with some batteries included."; + } // lib.optionalAttrs (lixRevision != null) { "org.opencontainers.image.revision" = lixRevision; }; + }; + + meta = { + description = "Docker image for Lix. This is built with nix2container; see that project's README for details"; + longDescription = '' + Docker image for Lix, built with nix2container. + To copy it to your docker daemon, nix run .#dockerImage.copyToDockerDaemon + To copy it to podman, nix run .#dockerImage.copyTo containers-storage:lix + ''; }; }; in @@ -379,12 +398,4 @@ image gzip $image echo "file binary-dist $image" >> $out/nix-support/hydra-build-products ''; - meta = image.meta // { - description = "Docker image for Lix. This is built with nix2container; see that project's README for details"; - longDescription = '' - Docker image for Lix, built with nix2container. - To copy it to your docker daemon, nix run .#dockerImage.copyToDockerDaemon - To copy it to podman, nix run .#dockerImage.copyTo containers-storage:lix - ''; - }; } diff --git a/package.nix b/package.nix index 1ac54dab6..6c0201c7f 100644 --- a/package.nix +++ b/package.nix @@ -417,6 +417,8 @@ stdenv.mkDerivation (finalAttrs: { p: [ p.yapf p.python-frontmatter + p.requests + p.xdg-base-dirs (p.toPythonModule xonsh-unwrapped) ] ); diff --git a/releng/__init__.py b/releng/__init__.py index 39d2beb51..fc23c52f3 100644 --- a/releng/__init__.py +++ b/releng/__init__.py @@ -2,12 +2,29 @@ from xonsh.main import setup setup() del setup +import logging + from . import environment from . import create_release from . import keys from . import version from . import cli from . import docker +from . import docker_assemble + +rootLogger = logging.getLogger() +rootLogger.setLevel(logging.DEBUG) +log = logging.getLogger(__name__) +log.setLevel(logging.DEBUG) + +fmt = logging.Formatter('{asctime} {levelname} {name}: {message}', + datefmt='%b %d %H:%M:%S', + style='{') + +if not any(isinstance(h, logging.StreamHandler) for h in rootLogger.handlers): + hand = logging.StreamHandler() + hand.setFormatter(fmt) + rootLogger.addHandler(hand) def reload(): import importlib @@ -17,3 +34,4 @@ def reload(): importlib.reload(version) importlib.reload(cli) importlib.reload(docker) + importlib.reload(docker_assemble) diff --git a/releng/create_release.xsh b/releng/create_release.xsh index 128edb63f..6f4df2142 100644 --- a/releng/create_release.xsh +++ b/releng/create_release.xsh @@ -258,15 +258,16 @@ def upload_artifacts(env: RelengEnvironment, noconfirm=False, no_check_git=False 'I want to release this' ) + docker_images = list((ARTIFACTS / f'lix/lix-{VERSION}').glob(f'lix-{VERSION}-docker-image-*.tar.gz')) + assert docker_images + print('[+] Upload to cache') with open(DRVS_TXT) as fh: upload_drv_paths_and_outputs(env, [x.strip() for x in fh.readlines() if x]) - docker_images = (ARTIFACTS / f'lix/lix-{VERSION}').glob(f'lix-{VERSION}-docker-image-*.tar.gz') print('[+] Upload docker images') - for image in docker_images: - for target in env.docker_targets: - docker.upload_docker_image(target, image) + for target in env.docker_targets: + docker.upload_docker_images(target, docker_images) print('[+] Upload to release bucket') aws s3 cp --recursive @(ARTIFACTS)/ @(env.releases_bucket)/ @@ -321,7 +322,8 @@ def build_artifacts(no_check_git=False): build_manual(eval_result) with open(DRVS_TXT, 'w') as fh: - fh.write('\n'.join(drv_paths)) + # don't bother putting the release tarballs themselves because they are duplicate and huge + fh.write('\n'.join(x['drvPath'] for x in eval_result if x['attr'] != 'lix-release-tarballs')) make_artifacts_dir(eval_result, ARTIFACTS) print(f'[+] Done! See {ARTIFACTS}') diff --git a/releng/docker.xsh b/releng/docker.xsh index 1ed2330cf..f45a69d27 100644 --- a/releng/docker.xsh +++ b/releng/docker.xsh @@ -1,6 +1,18 @@ -from .environment import DockerTarget, RelengEnvironment -from .version import VERSION +import json +import logging from pathlib import Path +import tempfile + +import requests + +from .environment import DockerTarget, RelengEnvironment +from .version import VERSION, MAJOR +from . import gitutils +from .docker_assemble import Registry, OCIIndex, OCIIndexItem +from . import docker_assemble + +log = logging.getLogger(__name__) +log.setLevel(logging.INFO) def check_all_logins(env: RelengEnvironment): for target in env.docker_targets: @@ -9,5 +21,54 @@ def check_all_logins(env: RelengEnvironment): def check_login(target: DockerTarget): skopeo login @(target.registry_name()) -def upload_docker_image(target: DockerTarget, path: Path): - skopeo --insecure-policy copy docker-archive:@(path) docker://@(target.resolve(version=VERSION)) +def upload_docker_images(target: DockerTarget, paths: list[Path]): + if not paths: return + + sess = requests.Session() + sess.headers['User-Agent'] = 'lix-releng' + + tag_names = [DockerTarget.resolve(tag, version=VERSION, major=MAJOR) for tag in target.tags] + + # latest only gets tagged for the current release branch of Lix + if not gitutils.is_maintenance_branch('HEAD'): + tag_names.append('latest') + + meta = {} + + reg = docker_assemble.Registry(sess) + manifests = [] + + with tempfile.TemporaryDirectory() as tmp: + tmp = Path(tmp) + + for path in paths: + digest_file = tmp / (path.name + '.digest') + inspection = json.loads($(skopeo inspect docker-archive:@(path))) + + docker_arch = inspection['Architecture'] + docker_os = inspection['Os'] + meta = inspection['Labels'] + + log.info('Pushing image %s for %s', path, docker_arch) + + # insecure-policy: we don't have any signature policy, we are just uploading an image + # We upload to a junk tag, because otherwise it will upload to `latest`, which is undesirable + skopeo --insecure-policy copy --format oci --digestfile @(digest_file) docker-archive:@(path) docker://@(target.registry_path):temp + + digest = digest_file.read_text().strip() + + # skopeo doesn't give us the manifest size directly, so we just ask the registry + metadata = reg.image_info(target.registry_path, digest) + + manifests.append(OCIIndexItem(metadata=metadata, architecture=docker_arch, os=docker_os)) + # delete the temp tag, which we only have to create because of skopeo + # limitations anyhow (it seems to not have a way to say "don't tag it, find + # your checksum and put it there") + # FIXME: this is not possible because GitHub only has a proprietary API for it. amazing. 11/10. + # reg.delete_tag(target.registry_path, 'temp') + + log.info('Pushed images, building a bigger and more menacing manifest from %r with metadata %r', manifests, meta) + # send the multiarch manifest to each tag + index = OCIIndex(manifests=manifests, annotations=meta) + for tag in tag_names: + reg.upload_index(target.registry_path, tag, index) diff --git a/releng/docker_assemble.py b/releng/docker_assemble.py new file mode 100644 index 000000000..ef1d8c4e6 --- /dev/null +++ b/releng/docker_assemble.py @@ -0,0 +1,399 @@ +from typing import Any, Literal, Optional +import re +from pathlib import Path +import json +import dataclasses +import time +from urllib.parse import unquote +import urllib.request +import logging + +import requests.auth +import requests +import xdg_base_dirs + +log = logging.getLogger(__name__) +log.setLevel(logging.INFO) + +DEBUG_REQUESTS = False +if DEBUG_REQUESTS: + urllib3_logger = logging.getLogger('requests.packages.urllib3') + urllib3_logger.setLevel(logging.DEBUG) + urllib3_logger.propagate = True + +# So, there is a bunch of confusing stuff happening in this file. The gist of why it's Like This is: +# +# nix2container does not concern itself with tags (reasonably enough): +# https://github.com/nlewo/nix2container/issues/59 +# +# This is fine. But then we noticed: docker images don't play nice if you have +# multiple architectures you want to abstract over if you don't do special +# things. Those special things are images with manifests containing multiple +# images. +# +# Docker has a data model vaguely analogous to git: you have higher level +# objects referring to a bunch of content-addressed blobs. +# +# A multiarch image is more or less just a manifest that refers to more +# manifests; in OCI it is an Index. +# +# See the API spec here: https://github.com/opencontainers/distribution-spec/blob/v1.0.1/spec.md#definitions +# And the Index spec here: https://github.com/opencontainers/image-spec/blob/v1.0.1/image-index.md +# +# skopeo doesn't *know* how to make multiarch *manifests*: +# https://github.com/containers/skopeo/issues/1136 +# +# There is a tool called manifest-tool that is supposed to do this +# (https://github.com/estesp/manifest-tool) but it doesn't support putting in +# annotations on the outer image, and I *really* didn't want to write golang to +# fix that. Thus, a little bit of homebrew containers code. +# +# Essentially what we are doing in here is splatting a bunch of images into the +# registry without tagging them (except as "temp", due to podman issues), then +# simply sending a new composite manifest ourselves. + +DockerArchitecture = Literal['amd64'] | Literal['arm64'] +MANIFEST_MIME = 'application/vnd.oci.image.manifest.v1+json' +INDEX_MIME = 'application/vnd.oci.image.index.v1+json' + + +@dataclasses.dataclass(frozen=True, order=True) +class ImageMetadata: + size: int + digest: str + """sha256:SOMEHEX""" + + +@dataclasses.dataclass(frozen=True, order=True) +class OCIIndexItem: + """Information about an untagged uploaded image.""" + + metadata: ImageMetadata + + architecture: DockerArchitecture + + os: str = 'linux' + + def serialize(self): + return { + 'mediaType': MANIFEST_MIME, + 'size': self.metadata.size, + 'digest': self.metadata.digest, + 'platform': { + 'architecture': self.architecture, + 'os': self.os, + } + } + + +@dataclasses.dataclass(frozen=True) +class OCIIndex: + manifests: list[OCIIndexItem] + + annotations: dict[str, str] + + def serialize(self): + return { + 'schemaVersion': 2, + 'manifests': [item.serialize() for item in sorted(self.manifests)], + 'annotations': self.annotations + } + + +def docker_architecture_from_nix_system(system: str) -> DockerArchitecture: + MAP = { + 'x86_64-linux': 'amd64', + 'aarch64-linux': 'arm64', + } + return MAP[system] # type: ignore + + +@dataclasses.dataclass +class TaggingOperation: + manifest: OCIIndex + tags: list[str] + """Tags this image is uploaded under""" + + +runtime_dir = xdg_base_dirs.xdg_runtime_dir() +config_dir = xdg_base_dirs.xdg_config_home() + +AUTH_FILES = ([runtime_dir / 'containers/auth.json'] if runtime_dir else []) + \ + [config_dir / 'containers/auth.json', Path.home() / '.docker/config.json'] + + +# Copied from Werkzeug https://github.com/pallets/werkzeug/blob/62e3ea45846d06576199a2f8470be7fe44c867c1/src/werkzeug/http.py#L300-L325 +def parse_list_header(value: str) -> list[str]: + """Parse a header value that consists of a list of comma separated items according + to `RFC 9110 `__. + + This extends :func:`urllib.request.parse_http_list` to remove surrounding quotes + from values. + + .. code-block:: python + + parse_list_header('token, "quoted value"') + ['token', 'quoted value'] + + This is the reverse of :func:`dump_header`. + + :param value: The header value to parse. + """ + result = [] + + for item in urllib.request.parse_http_list(value): + if len(item) >= 2 and item[0] == item[-1] == '"': + item = item[1:-1] + + result.append(item) + + return result + + +# https://www.rfc-editor.org/rfc/rfc2231#section-4 +_charset_value_re = re.compile( + r""" + ([\w!#$%&*+\-.^`|~]*)' # charset part, could be empty + [\w!#$%&*+\-.^`|~]*' # don't care about language part, usually empty + ([\w!#$%&'*+\-.^`|~]+) # one or more token chars with percent encoding + """, + re.ASCII | re.VERBOSE, +) + + +# Copied from: https://github.com/pallets/werkzeug/blob/62e3ea45846d06576199a2f8470be7fe44c867c1/src/werkzeug/http.py#L327-L394 +def parse_dict_header(value: str) -> dict[str, str | None]: + """Parse a list header using :func:`parse_list_header`, then parse each item as a + ``key=value`` pair. + + .. code-block:: python + + parse_dict_header('a=b, c="d, e", f') + {"a": "b", "c": "d, e", "f": None} + + This is the reverse of :func:`dump_header`. + + If a key does not have a value, it is ``None``. + + This handles charsets for values as described in + `RFC 2231 `__. Only ASCII, UTF-8, + and ISO-8859-1 charsets are accepted, otherwise the value remains quoted. + + :param value: The header value to parse. + + .. versionchanged:: 3.0 + Passing bytes is not supported. + + .. versionchanged:: 3.0 + The ``cls`` argument is removed. + + .. versionchanged:: 2.3 + Added support for ``key*=charset''value`` encoded items. + + .. versionchanged:: 0.9 + The ``cls`` argument was added. + """ + result: dict[str, str | None] = {} + + for item in parse_list_header(value): + key, has_value, value = item.partition("=") + key = key.strip() + + if not has_value: + result[key] = None + continue + + value = value.strip() + encoding: str | None = None + + if key[-1] == "*": + # key*=charset''value becomes key=value, where value is percent encoded + # adapted from parse_options_header, without the continuation handling + key = key[:-1] + match = _charset_value_re.match(value) + + if match: + # If there is a charset marker in the value, split it off. + encoding, value = match.groups() + assert encoding + encoding = encoding.lower() + + # A safe list of encodings. Modern clients should only send ASCII or UTF-8. + # This list will not be extended further. An invalid encoding will leave the + # value quoted. + if encoding in {"ascii", "us-ascii", "utf-8", "iso-8859-1"}: + # invalid bytes are replaced during unquoting + value = unquote(value, encoding=encoding) + + if len(value) >= 2 and value[0] == value[-1] == '"': + value = value[1:-1] + + result[key] = value + + return result + + +def parse_www_authenticate(www_authenticate): + scheme, _, rest = www_authenticate.partition(' ') + scheme = scheme.lower() + rest = rest.strip() + + parsed = parse_dict_header(rest.rstrip('=')) + return parsed + + +class AuthState: + + def __init__(self, auth_files: list[Path] = AUTH_FILES): + self.auth_map: dict[str, str] = {} + for f in auth_files: + self.auth_map.update(AuthState.load_auth_file(f)) + self.token_cache: dict[str, str] = {} + + @staticmethod + def load_auth_file(path: Path) -> dict[str, str]: + if path.exists(): + with path.open() as fh: + try: + json_obj = json.load(fh) + return {k: v['auth'] for k, v in json_obj['auths'].items()} + except (json.JSONDecodeError, KeyError) as e: + log.exception('JSON decode error in %s', path, exc_info=e) + return {} + + def get_token(self, hostname: str) -> Optional[str]: + return self.token_cache.get(hostname) + + def obtain_token(self, session: requests.Session, token_endpoint: str, + scope: str, service: str, image_path: str) -> str: + authority, _, _ = image_path.partition('/') + if tok := self.get_token(authority): + return tok + + creds = self.find_credential_for(image_path) + if not creds: + raise ValueError('No credentials available for ' + image_path) + + resp = session.get(token_endpoint, + params={ + 'client_id': 'lix-releng', + 'scope': scope, + 'service': service, + }, + headers={ + 'Authorization': 'Basic ' + creds + }).json() + token = resp['token'] + self.token_cache[service] = token + return token + + def find_credential_for(self, image_path: str): + trails = image_path.split('/') + for i in range(len(trails)): + prefix = '/'.join(trails[:len(trails) - i]) + if prefix in self.auth_map: + return self.auth_map[prefix] + + return None + + +class RegistryAuthenticator(requests.auth.AuthBase): + """Authenticates to an OCI compliant registry""" + + def __init__(self, auth_state: AuthState, session: requests.Session, + image: str): + self.auth_map: dict[str, str] = {} + self.image = image + self.session = session + self.auth_state = auth_state + + def response_hook(self, r: requests.Response, + **kwargs: Any) -> requests.Response: + if r.status_code == 401: + www_authenticate = r.headers.get('www-authenticate', '').lower() + parsed = parse_www_authenticate(www_authenticate) + assert parsed + + tok = self.auth_state.obtain_token( + self.session, + parsed['realm'], # type: ignore + parsed['scope'], # type: ignore + parsed['service'], # type: ignore + self.image) + + new_req = r.request.copy() + new_req.headers['Authorization'] = 'Bearer ' + tok + + return self.session.send(new_req) + else: + return r + + def __call__(self, + r: requests.PreparedRequest) -> requests.PreparedRequest: + authority, _, _ = self.image.partition('/') + auth_may = self.auth_state.get_token(authority) + + if auth_may: + r.headers['Authorization'] = 'Bearer ' + auth_may + + r.register_hook('response', self.response_hook) + return r + + +class Registry: + + def __init__(self, session: requests.Session): + self.auth_state = AuthState() + self.session = session + + def image_info(self, image_path: str, manifest_id: str) -> ImageMetadata: + authority, _, path = image_path.partition('/') + resp = self.session.head( + f'https://{authority}/v2/{path}/manifests/{manifest_id}', + headers={'Accept': MANIFEST_MIME}, + auth=RegistryAuthenticator(self.auth_state, self.session, + image_path)) + resp.raise_for_status() + return ImageMetadata(int(resp.headers['content-length']), + resp.headers['docker-content-digest']) + + def delete_tag(self, image_path: str, tag: str): + authority, _, path = image_path.partition('/') + resp = self.session.delete( + f'https://{authority}/v2/{path}/manifests/{tag}', + headers={'Content-Type': INDEX_MIME}, + auth=RegistryAuthenticator(self.auth_state, self.session, + image_path)) + resp.raise_for_status() + + def _upload_index(self, image_path: str, tag: str, index: OCIIndex): + authority, _, path = image_path.partition('/') + body = json.dumps(index.serialize(), + separators=(',', ':'), + sort_keys=True) + + resp = self.session.put( + f'https://{authority}/v2/{path}/manifests/{tag}', + data=body, + headers={'Content-Type': INDEX_MIME}, + auth=RegistryAuthenticator(self.auth_state, self.session, + image_path)) + resp.raise_for_status() + + return resp.headers['Location'] + + def upload_index(self, + image_path: str, + tag: str, + index: OCIIndex, + retries=20, + retry_delay=1): + # eventual consistency lmao + for _ in range(retries): + try: + return self._upload_index(image_path, tag, index) + except requests.HTTPError as e: + if e.response.status_code != 404: + raise + + time.sleep(retry_delay) diff --git a/releng/environment.py b/releng/environment.py index e8e7e771e..3d65b2799 100644 --- a/releng/environment.py +++ b/releng/environment.py @@ -1,5 +1,5 @@ -import dataclasses import urllib.parse +import dataclasses S3_HOST = 's3.lix.systems' S3_ENDPOINT = 'https://s3.lix.systems' @@ -19,12 +19,19 @@ DEFAULT_STORE_URI_BITS = { @dataclasses.dataclass class DockerTarget: registry_path: str + """Registry path without the tag, e.g. ghcr.io/lix-project/lix""" - def resolve(self, version: str) -> str: - """Applies templates: - - version: the Lix version + tags: list[str] + """List of tags this image should take. There must be at least one.""" + + @staticmethod + def resolve(item: str, version: str, major: str) -> str: """ - return self.registry_path.format(version=version) + Applies templates: + - version: the Lix version e.g. 2.90.0 + - major: the major Lix version e.g. 2.90 + """ + return item.format(version=version, major=major) def registry_name(self) -> str: [a, _, _] = self.registry_path.partition('/') @@ -57,10 +64,11 @@ STAGING = RelengEnvironment( releases_bucket='s3://staging-releases', git_repo='ssh://git@git.lix.systems/lix-project/lix-releng-staging', docker_targets=[ - DockerTarget( - 'git.lix.systems/lix-project/lix-releng-staging:{version}'), - DockerTarget( - 'ghcr.io/lix-project/lix-releng-staging:{version}'), + # FIXME: how do we make sure that latest gets the latest of the *most recent* branch? + DockerTarget('git.lix.systems/lix-project/lix-releng-staging', + tags=['{version}', '{major}']), + DockerTarget('ghcr.io/lix-project/lix-releng-staging', + tags=['{version}', '{major}']), ], )