diff --git a/scripts/aws_image_tools.py b/scripts/aws_image_tools.py index 18785291b7..f9f8660901 100644 --- a/scripts/aws_image_tools.py +++ b/scripts/aws_image_tools.py @@ -124,6 +124,7 @@ class DeleteAmisCommandArgs: regions: List[str] before: datetime name_prefix: str + name_contains: Optional[str] keep_latest: int yes: bool @@ -146,6 +147,11 @@ def setup_parser(cls, parser: ArgumentParser) -> None: default=DEFAULT_NAME_PREFIX, help=f"Only consider AMIs whose name starts with this (default: {DEFAULT_NAME_PREFIX!r})", ) + parser.add_argument( + "--name-contains", + help="Further restrict to AMIs whose name contains this substring " + "(case-insensitive), e.g. a version like 0.18", + ) parser.add_argument( "--keep-latest", type=int, @@ -172,6 +178,7 @@ def from_namespace(args: Namespace) -> "DeleteAmisCommandArgs": regions=args.regions or list(PROD_REGIONS), before=before, name_prefix=args.name_prefix, + name_contains=args.name_contains, keep_latest=args.keep_latest, yes=args.yes, ) @@ -312,7 +319,7 @@ def delete_amis_command(args: DeleteAmisCommandArgs) -> None: total_deleted = 0 for region in args.regions: ec2 = boto3.client("ec2", region_name=region) - images = _find_self_owned_images(ec2, args.name_prefix) + images = _find_self_owned_images(ec2, args.name_prefix, args.name_contains) # Sort newest first so --keep-latest preserves the most recent images. images.sort(key=lambda img: img["_created"], reverse=True) @@ -359,12 +366,15 @@ def delete_amis_command(args: DeleteAmisCommandArgs) -> None: logger.info("Deleted %d AMIs.", total_deleted) -def _find_self_owned_images(ec2, name_prefix: str) -> List[dict]: +def _find_self_owned_images(ec2, name_prefix: str, name_contains: Optional[str]) -> List[dict]: resp = ec2.describe_images( Owners=["self"], Filters=[{"Name": "name", "Values": [f"{name_prefix}*"]}], ) images = resp["Images"] + if name_contains: + needle = name_contains.lower() + images = [img for img in images if needle in img["Name"].lower()] for image in images: image["_created"] = datetime.strptime( image["CreationDate"], "%Y-%m-%dT%H:%M:%S.%fZ" diff --git a/scripts/oci_image_tools.py b/scripts/oci_image_tools.py index 90b9491573..07021fbf86 100644 --- a/scripts/oci_image_tools.py +++ b/scripts/oci_image_tools.py @@ -3,8 +3,8 @@ import time from argparse import ArgumentParser, Namespace from dataclasses import dataclass -from datetime import datetime, timedelta -from typing import Dict, Iterable, List, Mapping +from datetime import datetime, timedelta, timezone +from typing import Callable, Dict, Iterable, List, Mapping, Optional, Sequence, Tuple, TypeVar import oci from oci.object_storage.models import Bucket @@ -129,6 +129,128 @@ def from_namespace(args: Namespace) -> "CheckCommandArgs": ) +@dataclass +class DeletePublicationsCommandArgs: + compartment_id: str + regions: List[str] + before: datetime + name_contains: Optional[str] + keep_latest: int + yes: bool + + @classmethod + def setup_parser(cls, parser: ArgumentParser) -> None: + parser.add_argument("--compartment", dest="compartment_id", required=True) + parser.add_argument("--regions", metavar="REGION_NAME", nargs="*") + _add_cleanup_filter_arguments(parser) + parser.set_defaults(to_struct=cls.from_namespace, run_command=delete_publications_command) + + @staticmethod + def from_namespace(args: Namespace) -> "DeletePublicationsCommandArgs": + return DeletePublicationsCommandArgs( + compartment_id=args.compartment_id, + regions=args.regions or [], + before=_parse_before(args.before), + name_contains=args.name_contains, + keep_latest=_validate_keep_latest(args.keep_latest), + yes=args.yes, + ) + + +@dataclass +class DeleteImagesCommandArgs: + compartment_id: str + regions: List[str] + before: datetime + name_contains: Optional[str] + keep_latest: int + yes: bool + + @classmethod + def setup_parser(cls, parser: ArgumentParser) -> None: + parser.add_argument("--compartment", dest="compartment_id", required=True) + parser.add_argument("--regions", metavar="REGION_NAME", nargs="*") + _add_cleanup_filter_arguments(parser) + parser.set_defaults(to_struct=cls.from_namespace, run_command=delete_images_command) + + @staticmethod + def from_namespace(args: Namespace) -> "DeleteImagesCommandArgs": + return DeleteImagesCommandArgs( + compartment_id=args.compartment_id, + regions=args.regions or [], + before=_parse_before(args.before), + name_contains=args.name_contains, + keep_latest=_validate_keep_latest(args.keep_latest), + yes=args.yes, + ) + + +@dataclass +class DeleteBucketsCommandArgs: + compartment_id: str + regions: List[str] + before: datetime + name_contains: Optional[str] + keep_latest: int + yes: bool + + @classmethod + def setup_parser(cls, parser: ArgumentParser) -> None: + parser.add_argument("--compartment", dest="compartment_id", required=True) + parser.add_argument("--regions", metavar="REGION_NAME", nargs="*") + _add_cleanup_filter_arguments(parser) + parser.set_defaults(to_struct=cls.from_namespace, run_command=delete_buckets_command) + + @staticmethod + def from_namespace(args: Namespace) -> "DeleteBucketsCommandArgs": + return DeleteBucketsCommandArgs( + compartment_id=args.compartment_id, + regions=args.regions or [], + before=_parse_before(args.before), + name_contains=args.name_contains, + keep_latest=_validate_keep_latest(args.keep_latest), + yes=args.yes, + ) + + +def _add_cleanup_filter_arguments(parser: ArgumentParser) -> None: + parser.add_argument( + "--before", + required=True, + metavar="YYYY-MM-DD", + help="Delete resources created strictly before this date (UTC)", + ) + parser.add_argument( + "--name-contains", + help="Only consider resources whose name contains this substring (case-insensitive)", + ) + parser.add_argument( + "--keep-latest", + type=int, + default=0, + help="Always keep this many newest matching resources per region, " + "regardless of --before (default: 0)", + ) + parser.add_argument( + "--yes", + action="store_true", + help="Actually delete (default: preview only)", + ) + + +def _parse_before(value: str) -> datetime: + try: + return datetime.strptime(value, "%Y-%m-%d").replace(tzinfo=timezone.utc) + except ValueError: + raise ScriptError(f"Invalid --before date {value!r}, expected YYYY-MM-DD") + + +def _validate_keep_latest(value: int) -> int: + if value < 0: + raise ScriptError("--keep-latest must be >= 0") + return value + + def main() -> None: parser = ArgumentParser(description="Tools for delivering OCI images") subparsers = parser.add_subparsers() @@ -161,7 +283,44 @@ def main() -> None: ) CheckCommandArgs.setup_parser(check_parser) + delete_publications_parser = subparsers.add_parser( + name="delete-publications", + description=( + "Delete OCI Marketplace community publications (a.k.a. Community " + "Applications) older than a date to free up the marketplace quota. " + "Dry-run by default; pass --yes to actually delete. Run this before " + "delete-images, since an image cannot be deleted while a publication " + "still references it." + ), + ) + DeletePublicationsCommandArgs.setup_parser(delete_publications_parser) + + delete_images_parser = subparsers.add_parser( + name="delete-images", + description=( + "Delete Custom Images older than a date in the given compartment and " + "regions. Dry-run by default; pass --yes to actually delete." + ), + ) + DeleteImagesCommandArgs.setup_parser(delete_images_parser) + + delete_buckets_parser = subparsers.add_parser( + name="delete-buckets", + description=( + "Delete Object Storage buckets older than a date in the given compartment " + "and regions, along with their contents (objects, pre-authenticated " + "requests, in-progress uploads). The copy command creates a bucket named " + "after the image to transfer it between regions and normally deletes it; " + "use this to clean up buckets left over by interrupted copies. " + "Dry-run by default; pass --yes to actually delete." + ), + ) + DeleteBucketsCommandArgs.setup_parser(delete_buckets_parser) + args = parser.parse_args() + if not hasattr(args, "run_command"): + parser.print_help() + sys.exit(1) args.run_command(args.to_struct(args)) @@ -241,6 +400,176 @@ def check_command(args: CheckCommandArgs) -> None: ) +def delete_publications_command(args: DeletePublicationsCommandArgs) -> None: + region_clients = get_region_clients(required_regions=args.regions) + regions_to_clean = args.regions or list(region_clients) + total_deleted = 0 + + for region in sorted(regions_to_clean): + client = region_clients[region].marketplace_client + publications = list_community_publications(args.compartment_id, client) + publications = _filter_by_name(publications, lambda p: p.name, args.name_contains) + keep, to_delete = _partition_for_deletion( + publications, lambda p: p.time_created, args.before, args.keep_latest + ) + _report_selection(region, "publications", keep, to_delete, lambda p: (p.name, p.id)) + + if args.yes: + for publication in to_delete: + client.delete_publication(publication.id) + logging.info( + "[%s] deleted publication %s (%s)", region, publication.name, publication.id + ) + total_deleted += 1 + + _report_outcome(args.yes, "publications", total_deleted) + + +def delete_images_command(args: DeleteImagesCommandArgs) -> None: + region_clients = get_region_clients(required_regions=args.regions) + regions_to_clean = args.regions or list(region_clients) + total_deleted = 0 + + for region in sorted(regions_to_clean): + client = region_clients[region].compute_client + images = list_compartment_images(args.compartment_id, client) + images = _filter_by_name(images, lambda i: i.display_name, args.name_contains) + keep, to_delete = _partition_for_deletion( + images, lambda i: i.time_created, args.before, args.keep_latest + ) + _report_selection(region, "images", keep, to_delete, lambda i: (i.display_name, i.id)) + + if args.yes: + for image in to_delete: + client.delete_image(image.id) + logging.info("[%s] deleted image %s (%s)", region, image.display_name, image.id) + total_deleted += 1 + + _report_outcome(args.yes, "images", total_deleted) + + +def delete_buckets_command(args: DeleteBucketsCommandArgs) -> None: + region_clients = get_region_clients(required_regions=args.regions) + regions_to_clean = args.regions or list(region_clients) + total_deleted = 0 + + for region in sorted(regions_to_clean): + client = region_clients[region].object_storage_client + namespace: str = client.get_namespace().data + buckets = list_compartment_buckets(namespace, args.compartment_id, client) + buckets = _filter_by_name(buckets, lambda b: b.name, args.name_contains) + keep, to_delete = _partition_for_deletion( + buckets, lambda b: b.time_created, args.before, args.keep_latest + ) + _report_selection(region, "buckets", keep, to_delete, lambda b: (b.name, namespace)) + + if args.yes: + for bucket in to_delete: + resources.delete_bucket(namespace, bucket.name, client) + logging.info("[%s] deleted bucket %s", region, bucket.name) + total_deleted += 1 + + _report_outcome(args.yes, "buckets", total_deleted) + + +def list_community_publications( + compartment_id: str, client: oci.marketplace.MarketplaceClient +) -> List[oci.marketplace.models.PublicationSummary]: + """ + List community publications (a.k.a. "Community Applications") created in + `compartment_id`. These are the publisher-side counterparts of marketplace + listings and count against the marketplace "Community Applications" quota. + """ + return list( + resources.chain_paginated_responses( + client.list_publications, + compartment_id=compartment_id, + listing_type=oci.marketplace.models.PublicationSummary.LISTING_TYPE_COMMUNITY, + ) + ) + + +def list_compartment_images( + compartment_id: str, client: oci.core.ComputeClient +) -> List[oci.core.models.Image]: + """ + List Custom Images owned by `compartment_id`. `list_images` also returns + Oracle platform images (with no compartment), which must never be deleted, + so they are filtered out here. + """ + images = resources.chain_paginated_responses(client.list_images, compartment_id=compartment_id) + return [image for image in images if image.compartment_id == compartment_id] + + +def list_compartment_buckets( + namespace: str, compartment_id: str, client: oci.object_storage.ObjectStorageClient +) -> List[oci.object_storage.models.BucketSummary]: + return list( + resources.chain_paginated_responses( + client.list_buckets, namespace_name=namespace, compartment_id=compartment_id + ) + ) + + +T = TypeVar("T") + + +def _filter_by_name( + items: Iterable[T], get_name: Callable[[T], str], name_contains: Optional[str] +) -> List[T]: + if not name_contains: + return list(items) + needle = name_contains.lower() + return [item for item in items if needle in get_name(item).lower()] + + +def _partition_for_deletion( + items: Iterable[T], + get_time: Callable[[T], datetime], + before: datetime, + keep_latest: int, +) -> Tuple[List[T], List[T]]: + # Sort newest first so --keep-latest preserves the most recent resources. + ordered = sorted(items, key=get_time, reverse=True) + keep, to_delete = [], [] + for index, item in enumerate(ordered): + if index < keep_latest or get_time(item) >= before: + keep.append(item) + else: + to_delete.append(item) + return keep, to_delete + + +def _report_selection( + region: str, + kind: str, + keep: Sequence[T], + to_delete: Sequence[T], + describe: Callable[[T], Tuple[str, str]], +) -> None: + logging.info( + "[%s] %d matching %s: %d to delete, %d to keep", + region, + len(keep) + len(to_delete), + kind, + len(to_delete), + len(keep), + ) + for item in keep: + name, ocid = describe(item) + logging.info("[%s] KEEP %s (%s)", region, name, ocid) + for item in to_delete: + name, ocid = describe(item) + logging.info("[%s] DELETE %s (%s)", region, name, ocid) + + +def _report_outcome(deleted_for_real: bool, kind: str, total_deleted: int) -> None: + if not deleted_for_real: + logging.info("Preview only. Re-run with --yes to delete the %s.", kind) + else: + logging.info("Deleted %d %s.", total_deleted, kind) + + def get_region_clients( required_regions: Iterable[str] = frozenset(), ) -> Dict[str, OCIRegionClient]: diff --git a/scripts/packer/provisioners/kernel/apt-packages.sh b/scripts/packer/provisioners/kernel/apt-packages.sh index 8e3978e4d9..3b53230bb0 100644 --- a/scripts/packer/provisioners/kernel/apt-packages.sh +++ b/scripts/packer/provisioners/kernel/apt-packages.sh @@ -2,8 +2,6 @@ set -e -sudo apt-get update - # Common packages across all versions DEPS=" net-tools @@ -25,11 +23,14 @@ DEPS=" python3-boto3 " +# No `apt-get update` here on purpose: the apt package indexes are already +# refreshed by apt-upgrade.sh, which always runs right before this script. + # Install basic packages for dep in $DEPS; do if ! dpkg -s $dep > /dev/null 2>&1; then echo "Attempting installation of missing package: $dep" - sudo DEBIAN_FRONTEND=noninteractive apt-get install -y -q $dep + sudo DEBIAN_FRONTEND=noninteractive apt-get -o DPkg::Lock::Timeout=60 install -y -q $dep fi done @@ -37,9 +38,9 @@ done sudo snap remove amazon-ssm-agent || true # Uninstall snapd, which is not used by us. -sudo apt-get purge -y snapd +sudo apt-get -o DPkg::Lock::Timeout=60 purge -y snapd # Uninstall ec2-instance-connect, which is not used by us. # This resolves ec2-instance-connect.service failure during boot, # which causes "systemctl status" in "degraded" state. -sudo apt-get purge -y --auto-remove ec2-instance-connect +sudo apt-get -o DPkg::Lock::Timeout=60 purge -y --auto-remove ec2-instance-connect diff --git a/src/dstack/version.py b/src/dstack/version.py index 08924a235c..b912a6c2df 100644 --- a/src/dstack/version.py +++ b/src/dstack/version.py @@ -5,6 +5,6 @@ __version__ = "0.0.0" __is_release__ = False -docker_base_image = "0.13" -docker_base_image_ubuntu_version = "22.04" -vm_base_image = "0.13" +docker_base_image = "0.14" +docker_base_image_ubuntu_version = "24.04" +vm_base_image = "0.14" diff --git a/src/tests/_internal/server/routers/test_runs.py b/src/tests/_internal/server/routers/test_runs.py index 0e5d821655..bea20a9830 100644 --- a/src/tests/_internal/server/routers/test_runs.py +++ b/src/tests/_internal/server/routers/test_runs.py @@ -11,7 +11,6 @@ from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession -from dstack._internal import settings from dstack._internal.core.errors import GatewayError from dstack._internal.core.models.backends.base import BackendType from dstack._internal.core.models.common import ApplyAction, EntityReference @@ -152,7 +151,7 @@ def get_dev_env_run_plan_dict( " && tail -f /dev/null" ), ] - image_name = f"dstackai/base:{settings.DSTACK_DOCKER_BASE_IMAGE_VERSION}-base-ubuntu{settings.DSTACK_DOCKER_BASE_IMAGE_UBUNTU_VERSION}" + image_name = "dstackai/base:0.14-base-ubuntu24.04" run_spec = { "configuration": { @@ -390,7 +389,7 @@ def get_dev_env_run_dict( " && tail -f /dev/null" ), ] - image_name = "dstackai/base:0.13-base-ubuntu22.04" + image_name = "dstackai/base:0.14-base-ubuntu24.04" return { "id": run_id,