From 51f073089d93a478af69787b6f6f61954ec6aac0 Mon Sep 17 00:00:00 2001 From: Denys Fedoryshchenko Date: Sat, 16 May 2026 13:22:13 +0300 Subject: [PATCH 1/3] console: Add console output logging Signed-off-by: Denys Fedoryshchenko --- examples/aws/alltests-config.json | 3 +- examples/aws/config-arm64.json | 3 +- examples/aws/config.json | 3 +- src/kernel_ci_cloud_labs/launch_vm.py | 40 ++++++++++++++++++++++++++- 4 files changed, 45 insertions(+), 4 deletions(-) diff --git a/examples/aws/alltests-config.json b/examples/aws/alltests-config.json index 47ada1e..374f9f0 100644 --- a/examples/aws/alltests-config.json +++ b/examples/aws/alltests-config.json @@ -84,7 +84,8 @@ "ec2:DescribeImages", "ec2:DescribeSubnets", "ec2:DescribeSecurityGroups", - "ec2:DescribeVpcs" + "ec2:DescribeVpcs", + "ec2:GetConsoleOutput" ], "Resource": "*" } diff --git a/examples/aws/config-arm64.json b/examples/aws/config-arm64.json index 3e934f6..208f5fd 100644 --- a/examples/aws/config-arm64.json +++ b/examples/aws/config-arm64.json @@ -84,7 +84,8 @@ "ec2:DescribeImages", "ec2:DescribeSubnets", "ec2:DescribeSecurityGroups", - "ec2:DescribeVpcs" + "ec2:DescribeVpcs", + "ec2:GetConsoleOutput" ], "Resource": "*" } diff --git a/examples/aws/config.json b/examples/aws/config.json index fe9c435..7348f26 100644 --- a/examples/aws/config.json +++ b/examples/aws/config.json @@ -84,7 +84,8 @@ "ec2:DescribeImages", "ec2:DescribeSubnets", "ec2:DescribeSecurityGroups", - "ec2:DescribeVpcs" + "ec2:DescribeVpcs", + "ec2:GetConsoleOutput" ], "Resource": "*" } diff --git a/src/kernel_ci_cloud_labs/launch_vm.py b/src/kernel_ci_cloud_labs/launch_vm.py index a9e2ffa..137dfec 100644 --- a/src/kernel_ci_cloud_labs/launch_vm.py +++ b/src/kernel_ci_cloud_labs/launch_vm.py @@ -5,6 +5,7 @@ # SPDX-License-Identifier: Apache-2.0 +import base64 import json import shlex import sys @@ -345,8 +346,45 @@ def check_test_result(self): log_error(f"✗ Failed to read result.txt: {e}") return False + def capture_console_output(self): + """Fetch EC2 serial console output (kernel boot log) and upload to S3.""" + if not self.instance_id: + return + + log_not("\n=== Capturing console output ===") + try: + resp = self.ec2.get_console_output(InstanceId=self.instance_id, Latest=True) + except Exception as e: + log_not(f" Failed to fetch console output: {e}") + return + + output_b64 = resp.get("Output", "") + if not output_b64: + log_not(" No console output available yet") + return + + # boto3 returns the buffer base64-encoded; decode for human-readable upload. + try: + output = base64.b64decode(output_b64).decode("utf-8", errors="replace") + except Exception: + output = output_b64 + + s3_key = f"{self.run_prefix}/test_{self.test}/output/{self.instance_id}/console-output.log" + try: + self.s3.put_object( + Bucket=self.s3_bucket, + Key=s3_key, + Body=output.encode("utf-8"), + ContentType="text/plain; charset=utf-8", + ) + log_not(f"✓ Console output uploaded ({len(output)} bytes) to s3://{self.s3_bucket}/{s3_key}") + except Exception as e: + log_not(f" Failed to upload console output: {e}") + def cleanup(self): - """Terminate instance.""" + """Capture console output, then terminate instance.""" + self.capture_console_output() + if self.instance_id: log_not(f"\n=== Terminating instance {self.instance_id} ===") try: From 7edd350ef7db5dbd4cb5d2b2ac8af1b3f2452ca0 Mon Sep 17 00:00:00 2001 From: Denys Fedoryshchenko Date: Sat, 16 May 2026 13:44:43 +0300 Subject: [PATCH 2/3] setup: Add validation and auto-fix Signed-off-by: Denys Fedoryshchenko --- QUICKSTART.md | 10 + README.md | 26 ++ src/kernel_ci_cloud_labs/cli.py | 26 ++ src/kernel_ci_cloud_labs/setup_validate.py | 264 +++++++++++++++++++++ 4 files changed, 326 insertions(+) create mode 100644 src/kernel_ci_cloud_labs/setup_validate.py diff --git a/QUICKSTART.md b/QUICKSTART.md index 2854137..fde9063 100644 --- a/QUICKSTART.md +++ b/QUICKSTART.md @@ -46,6 +46,16 @@ pipeline can run a job end-to-end. The full walkthrough lives in Look for `VMs: 2/2 spawned, 2 successful, 0 failed`. If this passes, the AWS side is ready and you can proceed below. + For a fast pre-flight check (no VMs spawned) of AWS permissions, + the results bucket, and KernelCI/KCIDB tokens, run: + ```bash + kernel-ci-cloud-runner aws setup validate \ + --bucket kernel-ci-$USER-results --role kernel-ci-$USER-vm-role + ``` + See [README → Validate setup](README.md#validate-setup-optional). The + `KERNELCI_API_TOKEN` / `KCIDB_JWT` / `UNIFIED_TOKEN` env vars set in + section 3 below are picked up automatically. + If your jobs install custom kernels, also follow [README 4. Upload kernel RPMs](README.md#4-upload-kernel-rpms-required-for-kernel-install-tests). diff --git a/README.md b/README.md index 26a24ff..4d08aec 100644 --- a/README.md +++ b/README.md @@ -93,6 +93,32 @@ Actually writing a configuration file for a given setup can be done with an expl kernel-ci-cloud-runner aws setup configure --prefix kernel-ci-$USER- --region us-west-2 --output my-config.config ``` +### Validate setup (optional) + +Before launching real VMs, run a pre-flight check of AWS permissions, IAM resources, the results bucket, and KernelCI/KCIDB tokens. The command is read-only by default — pass `--fix` to create the S3 bucket if it doesn't exist yet. + +```bash +kernel-ci-cloud-runner aws setup validate \ + --bucket kernel-ci-$USER-results \ + --role kernel-ci-$USER-vm-role \ + --region us-west-2 +``` + +What it checks: + +| Check | What it does | +| --- | --- | +| `aws_credentials` | `sts:GetCallerIdentity` — prints account + principal ARN | +| `ec2_describe` | confirms `ec2:DescribeInstances` works | +| `ec2_console_output` | probes `ec2:GetConsoleOutput` (needed to capture kernel boot logs) | +| `ssm` | `ssm:DescribeInstanceInformation` — needed to drive the test client | +| `iam_role` / `instance_profile` | only when `--role` is given — verifies trust policy and attached managed policies | +| `s3_bucket` | `head_bucket`; with `--fix`, creates the bucket (region-aware) and enables Block Public Access | +| `kernelci_api_token` | `GET /whoami` with `Bearer` from `KERNELCI_API_TOKEN` or `UNIFIED_TOKEN` | +| `kcidb_jwt` | decodes the JWT payload (no signature verification) and reports `exp`, `iss`, `sub`; sources the token from `KCIDB_JWT`, `KCIDB_REST=https://@host/path`, or `UNIFIED_TOKEN` | + +Exits non-zero if any check fails. Useful when iterating on IAM policies, rotating tokens, or onboarding a new AWS account. + ### 3. Run integration test to verify setup The integration test uses only `basic-test` and `example-reboot-test` — no kernel RPMs needed. This is the fastest way to verify everything works. The test will fail if you do not provide your configuration. diff --git a/src/kernel_ci_cloud_labs/cli.py b/src/kernel_ci_cloud_labs/cli.py index 735b887..2187bce 100644 --- a/src/kernel_ci_cloud_labs/cli.py +++ b/src/kernel_ci_cloud_labs/cli.py @@ -7,6 +7,7 @@ kernel-ci-cloud-runner aws setup upload-rpms --bucket BUCKET --local-rpms DIR [--region REGION] kernel-ci-cloud-runner aws setup upload-tests --bucket BUCKET [--test-dir DIR] [--region REGION] kernel-ci-cloud-runner aws setup cleanup --prefix PREFIX [--region REGION] [--delete] + kernel-ci-cloud-runner aws setup validate [--bucket BUCKET] [--role ROLE] [--region REGION] [--fix] """ __authors__ = ["Max Hubmann ", "Norbert Manthey "] @@ -167,6 +168,19 @@ def cmd_setup_upload_tests(args): sys.exit(1) +def cmd_setup_validate(args): + """Validate AWS setup and KernelCI/KCIDB tokens; optionally create missing resources.""" + from kernel_ci_cloud_labs.setup_validate import validate + + sys.exit(validate( + bucket=args.bucket, + role_name=args.role, + region=args.region, + api_base_uri=args.api_url, + fix=args.fix, + )) + + def cmd_analyze(args): """Download and analyze benchmark results from a previous pipeline run.""" try: @@ -270,6 +284,18 @@ def main(): test_parser.add_argument("--region", default="us-west-2", help="AWS region") test_parser.set_defaults(func=cmd_setup_upload_tests) + # aws setup validate + val_parser = setup_sub.add_parser( + "validate", + help="Validate AWS setup and tokens (read-only; use --fix to create missing resources)", + ) + val_parser.add_argument("--bucket", help="S3 bucket to verify (and create with --fix)") + val_parser.add_argument("--role", help="IAM role name used by VM instance profiles") + val_parser.add_argument("--region", default="us-west-2", help="AWS region (default: us-west-2)") + val_parser.add_argument("--api-url", help=f"KernelCI API base URI (overrides $KERNELCI_API_BASE_URI)") + val_parser.add_argument("--fix", action="store_true", help="Create missing resources (S3 bucket) instead of just reporting them") + val_parser.set_defaults(func=cmd_setup_validate) + args = parser.parse_args() if not hasattr(args, "func"): diff --git a/src/kernel_ci_cloud_labs/setup_validate.py b/src/kernel_ci_cloud_labs/setup_validate.py new file mode 100644 index 0000000..678c29d --- /dev/null +++ b/src/kernel_ci_cloud_labs/setup_validate.py @@ -0,0 +1,264 @@ +"""Validate AWS setup, KernelCI/KCIDB tokens, and optionally fix missing resources. + +Wired into the CLI as `aws setup validate [--fix]`. Read-only by default; +`--fix` is allowed to create missing resources (currently: the S3 bucket). + +__authors__ = ["Max Hubmann ", "Norbert Manthey "] +__copyright__ = "Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved." +SPDX-License-Identifier: Apache-2.0 +""" + +import base64 +import json +import os +import time +import urllib.error +import urllib.request +from typing import Optional, Tuple + +import boto3 +from botocore.exceptions import ClientError + +from kernel_ci_cloud_labs.debug_aws_setup import ( + check_aws_credentials, + check_ec2_permissions, + check_iam_role, + check_instance_profile, + check_ssm_permissions, +) + + +# Env var names mirror the ones used by pull_labs_poller. Kept here as plain +# strings rather than imported to avoid a circular dependency on the poller. +ENV_API_BASE_URI = "KERNELCI_API_BASE_URI" +ENV_API_TOKEN = "KERNELCI_API_TOKEN" +ENV_KCIDB_URL = "KCIDB_SUBMIT_URL" +ENV_KCIDB_JWT = "KCIDB_JWT" +ENV_KCIDB_REST = "KCIDB_REST" +ENV_UNIFIED_TOKEN = "UNIFIED_TOKEN" + + +def check_s3_bucket(bucket_name: str, region: str, fix: bool = False) -> bool: + """Check S3 bucket exists. With fix=True, create it if missing.""" + print(f"\n=== Checking S3 Bucket: {bucket_name} ===") + s3 = boto3.client("s3", region_name=region) + try: + s3.head_bucket(Bucket=bucket_name) + print(f"✓ S3 bucket accessible: {bucket_name}") + return True + except ClientError as e: + code = e.response.get("Error", {}).get("Code", "") + # 404 = bucket does not exist; 403 = exists but no access (different owner) + if code in ("404", "NoSuchBucket"): + print(f"✗ S3 bucket does not exist: {bucket_name}") + if not fix: + print(" (pass --fix to create)") + return False + return _create_s3_bucket(s3, bucket_name, region) + print(f"✗ S3 bucket check failed ({code}): {e}") + return False + + +def _create_s3_bucket(s3, bucket_name: str, region: str) -> bool: + """Create an S3 bucket honoring region constraints (us-east-1 quirk).""" + print(f" Creating bucket in {region}...") + try: + if region == "us-east-1": + s3.create_bucket(Bucket=bucket_name) + else: + s3.create_bucket( + Bucket=bucket_name, + CreateBucketConfiguration={"LocationConstraint": region}, + ) + # Default-on: block public access; modern best practice. + s3.put_public_access_block( + Bucket=bucket_name, + PublicAccessBlockConfiguration={ + "BlockPublicAcls": True, + "IgnorePublicAcls": True, + "BlockPublicPolicy": True, + "RestrictPublicBuckets": True, + }, + ) + print(f"✓ Created S3 bucket: {bucket_name}") + return True + except ClientError as e: + print(f"✗ Failed to create bucket: {e}") + return False + + +def check_console_output_permission() -> bool: + """Probe ec2:GetConsoleOutput. Uses a non-existent instance id so the only + permission we test is the IAM action itself.""" + print("\n=== Checking ec2:GetConsoleOutput permission ===") + ec2 = boto3.client("ec2") + try: + ec2.get_console_output(InstanceId="i-0000000000000000f") + print("✓ Call accepted (unexpected; permission OK)") + return True + except ClientError as e: + code = e.response.get("Error", {}).get("Code", "") + if code == "UnauthorizedOperation" or "AccessDenied" in str(e): + print(f"✗ Missing ec2:GetConsoleOutput permission ({code})") + return False + # InvalidInstanceID.NotFound / Malformed means the call was authorized. + print(f"✓ ec2:GetConsoleOutput permitted (saw expected {code or 'error'})") + return True + + +def check_kernelci_api_token(api_base_uri: Optional[str] = None, + token: Optional[str] = None) -> bool: + """Probe the KernelCI API with the configured token.""" + print("\n=== Checking KernelCI API token ===") + + api_base_uri = api_base_uri or os.getenv(ENV_API_BASE_URI) + token = token or os.getenv(ENV_API_TOKEN) or os.getenv(ENV_UNIFIED_TOKEN) + + if not api_base_uri: + print(f"✗ {ENV_API_BASE_URI} not set; cannot probe") + return False + if not token: + print(f"✗ Neither {ENV_API_TOKEN} nor {ENV_UNIFIED_TOKEN} is set") + return False + + base = api_base_uri.rstrip("/") + whoami_url = f"{base}/whoami" + print(f" GET {whoami_url}") + req = urllib.request.Request( + whoami_url, + headers={"Authorization": f"Bearer {token}", "Accept": "application/json"}, + ) + try: + with urllib.request.urlopen(req, timeout=15) as resp: + body = resp.read().decode("utf-8", errors="replace") + print(f"✓ API accepted token (HTTP {resp.status})") + try: + data = json.loads(body) + user = data.get("username") or data.get("email") or data.get("id") + if user: + print(f" Authenticated as: {user}") + except (json.JSONDecodeError, AttributeError): + pass + return True + except urllib.error.HTTPError as e: + if e.code in (401, 403): + print(f"✗ Token rejected (HTTP {e.code})") + else: + print(f"✗ Unexpected HTTP {e.code} from {whoami_url}") + return False + except urllib.error.URLError as e: + print(f"✗ Could not reach KernelCI API: {e.reason}") + return False + + +def _parse_kcidb_rest(rest_url: str) -> Tuple[Optional[str], Optional[str]]: + """Parse https://@/path into (submit_url_without_token, token).""" + from urllib.parse import urlparse, urlunparse + + parsed = urlparse(rest_url) + token = parsed.username + if not token: + return rest_url, None + netloc = parsed.hostname or "" + if parsed.port: + netloc = f"{netloc}:{parsed.port}" + clean = urlunparse(parsed._replace(netloc=netloc)) + return clean, token + + +def check_kcidb_jwt() -> bool: + """Decode the KCIDB JWT (no signature verification) and check expiry.""" + print("\n=== Checking KCIDB JWT ===") + + jwt = os.getenv(ENV_KCIDB_JWT) + submit_url = os.getenv(ENV_KCIDB_URL) + rest = os.getenv(ENV_KCIDB_REST) + unified = os.getenv(ENV_UNIFIED_TOKEN) + + if not jwt and rest: + submit_url, jwt = _parse_kcidb_rest(rest) + if not jwt and unified: + jwt = unified + + if not jwt: + print(f"✗ No JWT found ({ENV_KCIDB_JWT}, {ENV_KCIDB_REST}, or {ENV_UNIFIED_TOKEN})") + return False + if submit_url: + print(f" Submit URL: {submit_url}") + + parts = jwt.split(".") + if len(parts) != 3: + print(f"✗ Token is not a JWT (expected 3 dot-separated parts, got {len(parts)})") + return False + + try: + # JWT payload is base64url; pad to a multiple of 4 before decoding. + payload_b64 = parts[1] + "=" * (-len(parts[1]) % 4) + payload = json.loads(base64.urlsafe_b64decode(payload_b64)) + except (ValueError, json.JSONDecodeError) as e: + print(f"✗ JWT payload not decodable: {e}") + return False + + exp = payload.get("exp") + if exp is None: + print("✓ JWT decoded; no expiry claim") + else: + remaining = int(exp - time.time()) + if remaining <= 0: + print(f"✗ JWT expired {-remaining}s ago") + return False + days = remaining // 86400 + print(f"✓ JWT valid for {days}d ({remaining}s remaining)") + + issuer = payload.get("iss") + sub = payload.get("sub") or payload.get("email") + if issuer: + print(f" Issuer: {issuer}") + if sub: + print(f" Subject: {sub}") + return True + + +def validate(bucket: Optional[str] = None, + role_name: Optional[str] = None, + region: str = "us-west-2", + api_base_uri: Optional[str] = None, + api_token: Optional[str] = None, + fix: bool = False) -> int: + """Run all validation checks; return process exit code.""" + print("=" * 60) + print("Setup validation") + print(f" Region: {region}") + print(f" Bucket: {bucket or '(not provided)'}") + print(f" Role: {role_name or '(not provided)'}") + print(f" Fix: {'yes' if fix else 'no (read-only)'}") + print("=" * 60) + + results = {} + + results["aws_credentials"] = check_aws_credentials() + results["ec2_describe"] = check_ec2_permissions() + results["ec2_console_output"] = check_console_output_permission() + results["ssm"] = check_ssm_permissions() + + if role_name: + results["iam_role"] = check_iam_role(role_name) + results["instance_profile"] = check_instance_profile(role_name) + + if bucket: + results["s3_bucket"] = check_s3_bucket(bucket, region, fix=fix) + + results["kernelci_api_token"] = check_kernelci_api_token(api_base_uri, api_token) + results["kcidb_jwt"] = check_kcidb_jwt() + + print("\n" + "=" * 60) + print("Summary") + print("=" * 60) + for name, ok in results.items(): + mark = "✓" if ok else "✗" + print(f" {mark} {name}") + passed = sum(1 for v in results.values() if v) + total = len(results) + print(f"\n{passed}/{total} checks passed") + + return 0 if all(results.values()) else 1 From 864498be6198ae4cb9a6d07063d9963304ddbbff Mon Sep 17 00:00:00 2001 From: Denys Fedoryshchenko Date: Thu, 21 May 2026 19:53:48 +0300 Subject: [PATCH 3/3] kcidb: Fix test names, add normalize/validate functions, fix few tests kcidb require to follow some rules in tests names, and specific name to classify tests as boot tests. Also fixing one failing tests. I am avoiding to add kcidb as dependency and adding manually critical filtering/normalization. Signed-off-by: Denys Fedoryshchenko --- src/kernel_ci_cloud_labs/debug_aws_setup.py | 16 ++--- src/kernel_ci_cloud_labs/kcidb_submit.py | 63 +++++++++++++++++++- src/kernel_ci_cloud_labs/pull_labs_poller.py | 31 ++++++++-- src/kernel_ci_cloud_labs/setup_validate.py | 10 ++-- tests/test_kcidb_submit.py | 52 +++++++++++++++- tests/test_pull_labs_poller.py | 38 +++++++++++- 6 files changed, 187 insertions(+), 23 deletions(-) diff --git a/src/kernel_ci_cloud_labs/debug_aws_setup.py b/src/kernel_ci_cloud_labs/debug_aws_setup.py index 4a9e6ab..9c10842 100644 --- a/src/kernel_ci_cloud_labs/debug_aws_setup.py +++ b/src/kernel_ci_cloud_labs/debug_aws_setup.py @@ -109,11 +109,11 @@ def check_s3_bucket(bucket_name): return False -def check_ec2_permissions(): - """Check if we can describe EC2 instances.""" +def check_ec2_permissions(region=None): + """Check if we can describe EC2 instances in the given region.""" print("\n=== Checking EC2 Permissions ===") try: - ec2 = boto3.client("ec2") + ec2 = boto3.client("ec2", region_name=region) _ = ec2.describe_instances(MaxResults=5) print("✓ Can describe EC2 instances") return True @@ -122,11 +122,11 @@ def check_ec2_permissions(): return False -def check_ssm_permissions(): - """Check if we can use SSM.""" +def check_ssm_permissions(region=None): + """Check if we can use SSM in the given region.""" print("\n=== Checking SSM Permissions ===") try: - ssm = boto3.client("ssm") + ssm = boto3.client("ssm", region_name=region) response = ssm.describe_instance_information(MaxResults=5) print("✓ Can describe SSM instances") count = len(response.get("InstanceInformationList", [])) @@ -181,8 +181,8 @@ def main(): if bucket_name: results.append(check_s3_bucket(bucket_name)) - results.append(check_ec2_permissions()) - results.append(check_ssm_permissions()) + results.append(check_ec2_permissions(config["region"])) + results.append(check_ssm_permissions(config["region"])) # Summary print("\n" + "=" * 60) diff --git a/src/kernel_ci_cloud_labs/kcidb_submit.py b/src/kernel_ci_cloud_labs/kcidb_submit.py index 1bd940c..1f8bed4 100644 --- a/src/kernel_ci_cloud_labs/kcidb_submit.py +++ b/src/kernel_ci_cloud_labs/kcidb_submit.py @@ -17,6 +17,7 @@ import json import logging +import re import urllib.error import urllib.request from typing import Any, Dict, Iterable, List, Optional @@ -50,6 +51,54 @@ def to_kcidb_status(raw: Optional[str]) -> str: return STATUS_MAP.get(str(raw).strip().lower(), "ERROR") +# KCIDB v5.3 field constraints (see kcidb_io.schema.V5_3). pullab_cloud +# constructs these fields itself, so each value is verified before submission: +# an invalid one would otherwise make the *whole* submission fail at the +# ingester, with a far less obvious error than a local raise. +# tests[*].path -- dot-separated segments of [A-Za-z0-9_-], or empty +# (kcidb_io.schema.V5_3.test_path_re) +# *.origin -- [a-z0-9_]+ +_KCIDB_PATH_RE = re.compile(r"^([a-zA-Z0-9_-]+(\.[a-zA-Z0-9_-]+)*)?$") +_KCIDB_ORIGIN_RE = re.compile(r"^[a-z0-9_]+$") + + +def validate_test_path(path: str) -> str: + """Verify *path* is a KCIDB v5.3-compliant test path; return it unchanged. + + KCIDB v5.3 restricts ``tests[*].path`` to dot-separated segments of + ``[A-Za-z0-9_-]`` (the empty string is allowed -- it denotes the test + tree root). A path with a space, slash or other punctuation makes the + entire submission fail schema validation at the ingester, so it is + rejected here, at the producer, rather than silently rewritten. + + Raises: + ValueError: if *path* is not KCIDB v5.3-compliant. + """ + if not isinstance(path, str) or not _KCIDB_PATH_RE.match(path): + raise ValueError( + f"invalid KCIDB test path {path!r}: must be dot-separated " + "segments of [A-Za-z0-9_-] (KCIDB v5.3)" + ) + return path + + +def validate_origin(origin: str) -> str: + """Verify *origin* is a KCIDB v5.3-compliant origin; return it unchanged. + + KCIDB v5.3 restricts every object's ``origin`` to ``[a-z0-9_]+`` + (lowercase letters, digits and underscores). + + Raises: + ValueError: if *origin* is not KCIDB v5.3-compliant. + """ + if not isinstance(origin, str) or not _KCIDB_ORIGIN_RE.match(origin): + raise ValueError( + f"invalid KCIDB origin {origin!r}: must match [a-z0-9_]+ " + "(lowercase letters, digits and underscores; KCIDB v5.3)" + ) + return origin + + def build_test_row( *, origin: str, @@ -64,14 +113,24 @@ def build_test_row( ) -> Dict[str, Any]: """Build a single KCIDB tests[*] row. - Required fields per KCIDB IO schema: id, build_id, origin, path, status. + Required fields per the KCIDB v5.3 IO schema: id, build_id, origin. + `path` and `status` are optional in the schema, but always emitted here. Optional fields: duration, log_url, output_files, misc, comment. + + `origin` and `path` are verified against the KCIDB v5.3 constraints; + an invalid value raises ValueError instead of being submitted, so a bad + test name is caught here rather than failing the whole submission at the + ingester. + + Raises: + ValueError: if `origin` or `path` is not KCIDB v5.3-compliant. """ + origin = validate_origin(origin) row: Dict[str, Any] = { "id": f"{origin}:{test_id}", "build_id": build_id, "origin": origin, - "path": path, + "path": validate_test_path(path), "status": to_kcidb_status(status), } if duration_ms is not None: diff --git a/src/kernel_ci_cloud_labs/pull_labs_poller.py b/src/kernel_ci_cloud_labs/pull_labs_poller.py index 47366aa..572fd7d 100644 --- a/src/kernel_ci_cloud_labs/pull_labs_poller.py +++ b/src/kernel_ci_cloud_labs/pull_labs_poller.py @@ -234,6 +234,26 @@ def _default_job_executor(run_config: Dict[str, Any]) -> Tuple[List[Dict[str, An return _extract_test_results(summary or {}) +# Pipeline/PULL_LABS test names that denote a kernel boot test. The dashboard +# classifies a KCIDB test as a "boot" (rather than a generic test) only when +# its path is exactly "boot" or starts with "boot." -- see is_boot() in the +# kernelci-dashboard backend (kernelCI_app/utils.py). Every pullab_cloud job is +# a url-kernel-boot job, so these names are remapped to the "boot" path on +# submission. "baseline" is the PULL_LABS test type; "url-kernel-boot" is the +# vm-tests directory name it translates to and which appears in pipeline logs. +_BOOT_TEST_NAMES = frozenset({"baseline", "url-kernel-boot", "boot"}) + + +def _test_name_to_path(name: str) -> str: + """Map a pipeline test name to a KCIDB test path. + + Boot tests are remapped to the "boot" path so the dashboard classifies + them as boots; every other name passes through unchanged (build_test_row + then verifies it is a KCIDB-valid path and raises if it is not). + """ + return "boot" if name.strip().lower() in _BOOT_TEST_NAMES else name + + def _extract_test_results(summary: Dict[str, Any]) -> Tuple[List[Dict[str, Any]], Optional[str]]: """Pull per-test status out of the summary dict returned by run_pipeline. @@ -246,7 +266,7 @@ def _extract_test_results(summary: Dict[str, Any]) -> Tuple[List[Dict[str, Any]] failed_by_test = vms.get("failed_by_test") or {} for name in test_names: status = "FAIL" if failed_by_test.get(name) else "PASS" - rows.append({"name": name, "status": status}) + rows.append({"name": _test_name_to_path(name), "status": status}) return rows, None @@ -470,8 +490,9 @@ def process_event(self, event: Dict[str, Any]) -> bool: per_test, log_url = self.job_executor(run_config) except Exception as e: # pylint: disable=broad-exception-caught logger.error("Job execution failed for node %s: %s", node_id, e, exc_info=True) - # Submit an ERROR row so KCIDB sees we picked it up. - per_test = [{"name": "infrastructure", "status": "ERROR"}] + # Submit an ERROR row so KCIDB sees we picked it up. The boot. + # prefix makes the dashboard classify it as a (failed) boot test. + per_test = [{"name": "boot.infrastructure", "status": "ERROR"}] log_url = None test_rows = [ @@ -493,7 +514,9 @@ def process_event(self, event: Dict[str, Any]) -> bool: origin=self.kcidb_origin, build_id=build_id, test_id=f"{node_id}.0", - path="pullab_cloud", + # "boot" path => the dashboard classifies this as a boot + # test (is_boot() in kernelCI_app/utils.py). + path="boot", status="ERROR", log_url=log_url, misc={ diff --git a/src/kernel_ci_cloud_labs/setup_validate.py b/src/kernel_ci_cloud_labs/setup_validate.py index 678c29d..a158e42 100644 --- a/src/kernel_ci_cloud_labs/setup_validate.py +++ b/src/kernel_ci_cloud_labs/setup_validate.py @@ -87,11 +87,11 @@ def _create_s3_bucket(s3, bucket_name: str, region: str) -> bool: return False -def check_console_output_permission() -> bool: +def check_console_output_permission(region: Optional[str] = None) -> bool: """Probe ec2:GetConsoleOutput. Uses a non-existent instance id so the only permission we test is the IAM action itself.""" print("\n=== Checking ec2:GetConsoleOutput permission ===") - ec2 = boto3.client("ec2") + ec2 = boto3.client("ec2", region_name=region) try: ec2.get_console_output(InstanceId="i-0000000000000000f") print("✓ Call accepted (unexpected; permission OK)") @@ -237,9 +237,9 @@ def validate(bucket: Optional[str] = None, results = {} results["aws_credentials"] = check_aws_credentials() - results["ec2_describe"] = check_ec2_permissions() - results["ec2_console_output"] = check_console_output_permission() - results["ssm"] = check_ssm_permissions() + results["ec2_describe"] = check_ec2_permissions(region) + results["ec2_console_output"] = check_console_output_permission(region) + results["ssm"] = check_ssm_permissions(region) if role_name: results["iam_role"] = check_iam_role(role_name) diff --git a/tests/test_kcidb_submit.py b/tests/test_kcidb_submit.py index 52d85c8..5c2a8d2 100644 --- a/tests/test_kcidb_submit.py +++ b/tests/test_kcidb_submit.py @@ -18,6 +18,8 @@ submit_revision, submit_tests, to_kcidb_status, + validate_origin, + validate_test_path, ) @@ -60,6 +62,37 @@ def test_status_map_keys_are_lowercase(self): assert k == k.lower() +class TestValidation: + """validate_test_path() / validate_origin() verify, never rewrite.""" + + @pytest.mark.parametrize( + "path", + ["", "boot", "boot.infrastructure", "ltp.syscalls", "kunit-test_03"], + ) + def test_valid_paths_pass_through(self, path): + assert validate_test_path(path) == path + + @pytest.mark.parametrize( + "path", + ["boot test", "ltp/syscalls", "fs\\ext4", "100% pass", "café", + "a..b", ".leading", "trailing.", "tab\there"], + ) + def test_invalid_paths_raise(self, path): + with pytest.raises(ValueError, match="invalid KCIDB test path"): + validate_test_path(path) + + @pytest.mark.parametrize("origin", ["o", "pullab_cloud_aws", "lab1", "x_y_2"]) + def test_valid_origins_pass_through(self, origin): + assert validate_origin(origin) == origin + + @pytest.mark.parametrize( + "origin", ["pullab-cloud", "PullLab", "lab 1", "café", ""], + ) + def test_invalid_origins_raise(self, origin): + with pytest.raises(ValueError, match="invalid KCIDB origin"): + validate_origin(origin) + + class TestBuildTestRow: """build_test_row() shape.""" @@ -68,17 +101,32 @@ def test_required_fields(self): origin="pullab_cloud_aws", build_id="maestro:b1", test_id="node-1.0", - path="ltp/syscalls", + path="ltp.syscalls", status="pass", ) assert row == { "id": "pullab_cloud_aws:node-1.0", "build_id": "maestro:b1", "origin": "pullab_cloud_aws", - "path": "ltp/syscalls", + "path": "ltp.syscalls", "status": "PASS", } + def test_rejects_invalid_path(self): + # An invalid test name fails loudly here, not silently at the ingester. + with pytest.raises(ValueError, match="invalid KCIDB test path"): + build_test_row( + origin="o", build_id="b", test_id="t1", + path="boot test", status="pass", + ) + + def test_rejects_invalid_origin(self): + with pytest.raises(ValueError, match="invalid KCIDB origin"): + build_test_row( + origin="bad-origin", build_id="b", test_id="t1", + path="boot", status="pass", + ) + def test_optional_duration_converted_to_seconds(self): row = build_test_row( origin="o", build_id="b", test_id="t1", diff --git a/tests/test_pull_labs_poller.py b/tests/test_pull_labs_poller.py index 2962d31..d349d58 100644 --- a/tests/test_pull_labs_poller.py +++ b/tests/test_pull_labs_poller.py @@ -19,6 +19,7 @@ PullLabsPoller, _extract_test_results, _parse_kcidb_rest, + _test_name_to_path, ) # Capture the real validator at import time so a specific test can restore it @@ -187,12 +188,12 @@ def test_missing_timestamp_key_falls_back(self, tmp_path): class TestEventHelpers: def test_matches_runtime_true(self): p = PullLabsPoller(_minimal_kc()) - ev = {"node": {"data": {"data": {"runtime": "pull-labs-aws-ec2"}}}} + ev = {"node": {"data": {"runtime": "pull-labs-aws-ec2"}}} assert p._matches_runtime(ev) def test_matches_runtime_false(self): p = PullLabsPoller(_minimal_kc()) - ev = {"node": {"data": {"data": {"runtime": "lava-collabora"}}}} + ev = {"node": {"data": {"runtime": "lava-collabora"}}} assert not p._matches_runtime(ev) def test_matches_runtime_missing_keys(self): @@ -280,6 +281,39 @@ def test_empty_summary(self): assert rows == [] assert log is None + def test_boot_test_names_remapped_to_boot_path(self): + # Boot tests must use the "boot" path so the dashboard's is_boot() + # classifies them as boots rather than generic tests. + summary = {"vms": { + "test_names": ["baseline", "url-kernel-boot", "ltp"], + "failed_by_test": {}, + }} + rows, _ = _extract_test_results(summary) + names = sorted(r["name"] for r in rows) + assert names == ["boot", "boot", "ltp"] + + def test_boot_remap_preserves_failure_status(self): + # The failed_by_test lookup must still use the original test name. + summary = {"vms": { + "test_names": ["baseline"], + "failed_by_test": {"baseline": ["i-123"]}, + }} + rows, _ = _extract_test_results(summary) + assert rows == [{"name": "boot", "status": "FAIL"}] + + +class TestTestNameToPath: + """_test_name_to_path() remaps boot test names to the 'boot' path.""" + + @pytest.mark.parametrize("name", ["baseline", "url-kernel-boot", "boot", + "Baseline", " BOOT "]) + def test_boot_names_map_to_boot(self, name): + assert _test_name_to_path(name) == "boot" + + @pytest.mark.parametrize("name", ["ltp", "unixbench", "kselftest", "a"]) + def test_other_names_pass_through(self, name): + assert _test_name_to_path(name) == name + # --------------------------------------------------------------------------- # Default-executor dependency validation