kernelci · nuclearcat · May 21, 2026 · May 16, 2026 · May 16, 2026 · May 21, 2026
diff --git a/QUICKSTART.md b/QUICKSTART.md
@@ -46,6 +46,16 @@ pipeline can run a job end-to-end. The full walkthrough lives in
    Look for `VMs: 2/2 spawned, 2 successful, 0 failed`. If this passes,
    the AWS side is ready and you can proceed below.
 
+   For a fast pre-flight check (no VMs spawned) of AWS permissions,
+   the results bucket, and KernelCI/KCIDB tokens, run:
+   ```bash
+   kernel-ci-cloud-runner aws setup validate \
+     --bucket kernel-ci-$USER-results --role kernel-ci-$USER-vm-role
+   ```
+   See [README → Validate setup](README.md#validate-setup-optional). The
+   `KERNELCI_API_TOKEN` / `KCIDB_JWT` / `UNIFIED_TOKEN` env vars set in
+   section 3 below are picked up automatically.
+
 If your jobs install custom kernels, also follow
 [README 4. Upload kernel RPMs](README.md#4-upload-kernel-rpms-required-for-kernel-install-tests).
 

diff --git a/README.md b/README.md
@@ -93,6 +93,32 @@ Actually writing a configuration file for a given setup can be done with an expl
 kernel-ci-cloud-runner aws setup configure --prefix kernel-ci-$USER- --region us-west-2 --output my-config.config
 ```
 
+### Validate setup (optional)
+
+Before launching real VMs, run a pre-flight check of AWS permissions, IAM resources, the results bucket, and KernelCI/KCIDB tokens. The command is read-only by default — pass `--fix` to create the S3 bucket if it doesn't exist yet.
+
+```bash
+kernel-ci-cloud-runner aws setup validate \
+  --bucket kernel-ci-$USER-results \
+  --role kernel-ci-$USER-vm-role \
+  --region us-west-2
+```
+
+What it checks:
+
+| Check | What it does |
+| --- | --- |
+| `aws_credentials` | `sts:GetCallerIdentity` — prints account + principal ARN |
+| `ec2_describe` | confirms `ec2:DescribeInstances` works |
+| `ec2_console_output` | probes `ec2:GetConsoleOutput` (needed to capture kernel boot logs) |
+| `ssm` | `ssm:DescribeInstanceInformation` — needed to drive the test client |
+| `iam_role` / `instance_profile` | only when `--role` is given — verifies trust policy and attached managed policies |
+| `s3_bucket` | `head_bucket`; with `--fix`, creates the bucket (region-aware) and enables Block Public Access |
+| `kernelci_api_token` | `GET <api_base_uri>/whoami` with `Bearer` from `KERNELCI_API_TOKEN` or `UNIFIED_TOKEN` |
+| `kcidb_jwt` | decodes the JWT payload (no signature verification) and reports `exp`, `iss`, `sub`; sources the token from `KCIDB_JWT`, `KCIDB_REST=https://<jwt>@host/path`, or `UNIFIED_TOKEN` |
+
+Exits non-zero if any check fails. Useful when iterating on IAM policies, rotating tokens, or onboarding a new AWS account.
+
 ### 3. Run integration test to verify setup
 
 The integration test uses only `basic-test` and `example-reboot-test` — no kernel RPMs needed. This is the fastest way to verify everything works. The test will fail if you do not provide your configuration.

diff --git a/examples/aws/alltests-config.json b/examples/aws/alltests-config.json
@@ -84,7 +84,8 @@
                 "ec2:DescribeImages",
                 "ec2:DescribeSubnets",
                 "ec2:DescribeSecurityGroups",
-                "ec2:DescribeVpcs"
+                "ec2:DescribeVpcs",
+                "ec2:GetConsoleOutput"
               ],
               "Resource": "*"
             }

diff --git a/examples/aws/config-arm64.json b/examples/aws/config-arm64.json
@@ -84,7 +84,8 @@
                 "ec2:DescribeImages",
                 "ec2:DescribeSubnets",
                 "ec2:DescribeSecurityGroups",
-                "ec2:DescribeVpcs"
+                "ec2:DescribeVpcs",
+                "ec2:GetConsoleOutput"
               ],
               "Resource": "*"
             }

diff --git a/examples/aws/config.json b/examples/aws/config.json
@@ -84,7 +84,8 @@
                 "ec2:DescribeImages",
                 "ec2:DescribeSubnets",
                 "ec2:DescribeSecurityGroups",
-                "ec2:DescribeVpcs"
+                "ec2:DescribeVpcs",
+                "ec2:GetConsoleOutput"
               ],
               "Resource": "*"
             }

diff --git a/src/kernel_ci_cloud_labs/cli.py b/src/kernel_ci_cloud_labs/cli.py
@@ -7,6 +7,7 @@
     kernel-ci-cloud-runner aws setup upload-rpms --bucket BUCKET --local-rpms DIR [--region REGION]
     kernel-ci-cloud-runner aws setup upload-tests --bucket BUCKET [--test-dir DIR] [--region REGION]
     kernel-ci-cloud-runner aws setup cleanup --prefix PREFIX [--region REGION] [--delete]
+    kernel-ci-cloud-runner aws setup validate [--bucket BUCKET] [--role ROLE] [--region REGION] [--fix]
 """
 
 __authors__ = ["Max Hubmann <mxhbm@amazon.de>", "Norbert Manthey <nmanthey@amazon.de>"]
@@ -167,6 +168,19 @@ def cmd_setup_upload_tests(args):
         sys.exit(1)
 
 
+def cmd_setup_validate(args):
+    """Validate AWS setup and KernelCI/KCIDB tokens; optionally create missing resources."""
+    from kernel_ci_cloud_labs.setup_validate import validate
+
+    sys.exit(validate(
+        bucket=args.bucket,
+        role_name=args.role,
+        region=args.region,
+        api_base_uri=args.api_url,
+        fix=args.fix,
+    ))
+
+
 def cmd_analyze(args):
     """Download and analyze benchmark results from a previous pipeline run."""
     try:
@@ -270,6 +284,18 @@ def main():
     test_parser.add_argument("--region", default="us-west-2", help="AWS region")
     test_parser.set_defaults(func=cmd_setup_upload_tests)
 
+    # aws setup validate
+    val_parser = setup_sub.add_parser(
+        "validate",
+        help="Validate AWS setup and tokens (read-only; use --fix to create missing resources)",
+    )
+    val_parser.add_argument("--bucket", help="S3 bucket to verify (and create with --fix)")
+    val_parser.add_argument("--role", help="IAM role name used by VM instance profiles")
+    val_parser.add_argument("--region", default="us-west-2", help="AWS region (default: us-west-2)")
+    val_parser.add_argument("--api-url", help=f"KernelCI API base URI (overrides $KERNELCI_API_BASE_URI)")
+    val_parser.add_argument("--fix", action="store_true", help="Create missing resources (S3 bucket) instead of just reporting them")
+    val_parser.set_defaults(func=cmd_setup_validate)
+
     args = parser.parse_args()
 
     if not hasattr(args, "func"):

diff --git a/src/kernel_ci_cloud_labs/debug_aws_setup.py b/src/kernel_ci_cloud_labs/debug_aws_setup.py
@@ -109,11 +109,11 @@ def check_s3_bucket(bucket_name):
         return False
 
 
-def check_ec2_permissions():
-    """Check if we can describe EC2 instances."""
+def check_ec2_permissions(region=None):
+    """Check if we can describe EC2 instances in the given region."""
     print("\n=== Checking EC2 Permissions ===")
     try:
-        ec2 = boto3.client("ec2")
+        ec2 = boto3.client("ec2", region_name=region)
         _ = ec2.describe_instances(MaxResults=5)
         print("✓ Can describe EC2 instances")
         return True
@@ -122,11 +122,11 @@ def check_ec2_permissions():
         return False
 
 
-def check_ssm_permissions():
-    """Check if we can use SSM."""
+def check_ssm_permissions(region=None):
+    """Check if we can use SSM in the given region."""
     print("\n=== Checking SSM Permissions ===")
     try:
-        ssm = boto3.client("ssm")
+        ssm = boto3.client("ssm", region_name=region)
         response = ssm.describe_instance_information(MaxResults=5)
         print("✓ Can describe SSM instances")
         count = len(response.get("InstanceInformationList", []))
@@ -181,8 +181,8 @@ def main():
     if bucket_name:
         results.append(check_s3_bucket(bucket_name))
 
-    results.append(check_ec2_permissions())
-    results.append(check_ssm_permissions())
+    results.append(check_ec2_permissions(config["region"]))
+    results.append(check_ssm_permissions(config["region"]))
 
     # Summary
     print("\n" + "=" * 60)

diff --git a/src/kernel_ci_cloud_labs/kcidb_submit.py b/src/kernel_ci_cloud_labs/kcidb_submit.py
@@ -17,6 +17,7 @@
 
 import json
 import logging
+import re
 import urllib.error
 import urllib.request
 from typing import Any, Dict, Iterable, List, Optional
@@ -50,6 +51,54 @@ def to_kcidb_status(raw: Optional[str]) -> str:
     return STATUS_MAP.get(str(raw).strip().lower(), "ERROR")
 
 
+# KCIDB v5.3 field constraints (see kcidb_io.schema.V5_3). pullab_cloud
+# constructs these fields itself, so each value is verified before submission:
+# an invalid one would otherwise make the *whole* submission fail at the
+# ingester, with a far less obvious error than a local raise.
+#   tests[*].path -- dot-separated segments of [A-Za-z0-9_-], or empty
+#                    (kcidb_io.schema.V5_3.test_path_re)
+#   *.origin      -- [a-z0-9_]+
+_KCIDB_PATH_RE = re.compile(r"^([a-zA-Z0-9_-]+(\.[a-zA-Z0-9_-]+)*)?$")
+_KCIDB_ORIGIN_RE = re.compile(r"^[a-z0-9_]+$")
+
+
+def validate_test_path(path: str) -> str:
+    """Verify *path* is a KCIDB v5.3-compliant test path; return it unchanged.
+
+    KCIDB v5.3 restricts ``tests[*].path`` to dot-separated segments of
+    ``[A-Za-z0-9_-]`` (the empty string is allowed -- it denotes the test
+    tree root). A path with a space, slash or other punctuation makes the
+    entire submission fail schema validation at the ingester, so it is
+    rejected here, at the producer, rather than silently rewritten.
+
+    Raises:
+        ValueError: if *path* is not KCIDB v5.3-compliant.
+    """
+    if not isinstance(path, str) or not _KCIDB_PATH_RE.match(path):
+        raise ValueError(
+            f"invalid KCIDB test path {path!r}: must be dot-separated "
+            "segments of [A-Za-z0-9_-] (KCIDB v5.3)"
+        )
+    return path
+
+
+def validate_origin(origin: str) -> str:
+    """Verify *origin* is a KCIDB v5.3-compliant origin; return it unchanged.
+
+    KCIDB v5.3 restricts every object's ``origin`` to ``[a-z0-9_]+``
+    (lowercase letters, digits and underscores).
+
+    Raises:
+        ValueError: if *origin* is not KCIDB v5.3-compliant.
+    """
+    if not isinstance(origin, str) or not _KCIDB_ORIGIN_RE.match(origin):
+        raise ValueError(
+            f"invalid KCIDB origin {origin!r}: must match [a-z0-9_]+ "
+            "(lowercase letters, digits and underscores; KCIDB v5.3)"
+        )
+    return origin
+
+
 def build_test_row(
     *,
     origin: str,
@@ -64,14 +113,24 @@ def build_test_row(
 ) -> Dict[str, Any]:
     """Build a single KCIDB tests[*] row.
 
-    Required fields per KCIDB IO schema: id, build_id, origin, path, status.
+    Required fields per the KCIDB v5.3 IO schema: id, build_id, origin.
+    `path` and `status` are optional in the schema, but always emitted here.
     Optional fields: duration, log_url, output_files, misc, comment.
+
+    `origin` and `path` are verified against the KCIDB v5.3 constraints;
+    an invalid value raises ValueError instead of being submitted, so a bad
+    test name is caught here rather than failing the whole submission at the
+    ingester.
+
+    Raises:
+        ValueError: if `origin` or `path` is not KCIDB v5.3-compliant.
     """
+    origin = validate_origin(origin)
     row: Dict[str, Any] = {
         "id": f"{origin}:{test_id}",
         "build_id": build_id,
         "origin": origin,
-        "path": path,
+        "path": validate_test_path(path),
         "status": to_kcidb_status(status),
     }
     if duration_ms is not None:

diff --git a/src/kernel_ci_cloud_labs/launch_vm.py b/src/kernel_ci_cloud_labs/launch_vm.py
@@ -5,6 +5,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 
+import base64
 import json
 import shlex
 import sys
@@ -345,8 +346,45 @@ def check_test_result(self):
             log_error(f"✗ Failed to read result.txt: {e}")
             return False
 
+    def capture_console_output(self):
+        """Fetch EC2 serial console output (kernel boot log) and upload to S3."""
+        if not self.instance_id:
+            return
+
+        log_not("\n=== Capturing console output ===")
+        try:
+            resp = self.ec2.get_console_output(InstanceId=self.instance_id, Latest=True)
+        except Exception as e:
+            log_not(f"  Failed to fetch console output: {e}")
+            return
+
+        output_b64 = resp.get("Output", "")
+        if not output_b64:
+            log_not("  No console output available yet")
+            return
+
+        # boto3 returns the buffer base64-encoded; decode for human-readable upload.
+        try:
+            output = base64.b64decode(output_b64).decode("utf-8", errors="replace")
+        except Exception:
+            output = output_b64
+
+        s3_key = f"{self.run_prefix}/test_{self.test}/output/{self.instance_id}/console-output.log"
+        try:
+            self.s3.put_object(
+                Bucket=self.s3_bucket,
+                Key=s3_key,
+                Body=output.encode("utf-8"),
+                ContentType="text/plain; charset=utf-8",
+            )
+            log_not(f"✓ Console output uploaded ({len(output)} bytes) to s3://{self.s3_bucket}/{s3_key}")
+        except Exception as e:
+            log_not(f"  Failed to upload console output: {e}")
+
     def cleanup(self):
-        """Terminate instance."""
+        """Capture console output, then terminate instance."""
+        self.capture_console_output()
+
         if self.instance_id:
             log_not(f"\n=== Terminating instance {self.instance_id} ===")
             try:

diff --git a/src/kernel_ci_cloud_labs/pull_labs_poller.py b/src/kernel_ci_cloud_labs/pull_labs_poller.py
@@ -234,6 +234,26 @@ def _default_job_executor(run_config: Dict[str, Any]) -> Tuple[List[Dict[str, An
     return _extract_test_results(summary or {})
 
 
+# Pipeline/PULL_LABS test names that denote a kernel boot test. The dashboard
+# classifies a KCIDB test as a "boot" (rather than a generic test) only when
+# its path is exactly "boot" or starts with "boot." -- see is_boot() in the
+# kernelci-dashboard backend (kernelCI_app/utils.py). Every pullab_cloud job is
+# a url-kernel-boot job, so these names are remapped to the "boot" path on
+# submission. "baseline" is the PULL_LABS test type; "url-kernel-boot" is the
+# vm-tests directory name it translates to and which appears in pipeline logs.
+_BOOT_TEST_NAMES = frozenset({"baseline", "url-kernel-boot", "boot"})
+
+
+def _test_name_to_path(name: str) -> str:
+    """Map a pipeline test name to a KCIDB test path.
+
+    Boot tests are remapped to the "boot" path so the dashboard classifies
+    them as boots; every other name passes through unchanged (build_test_row
+    then verifies it is a KCIDB-valid path and raises if it is not).
+    """
+    return "boot" if name.strip().lower() in _BOOT_TEST_NAMES else name
+
+
 def _extract_test_results(summary: Dict[str, Any]) -> Tuple[List[Dict[str, Any]], Optional[str]]:
     """Pull per-test status out of the summary dict returned by run_pipeline.
 
@@ -246,7 +266,7 @@ def _extract_test_results(summary: Dict[str, Any]) -> Tuple[List[Dict[str, Any]]
     failed_by_test = vms.get("failed_by_test") or {}
     for name in test_names:
         status = "FAIL" if failed_by_test.get(name) else "PASS"
-        rows.append({"name": name, "status": status})
+        rows.append({"name": _test_name_to_path(name), "status": status})
     return rows, None
 
 
@@ -470,8 +490,9 @@ def process_event(self, event: Dict[str, Any]) -> bool:
             per_test, log_url = self.job_executor(run_config)
         except Exception as e:  # pylint: disable=broad-exception-caught
             logger.error("Job execution failed for node %s: %s", node_id, e, exc_info=True)
-            # Submit an ERROR row so KCIDB sees we picked it up.
-            per_test = [{"name": "infrastructure", "status": "ERROR"}]
+            # Submit an ERROR row so KCIDB sees we picked it up. The boot.
+            # prefix makes the dashboard classify it as a (failed) boot test.
+            per_test = [{"name": "boot.infrastructure", "status": "ERROR"}]
             log_url = None
 
         test_rows = [
@@ -493,7 +514,9 @@ def process_event(self, event: Dict[str, Any]) -> bool:
                     origin=self.kcidb_origin,
                     build_id=build_id,
                     test_id=f"{node_id}.0",
-                    path="pullab_cloud",
+                    # "boot" path => the dashboard classifies this as a boot
+                    # test (is_boot() in kernelCI_app/utils.py).
+                    path="boot",
                     status="ERROR",
                     log_url=log_url,
                     misc={