diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 816ff49..814b471 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -16,12 +16,11 @@ concurrency: cancel-in-progress: ${{ github.event_name == 'pull_request' }} env: - # Pin the sibling C++ checkout to a specific commit. cpp@v0.70.0 - # predates the __int128 → checked_arith refactor (MSVC), the - # protobuf-API-skew shim, and the MSVC source-charset fix; 9af2ec0 - # is the first commit with all of those. Bump to a v0.70.x tag - # once cpp cuts one that includes them. - PROTOWIRE_CPP_REF: 9af2ec04918a417933848de1577cd61f83a710b0 + # Pin the sibling C++ checkout to a specific tag. v0.75.0 carries the + # PXF v0.72-series feature set (@ / @entry / @table directive + # grammar, schema validator, Result accessors, TableReader streaming) + # the Python port wraps. Bump in lockstep with cpp release cuts. + PROTOWIRE_CPP_REF: v0.75.0 jobs: # --------------------------------------------------------------------- diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 7d9e608..89113eb 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -16,8 +16,8 @@ permissions: security-events: write env: - # See ci.yml for why this is a SHA, not v0.70.0. - PROTOWIRE_CPP_REF: 9af2ec04918a417933848de1577cd61f83a710b0 + # See ci.yml for the rationale on this pin. + PROTOWIRE_CPP_REF: v0.75.0 jobs: analyze: diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 51f7d96..ddef699 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -24,13 +24,11 @@ env: # frozen FFI surface, so it must be an immutable ref — never a # branch. # - # Using a SHA (not the v0.70.0 tag) because cpp@v0.70.0 predates - # the f1d3eb0 __int128 → checked_arith refactor needed for MSVC, - # plus the protobuf-API-skew shim and MSVC source-charset fixes. - # 9af2ec0 (cpp main, ci: MSVC source-charset + skip pxf_escapes) - # is the first commit with all of those. Bump to v0.70.x once cpp - # cuts a tag that includes them. - PROTOWIRE_CPP_REF: 9af2ec04918a417933848de1577cd61f83a710b0 + # Pinned to a tagged C++ release. v0.75.0 ships the PXF v0.72-series + # feature set (@ / @entry / @table grammar, schema validator, + # Result accessors, TableReader streaming) that this Python port + # wraps. Bump in lockstep with cpp release cuts. + PROTOWIRE_CPP_REF: v0.75.0 jobs: # --------------------------------------------------------------------- diff --git a/CHANGELOG.md b/CHANGELOG.md index 99c5ec8..4d851c0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,39 @@ format changes. ## [Unreleased] +### Changed + +- **CI pin to protowire-cpp v0.75.0.** The cpp sibling now ships the + PXF v0.72-series feature set (directive grammar, schema validator, + Result accessors, TableReader streaming). The pin moves from the + pre-v0.72 commit `9af2ec0` to the `v0.75.0` tag so the Python + wrapper exposes the new surface. + +### Added + +- **`pxf.Result.directives` / `pxf.Result.tables`** — the document-root + directives the decoder saw at `unmarshal_full` time, exposed as + immutable dataclasses: + - `pxf.Directive(name, prefixes, type, body, has_body, line, column)` + for generic `@ *(prefix) [{ ... }]` blocks. `body` is the + raw bytes between `{` and `}` (verbatim), suitable for handing to + a follow-up `pxf.unmarshal` against the consumer's message type. + `type` keeps the v0.72.0 single-prefix back-compat shape. + - `pxf.TableDirective(type, columns, rows)` for `@table` directives, + with cells modeled as `None` (absent) or a `(kind, value)` 2-tuple + where kind ∈ {`"null"`, `"string"`, `"int"`, `"float"`, `"bool"`, + `"bytes"`, `"ident"`, `"timestamp"`, `"duration"`} — faithful to + the three-state cell grammar (absent / present-but-null / + present-with-value, draft §3.4.4). +- **`pxf.validate_descriptor(msg)` + `pxf.Violation`** — schema + reserved-name check (draft §3.13). Returns the list of fields, + oneofs, and enum values whose names case-sensitively match a PXF + value keyword (`null` / `true` / `false`). Sorted by element FQN. +- **`skip_validate` keyword** on `pxf.unmarshal` and + `pxf.unmarshal_full` (and the `_bytes` variants) — opt-out of the + per-call schema validator when the caller has already validated the + descriptor at registry-load time. + ## [0.70.0] Initial public release. The version number aligns this port with the rest diff --git a/src/_protowire/module.cc b/src/_protowire/module.cc index 5a83d15..6cb790f 100644 --- a/src/_protowire/module.cc +++ b/src/_protowire/module.cc @@ -17,10 +17,12 @@ #include #include +#include #include #include #include #include +#include #include #include @@ -77,9 +79,59 @@ const pbuf::Descriptor* FindDescriptor(const SchemaBundle& s, // --- pxf bindings --------------------------------------------------------- +// CellToPyTuple converts a single AST cell value (or std::nullopt for an +// absent cell) into the FFI shape consumed by pxf.py — `None` for absent, +// `(kind, value)` otherwise. Used by PxfUnmarshalFull for @table rows. +// +// kind values mirror the AST variant tags: +// "null" → nb::none() +// "string" → str (already-unescaped UTF-8) +// "int" → str (raw integer text — Python wrapper decides parse) +// "float" → str (raw float text) +// "bool" → bool +// "bytes" → bytes +// "ident" → str +// "timestamp" → str (raw RFC3339) +// "duration" → str (raw duration) +nb::object CellToPyTuple(const std::optional& cell) { + if (!cell.has_value()) return nb::none(); + using namespace protowire::pxf; + return std::visit( + [](const auto& p) -> nb::object { + using T = std::decay_t; + if constexpr (std::is_same_v) { + return nb::make_tuple(std::string("null"), nb::none()); + } else if constexpr (std::is_same_v) { + return nb::make_tuple(std::string("string"), p->value); + } else if constexpr (std::is_same_v) { + return nb::make_tuple(std::string("int"), p->raw); + } else if constexpr (std::is_same_v) { + return nb::make_tuple(std::string("float"), p->raw); + } else if constexpr (std::is_same_v) { + return nb::make_tuple(std::string("bool"), p->value); + } else if constexpr (std::is_same_v) { + return nb::make_tuple( + std::string("bytes"), + nb::bytes(reinterpret_cast(p->value.data()), p->value.size())); + } else if constexpr (std::is_same_v) { + return nb::make_tuple(std::string("ident"), p->name); + } else if constexpr (std::is_same_v) { + return nb::make_tuple(std::string("timestamp"), p->raw); + } else if constexpr (std::is_same_v) { + return nb::make_tuple(std::string("duration"), p->raw); + } else { + // List / Block are rejected at @table cell-parse time, so this + // branch is unreachable for cells. Surface as a clean error. + return nb::make_tuple(std::string("unknown"), nb::none()); + } + }, + *cell); +} + // PXF text -> binary proto bytes. nb::bytes PxfUnmarshal(nb::bytes text, nb::bytes fds_bytes, - const std::string& full_name, bool discard_unknown) { + const std::string& full_name, bool discard_unknown, + bool skip_validate) { auto schema = BuildSchema(std::string_view(fds_bytes.c_str(), fds_bytes.size())); const auto* desc = FindDescriptor(schema, full_name); std::unique_ptr msg( @@ -87,6 +139,7 @@ nb::bytes PxfUnmarshal(nb::bytes text, nb::bytes fds_bytes, protowire::pxf::UnmarshalOptions opts; opts.discard_unknown = discard_unknown; + opts.skip_validate = skip_validate; auto st = protowire::pxf::Unmarshal( std::string_view(text.c_str(), text.size()), msg.get(), opts); if (!st.ok()) { @@ -99,10 +152,20 @@ nb::bytes PxfUnmarshal(nb::bytes text, nb::bytes fds_bytes, return nb::bytes(out.data(), out.size()); } -// PXF text -> (binary proto bytes, set_paths, null_paths). -std::tuple, std::vector> +// Directive FFI shape: (name, prefixes, type, body, has_body, line, column). +using PyDirective = std::tuple, std::string, + nb::bytes, bool, int, int>; +// TableDirective FFI shape: (type, columns, rows) where rows is a list of +// lists of cells (each cell None or (kind, value); see CellToPyTuple). +using PyTableDirective = std::tuple, + std::vector>>; + +// PXF text -> (binary proto bytes, set_paths, null_paths, directives, tables). +std::tuple, std::vector, + std::vector, std::vector> PxfUnmarshalFull(nb::bytes text, nb::bytes fds_bytes, - const std::string& full_name, bool discard_unknown) { + const std::string& full_name, bool discard_unknown, + bool skip_validate) { auto schema = BuildSchema(std::string_view(fds_bytes.c_str(), fds_bytes.size())); const auto* desc = FindDescriptor(schema, full_name); std::unique_ptr msg( @@ -110,6 +173,7 @@ PxfUnmarshalFull(nb::bytes text, nb::bytes fds_bytes, protowire::pxf::UnmarshalOptions opts; opts.discard_unknown = discard_unknown; + opts.skip_validate = skip_validate; auto r = protowire::pxf::UnmarshalFull( std::string_view(text.c_str(), text.size()), msg.get(), opts); if (!r.ok()) { @@ -119,9 +183,56 @@ PxfUnmarshalFull(nb::bytes text, nb::bytes fds_bytes, if (!msg->SerializeToString(&out)) { throw nb::value_error("pxf.unmarshal_full: proto serialization failed"); } + // Marshal directives. + std::vector py_dirs; + py_dirs.reserve(r->Directives().size()); + for (const auto& d : r->Directives()) { + py_dirs.emplace_back( + d.name, d.prefixes, d.type, + nb::bytes(d.body.data(), d.body.size()), + d.has_body, d.pos.line, d.pos.column); + } + // Marshal tables. + std::vector py_tables; + py_tables.reserve(r->Tables().size()); + for (const auto& t : r->Tables()) { + std::vector> py_rows; + py_rows.reserve(t.rows.size()); + for (const auto& row : t.rows) { + std::vector py_cells; + py_cells.reserve(row.cells.size()); + for (const auto& cell : row.cells) py_cells.push_back(CellToPyTuple(cell)); + py_rows.push_back(std::move(py_cells)); + } + py_tables.emplace_back(t.type, t.columns, std::move(py_rows)); + } return {nb::bytes(out.data(), out.size()), r->SetFields(), - r->NullFields()}; + r->NullFields(), + std::move(py_dirs), + std::move(py_tables)}; +} + +// PXF schema reserved-name check (draft §3.13). Returns a list of +// (kind, element, name, file) tuples. Empty list ⇒ conformant schema. +// kind values: "field" / "oneof" / "enum_value". +std::vector> +PxfValidateDescriptor(nb::bytes fds_bytes, const std::string& full_name) { + auto schema = BuildSchema(std::string_view(fds_bytes.c_str(), fds_bytes.size())); + const auto* desc = FindDescriptor(schema, full_name); + auto vs = protowire::pxf::ValidateDescriptor(desc); + std::vector> out; + out.reserve(vs.size()); + for (const auto& v : vs) { + std::string kind; + switch (v.kind) { + case protowire::pxf::ViolationKind::kField: kind = "field"; break; + case protowire::pxf::ViolationKind::kOneof: kind = "oneof"; break; + case protowire::pxf::ViolationKind::kEnumValue: kind = "enum_value"; break; + } + out.emplace_back(std::move(kind), v.element, v.name, v.file); + } + return out; } // Binary proto bytes -> PXF text. @@ -301,10 +412,11 @@ NB_MODULE(_protowire, m) { m.doc() = "protowire native extension (nanobind shim around protowire-cpp)"; m.def("pxf_unmarshal", &PxfUnmarshal, "text"_a, "fds"_a, "full_name"_a, - "discard_unknown"_a = false); + "discard_unknown"_a = false, "skip_validate"_a = false); m.def("pxf_unmarshal_full", &PxfUnmarshalFull, "text"_a, "fds"_a, - "full_name"_a, "discard_unknown"_a = false); + "full_name"_a, "discard_unknown"_a = false, "skip_validate"_a = false); m.def("pxf_marshal", &PxfMarshal, "msg_bytes"_a, "fds"_a, "full_name"_a); + m.def("pxf_validate_descriptor", &PxfValidateDescriptor, "fds"_a, "full_name"_a); nb::class_(m, "SbeCodec") .def_static("create", &SbeCodec::Create, "fds"_a, "file_names"_a) diff --git a/src/protowire/pxf.py b/src/protowire/pxf.py index 298f6e4..5b9e2ca 100644 --- a/src/protowire/pxf.py +++ b/src/protowire/pxf.py @@ -8,8 +8,8 @@ from __future__ import annotations -from dataclasses import dataclass -from typing import Iterable +from dataclasses import dataclass, field +from typing import Literal, Union from google.protobuf.message import Message @@ -17,12 +17,90 @@ from ._schema import fds_for_message +# --- Directive surface (PXF v0.72+) -------------------------------------- + + +# A single `@table` row cell. `None` denotes an absent cell (no value between +# two commas, draft §3.4.4); a non-None Cell is a (kind, value) pair where +# kind is one of the strings below. +# +# kind value type notes +# ---- ---------- ----- +# "null" None present-but-null (draft §3.9) +# "string" str escape-decoded UTF-8 +# "int" str raw text — Python wrapper leaves parse to caller +# "float" str raw text +# "bool" bool true / false +# "bytes" bytes base64-decoded +# "ident" str unquoted identifier (typically an enum tag name) +# "timestamp" str raw RFC3339 +# "duration" str raw duration text +CellKind = Literal[ + "null", "string", "int", "float", "bool", "bytes", "ident", "timestamp", "duration" +] +Cell = Union[None, tuple[CellKind, object]] + + +@dataclass(frozen=True) +class Directive: + """A generic `@ *(prefix) [{ ... }]` directive at document root. + + See draft §3.4.2. The body bytes are preserved verbatim — consumers + typically re-decode them against their own message type via + `pxf.unmarshal(directive.body, ...)`. + """ + + name: str + prefixes: tuple[str, ...] + type: str # back-compat: single prefix populates this; empty otherwise + body: bytes + has_body: bool + line: int + column: int + + +@dataclass(frozen=True) +class TableDirective: + """An `@table TYPE ( cols ) row*` directive at document root. + + Per draft §3.4.4 a document with any TableDirective MUST NOT have a + @type directive or top-level field entries — the @table header IS + the document's type declaration. + """ + + type: str + columns: tuple[str, ...] + rows: tuple[tuple[Cell, ...], ...] + + +@dataclass(frozen=True) +class Violation: + """A schema reserved-name violation, draft §3.13. + + Returned by `validate_descriptor`. `kind` is one of + `"field"` / `"oneof"` / `"enum_value"`. + """ + + kind: Literal["field", "oneof", "enum_value"] + element: str # fully-qualified protobuf name, e.g. "trades.v1.Side.null" + name: str # the bare reserved identifier ("null" / "true" / "false") + file: str # .proto file path the element is declared in + + +# --- Result --------------------------------------------------------------- + + @dataclass(frozen=True) class Result: - """Field-level presence metadata, mirror of Go pxf.Result.""" + """Field-level presence metadata + parsed document-root directives. + + Mirror of `protowire-cpp`'s `Result` (and Go's `pxf.Result`). + """ set_paths: frozenset[str] null_paths: frozenset[str] + directives: tuple[Directive, ...] = field(default_factory=tuple) + tables: tuple[TableDirective, ...] = field(default_factory=tuple) def is_set(self, path: str) -> bool: return path in self.set_paths and path not in self.null_paths @@ -37,6 +115,23 @@ def null_fields(self) -> list[str]: return sorted(self.null_paths) +# --- Schema validation (draft §3.13) ------------------------------------- + + +def validate_descriptor(msg: Message) -> list[Violation]: + """Return schema reserved-name violations on `msg`'s descriptor. + + An empty list means the schema is conformant. The check is + case-sensitive: `NULL` / `True` lex as identifiers and are accepted. + """ + fds = fds_for_message(msg) + raw = _protowire.pxf_validate_descriptor(fds, msg.DESCRIPTOR.full_name) + return [Violation(kind=k, element=e, name=n, file=f) for (k, e, n, f) in raw] + + +# --- Encoders -------------------------------------------------------------- + + def marshal(msg: Message) -> str: """Encode `msg` as PXF text. Mirrors Go pxf.Marshal.""" fds = fds_for_message(msg) @@ -52,44 +147,117 @@ def marshal_bytes(msg_bytes: bytes, fds: bytes, full_name: str) -> str: def unmarshal_bytes( - data: str | bytes, fds: bytes, full_name: str, *, discard_unknown: bool = False + data: str | bytes, + fds: bytes, + full_name: str, + *, + discard_unknown: bool = False, + skip_validate: bool = False, ) -> bytes: """Decode PXF text into raw proto-binary bytes against an explicit FDS.""" text = data.encode("utf-8") if isinstance(data, str) else bytes(data) - return _protowire.pxf_unmarshal(text, bytes(fds), full_name, discard_unknown) + return _protowire.pxf_unmarshal( + text, bytes(fds), full_name, discard_unknown, skip_validate + ) def unmarshal_full_bytes( - data: str | bytes, fds: bytes, full_name: str, *, discard_unknown: bool = False + data: str | bytes, + fds: bytes, + full_name: str, + *, + discard_unknown: bool = False, + skip_validate: bool = False, ) -> tuple[bytes, Result]: text = data.encode("utf-8") if isinstance(data, str) else bytes(data) - raw, set_paths, null_paths = _protowire.pxf_unmarshal_full( - text, bytes(fds), full_name, discard_unknown + raw, set_paths, null_paths, dirs, tables = _protowire.pxf_unmarshal_full( + text, bytes(fds), full_name, discard_unknown, skip_validate ) - return raw, Result(frozenset(set_paths), frozenset(null_paths)) + return raw, _wrap_result(set_paths, null_paths, dirs, tables) + + +# --- Decoders -------------------------------------------------------------- -def unmarshal(data: str | bytes, msg: Message, *, discard_unknown: bool = False) -> None: +def unmarshal( + data: str | bytes, + msg: Message, + *, + discard_unknown: bool = False, + skip_validate: bool = False, +) -> None: """Decode PXF text into `msg` (in place). Mirrors Go pxf.Unmarshal.""" text = data.encode("utf-8") if isinstance(data, str) else bytes(data) fds = fds_for_message(msg) - raw = _protowire.pxf_unmarshal(text, fds, msg.DESCRIPTOR.full_name, discard_unknown) + raw = _protowire.pxf_unmarshal( + text, fds, msg.DESCRIPTOR.full_name, discard_unknown, skip_validate + ) msg.Clear() msg.MergeFromString(raw) def unmarshal_full( - data: str | bytes, msg: Message, *, discard_unknown: bool = False + data: str | bytes, + msg: Message, + *, + discard_unknown: bool = False, + skip_validate: bool = False, ) -> Result: - """Decode PXF + return per-field presence (set/null) metadata. + """Decode PXF + return per-field presence (set/null) metadata and any + `@` / `@table` directives the decoder saw at the document root. Mirrors Go pxf.UnmarshalFull. """ text = data.encode("utf-8") if isinstance(data, str) else bytes(data) fds = fds_for_message(msg) - raw, set_paths, null_paths = _protowire.pxf_unmarshal_full( - text, fds, msg.DESCRIPTOR.full_name, discard_unknown + raw, set_paths, null_paths, dirs, tables = _protowire.pxf_unmarshal_full( + text, fds, msg.DESCRIPTOR.full_name, discard_unknown, skip_validate ) msg.Clear() msg.MergeFromString(raw) - return Result(frozenset(set_paths), frozenset(null_paths)) + return _wrap_result(set_paths, null_paths, dirs, tables) + + +# --- Internal helpers ---------------------------------------------------- + + +def _wrap_result(set_paths, null_paths, raw_dirs, raw_tables) -> Result: + dirs = tuple( + Directive( + name=name, + prefixes=tuple(prefixes), + type=type_, + body=bytes(body), + has_body=has_body, + line=line, + column=column, + ) + for (name, prefixes, type_, body, has_body, line, column) in raw_dirs + ) + tables = tuple( + TableDirective( + type=type_, + columns=tuple(columns), + rows=tuple( + tuple(_normalize_cell(c) for c in row) for row in rows + ), + ) + for (type_, columns, rows) in raw_tables + ) + return Result( + set_paths=frozenset(set_paths), + null_paths=frozenset(null_paths), + directives=dirs, + tables=tables, + ) + + +def _normalize_cell(c) -> Cell: + """Convert the FFI cell shape to the Cell type alias. + + The FFI hands cells over as either None (absent) or a 2-tuple + `(kind, value)`. We pass them through unchanged but type-cast — the + only normalization is `("bytes", bytes)` which arrives as nb::bytes + and stays as Python `bytes` after the round-trip. + """ + return c # already in the right shape diff --git a/tests/test_pxf_directives.py b/tests/test_pxf_directives.py new file mode 100644 index 0000000..0381bbc --- /dev/null +++ b/tests/test_pxf_directives.py @@ -0,0 +1,189 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2026 TrendVidia, LLC. +"""Tests for the PXF v0.72+ surface exposed in v0.75.0: + + - `Result.directives` and `Result.tables` populated by `unmarshal_full` + - `pxf.validate_descriptor` and `pxf.Violation` + - `skip_validate` opt-out on `unmarshal` / `unmarshal_full` +""" + +from __future__ import annotations + +import pytest + +from protowire import pxf + + +# ---- Result.directives -------------------------------------------------- + + +def test_directives_empty_when_no_at_directives(all_types_cls): + msg = all_types_cls() + r = pxf.unmarshal_full('string_field = "x"', msg) + assert r.directives == () + assert r.tables == () + + +def test_bare_directive_recorded(all_types_cls): + msg = all_types_cls() + r = pxf.unmarshal_full('@frob\nstring_field = "x"', msg) + assert len(r.directives) == 1 + d = r.directives[0] + assert d.name == "frob" + assert d.prefixes == () + assert d.has_body is False + assert d.type == "" + + +def test_single_prefix_populates_legacy_type(all_types_cls): + msg = all_types_cls() + r = pxf.unmarshal_full( + '@header pkg.Hdr { id = "h" }\nstring_field = "x"', msg + ) + assert len(r.directives) == 1 + d = r.directives[0] + assert d.name == "header" + assert d.prefixes == ("pkg.Hdr",) + assert d.type == "pkg.Hdr" + assert d.has_body is True + assert b'id = "h"' in d.body + + +def test_two_prefixes_leave_type_empty(all_types_cls): + msg = all_types_cls() + r = pxf.unmarshal_full( + "@entry label pkg.MsgType\nstring_field = \"x\"", msg + ) + d = r.directives[0] + assert d.prefixes == ("label", "pkg.MsgType") + assert d.type == "" + + +def test_multiple_directives_in_source_order(all_types_cls): + msg = all_types_cls() + r = pxf.unmarshal_full( + '@header pkg.Hdr { id = "h" }\n@frob alpha beta\n@meta\nstring_field = "x"', + msg, + ) + names = [d.name for d in r.directives] + assert names == ["header", "frob", "meta"] + + +def test_at_type_does_not_leak_into_directives(all_types_cls): + msg = all_types_cls() + r = pxf.unmarshal_full( + '@type test.v1.AllTypes\n@frob alpha\nstring_field = "x"', msg + ) + assert len(r.directives) == 1 + assert r.directives[0].name == "frob" + + +# ---- Result.tables ------------------------------------------------------ + + +def test_table_recorded_with_columns_and_rows(all_types_cls): + msg = all_types_cls() + src = "@table trades.v1.Trade ( px, qty )\n( 100, 5 )\n( 101, 7 )\n" + r = pxf.unmarshal_full(src, msg) + assert len(r.tables) == 1 + t = r.tables[0] + assert t.type == "trades.v1.Trade" + assert t.columns == ("px", "qty") + assert len(t.rows) == 2 + # Row 0: (100, 5) — both IntVals. + assert t.rows[0] == (("int", "100"), ("int", "5")) + + +def test_table_cell_shapes(all_types_cls): + msg = all_types_cls() + src = '@table x.Row ( a, b, c, d )\n( 42, "hello", true, null )\n' + r = pxf.unmarshal_full(src, msg) + row = r.tables[0].rows[0] + assert row[0] == ("int", "42") + assert row[1] == ("string", "hello") + assert row[2] == ("bool", True) + assert row[3] == ("null", None) + + +def test_three_state_cells(all_types_cls): + msg = all_types_cls() + # Empty cell = None (absent); null literal = ("null", None) (present-but-null); + # value = ("", value) (present-with-value). + r = pxf.unmarshal_full("@table x.Row ( a, b, c )\n( 1, , null )\n", msg) + row = r.tables[0].rows[0] + assert row[0] == ("int", "1") + assert row[1] is None # absent + assert row[2] == ("null", None) + + +def test_multiple_tables_in_order(all_types_cls): + msg = all_types_cls() + src = ( + "@table a.Row ( x )\n" + "( 1 )\n" + "@table b.Row ( y )\n" + '( "p" )\n' + ) + r = pxf.unmarshal_full(src, msg) + assert [t.type for t in r.tables] == ["a.Row", "b.Row"] + + +def test_directives_and_tables_can_coexist(all_types_cls): + # A doc with @table can NOT have @type or body entries, but can carry + # generic @s before the @table header. + msg = all_types_cls() + src = '@header pkg.Hdr { id = "h" }\n@table x.Row ( a )\n( 1 )\n' + r = pxf.unmarshal_full(src, msg) + assert len(r.directives) == 1 + assert len(r.tables) == 1 + assert r.directives[0].name == "header" + + +# ---- pxf.validate_descriptor + Violation ------------------------------- + + +def test_validate_conformant_schema_returns_empty(all_types_cls): + """test.v1.AllTypes is conformant — no field/oneof/enum value collides + with the PXF reserved keywords.""" + msg = all_types_cls() + assert pxf.validate_descriptor(msg) == [] + + +def test_unmarshal_rejects_reserved_field_in_schema_when_validate_on( + all_types_cls, monkeypatch +): + # We can't easily build a non-conformant descriptor without protoc, so + # exercise the gate at the FFI level: a synthetic call against the + # standard descriptor should still pass. The skip_validate test below + # covers the bypass. + msg = all_types_cls() + pxf.unmarshal('string_field = "x"', msg) + + +def test_skip_validate_bypasses_check(all_types_cls): + """skip_validate should be accepted as a no-op when the schema is + conformant; coverage of the actual bypass against a non-conformant + descriptor lives in protowire-cpp's schema tests.""" + msg = all_types_cls() + pxf.unmarshal('string_field = "x"', msg, skip_validate=True) + assert msg.string_field == "x" + + +def test_unmarshal_full_accepts_skip_validate(all_types_cls): + msg = all_types_cls() + r = pxf.unmarshal_full( + 'string_field = "y"', msg, skip_validate=True + ) + assert r.is_set("string_field") + + +# ---- Violation dataclass shape (regression on dataclass fields) --------- + + +def test_violation_dataclass_fields(): + v = pxf.Violation(kind="field", element="pkg.M.null", name="null", file="m.proto") + assert v.kind == "field" + assert v.element == "pkg.M.null" + # Frozen — should reject mutation. + with pytest.raises(Exception): # FrozenInstanceError or AttributeError + v.kind = "oneof" # type: ignore[misc]