msgpack_tagged/
lib.rs

1//! Tagged-map serialization format for Noir bytecode.
2//!
3//! Design: [issue #12554](https://github.com/noir-lang/noir/issues/12554).
4//!
5//! This crate currently provides:
6//! - The [`MsgpackTagged`] trait — metadata-only, exposing each type's wire
7//!   shape via [`Tagged`] plus a hook for building a [`TagRegistry`].
8//! - [`TagRegistry`] / [`Entry`] — the runtime data structure populated by
9//!   recursive [`MsgpackTagged::register_into`] calls and consulted by the
10//!   wrapper Serializer/Deserializer (added in a follow-up step).
11
12// `msgpack_tagged_derive`'s `MsgpackTagged` proc-macro emits
13// `::msgpack_tagged::...` paths to remain hygienic at every call site. From
14// inside this crate that absolute path doesn't resolve unless we tell rustc
15// the current crate also goes by that name.
16extern crate self as msgpack_tagged;
17
18mod containers;
19mod primitives;
20mod registry;
21
22pub mod deserializer;
23pub mod serializer;
24
25pub use deserializer::{Deserializer, msgpack_tagged_deserialize};
26pub use serializer::{Serializer, msgpack_tagged_serialize};
27
28pub use msgpack_tagged_derive::MsgpackTagged;
29pub use registry::{
30    Entry, Product, Sum, TagRegistry, Tagged, Variant, VariantKind, type_name_basename,
31};
32
33/// On-wire shape for product types (structs, tuple structs, enum-variant
34/// payloads). Picked per-type on the [`Serializer`] (see
35/// [`Serializer::new`] / [`Serializer::with_strategy`]). Enum variants
36/// are *always* int-keyed under `MsgpackTagged` regardless of strategy —
37/// the strategy only affects struct shape.
38///
39/// * [`EncodingStrategy::Tagged`] (default) — int-keyed `fixmap`
40///   `{0: a, 1: b, …}`. Schema-evolution friendly: identification is by
41///   tag, so fields can be added, removed (via `#[tagged(reserved(...))]`),
42///   or reordered freely. Costs one byte per field for the tag.
43/// * [`EncodingStrategy::Array`] — positional `fixarray` `[a, b, …]`,
44///   fields emitted in tag-ascending order. Minimum overhead. Identification
45///   is by position, so evolvability is limited to *trailing* changes:
46///   - **Adding a trailing field** is backward-compat when the field is
47///     marked `#[serde(default)]` — V1 wire (shorter) decodes into V2
48///     type, the default fills the new position.
49///   - **Removing a trailing field** is forward-compat when the type
50///     opts into `#[tagged(allow_unknown_tags)]` — V2 wire (longer)
51///     decodes into V1 type, the extra trailing position is ignored.
52///   - Anything else (middle insert/remove, reorder, type change) is
53///     wire-breaking. Pick this strategy for small leaf types where size
54///     wins over flexibility and the type is unlikely to need
55///     middle-of-shape edits.
56///
57/// **Auto-downgrade to Tagged.** If a type has `#[tagged(reserved(N))]`
58/// where `N` falls *between* (or before) the active tags, requesting
59/// `Array` for it would corrupt round-trips: V2's positional wire only
60/// carries active values, but the decoder walks a merged-sorted layout
61/// of `(active + reserved)` tags and would drain a wire byte at the
62/// reserved slot intended for a later active field. The encoder detects
63/// this and silently switches to `Tagged` for that product only — other
64/// types in the same serializer keep their configured strategy.
65/// Strictly-trailing reserved tags (every reserved tag greater than
66/// every active tag) keep `Array`: the decoder hits `wire_remaining == 0`
67/// before reaching the trailing reserved slot, so positional alignment
68/// holds. The migration guide in the crate README walks through both
69/// cases with examples.
70///
71/// The decoder probes the wire shape (`fixmap` vs. `fixarray`) per struct
72/// at decode time, so a single buffer can mix both strategies across
73/// nested types freely.
74#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
75pub enum EncodingStrategy {
76    /// Int-keyed map. Default — most backward/forward compatible.
77    #[default]
78    Tagged,
79    /// Positional array. Smaller; not schema-evolvable.
80    Array,
81}
82
83/// The integer tag used as a wire-level identifier for struct fields and enum
84/// variants. `u8` keeps tags inside msgpack's `fixint` range (0–127) at the
85/// 1-byte-per-tag encoding and rejects `#[tag(N)]` annotations with `N > 255`
86/// at compile time.
87pub type Tag = u8;
88
89/// A type that participates in the tagged-map wire format.
90///
91/// Implementations are typically generated by `#[derive(MsgpackTagged)]` from the
92/// `msgpack_tagged_derive` crate, but can also be hand-written for primitives,
93/// container types, or shadow-DTO public types via `#[tagged(via(WireType))]`.
94///
95/// The trait is metadata-only: it does *not* replace [`serde::Serialize`] /
96/// [`serde::Deserialize`]. It sits alongside them and exposes the type's wire
97/// shape plus a recursive registry-build hook.
98#[diagnostic::on_unimplemented(
99    note = "use `#[derive(MsgpackTagged)]` on the type, or `#[tagged(via(WireType))]` on a shadow-DTO public type that delegates to a wire companion",
100    note = "for container fields, use `BTreeMap` / `BTreeSet` — `HashMap` / `HashSet` are deliberately unsupported on the wire because their iteration order is non-deterministic"
101)]
102pub trait MsgpackTagged: 'static {
103    /// The wire shape of this type — either a [`Product`] (struct/tuple
104    /// struct) or a [`Sum`] (enum). The derive macro emits this from
105    /// `#[tag(N)]` annotations; primitives and container types use a
106    /// `Tagged::Product` with empty `fields`, signalling they don't appear
107    /// directly on the wire as a registry entry but still satisfy the bound.
108    const TAGGED: Tagged;
109
110    /// Recursively register this type and every tagged field type into a registry.
111    ///
112    /// The macro emits the body: it calls `reg.try_insert::<Self>(...)` and, on
113    /// first insert, recurses into each generic and tagged-field type via their
114    /// own `register_into`. Idempotent — re-registering a type is a no-op.
115    fn register_into(reg: &mut TagRegistry);
116}
117
118#[cfg(test)]
119mod tests {
120    use super::*;
121
122    /// Hand-written struct-shaped impl exercising every `Product` field.
123    struct Foo;
124    impl MsgpackTagged for Foo {
125        const TAGGED: Tagged = Tagged::Product(Product {
126            fields: &[(0, "a"), (1, "b")],
127            reserved: &[3],
128            allow_unknown_tags: true,
129            tag_order_matches_source: true,
130        });
131        fn register_into(_reg: &mut TagRegistry) {}
132    }
133
134    /// Minimal impl supplying a `TAGGED` with `Product` extras blank —
135    /// proves the empty shape compiles and reads back as expected.
136    struct Bar;
137    impl MsgpackTagged for Bar {
138        const TAGGED: Tagged = Tagged::empty_product();
139        fn register_into(_reg: &mut TagRegistry) {}
140    }
141
142    #[test]
143    #[allow(clippy::assertions_on_constants)]
144    fn bar_disallows_unknown_by_default() {
145        let p = <Bar as MsgpackTagged>::TAGGED.as_product().unwrap();
146        assert!(!p.allow_unknown_tags);
147    }
148
149    #[test]
150    #[allow(clippy::const_is_empty)]
151    fn bar_has_nothing_reserved() {
152        let p = <Bar as MsgpackTagged>::TAGGED.as_product().unwrap();
153        assert!(p.reserved.is_empty());
154    }
155
156    #[test]
157    fn foo_constants_match_what_was_written() {
158        let p = <Foo as MsgpackTagged>::TAGGED.as_product().unwrap();
159        assert_eq!(p.fields, &[(0, "a"), (1, "b")]);
160        assert_eq!(p.reserved, &[3]);
161        assert!(p.allow_unknown_tags);
162    }
163}