msgpack_tagged/lib.rs
1//! Tagged-map serialization format for Noir bytecode.
2//!
3//! Design: [issue #12554](https://github.com/noir-lang/noir/issues/12554).
4//!
5//! This crate currently provides:
6//! - The [`MsgpackTagged`] trait — metadata-only, exposing each type's wire
7//! shape via [`Tagged`] plus a hook for building a [`TagRegistry`].
8//! - [`TagRegistry`] / [`Entry`] — the runtime data structure populated by
9//! recursive [`MsgpackTagged::register_into`] calls and consulted by the
10//! wrapper Serializer/Deserializer (added in a follow-up step).
11
12// `msgpack_tagged_derive`'s `MsgpackTagged` proc-macro emits
13// `::msgpack_tagged::...` paths to remain hygienic at every call site. From
14// inside this crate that absolute path doesn't resolve unless we tell rustc
15// the current crate also goes by that name.
16extern crate self as msgpack_tagged;
17
18mod containers;
19mod primitives;
20mod registry;
21
22pub mod deserializer;
23pub mod serializer;
24
25pub use deserializer::{Deserializer, msgpack_tagged_deserialize};
26pub use serializer::{Serializer, msgpack_tagged_serialize};
27
28pub use msgpack_tagged_derive::MsgpackTagged;
29pub use registry::{
30 Entry, Product, Sum, TagRegistry, Tagged, Variant, VariantKind, type_name_basename,
31};
32
33/// On-wire shape for product types (structs, tuple structs, enum-variant
34/// payloads). Picked per-type on the [`Serializer`] (see
35/// [`Serializer::new`] / [`Serializer::with_strategy`]). Enum variants
36/// are *always* int-keyed under `MsgpackTagged` regardless of strategy —
37/// the strategy only affects struct shape.
38///
39/// * [`EncodingStrategy::Tagged`] (default) — int-keyed `fixmap`
40/// `{0: a, 1: b, …}`. Schema-evolution friendly: identification is by
41/// tag, so fields can be added, removed (via `#[tagged(reserved(...))]`),
42/// or reordered freely. Costs one byte per field for the tag.
43/// * [`EncodingStrategy::Array`] — positional `fixarray` `[a, b, …]`,
44/// fields emitted in tag-ascending order. Minimum overhead. Identification
45/// is by position, so evolvability is limited to *trailing* changes:
46/// - **Adding a trailing field** is backward-compat when the field is
47/// marked `#[serde(default)]` — V1 wire (shorter) decodes into V2
48/// type, the default fills the new position.
49/// - **Removing a trailing field** is forward-compat when the type
50/// opts into `#[tagged(allow_unknown_tags)]` — V2 wire (longer)
51/// decodes into V1 type, the extra trailing position is ignored.
52/// - Anything else (middle insert/remove, reorder, type change) is
53/// wire-breaking. Pick this strategy for small leaf types where size
54/// wins over flexibility and the type is unlikely to need
55/// middle-of-shape edits.
56///
57/// **Auto-downgrade to Tagged.** If a type has `#[tagged(reserved(N))]`
58/// where `N` falls *between* (or before) the active tags, requesting
59/// `Array` for it would corrupt round-trips: V2's positional wire only
60/// carries active values, but the decoder walks a merged-sorted layout
61/// of `(active + reserved)` tags and would drain a wire byte at the
62/// reserved slot intended for a later active field. The encoder detects
63/// this and silently switches to `Tagged` for that product only — other
64/// types in the same serializer keep their configured strategy.
65/// Strictly-trailing reserved tags (every reserved tag greater than
66/// every active tag) keep `Array`: the decoder hits `wire_remaining == 0`
67/// before reaching the trailing reserved slot, so positional alignment
68/// holds. The migration guide in the crate README walks through both
69/// cases with examples.
70///
71/// The decoder probes the wire shape (`fixmap` vs. `fixarray`) per struct
72/// at decode time, so a single buffer can mix both strategies across
73/// nested types freely.
74#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
75pub enum EncodingStrategy {
76 /// Int-keyed map. Default — most backward/forward compatible.
77 #[default]
78 Tagged,
79 /// Positional array. Smaller; not schema-evolvable.
80 Array,
81}
82
83/// The integer tag used as a wire-level identifier for struct fields and enum
84/// variants. `u8` keeps tags inside msgpack's `fixint` range (0–127) at the
85/// 1-byte-per-tag encoding and rejects `#[tag(N)]` annotations with `N > 255`
86/// at compile time.
87pub type Tag = u8;
88
89/// A type that participates in the tagged-map wire format.
90///
91/// Implementations are typically generated by `#[derive(MsgpackTagged)]` from the
92/// `msgpack_tagged_derive` crate, but can also be hand-written for primitives,
93/// container types, or shadow-DTO public types via `#[tagged(via(WireType))]`.
94///
95/// The trait is metadata-only: it does *not* replace [`serde::Serialize`] /
96/// [`serde::Deserialize`]. It sits alongside them and exposes the type's wire
97/// shape plus a recursive registry-build hook.
98#[diagnostic::on_unimplemented(
99 note = "use `#[derive(MsgpackTagged)]` on the type, or `#[tagged(via(WireType))]` on a shadow-DTO public type that delegates to a wire companion",
100 note = "for container fields, use `BTreeMap` / `BTreeSet` — `HashMap` / `HashSet` are deliberately unsupported on the wire because their iteration order is non-deterministic"
101)]
102pub trait MsgpackTagged: 'static {
103 /// The wire shape of this type — either a [`Product`] (struct/tuple
104 /// struct) or a [`Sum`] (enum). The derive macro emits this from
105 /// `#[tag(N)]` annotations; primitives and container types use a
106 /// `Tagged::Product` with empty `fields`, signalling they don't appear
107 /// directly on the wire as a registry entry but still satisfy the bound.
108 const TAGGED: Tagged;
109
110 /// Recursively register this type and every tagged field type into a registry.
111 ///
112 /// The macro emits the body: it calls `reg.try_insert::<Self>(...)` and, on
113 /// first insert, recurses into each generic and tagged-field type via their
114 /// own `register_into`. Idempotent — re-registering a type is a no-op.
115 fn register_into(reg: &mut TagRegistry);
116}
117
118#[cfg(test)]
119mod tests {
120 use super::*;
121
122 /// Hand-written struct-shaped impl exercising every `Product` field.
123 struct Foo;
124 impl MsgpackTagged for Foo {
125 const TAGGED: Tagged = Tagged::Product(Product {
126 fields: &[(0, "a"), (1, "b")],
127 reserved: &[3],
128 allow_unknown_tags: true,
129 tag_order_matches_source: true,
130 });
131 fn register_into(_reg: &mut TagRegistry) {}
132 }
133
134 /// Minimal impl supplying a `TAGGED` with `Product` extras blank —
135 /// proves the empty shape compiles and reads back as expected.
136 struct Bar;
137 impl MsgpackTagged for Bar {
138 const TAGGED: Tagged = Tagged::empty_product();
139 fn register_into(_reg: &mut TagRegistry) {}
140 }
141
142 #[test]
143 #[allow(clippy::assertions_on_constants)]
144 fn bar_disallows_unknown_by_default() {
145 let p = <Bar as MsgpackTagged>::TAGGED.as_product().unwrap();
146 assert!(!p.allow_unknown_tags);
147 }
148
149 #[test]
150 #[allow(clippy::const_is_empty)]
151 fn bar_has_nothing_reserved() {
152 let p = <Bar as MsgpackTagged>::TAGGED.as_product().unwrap();
153 assert!(p.reserved.is_empty());
154 }
155
156 #[test]
157 fn foo_constants_match_what_was_written() {
158 let p = <Foo as MsgpackTagged>::TAGGED.as_product().unwrap();
159 assert_eq!(p.fields, &[(0, "a"), (1, "b")]);
160 assert_eq!(p.reserved, &[3]);
161 assert!(p.allow_unknown_tags);
162 }
163}