@@ -21,6 +21,7 @@ use rdkafka::message::{Header, OwnedHeaders};
2121use rdkafka:: producer:: FutureRecord ;
2222use serde:: de:: DeserializeOwned ;
2323use tokio:: fs;
24+ use uuid:: Uuid ;
2425
2526use crate :: action:: { self , ControlFlow , State } ;
2627use crate :: format:: avro:: { self , Schema } ;
@@ -160,6 +161,10 @@ enum Format {
160161 Avro {
161162 schema : String ,
162163 confluent_wire_format : bool ,
164+ /// If set, override the wire format with AWS Glue Schema Registry framing
165+ /// using the given schema-version UUID. Mutually exclusive with
166+ /// `confluent_wire_format=true`.
167+ glue_schema_version_id : Option < Uuid > ,
163168 /// Schema references (subject names) for Confluent Schema Registry
164169 references : Vec < String > ,
165170 } ,
@@ -183,6 +188,10 @@ enum Transcoder {
183188 schema : Schema ,
184189 schema_id : i32 ,
185190 } ,
191+ GlueAvro {
192+ schema : Schema ,
193+ schema_version_id : Uuid ,
194+ } ,
186195 Protobuf {
187196 message : MessageDescriptor ,
188197 confluent_wire_format : bool ,
@@ -230,6 +239,26 @@ impl Transcoder {
230239 Ok ( None )
231240 }
232241 }
242+ Transcoder :: GlueAvro {
243+ schema,
244+ schema_version_id,
245+ } => {
246+ if let Some ( val) = Self :: decode_json ( row) ? {
247+ let val = avro:: from_json ( & val, schema. top_node ( ) ) ?;
248+ let mut out = vec ! [ ] ;
249+ // AWS Glue Schema Registry wire format:
250+ // byte 0 = 0x03 (header version)
251+ // byte 1 = compression byte (0x00 = none)
252+ // bytes 2..18 = 16-byte schema-version UUID
253+ out. write_u8 ( 0x03 ) . unwrap ( ) ;
254+ out. write_u8 ( 0x00 ) . unwrap ( ) ;
255+ out. extend_from_slice ( schema_version_id. as_bytes ( ) ) ;
256+ out. extend ( avro:: to_avro_datum ( schema, val) ?) ;
257+ Ok ( Some ( out) )
258+ } else {
259+ Ok ( None )
260+ }
261+ }
233262 Transcoder :: PlainAvro { schema } => {
234263 if let Some ( val) = Self :: decode_json ( row) ? {
235264 let val = avro:: from_json ( & val, schema. top_node ( ) ) ?;
@@ -303,16 +332,31 @@ pub async fn run_ingest(
303332 let schema_id_var = cmd. args . opt_parse ( "set-schema-id-var" ) ?;
304333 let key_schema_id_var = cmd. args . opt_parse ( "set-key-schema-id-var" ) ?;
305334 let format = match cmd. args . string ( "format" ) ?. as_str ( ) {
306- "avro" => Format :: Avro {
307- schema : cmd. args . string ( "schema" ) ?,
308- confluent_wire_format : cmd. args . opt_bool ( "confluent-wire-format" ) ?. unwrap_or ( true ) ,
309- // TODO (maz): update README!
310- references : cmd
335+ "avro" => {
336+ let glue_schema_version_id = cmd
311337 . args
312- . opt_string ( "references" )
313- . map ( |s| s. split ( ',' ) . map ( |s| s. to_string ( ) ) . collect ( ) )
314- . unwrap_or_default ( ) ,
315- } ,
338+ . opt_string ( "glue-schema-version-id" )
339+ . map ( |s| Uuid :: parse_str ( & s) . context ( "parsing glue-schema-version-id as UUID" ) )
340+ . transpose ( ) ?;
341+ let confluent_wire_format_explicit = cmd. args . opt_bool ( "confluent-wire-format" ) ?;
342+ if glue_schema_version_id. is_some ( ) && confluent_wire_format_explicit == Some ( true ) {
343+ bail ! ( "confluent-wire-format=true is incompatible with glue-schema-version-id" ) ;
344+ }
345+ // Default: confluent unless Glue framing is requested.
346+ let confluent_wire_format =
347+ confluent_wire_format_explicit. unwrap_or_else ( || glue_schema_version_id. is_none ( ) ) ;
348+ Format :: Avro {
349+ schema : cmd. args . string ( "schema" ) ?,
350+ confluent_wire_format,
351+ glue_schema_version_id,
352+ // TODO (maz): update README!
353+ references : cmd
354+ . args
355+ . opt_string ( "references" )
356+ . map ( |s| s. split ( ',' ) . map ( |s| s. to_string ( ) ) . collect ( ) )
357+ . unwrap_or_default ( ) ,
358+ }
359+ }
316360 "protobuf" => {
317361 let descriptor_file = cmd. args . string ( "descriptor-file" ) ?;
318362 let message = cmd. args . string ( "message" ) ?;
@@ -336,6 +380,7 @@ pub async fn run_ingest(
336380 anyhow ! ( "key-schema parameter required when key-format is present" )
337381 } ) ?,
338382 confluent_wire_format : cmd. args . opt_bool ( "confluent-wire-format" ) ?. unwrap_or ( true ) ,
383+ glue_schema_version_id : None ,
339384 references : cmd
340385 . args
341386 . opt_string ( "key-references" )
@@ -425,8 +470,16 @@ pub async fn run_ingest(
425470 match fmt {
426471 Format :: Avro {
427472 confluent_wire_format,
473+ glue_schema_version_id,
428474 ..
429- } => Some ( * confluent_wire_format) ,
475+ } => {
476+ // Glue framing is its own wire format — don't compare against CSR.
477+ if glue_schema_version_id. is_some ( ) {
478+ None
479+ } else {
480+ Some ( * confluent_wire_format)
481+ }
482+ }
430483 Format :: Protobuf {
431484 confluent_wire_format,
432485 ..
@@ -550,8 +603,20 @@ async fn make_transcoder(
550603 Format :: Avro {
551604 schema,
552605 confluent_wire_format,
606+ glue_schema_version_id,
553607 references,
554608 } => {
609+ if let Some ( schema_version_id) = glue_schema_version_id {
610+ if !references. is_empty ( ) {
611+ bail ! ( "schema references are not supported with glue-schema-version-id" ) ;
612+ }
613+ let schema = avro:: parse_schema ( & schema, & [ ] )
614+ . with_context ( || format ! ( "parsing avro schema: {}" , schema) ) ?;
615+ return Ok ( Transcoder :: GlueAvro {
616+ schema,
617+ schema_version_id,
618+ } ) ;
619+ }
555620 if confluent_wire_format {
556621 // Build references list by fetching each subject from the registry.
557622 // Start with immediate references and automatically resolve transitive ones.
0 commit comments