@@ -275,10 +275,13 @@ fn doc_lines_to_tokens(text: &str) -> TokenStream {
275275 }
276276 } else if line. is_empty ( ) {
277277 lines. push ( String :: new ( ) ) ;
278- } else if line. starts_with ( ' ' ) {
279- lines. push ( line. to_string ( ) ) ;
280278 } else {
281- lines. push ( format ! ( " {line}" ) ) ;
279+ let sanitized = sanitize_line ( line) ;
280+ if sanitized. starts_with ( ' ' ) {
281+ lines. push ( sanitized) ;
282+ } else {
283+ lines. push ( format ! ( " {sanitized}" ) ) ;
284+ }
282285 }
283286 }
284287
@@ -291,6 +294,195 @@ fn doc_lines_to_tokens(text: &str) -> TokenStream {
291294 }
292295}
293296
297+ /// Escape one prose line of proto comment text for rustdoc.
298+ ///
299+ /// Proto comments are written for a cross-language audience and frequently
300+ /// contain constructs that rustdoc misparses:
301+ ///
302+ /// - `[foo]` / `[foo][]` — treated as intra-doc links; an error under
303+ /// `deny(rustdoc::broken_intra_doc_links)` when `foo` resembles a Rust
304+ /// path. Escaped to `\[foo\]`.
305+ /// - Bare `http(s)://…` — triggers `rustdoc::bare_urls`. Wrapped in `<…>`.
306+ /// - `Option<T>` — treated as raw HTML; triggers
307+ /// `rustdoc::invalid_html_tags`. Escaped to `Option\<T\>`.
308+ ///
309+ /// Left intact:
310+ /// - Single-line inline links `[text](url)`.
311+ /// - Existing autolinks `<http(s)://…>`.
312+ /// - Content inside `` `…` `` backtick code spans.
313+ /// - Already-escaped `\[`, `\]`, `\<`, `\>`.
314+ ///
315+ /// This is a per-line pass invoked from [`doc_lines_to_tokens`] on prose
316+ /// lines only — code blocks are left untouched. Multi-line markdown links
317+ /// are conservatively escaped; the link degrades to literal text plus a
318+ /// clickable autolink, which is preferable to a docs.rs build failure.
319+ fn sanitize_line ( line : & str ) -> String {
320+ let bytes = line. as_bytes ( ) ;
321+ let mut out = String :: with_capacity ( line. len ( ) ) ;
322+ let mut i = 0 ;
323+
324+ while i < bytes. len ( ) {
325+ let b = bytes[ i] ;
326+
327+ if b == b'`' {
328+ // CommonMark code span: a run of N backticks opens, the next run
329+ // of exactly N closes. Emit the whole span verbatim. If no
330+ // closer exists on this line, the run is literal (not a span).
331+ let run_start = i;
332+ while i < bytes. len ( ) && bytes[ i] == b'`' {
333+ i += 1 ;
334+ }
335+ let run_len = i - run_start;
336+ if let Some ( close_end) = find_backtick_closer ( bytes, i, run_len) {
337+ out. push_str ( & line[ run_start..close_end] ) ;
338+ i = close_end;
339+ } else {
340+ out. push_str ( & line[ run_start..i] ) ;
341+ }
342+ continue ;
343+ }
344+
345+ match b {
346+ b'\\' => {
347+ out. push ( '\\' ) ;
348+ i += 1 ;
349+ if i < bytes. len ( ) {
350+ i += push_char_at ( & mut out, line, i) ;
351+ }
352+ }
353+ b'[' => {
354+ if let Some ( end) = find_inline_link_end ( bytes, i) {
355+ out. push_str ( & line[ i..=end] ) ;
356+ i = end + 1 ;
357+ } else {
358+ out. push_str ( "\\ [" ) ;
359+ i += 1 ;
360+ }
361+ }
362+ b']' => {
363+ out. push_str ( "\\ ]" ) ;
364+ i += 1 ;
365+ }
366+ b'<' => {
367+ if let Some ( end) = find_autolink_end ( bytes, i) {
368+ out. push_str ( & line[ i..=end] ) ;
369+ i = end + 1 ;
370+ } else {
371+ out. push_str ( "\\ <" ) ;
372+ i += 1 ;
373+ }
374+ }
375+ b'>' => {
376+ out. push_str ( "\\ >" ) ;
377+ i += 1 ;
378+ }
379+ b'h' => {
380+ if let Some ( end) = find_bare_url_end ( bytes, i) {
381+ out. push ( '<' ) ;
382+ out. push_str ( & line[ i..end] ) ;
383+ out. push ( '>' ) ;
384+ i = end;
385+ } else {
386+ out. push ( 'h' ) ;
387+ i += 1 ;
388+ }
389+ }
390+ _ => {
391+ i += push_char_at ( & mut out, line, i) ;
392+ }
393+ }
394+ }
395+ out
396+ }
397+
398+ /// Push the UTF-8 char at byte index `i` of `s` into `out`, returning its
399+ /// byte length. `i` must be a char boundary and `< s.len()`.
400+ fn push_char_at ( out : & mut String , s : & str , i : usize ) -> usize {
401+ let ch = s[ i..]
402+ . chars ( )
403+ . next ( )
404+ . expect ( "i is in bounds and on a char boundary" ) ;
405+ out. push ( ch) ;
406+ ch. len_utf8 ( )
407+ }
408+
409+ /// Starting at `from` (just past an opening run of `run_len` backticks),
410+ /// return the past-the-end index of the matching closing run, or `None`.
411+ fn find_backtick_closer ( bytes : & [ u8 ] , from : usize , run_len : usize ) -> Option < usize > {
412+ let mut i = from;
413+ while i < bytes. len ( ) {
414+ if bytes[ i] == b'`' {
415+ let start = i;
416+ while i < bytes. len ( ) && bytes[ i] == b'`' {
417+ i += 1 ;
418+ }
419+ if i - start == run_len {
420+ return Some ( i) ;
421+ }
422+ } else {
423+ i += 1 ;
424+ }
425+ }
426+ None
427+ }
428+
429+ /// If `bytes[start..]` is a complete `[text](url)`, return the index of the
430+ /// closing `)`. Nested `(`/`)` inside the URL are balanced one level deep so
431+ /// fragments like `…#method()` survive.
432+ fn find_inline_link_end ( bytes : & [ u8 ] , start : usize ) -> Option < usize > {
433+ debug_assert_eq ! ( bytes[ start] , b'[' ) ;
434+ let mut j = start + 1 ;
435+ while j < bytes. len ( ) && bytes[ j] != b']' {
436+ if bytes[ j] == b'[' {
437+ return None ;
438+ }
439+ j += 1 ;
440+ }
441+ if j + 1 >= bytes. len ( ) || bytes[ j + 1 ] != b'(' {
442+ return None ;
443+ }
444+ let mut depth = 1i32 ;
445+ let mut k = j + 2 ;
446+ while k < bytes. len ( ) {
447+ match bytes[ k] {
448+ b'(' => depth += 1 ,
449+ b')' => {
450+ depth -= 1 ;
451+ if depth == 0 {
452+ return Some ( k) ;
453+ }
454+ }
455+ _ => { }
456+ }
457+ k += 1 ;
458+ }
459+ None
460+ }
461+
462+ /// If `bytes[start..]` is `<http(s)://…>`, return the index of the `>`.
463+ fn find_autolink_end ( bytes : & [ u8 ] , start : usize ) -> Option < usize > {
464+ debug_assert_eq ! ( bytes[ start] , b'<' ) ;
465+ let rest = & bytes[ start + 1 ..] ;
466+ if !( rest. starts_with ( b"http://" ) || rest. starts_with ( b"https://" ) ) {
467+ return None ;
468+ }
469+ rest. iter ( ) . position ( |& b| b == b'>' ) . map ( |p| start + 1 + p)
470+ }
471+
472+ /// If `bytes[start..]` begins a bare `http(s)://` URL, return the
473+ /// past-the-end byte index. The URL ends at whitespace or `)`.
474+ fn find_bare_url_end ( bytes : & [ u8 ] , start : usize ) -> Option < usize > {
475+ let rest = & bytes[ start..] ;
476+ if !( rest. starts_with ( b"http://" ) || rest. starts_with ( b"https://" ) ) {
477+ return None ;
478+ }
479+ let mut j = start;
480+ while j < bytes. len ( ) && !bytes[ j] . is_ascii_whitespace ( ) && bytes[ j] != b')' {
481+ j += 1 ;
482+ }
483+ Some ( j)
484+ }
485+
294486/// Format a `SourceCodeInfo.Location` into a doc-comment string.
295487///
296488/// Combines leading detached comments, leading comments, and trailing
@@ -744,4 +936,92 @@ mod tests {
744936 } ;
745937 assert_eq ! ( format_comment( & loc) . as_deref( ) , Some ( " hello" ) ) ;
746938 }
939+
940+ // --- sanitize_line ------------------------------------------------------
941+
942+ #[ test]
943+ fn test_sanitize_line ( ) {
944+ let cases: & [ ( & str , & str , & str ) ] = & [
945+ ( "plain text" , "plain text" , "plain" ) ,
946+ ( "hello world" , "hello world" , "h_not_url" ) ,
947+ // Brackets
948+ (
949+ "see [google.protobuf.Duration][]" ,
950+ r"see \[google.protobuf.Duration\]\[\]" ,
951+ "collapsed_ref_link" ,
952+ ) ,
953+ ( "a [foo] b" , r"a \[foo\] b" , "shortcut_link" ) ,
954+ ( "[foo][bar]" , r"\[foo\]\[bar\]" , "full_ref_link" ) ,
955+ ( "[.{frac_sec}]Z" , r"\[.{frac_sec}\]Z" , "format_string" ) ,
956+ // Inline links preserved
957+ (
958+ "[RFC 3339](https://ietf.org/rfc/rfc3339.txt)" ,
959+ "[RFC 3339](https://ietf.org/rfc/rfc3339.txt)" ,
960+ "inline_link" ,
961+ ) ,
962+ (
963+ "[m()](https://e.com/#m())" ,
964+ "[m()](https://e.com/#m())" ,
965+ "inline_link_nested_parens" ,
966+ ) ,
967+ // Already escaped
968+ ( r"\[foo\]" , r"\[foo\]" , "pre_escaped" ) ,
969+ // Backtick spans untouched
970+ ( "`[foo]` bar" , "`[foo]` bar" , "backtick_brackets" ) ,
971+ ( "`Option<T>` bar" , "`Option<T>` bar" , "backtick_generics" ) ,
972+ ( "``[foo]``" , "``[foo]``" , "double_backtick" ) ,
973+ ( "`` `<T>` ``" , "`` `<T>` ``" , "double_backtick_inner" ) ,
974+ ( "``` x ` y ```" , "``` x ` y ```" , "triple_backtick" ) ,
975+ ( "`` no closer" , "`` no closer" , "unclosed_backticks" ) ,
976+ (
977+ "[résumé](http://e.com)" ,
978+ "[résumé](http://e.com)" ,
979+ "utf8_link_text" ,
980+ ) ,
981+ // Bare URLs wrapped
982+ (
983+ "see https://example.com/x for details" ,
984+ "see <https://example.com/x> for details" ,
985+ "bare_url" ,
986+ ) ,
987+ (
988+ "(https://example.com)" ,
989+ "(<https://example.com>)" ,
990+ "bare_url_in_parens" ,
991+ ) ,
992+ // Existing autolinks preserved
993+ ( "<https://example.com>" , "<https://example.com>" , "autolink" ) ,
994+ // Angle brackets escaped
995+ ( "Option<T>" , r"Option\<T\>" , "generics" ) ,
996+ ( "HashMap<K, V>" , r"HashMap\<K, V\>" , "generics_multi" ) ,
997+ // UTF-8 passthrough
998+ ( "café — ok" , "café — ok" , "utf8" ) ,
999+ ( "`café` [x]" , r"`café` \[x\]" , "utf8_backtick" ) ,
1000+ ] ;
1001+ for ( input, want, name) in cases {
1002+ assert_eq ! ( sanitize_line( input) , * want, "case: {name}" ) ;
1003+ }
1004+ }
1005+
1006+ #[ test]
1007+ fn test_sanitize_line_unbalanced ( ) {
1008+ // Unmatched delimiters are escaped, not crashed on.
1009+ assert_eq ! ( sanitize_line( "[foo" ) , r"\[foo" ) ;
1010+ assert_eq ! ( sanitize_line( "foo]" ) , r"foo\]" ) ;
1011+ assert_eq ! ( sanitize_line( "[foo](" ) , r"\[foo\](" ) ;
1012+ assert_eq ! ( sanitize_line( "<http://x" ) , r"\<<http://x>" ) ;
1013+ assert_eq ! ( sanitize_line( "a > b" ) , r"a \> b" ) ;
1014+ assert_eq ! ( sanitize_line( "trailing \\ " ) , "trailing \\ " ) ;
1015+ }
1016+
1017+ #[ test]
1018+ fn test_doc_tokens_sanitizes_prose_not_code ( ) {
1019+ // Indented code block content must NOT be sanitized.
1020+ let out = doc_tokens ( "Prose [foo].\n code [bar]\n More." ) ;
1021+ assert ! ( out. contains( r"\\[foo\\]" ) , "prose escaped: {out}" ) ;
1022+ assert ! ( out. contains( "code [bar]" ) , "code untouched: {out}" ) ;
1023+ // User-written fence content must NOT be sanitized.
1024+ let out = doc_tokens ( "```\n [x](y)\n Option<T>\n ```" ) ;
1025+ assert ! ( out. contains( "Option<T>" ) , "fence untouched: {out}" ) ;
1026+ }
7471027}
0 commit comments