@@ -30,6 +30,19 @@ fn inline_text_to_plain_text(nodes: &[Node]) -> String {
3030 nodes. iter ( ) . map ( ast_to_plain_text) . collect ( )
3131}
3232
33+ fn escape_xml_text ( input : & str ) -> String {
34+ input
35+ . replace ( '&' , "&" )
36+ . replace ( '<' , "<" )
37+ . replace ( '>' , ">" )
38+ }
39+
40+ fn escape_xml_attr ( input : & str ) -> String {
41+ escape_xml_text ( input)
42+ . replace ( '"' , """ )
43+ . replace ( '\'' , "'" )
44+ }
45+
3346fn looks_like_email ( text : & str ) -> bool {
3447 regex:: Regex :: new ( r"^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$" )
3548 . unwrap ( )
@@ -680,16 +693,16 @@ pub fn plain_text_to_ast(text: &str) -> Node {
680693pub fn plain_text_ast_to_jats ( node : & Node ) -> String {
681694 fn render_plain_text_inline ( node : & Node ) -> String {
682695 match node {
683- Node :: Text ( text) => text . clone ( ) ,
696+ Node :: Text ( text) => escape_xml_text ( text ) ,
684697 Node :: Break => "<break/>" . to_string ( ) ,
685698 Node :: InlineFormula ( tex) => {
686699 format ! (
687700 "<inline-formula><tex-math>{}</tex-math></inline-formula>" ,
688- tex
701+ escape_xml_text ( tex)
689702 )
690703 }
691- Node :: Email ( email) => format ! ( "<email>{}</email>" , email) ,
692- Node :: Uri ( uri) => format ! ( "<uri>{}</uri>" , uri) ,
704+ Node :: Email ( email) => format ! ( "<email>{}</email>" , escape_xml_text ( email) ) ,
705+ Node :: Uri ( uri) => format ! ( "<uri>{}</uri>" , escape_xml_text ( uri) ) ,
693706 other => ast_to_jats ( other) ,
694707 }
695708 }
@@ -712,16 +725,16 @@ pub fn plain_text_ast_to_jats(node: &Node) -> String {
712725 let inner: String = children. iter ( ) . map ( render_plain_text_inline) . collect ( ) ;
713726 format ! ( "<p>{}</p>" , inner)
714727 }
715- Node :: Text ( text) => format ! ( "<p>{}</p>" , text) ,
728+ Node :: Text ( text) => format ! ( "<p>{}</p>" , escape_xml_text ( text) ) ,
716729 Node :: Break => "<p><break/></p>" . to_string ( ) ,
717730 Node :: InlineFormula ( tex) => {
718731 format ! (
719732 "<p><inline-formula><tex-math>{}</tex-math></inline-formula></p>" ,
720- tex
733+ escape_xml_text ( tex)
721734 )
722735 }
723- Node :: Email ( email) => format ! ( "<p><email>{}</email></p>" , email) ,
724- Node :: Uri ( uri) => format ! ( "<p><uri>{}</uri></p>" , uri) ,
736+ Node :: Email ( email) => format ! ( "<p><email>{}</email></p>" , escape_xml_text ( email) ) ,
737+ Node :: Uri ( uri) => format ! ( "<p><uri>{}</uri></p>" , escape_xml_text ( uri) ) ,
725738 _ => {
726739 // For other nodes, use regular ast_to_jats
727740 ast_to_jats ( node)
@@ -780,17 +793,21 @@ pub fn ast_to_jats(node: &Node) -> String {
780793 }
781794 Node :: Link { url, text } => {
782795 let inner: String = text. iter ( ) . map ( ast_to_jats) . collect ( ) ;
783- format ! ( r#"<ext-link xlink:href="{}">{}</ext-link>"# , url, inner)
796+ format ! (
797+ r#"<ext-link xlink:href="{}">{}</ext-link>"# ,
798+ escape_xml_attr( url) ,
799+ inner
800+ )
784801 }
785802 Node :: InlineFormula ( tex) => {
786803 format ! (
787804 "<inline-formula><tex-math>{}</tex-math></inline-formula>" ,
788- tex
805+ escape_xml_text ( tex)
789806 )
790807 }
791- Node :: Email ( email) => format ! ( "<email>{}</email>" , email) ,
792- Node :: Uri ( uri) => format ! ( "<uri>{}</uri>" , uri) ,
793- Node :: Text ( text) => text . clone ( ) ,
808+ Node :: Email ( email) => format ! ( "<email>{}</email>" , escape_xml_text ( email) ) ,
809+ Node :: Uri ( uri) => format ! ( "<uri>{}</uri>" , escape_xml_text ( uri) ) ,
810+ Node :: Text ( text) => escape_xml_text ( text ) ,
794811 }
795812}
796813
@@ -1997,6 +2014,64 @@ mod tests {
19972014 ) ;
19982015 }
19992016
2017+ #[ test]
2018+ fn test_ast_to_jats_escapes_reserved_xml_chars ( ) {
2019+ let ast = Node :: Paragraph ( vec ! [
2020+ Node :: Text ( "x < y & z > w " . to_string( ) ) ,
2021+ Node :: InlineFormula ( "a < b & c" . to_string( ) ) ,
2022+ Node :: Text ( " " . to_string( ) ) ,
2023+ Node :: Email ( "user@example.org" . to_string( ) ) ,
2024+ Node :: Text ( " " . to_string( ) ) ,
2025+ Node :: Uri ( "https://example.org?a=1&b=2" . to_string( ) ) ,
2026+ ] ) ;
2027+
2028+ let jats = ast_to_jats ( & ast) ;
2029+ assert_eq ! (
2030+ jats,
2031+ "<p>x < y & z > w <inline-formula><tex-math>a < b & c</tex-math></inline-formula> <email>user@example.org</email> <uri>https://example.org?a=1&b=2</uri></p>"
2032+ ) ;
2033+ }
2034+
2035+ #[ test]
2036+ fn test_ast_to_jats_escapes_link_url_attribute ( ) {
2037+ let ast = Node :: Link {
2038+ url : "https://example.com?a=1&b=2" . to_string ( ) ,
2039+ text : vec ! [ Node :: Text ( "Link text" . to_string( ) ) ] ,
2040+ } ;
2041+
2042+ let jats = ast_to_jats ( & ast) ;
2043+ assert_eq ! (
2044+ jats,
2045+ r#"<ext-link xlink:href="https://example.com?a=1&b=2">Link text</ext-link>"#
2046+ ) ;
2047+ }
2048+
2049+ #[ test]
2050+ fn test_ast_to_jats_preserves_generated_tags ( ) {
2051+ let ast = Node :: Paragraph ( vec ! [
2052+ Node :: Bold ( vec![ Node :: Text ( "Bold" . to_string( ) ) ] ) ,
2053+ Node :: Text ( " and " . to_string( ) ) ,
2054+ Node :: Italic ( vec![ Node :: Text ( "italic" . to_string( ) ) ] ) ,
2055+ ] ) ;
2056+
2057+ let jats = ast_to_jats ( & ast) ;
2058+ assert_eq ! ( jats, "<p><bold>Bold</bold> and <italic>italic</italic></p>" ) ;
2059+ assert ! ( !jats. contains( "<bold>" ) ) ;
2060+ }
2061+
2062+ #[ test]
2063+ fn test_ast_to_jats_preserves_generated_tags_and_escapes ( ) {
2064+ let ast = Node :: Paragraph ( vec ! [
2065+ Node :: Bold ( vec![ Node :: Text ( "Bo<ld" . to_string( ) ) ] ) ,
2066+ Node :: Text ( " & " . to_string( ) ) ,
2067+ Node :: Italic ( vec![ Node :: Text ( "ita>lic" . to_string( ) ) ] ) ,
2068+ ] ) ;
2069+
2070+ let jats = ast_to_jats ( & ast) ;
2071+ assert_eq ! ( jats, "<p><bold>Bo<ld</bold> & <italic>ita>lic</italic></p>" ) ;
2072+ assert ! ( !jats. contains( "<bold>" ) ) ;
2073+ }
2074+
20002075 #[ test]
20012076 fn test_ast_to_jats_break_formula_email_and_uri ( ) {
20022077 let ast = Node :: Paragraph ( vec ! [
0 commit comments