@@ -9,17 +9,39 @@ var Color = require('../components/color');
99var xmlnsNamespaces = require ( '../constants/xmlns_namespaces' ) ;
1010var DOUBLEQUOTE_REGEX = / " / g;
1111var DUMMY_SUB = 'TOBESTRIPPED' ;
12- var DUMMY_REGEX = new RegExp ( '("' + DUMMY_SUB + ')|(' + DUMMY_SUB + '")' , 'g' ) ;
13-
12+ // Match TOBESTRIPPED adjacent to either a literal " or its entity form ".
13+ // XMLSerializer escapes inner double-quotes to " inside "-delimited
14+ // attributes, and htmlEntityDecode now preserves that entity for safety.
15+ const DUMMY_REGEX = new RegExp ( `("${ DUMMY_SUB } )|(${ DUMMY_SUB } ")|("${ DUMMY_SUB } )|(${ DUMMY_SUB } ")` , 'g' ) ;
16+
17+ // Entities for & " ' - decoding these in attribute context is an XSS vector,
18+ // so preserve them as-is. List includes named, decimal, and hex numeric forms.
19+ const PRESERVED_ENTITIES = [ '&' , '&' , '&' , '"' , '"' , '"' , ''' , ''' , ''' ] ;
20+ // Entities for < and > - normalize to numeric so downstream passes treat them
21+ // uniformly regardless of which form the serializer emitted.
22+ const LESS_THAN_ENTITIES = [ '<' , '<' , '<' ] ;
23+ const GREATER_THAN_ENTITIES = [ '>' , '>' , '>' ] ;
24+
25+ /**
26+ * Decode non-structural entities to Unicode for non-browser SVG renderers,
27+ * keeping & " ' < > entity-encoded to prevent attribute-context escape (XSS).
28+ *
29+ * @param s - serialized SVG string
30+ * @returns entity-normalized SVG string
31+ */
1432function htmlEntityDecode ( s ) {
15- var hiddenDiv = d3 . select ( 'body' ) . append ( 'div' ) . style ( { display : 'none' } ) . html ( '' ) ;
16- var replaced = s . replace ( / ( & [ ^ ; ] * ; ) / gi, function ( d ) {
17- if ( d === '<' ) { return '<' ; } // special handling for brackets
18- if ( d === '&rt;' ) { return '>' ; }
19- if ( d . indexOf ( '<' ) !== - 1 || d . indexOf ( '>' ) !== - 1 ) { return '' ; }
33+ const hiddenDiv = d3 . select ( 'body' ) . append ( 'div' ) . style ( { display : 'none' } ) . html ( '' ) ;
34+ const replaced = s . replace ( / ( & [ ^ ; ] * ; ) / gi, ( d ) => {
35+ const lower = d . toLowerCase ( ) ;
36+ if ( PRESERVED_ENTITIES . includes ( lower ) ) return d ;
37+ if ( LESS_THAN_ENTITIES . includes ( lower ) ) return '<' ;
38+ if ( GREATER_THAN_ENTITIES . includes ( lower ) ) return '>' ;
39+ if ( d . includes ( '<' ) || d . includes ( '>' ) ) return '' ;
40+
2041 return hiddenDiv . html ( d ) . text ( ) ; // everything else, let the browser decode it to unicode
2142 } ) ;
2243 hiddenDiv . remove ( ) ;
44+
2345 return replaced ;
2446}
2547
@@ -48,29 +70,29 @@ module.exports = function toSVG(gd, format, scale) {
4870 // which notably add the contents of the gl-container
4971 // into the main svg node
5072 var basePlotModules = fullLayout . _basePlotModules || [ ] ;
51- for ( i = 0 ; i < basePlotModules . length ; i ++ ) {
73+ for ( i = 0 ; i < basePlotModules . length ; i ++ ) {
5274 var _module = basePlotModules [ i ] ;
5375
54- if ( _module . toSVG ) _module . toSVG ( gd ) ;
76+ if ( _module . toSVG ) _module . toSVG ( gd ) ;
5577 }
5678
5779 // add top items above them assumes everything in toppaper is either
5880 // a group or a defs, and if it's empty (like hoverlayer) we can ignore it.
59- if ( toppaper ) {
81+ if ( toppaper ) {
6082 var nodes = toppaper . node ( ) . childNodes ;
6183
6284 // make copy of nodes as childNodes prop gets mutated in loop below
6385 var topGroups = Array . prototype . slice . call ( nodes ) ;
6486
65- for ( i = 0 ; i < topGroups . length ; i ++ ) {
87+ for ( i = 0 ; i < topGroups . length ; i ++ ) {
6688 var topGroup = topGroups [ i ] ;
6789
68- if ( topGroup . childNodes . length ) svg . node ( ) . appendChild ( topGroup ) ;
90+ if ( topGroup . childNodes . length ) svg . node ( ) . appendChild ( topGroup ) ;
6991 }
7092 }
7193
7294 // remove draglayer for Adobe Illustrator compatibility
73- if ( fullLayout . _draggers ) {
95+ if ( fullLayout . _draggers ) {
7496 fullLayout . _draggers . remove ( ) ;
7597 }
7698
@@ -80,81 +102,82 @@ module.exports = function toSVG(gd, format, scale) {
80102 svg . node ( ) . style . background = '' ;
81103
82104 svg . selectAll ( 'text' )
83- . attr ( { 'data-unformatted' : null , 'data-math' : null } )
84- . each ( function ( ) {
105+ . attr ( { 'data-unformatted' : null , 'data-math' : null } )
106+ . each ( function ( ) {
85107 var txt = d3 . select ( this ) ;
86108
87109 // hidden text is pre-formatting mathjax, the browser ignores it
88110 // but in a static plot it's useless and it can confuse batik
89111 // we've tried to standardize on display:none but make sure we still
90112 // catch visibility:hidden if it ever arises
91- if ( this . style . visibility === 'hidden' || this . style . display === 'none' ) {
113+ if ( this . style . visibility === 'hidden' || this . style . display === 'none' ) {
92114 txt . remove ( ) ;
93115 return ;
94116 } else {
95117 // clear other visibility/display values to default
96118 // to not potentially confuse non-browser SVG implementations
97- txt . style ( { visibility : null , display : null } ) ;
119+ txt . style ( { visibility : null , display : null } ) ;
98120 }
99121
100122 // Font family styles break things because of quotation marks,
101123 // so we must remove them *after* the SVG DOM has been serialized
102124 // to a string (browsers convert singles back)
103125 var ff = this . style . fontFamily ;
104- if ( ff && ff . indexOf ( '"' ) !== - 1 ) {
126+ if ( ff && ff . indexOf ( '"' ) !== - 1 ) {
105127 txt . style ( 'font-family' , ff . replace ( DOUBLEQUOTE_REGEX , DUMMY_SUB ) ) ;
106128 }
107129
108130 // Drop normal font-weight, font-style and font-variant to reduce the size
109131 var fw = this . style . fontWeight ;
110- if ( fw && ( fw === 'normal' || fw === '400' ) ) { // font-weight 400 is similar to normal
132+ if ( fw && ( fw === 'normal' || fw === '400' ) ) {
133+ // font-weight 400 is similar to normal
111134 txt . style ( 'font-weight' , undefined ) ;
112135 }
113136 var fs = this . style . fontStyle ;
114- if ( fs && fs === 'normal' ) {
137+ if ( fs && fs === 'normal' ) {
115138 txt . style ( 'font-style' , undefined ) ;
116139 }
117140 var fv = this . style . fontVariant ;
118- if ( fv && fv === 'normal' ) {
141+ if ( fv && fv === 'normal' ) {
119142 txt . style ( 'font-variant' , undefined ) ;
120143 }
121144 } ) ;
122145
123- svg . selectAll ( '.gradient_filled,.pattern_filled' ) . each ( function ( ) {
146+ svg . selectAll ( '.gradient_filled,.pattern_filled' ) . each ( function ( ) {
124147 var pt = d3 . select ( this ) ;
125148
126149 // similar to font family styles above,
127150 // we must remove " after the SVG DOM has been serialized
128151 var fill = this . style . fill ;
129- if ( fill && fill . indexOf ( 'url(' ) !== - 1 ) {
152+ if ( fill && fill . indexOf ( 'url(' ) !== - 1 ) {
130153 pt . style ( 'fill' , fill . replace ( DOUBLEQUOTE_REGEX , DUMMY_SUB ) ) ;
131154 }
132155
133156 var stroke = this . style . stroke ;
134- if ( stroke && stroke . indexOf ( 'url(' ) !== - 1 ) {
157+ if ( stroke && stroke . indexOf ( 'url(' ) !== - 1 ) {
135158 pt . style ( 'stroke' , stroke . replace ( DOUBLEQUOTE_REGEX , DUMMY_SUB ) ) ;
136159 }
137160 } ) ;
138161
139- if ( format === 'pdf' || format === 'eps' ) {
162+ if ( format === 'pdf' || format === 'eps' ) {
140163 // these formats make the extra line MathJax adds around symbols look super thick in some cases
141164 // it looks better if this is removed entirely.
142- svg . selectAll ( '#MathJax_SVG_glyphs path' )
143- . attr ( 'stroke-width' , 0 ) ;
165+ svg . selectAll ( '#MathJax_SVG_glyphs path' ) . attr ( 'stroke-width' , 0 ) ;
144166 }
145167
146- if ( format === 'svg' && scale ) {
168+ if ( format === 'svg' && scale ) {
147169 svg . attr ( 'width' , scale * width ) ;
148170 svg . attr ( 'height' , scale * height ) ;
149171 svg . attr ( 'viewBox' , '0 0 ' + width + ' ' + height ) ;
150172 }
151173
152174 var s = new window . XMLSerializer ( ) . serializeToString ( svg . node ( ) ) ;
175+ // Decode numeric refs to Unicode so non-browser renderers (Batik, Illustrator) render them correctly.
153176 s = htmlEntityDecode ( s ) ;
154177 s = xmlEntityEncode ( s ) ;
155178
156179 // Fix quotations around font strings and gradient URLs
157- s = s . replace ( DUMMY_REGEX , '\'' ) ;
180+ s = s . replace ( DUMMY_REGEX , "'" ) ;
158181
159182 return s ;
160183} ;
0 commit comments