WordPress · sirreal · Dec 15, 2025 · Dec 15, 2025 · Dec 15, 2025 · Dec 15, 2025
diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -3812,28 +3812,29 @@ public function set_modifiable_text( string $plaintext_content ): bool {
 		switch ( $this->get_tag() ) {
 			case 'SCRIPT':
 				/**
-				 * This is over-protective, but ensures the update doesn't break
-				 * the HTML structure of the SCRIPT element.
+				 * Identify risky script contents to escape when possible or reject otherwise:
 				 *
-				 * More thorough analysis could track the HTML tokenizer states
-				 * and to ensure that the SCRIPT element closes at the expected
-				 * SCRIPT close tag as is done in {@see ::skip_script_data()}.
+				 * - "</script" could close the SCRIPT element prematurely.
+				 * - "<script" could enter the “script data double escaped state” and prevent the
+				 *   SCRIPT element from closing as expected.
 				 *
-				 * A SCRIPT element could be closed prematurely by contents
-				 * like `</script>`. A SCRIPT element could be prevented from
-				 * closing by contents like `<!--<script>`.
-				 *
-				 * The following strings are essential for dangerous content,
-				 * although they are insufficient on their own. This trade-off
-				 * prevents dangerous scripts from being sent to the browser.
-				 * It is also unlikely to produce HTML that may confuse more
-				 * basic HTML tooling.
+				 * @see WP_HTML_Tag_Processor::escape_javascript_script_contents()
 				 */
-				if (
+				$needs_escaping =
 					false !== stripos( $plaintext_content, '</script' ) ||
-					false !== stripos( $plaintext_content, '<script' )
-				) {
-					return false;
+					false !== stripos( $plaintext_content, '<script' );
+				if ( $needs_escaping ) {
+					if ( $this->is_javascript_script_tag() ) {
+						$plaintext_content = $this->escape_javascript_script_contents( $plaintext_content );
+					} elseif ( $this->is_json_script_tag() ) {
+						$plaintext_content = $this->escape_json_script_contents( $plaintext_content );
+					} else {
+						/*
+						 * Other types of script tags cannot be escaped safely because there is
+						 * no general escaping mechanism for arbitrary types of content.
+						 */
+						return false;
+					}
 				}
 
 				$this->lexical_updates['modifiable text'] = new WP_HTML_Text_Replacement(
@@ -3891,6 +3892,293 @@ static function ( $tag_match ) {
 		return false;
 	}
 
+	/**
+	 * Indicates if the currently matched tag is a JavaScript script tag.
+	 *
+	 * Note that this does not parse a MIME type. This behavior is well-documented in
+	 * in the HTML standard and uses string comparisons, *not* actual MIME Types.
+	 *
+	 * @see https://html.spec.whatwg.org/multipage/scripting.html#prepare-the-script-element
+	 *
+	 * @ignore
+	 * @todo Consider a public API that is clear and general.
+	 *
+	 * @since 7.0.0
+	 *
+	 * @return bool True if the script tag will be evaluated as JavaScript.
+	 */
+	private function is_javascript_script_tag(): bool {
+		if ( 'SCRIPT' !== $this->get_tag() || $this->get_namespace() !== 'html' ) {
+			return false;
+		}
+
+		/*
+		 * > If any of the following are true:
+		 * >   - el has a type attribute whose value is the empty string;
+		 * >   - el has no type attribute but it has a language attribute and that attribute's
+		 * >     value is the empty string; or
+		 * >   - el has neither a type attribute nor a language attribute,
+		 * > then let the script block's type string for this script element be "text/javascript".
+		 */
+		$type_attr     = $this->get_attribute( 'type' );
+		$language_attr = $this->get_attribute( 'language' );
+		if ( true === $type_attr || '' === $type_attr ) {
+			return true;
+		}
+		if (
+			null === $type_attr
+			&& ( null === $language_attr || true === $language_attr || '' === $language_attr )
+		) {
+			return true;
+		}
+
+		/*
+		 * > Otherwise, if el has a type attribute, then let the script block's type string be
+		 * > the value of that attribute with leading and trailing ASCII whitespace stripped.
+		 * > Otherwise, el has a non-empty language attribute; let the script block's type string
+		 * > be the concatenation of "text/" and the value of el's language attribute.
+		 */
+		$type_string = null !== $type_attr ? trim( $type_attr, " \t\f\r\n" ) : "text/{$language_attr}";
+
+		/*
+		 * > If the script block's type string is a JavaScript MIME type essence match, then
+		 * > set el's type to "classic".
+		 *
+		 * > A string is a JavaScript MIME type essence match if it is an ASCII case-insensitive
+		 * > match for one of the JavaScript MIME type essence strings.
+		 *
+		 * > A JavaScript MIME type is any MIME type whose essence is one of the following:
+		 * >
+		 * > - application/ecmascript
+		 * > - application/javascript
+		 * > - application/x-ecmascript
+		 * > - application/x-javascript
+		 * > - text/ecmascript
+		 * > - text/javascript
+		 * > - text/javascript1.0
+		 * > - text/javascript1.1
+		 * > - text/javascript1.2
+		 * > - text/javascript1.3
+		 * > - text/javascript1.4
+		 * > - text/javascript1.5
+		 * > - text/jscript
+		 * > - text/livescript
+		 * > - text/x-ecmascript
+		 * > - text/x-javascript
+		 *
+		 * @see https://mimesniff.spec.whatwg.org/#javascript-mime-type-essence-match
+		 * @see https://mimesniff.spec.whatwg.org/#javascript-mime-type
+		 */
+		switch ( strtolower( $type_string ) ) {
+			case 'application/ecmascript':
+			case 'application/javascript':
+			case 'application/x-ecmascript':
+			case 'application/x-javascript':
+			case 'text/ecmascript':
+			case 'text/javascript':
+			case 'text/javascript1.0':
+			case 'text/javascript1.1':
+			case 'text/javascript1.2':
+			case 'text/javascript1.3':
+			case 'text/javascript1.4':
+			case 'text/javascript1.5':
+			case 'text/jscript':
+			case 'text/livescript':
+			case 'text/x-ecmascript':
+			case 'text/x-javascript':
+				return true;
+
+			/*
+			 * > Otherwise, if the script block's type string is an ASCII case-insensitive match for
+			 * > the string "module", then set el's type to "module".
+			 *
+			 * A module is evaluated as JavaScript.
+			 */
+			case 'module':
+				return true;
+		}
+
+		/*
+		 * > Otherwise, if the script block's type string is an ASCII case-insensitive match for the string "importmap", then set el's type to "importmap".
+		 * > Otherwise, if the script block's type string is an ASCII case-insensitive match for the string "speculationrules", then set el's type to "speculationrules".
+		 *
+		 * These conditions indicate JSON content.
+		 */
+
+		/*
+		 * > Otherwise, return. (No script is executed, and el's type is left as null.)
+		 */
+		return false;
+	}
+
+	/**
+	 * Indicates if the currently matched tag is a JSON script tag.
+	 *
+	 * @ignore
+	 * @todo Consider a public API that is clear and general.
+	 * @todo Use a MIME type parser when available.
+	 *
+	 * @since 7.0.0
+	 *
+	 * @return bool True if the script tag should be treated as JSON.
+	 */
+	private function is_json_script_tag(): bool {
+		if ( 'SCRIPT' !== $this->get_tag() || $this->get_namespace() !== 'html' ) {
+			return false;
+		}
+
+		$type = $this->get_attribute( 'type' );
+		if ( null === $type || true === $type || '' === $type ) {
+			return false;
+		}
+		$type = strtolower( trim( $type, " \t\f\r\n" ) );
+
+		/*
+		 * > …
+		 * > Otherwise, if the script block's type string is an ASCII case-insensitive match for the string "importmap", then set el's type to "importmap".
+		 * > Otherwise, if the script block's type string is an ASCII case-insensitive match for the string "speculationrules", then set el's type to "speculationrules".
+		 * @see https://html.spec.whatwg.org/#script-processing-model
+		 *
+		 * > A JSON MIME type is any MIME type whose subtype ends in "+json" or whose essence
+		 * > is "application/json" or "text/json".
+		 *
+		 * @todo The JSON MIME type handling handles some common cases but when MIME type parsing is available it should be leveraged here.
+		 *
+		 * @see https://mimesniff.spec.whatwg.org/#json-mime-type
+		 */
+		if (
+			'importmap' === $type ||
+			'speculationrules' === $type ||
+			'application/json' === $type ||
+			'text/json' === $type
+		) {
+			return true;
+		}
+
+		return false;
+	}
+
+	/**
+	 * Escape JavaScript script tag contents.
+	 *
+	 * Prevent JavaScript text from modifying the HTML structure of a document and
+	 * ensure that it's contained within its enclosing SCRIPT tag as intended.
+	 *
+	 * JavaScript can be safely escaped with a few exceptions. This is achieved by
+	 * replacing dangerous sequences like "<script" and "</script" with a form
+	 * using a Unicode escape sequence "<\u0073cript>" and "</\u0073cript>".
+	 *
+	 * This text may appear in the JavaScript in limited ways, all of which support
+	 * the use of Unicode escape sequences on the "s" character. The escaping is safe
+	 * to perform in all JavaScript and the modified JavaScript maintains identical
+	 * behavior with a few exceptions:
+	 *
+	 * - Comments.
+	 * - Tagged templates like `String.raw()` that access “raw” strings.
+	 * - The `source` property of a RegExp object.
+	 *
+	 * For example, this input JavaScript:
+	 *
+	 *     // A comment: "</script>"
+	 *
+	 *     console.log( String.raw`</script>` );
+	 *
+	 *     const regex = /<script>/;
+	 *     console.log( regex.source );
+	 *
+	 * Is transformed to:
+	 *
+	 *     // A comment: "</\u0073cript>"
+	 *
+	 *     console.log( String.raw`</\u0073cript>` );
+	 *
+	 *     const regex = /<\u0073cript>/;
+	 *     console.log( regex.source );
+	 *
+	 * Note that the RegExp's matching behavior is equivalent, meaning that
+	 * `regex.test( '<script>' ) === true` in both the unescaped and
+	 * escaped versions.
+	 *
+	 * JavaScript that relies on behavior affected by this escaping must provide
+	 * safe script contents in order to avoid this escaping. For example, a raw string
+	 * may be split up to make its contents safe or avoided altogether:
+	 *
+	 *     console.log( String.raw`</script>` );                // !!UNSAFE!! Will be escaped.
+	 *     console.log( String.raw`</\u0073cript>` );           // "</\u0073cript>"
+	 *     console.log( String.raw`</scr` + String.raw`ipt>` ); // "</script>"
+	 *     console.log( String.raw`</${"script"}>` );           // "</script>"
+	 *     console.log( "\x3C/script>" );                       // "</script>"
+	 *     console.log( "<\/script>" );                         // "</script>"
+	 *
+	 * The following graph is a simplified interpretation of how HTML interprets the contents
+	 * of a SCRIPT tag and identifies the closing tag. It is useful to understand what text
+	 * is dangerous inside of a SCRIPT tag and why different approaches to escaping work.
+	 *
+	 *                              Open script
+	 *                                  │
+	 *                                  │
+	 *                                  ▼
+	 *               ╔═════════════════════════════════════════╗   <!--(…)>
+	 *               ║                                         ║   (all dashes)
+	 *               ║                 script                  ║ ───────────────┐
+	 *               ║                  data                   ║                │
+	 *   ┌────────── ║                                         ║ ◀──────────────┘
+	 *   │           ╚═════════════════════════════════════════╝
+	 *   │             │               ▲                    ▲
+	 *   │             │ <!--          │ -->                └─────┐
+	 *   │             ▼               │                          │
+	 *   │           ┌─────────────────────────────────────────┐  │
+	 *   │ </script† │                 escaped                 │  │
+	 *   │           └─────────────────────────────────────────┘  │
+	 *   │             │               ▲             │            │ -->
+	 *   │             │ </script†     │ </script†   │ <script†   │
+	 *   │             ▼               │             ▼            │
+	 *   │           ╔══════════════╗  │           ┌───────────┐  │
+	 *   │           ║ Close script ║  │           │  double   │  │
+	 *   └─────────▶ ║              ║  └────────── │  escaped  │ ─┘
+	 *               ╚══════════════╝              └───────────┘
+	 *
+	 *        † = Case insensitive 'script' followed by one of ' \t\f\r\n/>'
+	 *
+	 * The original source of this graph is included at the bottom of this file.
+	 *
+	 * @see https://html.spec.whatwg.org/#restrictions-for-contents-of-script-elements
+	 */
+	private function escape_javascript_script_contents( string $text ): string {
+		return preg_replace_callback(
+			'~(?P<HEAD></?)(?P<S_CHAR>s)(?P<TAIL>cript[ \\t\\f\\r\\n/>])~i',
+			static function ( $matches ) {
+				$escaped_s_char = 's' === $matches['S_CHAR']
+					? '\\u0073'
+					: '\\u0053';
+				return "{$matches['HEAD']}{$escaped_s_char}{$matches['TAIL']}";
+			},
+			$text
+		);
+	}
+
+	/**
+	 * Escape JSON script tag contents.
+	 *
+	 * Prevent JSON text from modifying the HTML structure of a document and
+	 * ensure that it's contained within its enclosing SCRIPT tag as intended.
+	 *
+	 * JSON can be escaped simply by replacing "<" with its Unicode escape
+	 * sequence "\u003C". "<" is not part of the JSON syntax and only appears
+	 * in JSON strings, so it's always safe to escape. Furthermore, JSON does
+	 * not allow backslash escaping of "<", so there's no need to consider
+	 * whether the "<" is preceded by an escaping backslash.
+	 *
+	 * For more details, see {@see WP_HTML_Tag_Processor::escape_javascript_script_contents()}.
+	 * @see https://www.json.org/json-en.html
+	 */
+	private function escape_json_script_contents( string $text ): string {
+		return strtr(
+			$text,
+			array( '<' => '\\u003C' )
+		);
+	}
+
 	/**
 	 * Updates or creates a new attribute on the currently matched tag with the passed value.
 	 *
@@ -4681,3 +4969,40 @@ public function get_doctype_info(): ?WP_HTML_Doctype_Info {
 	 */
 	const TEXT_IS_WHITESPACE = 'TEXT_IS_WHITESPACE';
 }
+
+/*
+# This is the original Graphviz source for the SCRIPT tag
+# parsing behavior. It's used in the documentation for
+# `WP_HTML_Tag_Processor::escape_javascript_script_contents()`.
+# ====
+digraph {
+	rankdir=TB;
+
+	// Entry point
+	entry [shape=plaintext label="Open script"];
+	entry -> script_data;
+
+	// Double-circle states arranged more compactly
+	data [shape=doublecircle label="Close script"];
+	script_data [shape=doublecircle color=blue label="script\ndata"];
+	script_data_escaped [shape=circle color=orange label="escaped"];
+	script_data_double_escaped [shape=circle color=red label="double\nescaped"];
+
+	// Group related nodes on same ranks where possible
+	{rank=same; script_data script_data_escaped script_data_double_escaped}
+
+	script_data -> script_data [label="<!--(…)>\n(all dashes)"];
+	script_data -> script_data_escaped [label="<!--"];
+	script_data -> data [label="</script†"];
+
+	script_data_escaped -> script_data [label="-->"];
+	script_data_escaped -> script_data_double_escaped [label="<script†"];
+	script_data_escaped -> data [label="</script†"];
+
+	script_data_double_escaped -> script_data [label="-->"];
+	script_data_double_escaped -> script_data_escaped [label="</script†"];
+
+	label="† = Case insensitive 'script' followed by one of ' \\t\\f\\r\\n/>'";
+	labelloc=b;
+}
+*/