Index: src/wp-includes/formatting.php
===================================================================
--- src/wp-includes/formatting.php	(revision 30135)
+++ src/wp-includes/formatting.php	(working copy)
@@ -205,69 +205,34 @@
 
 	// Look for shortcodes and HTML elements.
 
-	$comment_regex =
-		  '!'           // Start of comment, after the <.
-		. '(?:'         // Unroll the loop: Consume everything until --> is found.
-		.     '-(?!->)' // Dash not followed by end of comment.
-		.     '[^\-]*+' // Consume non-dashes.
-		. ')*+'         // Loop possessively.
-		. '(?:-->)?';   // End of comment. If not found, match all input.
+	$chunks = wptexturize_parse( $text );
+	$textarr = array();
 
-	$shortcode_regex =
-		  '\['          // Find start of shortcode.
-		. '[\/\[]?'     // Shortcodes may begin with [/ or [[
-		. '[^\s\/\[\]]' // No whitespace before name.
-		. '[^\[\]]*+'   // Shortcodes do not contain other shortcodes. Possessive critical.
-		. '\]'          // Find end of shortcode.
-		. '\]?';        // Shortcodes may end with ]]
+	foreach ( $chunks as $chunk ) {
+		// Only call _wptexturize_pushpop_element if $chunk is a tag.
 
-	$regex =
-		  '/('                   // Capture the entire match.
-		.     '<'                // Find start of element.
-		.     '(?(?=!--)'        // Is this a comment?
-		.         $comment_regex // Find end of comment.
-		.     '|'
-		.         '[^>]+>'       // Find end of element.
-		.     ')'
-		. '|'
-		.     $shortcode_regex   // Find shortcodes.
-		. ')/s';
+		if ( $chunk[5] ) {
+			if ( '<!--' !== substr( $chunk[0], 0, 4 ) ) {
+				// This is an HTML element, not a comment.
+				_wptexturize_pushpop_element( $chunk[0], $no_texturize_tags_stack, $no_texturize_tags );
+			}
+			$textarr[] = $chunk[0];
 
-	$textarr = preg_split( $regex, $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
+		} elseif ( $chunk[4] ) {
+			if ( '[[' !== substr( $chunk[0], 0, 2 ) && ']]' !== substr( $chunk[0], -2 ) ) {
+				// Looks like a normal, non-escaped shortcode.
+				_wptexturize_pushpop_element( $chunk[0], $no_texturize_shortcodes_stack, $no_texturize_shortcodes );
+			}
+			$textarr[] = $chunk[0];
 
-	foreach ( $textarr as &$curl ) {
-		// Only call _wptexturize_pushpop_element if $curl is a delimiter.
-		$first = $curl[0];
-		if ( '<' === $first && '<!--' === substr( $curl, 0, 4 ) ) {
-			// This is an HTML comment delimeter.
-
-			continue;
-
-		} elseif ( '<' === $first && '>' === substr( $curl, -1 ) ) {
-			// This is an HTML element delimiter.
-
-			_wptexturize_pushpop_element( $curl, $no_texturize_tags_stack, $no_texturize_tags );
-
-		} elseif ( '' === trim( $curl ) ) {
+		} elseif ( '' === trim( $chunk[0] ) ) {
 			// This is a newline between delimiters.  Performance improves when we check this.
+			$textarr[] = $chunk[0];
 
-			continue;
-
-		} elseif ( '[' === $first && 1 === preg_match( '/^' . $shortcode_regex . '$/', $curl ) ) {
-			// This is a shortcode delimiter.
-
-			if ( '[[' !== substr( $curl, 0, 2 ) && ']]' !== substr( $curl, -2 ) ) {
-				// Looks like a normal shortcode.
-				_wptexturize_pushpop_element( $curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes );
-			} else {
-				// Looks like an escaped shortcode.
-				continue;
-			}
-
 		} elseif ( empty( $no_texturize_shortcodes_stack ) && empty( $no_texturize_tags_stack ) ) {
 			// This is neither a delimiter, nor is this content inside of no_texturize pairs.  Do texturize.
 
-			$curl = str_replace( $static_characters, $static_replacements, $curl );
+			$curl = str_replace( $static_characters, $static_replacements, $chunk[0] );
 
 			if ( false !== strpos( $curl, "'" ) ) {
 				$curl = preg_replace( $dynamic_characters['apos'], $dynamic_replacements['apos'], $curl );
@@ -284,6 +249,10 @@
 				// Searching for a digit is 10 times more expensive than for the x, so we avoid doing this one!
 				$curl = preg_replace( '/\b(\d(?(?<=0)[\d\.,]+|[\d\.,]*))x(-?\d[\d\.,]*)\b/', '$1&#215;$2', $curl );
 			}
+
+			$textarr[] = $curl;
+		} else {
+			$textarr[] = $chunk[0];
 		}
 	}
 	$text = implode( '', $textarr );
@@ -349,6 +318,300 @@
 }
 
 /**
+ * Parse any string into separate chunks of plain text, HTML, and shortcodes.
+ *
+ * The return value is a 2-dimensional array of strings and metadata ordered
+ * by input, similar to a split string.  Each node has these keys and values:
+ *
+ * key- type  - value
+ * ------------------
+ * 0 - string - The text that was split into this node from the input.
+ * 1 - int    - Start position of input.
+ * 2 - int    - End position of input.
+ * 3 - int    - Length of text.  Always equal to End - Start + 1.
+ * 4 - bool   - Shortcode flag.
+ * 5 - bool   - HTML flag.
+ * 6 - Reserved for future use to indicate the node is inline or block.
+ *
+ * The string in key 0 is plain text when keys 4 and 5 are both false.
+ *
+ * @since 4.0.1
+ * @param string $text The user input that needs to be texturized.
+ * @return array Structured version of $text with its HTML and shortcodes separated.
+ */
+ function wptexturize_parse( $text ) {
+	$results = array();  // Stores the full shortcode matches, then gets updated, and ultimately returned.
+	$results2 = array(); // Stores the HTML matches and is read-only after that.
+
+	// Find shortcodes
+	$regex = '/' . get_shortcode_regex() . '/s';
+	
+	preg_match_all( $regex, $text, $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER );
+
+	$pos = 0;
+	foreach( $matches as $data ) {
+		// Collect data for this tag.
+		$tag = array();
+		$tag[1] = $data[0][1];                       // Start position
+		$tag[2] = strpos( $text, ']', $tag[1] );     // End position
+		$tag[3] = $tag[2] - $tag[1] + 1;             // Length
+		$tag[0] = substr( $text, $tag[1], $tag[3] ); // Tag text
+		$tag[4] = true;                              // Is it a shortcode?
+		$tag[5] = false;                             // Is it HTML?
+		
+		// Was there any text before this tag?
+		if ( $tag[1] > $pos ) {
+			$plain = array();
+			$plain[1] = $pos;
+			$plain[2] = $tag[1] - 1;
+			$plain[3] = $plain[2] - $plain[1] + 1;
+			$plain[0] = substr( $text, $plain[1], $plain[3] );
+			$plain[4] = false;
+			$plain[5] = false;
+			$results[] = $plain;
+		}
+		
+		$results[] = $tag;
+		
+		// Is this an enclosing tag?
+		if ( strlen( $data[0][0] ) > $tag[3] ) {
+			$close = array();
+
+			if ( !empty( $data[5][0] ) ) {
+				$plain = array();
+				$plain[0] = $data[5][0];
+				$plain[1] = $data[5][1];
+				$plain[2] = $plain[1] + strlen( $plain[0] ) - 1;
+				$plain[3] = $plain[2] - $plain[1] + 1;
+				$plain[4] = false;
+				$plain[5] = false;
+				$results[] = $plain;
+
+				$close[1] = $plain[2] + 1;
+			} else {
+				$close[1] = $tag[2] + 1;
+			}
+			
+			$close[2] = $tag[1] + strlen( $data[0][0] ) - 1;
+			$close[3] = $close[2] - $close[1] + 1;
+			$close[0] = substr( $text, $close[1], $close[3] );
+			$close[4] = true;
+			$close[5] = false;
+			$results[] = $close;
+			
+			$pos = $close[2] + 1;
+		} else {
+			$pos = $tag[2] + 1;
+		}
+	}
+		
+	// Was there any text after the last tag?
+	if ( $pos < strlen( $text ) ) {
+		$plain = array();
+		$plain[0] = substr( $text, $pos );
+		$plain[1] = $pos;
+		$plain[3] = strlen( $plain[0] );
+		$plain[2] = $plain[1] + $plain[3] - 1;
+		$plain[4] = false;
+		$plain[5] = false;
+		$results[] = $plain;
+	}
+
+	// Now remove the shortcodes so we can look for the HTML.
+	$html = array();
+	foreach( $results as &$chunk ) {
+		// Is this chunk a shortcode tag?
+		if ( $chunk[4] ) {
+			$html[] = str_repeat( ' ', $chunk[3] );
+		} else {
+			$html[] = $chunk[0];
+		}
+	}
+	$html = implode( '', $html );
+	
+	// Now look for HTML.  If there are any nested shortcodes, avoid them,
+	// but do not allow HTML inside the attributes of nested shortcodes.
+	// As in the Shortcode API, there is no recursion by default.
+	
+	$comment_regex =
+		  '!'           // Start of comment, after the <.
+		. '(?:'         // Unroll the loop: Consume everything until --> is found.
+		.     '-(?!->)' // Dash not followed by end of comment.
+		.     '[^\-]*+' // Consume non-dashes.
+		. ')*+'         // Loop possessively.
+		. '(?:-->)?';   // End of comment. If not found, match all input.
+
+	$shortcode_regex =
+		  '\['            // Find start of shortcode.
+		. '[\/\[]?'       // Shortcodes may begin with [/ or [[
+		. '[^\s\/\[\]<>]' // No whitespace before name.
+		. '[^\[\]<>]*+'   // Shortcodes do not contain other shortcodes. Possessive critical.
+		. '\]'            // Find end of shortcode.
+		. '\]?';          // Shortcodes may end with ]]
+
+	$regex =
+		  '/('                   // Capture HTML.
+		.     '<'                // Find start of element.
+		.     '(?(?=!--)'        // Is this a comment?
+		.         $comment_regex // Find end of comment.
+		.     '|'
+		.         '[^>]+>'       // Find end of element.
+		.     ')'
+		. ')|('                  // Capture shortcodes.
+		.     $shortcode_regex   // Find shortcodes.
+		. ')/s';
+
+	preg_match_all( $regex, $html, $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER );
+
+	foreach( $matches as $data ) {
+		// Collect data for this tag.
+		$tag = array();
+		$tag[0] = $data[0][0];           // Tag text
+		$tag[1] = $data[0][1];           // Start position
+		$tag[3] = strlen( $tag[0] );     // Length
+		$tag[2] = $tag[1] + $tag[3] - 1; // End position
+		$tag[4] = empty( $data[1][0] );  // Is it a shortcode?
+		$tag[5] = !$tag[4];              // Is it HTML?
+		$results2[] = $tag;
+	}
+	
+	// Look for standalone HTML tags.
+	// Look for HTML tags surrounding shortcodes.
+	// Look for shortcodes enclosed within shortcodes.
+	
+	foreach( $results2 as $tag ) {
+		$startfound = false;
+		$endfound = false;
+	
+		// Is it HTML?
+		if ( $tag[5] ) {
+			// Now loop through the shortcodes to see if any of them are inside an HTML element.
+			// Texturization has been a left-to-right process, and when inside HTML,
+			// the $no_texturize_tags list gets tested but the $no_texturize_shortcodes list does not.
+			for( $i = 0; $i < count( $results ); $i++ ) {
+				// Test for intersection of HTML in $results2 and shortcode chunks in $results.
+				$chunk_start = $results[$i][1];
+				$chunk_end = $results[$i][2];
+				
+				if ( $chunk_start > $tag[2] ) {
+					break;
+				}
+
+				$html_starts_in_chunk = $tag[1] >= $chunk_start && $tag[1] <= $chunk_end;
+				$html_ends_in_chunk   = $tag[2] >= $chunk_start && $tag[2] <= $chunk_end;
+				$chunk_starts_in_html = $chunk_start >= $tag[1] && $chunk_start <= $tag[2];
+				$chunk_ends_in_html   = $chunk_end >= $tag[1] && $chunk_end <= $tag[2];
+				
+				if ( !$html_starts_in_chunk && !$html_ends_in_chunk && !$chunk_starts_in_html && !$chunk_ends_in_html ) {
+					continue;
+				}
+
+				// Up here, check for plain text items for start of HTML.
+
+				// Is the chunk a shortcode?
+				if ( $results[$i][4] ) {
+				
+					// Our regexp never looks for HTML inside of shortcodes,
+					// so the above tests are adequate to determine this chunk
+					// is a shortcode contained within an HTML element.
+					
+					// Delete the shortcode node.  It's getting merged into the HTML node.
+					array_splice( $results, $i--, 1 );
+				
+				// Deal with the plain text chunk(s) that needs to be marked as HTML.
+				} elseif ( $html_starts_in_chunk ) {
+					if ( $tag[2] < $chunk_end ) {
+						// We're going to need at least one extra node, so figure that out first.
+						$plain = array();
+						$plain[1] = $tag[2] + 1;
+						$plain[2] = $chunk_end;
+						$plain[3] = $plain[2] - $plain[1] + 1;
+						$plain[0] = substr( $results[$i][0], -$plain[3] );
+						$plain[4] = false;
+						$plain[5] = false;
+						array_splice( $results, $i + 1, 0, array( $plain ) );
+					}
+					if ( $tag[1] > $chunk_start ) {
+						// Truncate the plain text.
+						$results[$i][2] = $tag[1] - 1;
+						$results[$i][3] = $results[$i][2] - $results[$i][1] + 1;
+						$results[$i][0] = substr( $results[$i][0], 0, $results[$i][3] );
+						
+						// Insert the HTML node after.
+						$tag[0] = substr( $text, $tag[1], $tag[3] );
+						array_splice( $results, ++$i, 0, array( $tag ) );
+					} else {
+						// The tag and the chunk could be identical, but just overwrite the chunk for simplicity.
+						$tag[0] = substr( $text, $tag[1], $tag[3] );
+						$results[$i] = $tag;
+					}
+				} elseif ( $html_ends_in_chunk ) {
+					if ( $tag[2] < $chunk_end ) {
+						// Truncate the plain text.
+						$results[$i][1] = $tag[2] + 1;
+						$results[$i][3] = $results[$i][2] - $results[$i][1] + 1;
+						$results[$i][0] = substr( $results[$i][0], -$results[$i][3] );
+					} else {
+						// This chunk just duplicates the end of the tag.  Remove it now.
+						array_splice( $results, $i--, 1 );
+					}
+				} else {
+					// Any other chunks (text between shortcodes, inside HTML) can be removed now.
+					array_splice( $results, $i--, 1 );
+				}
+			}
+		} else {
+			// It's a shortcode tag.
+			// Now loop through the plain text chunks to see if any of them look like shortcodes enclosed in shortcodes.
+			// We still want to avoid texturizing shortcodes, but we do not intend to run the full regexp recursively.
+			for( $i = 0; $i < count( $results ); $i++ ) {
+				// Test for intersection of shortcode-like tags in $results2 and plain text chunks in $results.
+				$chunk_start = $results[$i][1];
+				$chunk_end = $results[$i][2];
+				
+				if ( $chunk_start > $tag[2] ) {
+					break;
+				}
+
+				$tag_starts_in_chunk = $tag[1] >= $chunk_start && $tag[1] <= $chunk_end;
+				$tag_ends_in_chunk   = $tag[2] >= $chunk_start && $tag[2] <= $chunk_end;
+				
+				if ( !$tag_starts_in_chunk || !$tag_ends_in_chunk ) {
+					continue;
+				}
+
+				if ( $tag[2] < $chunk_end ) {
+					// We're going to need at least one extra node, so figure that out first.
+					$plain = array();
+					$plain[1] = $tag[2] + 1;
+					$plain[2] = $chunk_end;
+					$plain[3] = $plain[2] - $plain[1] + 1;
+					$plain[0] = substr( $results[$i][0], -$plain[3] );
+					$plain[4] = false;
+					$plain[5] = false;
+					array_splice( $results, $i + 1, 0, array( $plain ) );
+				}
+				if ( $tag[1] > $chunk_start ) {
+					// Truncate the plain text.
+					$results[$i][2] = $tag[1] - 1;
+					$results[$i][3] = $results[$i][2] - $results[$i][1] + 1;
+					$results[$i][0] = substr( $results[$i][0], 0, $results[$i][3] );
+					
+					// Insert the shortcode node after.
+					$tag[0] = substr( $text, $tag[1], $tag[3] );
+					array_splice( $results, ++$i, 0, array( $tag ) );
+				} else {
+					// The tag and the chunk could be identical, but just overwrite the chunk for simplicity.
+					$tag[0] = substr( $text, $tag[1], $tag[3] );
+					$results[$i] = $tag;
+				}
+			}
+		}
+	}
+	return $results;
+}
+
+/**
  * Replaces double line-breaks with paragraph elements.
  *
  * A group of regex replaces used to identify text formatted with newlines and
Index: tests/phpunit/tests/formatting/WPTexturize.php
===================================================================
--- tests/phpunit/tests/formatting/WPTexturize.php	(revision 30135)
+++ tests/phpunit/tests/formatting/WPTexturize.php	(working copy)
@@ -1197,7 +1197,7 @@
 			),
 			array(
 				'[is it wise to <a title="allow user content ] here? hmm"> maybe </a> ]', // HTML corruption is a known bug.  See tickets #12690 and #29557.
-				'[is it wise to <a title="allow user content ] here? hmm&#8221;> maybe </a> ]',
+				'[is it wise to <a title="allow user content ] here? hmm"> maybe </a> ]',
 			),
 			array(
 				'[caption - is it wise to <a title="allow user content ] here? hmm"> maybe </a> ]',
@@ -1377,7 +1377,7 @@
 			),
 			array(
 				'[Let\'s get crazy<input>[caption code="<a href=\'?a[]=100\'>hello</a>"]</input>world]', // caption shortcode is invalid here because it contains [] chars.
-				'[Let&#8217;s get crazy<input>[caption code=&#8221;<a href=\'?a[]=100\'>hello</a>&#8220;]</input>world]',
+				'[Let&#8217;s get crazy<input>[caption code="<a href=\'?a[]=100&#8217;>hello</a>&#8220;]</input>world]',
 			),
 		);
 	}
@@ -1779,7 +1779,7 @@
 			),
 			array(
 				'[gal>ery ...]',
-				'[gal>ery ...]',
+				'[gal>ery &#8230;]',
 			),
 			array(
 				'[randomthing param="test"]',
