Index: src/wp-includes/formatting.php
===================================================================
--- src/wp-includes/formatting.php	(revision 30135)
+++ src/wp-includes/formatting.php	(working copy)
@@ -205,69 +205,34 @@
 
 	// Look for shortcodes and HTML elements.
 
-	$comment_regex =
-		  '!'           // Start of comment, after the <.
-		. '(?:'         // Unroll the loop: Consume everything until --> is found.
-		.     '-(?!->)' // Dash not followed by end of comment.
-		.     '[^\-]*+' // Consume non-dashes.
-		. ')*+'         // Loop possessively.
-		. '(?:-->)?';   // End of comment. If not found, match all input.
+	$chunks = wptexturize_parse( $text );
+	$textarr = array();
 
-	$shortcode_regex =
-		  '\['          // Find start of shortcode.
-		. '[\/\[]?'     // Shortcodes may begin with [/ or [[
-		. '[^\s\/\[\]]' // No whitespace before name.
-		. '[^\[\]]*+'   // Shortcodes do not contain other shortcodes. Possessive critical.
-		. '\]'          // Find end of shortcode.
-		. '\]?';        // Shortcodes may end with ]]
+	foreach ( $chunks as $chunk ) {
+		// Only call _wptexturize_pushpop_element if $chunk is a tag.
 
-	$regex =
-		  '/('                   // Capture the entire match.
-		.     '<'                // Find start of element.
-		.     '(?(?=!--)'        // Is this a comment?
-		.         $comment_regex // Find end of comment.
-		.     '|'
-		.         '[^>]+>'       // Find end of element.
-		.     ')'
-		. '|'
-		.     $shortcode_regex   // Find shortcodes.
-		. ')/s';
+		if ( $chunk[5] ) {
+			if ( '<!--' !== substr( $chunk[0], 0, 4 ) ) {
+				// This is an HTML element, not a comment.
+				_wptexturize_pushpop_element( $chunk[0], $no_texturize_tags_stack, $no_texturize_tags );
+			}
+			$textarr[] = $chunk[0];
 
-	$textarr = preg_split( $regex, $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
+		} elseif ( $chunk[4] ) {
+			if ( '[[' !== substr( $chunk[0], 0, 2 ) && ']]' !== substr( $chunk[0], -2 ) ) {
+				// Looks like a normal, non-escaped shortcode.
+				_wptexturize_pushpop_element( $chunk[0], $no_texturize_shortcodes_stack, $no_texturize_shortcodes );
+			}
+			$textarr[] = $chunk[0];
 
-	foreach ( $textarr as &$curl ) {
-		// Only call _wptexturize_pushpop_element if $curl is a delimiter.
-		$first = $curl[0];
-		if ( '<' === $first && '<!--' === substr( $curl, 0, 4 ) ) {
-			// This is an HTML comment delimeter.
-
-			continue;
-
-		} elseif ( '<' === $first && '>' === substr( $curl, -1 ) ) {
-			// This is an HTML element delimiter.
-
-			_wptexturize_pushpop_element( $curl, $no_texturize_tags_stack, $no_texturize_tags );
-
-		} elseif ( '' === trim( $curl ) ) {
+		} elseif ( '' === trim( $chunk[0] ) ) {
 			// This is a newline between delimiters.  Performance improves when we check this.
+			$textarr[] = $chunk[0];
 
-			continue;
-
-		} elseif ( '[' === $first && 1 === preg_match( '/^' . $shortcode_regex . '$/', $curl ) ) {
-			// This is a shortcode delimiter.
-
-			if ( '[[' !== substr( $curl, 0, 2 ) && ']]' !== substr( $curl, -2 ) ) {
-				// Looks like a normal shortcode.
-				_wptexturize_pushpop_element( $curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes );
-			} else {
-				// Looks like an escaped shortcode.
-				continue;
-			}
-
 		} elseif ( empty( $no_texturize_shortcodes_stack ) && empty( $no_texturize_tags_stack ) ) {
 			// This is neither a delimiter, nor is this content inside of no_texturize pairs.  Do texturize.
 
-			$curl = str_replace( $static_characters, $static_replacements, $curl );
+			$curl = str_replace( $static_characters, $static_replacements, $chunk[0] );
 
 			if ( false !== strpos( $curl, "'" ) ) {
 				$curl = preg_replace( $dynamic_characters['apos'], $dynamic_replacements['apos'], $curl );
@@ -284,6 +249,10 @@
 				// Searching for a digit is 10 times more expensive than for the x, so we avoid doing this one!
 				$curl = preg_replace( '/\b(\d(?(?<=0)[\d\.,]+|[\d\.,]*))x(-?\d[\d\.,]*)\b/', '$1&#215;$2', $curl );
 			}
+
+			$textarr[] = $curl;
+		} else {
+			$textarr[] = $chunk[0];
 		}
 	}
 	$text = implode( '', $textarr );
@@ -349,6 +318,342 @@
 }
 
 /**
+ * Parse any string into separate chunks of plain text, HTML, and shortcodes.
+ *
+ * The return value is a 2-dimensional array of strings and metadata ordered
+ * by input, similar to a split string.  Each node has these keys and values:
+ *
+ * key- type  - value
+ * ------------------
+ * 0 - string - The text that was split into this node from the input.
+ * 1 - int    - Start position of input.
+ * 2 - int    - End position of input.
+ * 3 - int    - Length of text.  Always equal to End - Start + 1.
+ * 4 - bool   - Shortcode flag.
+ * 5 - bool   - HTML flag.
+ * 6 - Reserved for future use to indicate the node is inline or block.
+ *
+ * The string in key 0 is plain text when keys 4 and 5 are both false.
+ *
+ * @since 4.0.1
+ * @param string $text The user input that needs to be texturized.
+ * @return array Structured version of $text with its HTML and shortcodes separated.
+ */
+ function wptexturize_parse( $text ) {
+	$results = array();  // Stores the full shortcode matches.
+	$results2 = array(); // Stores the HTML matches and is read-only after that.
+	$final = array();    // Stores the output of the parser.
+
+	// Find shortcodes
+	$regex = '/' . get_shortcode_regex() . '/s';
+	
+	preg_match_all( $regex, $text, $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER );
+
+	$pos = 0;
+	foreach( $matches as $data ) {
+		// Collect data for this tag.
+		$tag = array();
+		$tag[1] = $data[0][1];                       // Start position
+		$tag[2] = strpos( $text, ']', $tag[1] );     // End position
+		$tag[3] = $tag[2] - $tag[1] + 1;             // Length
+		if ( strlen( $data[0][0] ) == $tag[3] + 1 && !empty( $data[6][0] ) ) {
+			// This is an escaped, non-enclosing shortcode.
+			$tag[2]++;
+			$tag[3]++;
+		}
+		$tag[0] = substr( $text, $tag[1], $tag[3] ); // Tag text
+		$tag[4] = true;                              // Is it a shortcode?
+		$tag[5] = false;                             // Is it HTML?
+		
+		// Was there any text before this tag?
+		if ( $tag[1] > $pos ) {
+			$plain = array();
+			$plain[1] = $pos;
+			$plain[2] = $tag[1] - 1;
+			$plain[3] = $plain[2] - $plain[1] + 1;
+			$plain[0] = substr( $text, $plain[1], $plain[3] );
+			$plain[4] = false;
+			$plain[5] = false;
+			$results[] = $plain;
+		}
+		
+		$results[] = $tag;
+		
+		// Is this an enclosing tag?
+		if ( strlen( $data[0][0] ) > $tag[3] ) {
+			$close = array();
+
+			if ( !empty( $data[5][0] ) ) {
+				$plain = array();
+				$plain[0] = $data[5][0];
+				$plain[1] = $data[5][1];
+				$plain[2] = $plain[1] + strlen( $plain[0] ) - 1;
+				$plain[3] = $plain[2] - $plain[1] + 1;
+				$plain[4] = false;
+				$plain[5] = false;
+				$results[] = $plain;
+
+				$close[1] = $plain[2] + 1;
+			} else {
+				$close[1] = $tag[2] + 1;
+			}
+			
+			$close[2] = $tag[1] + strlen( $data[0][0] ) - 1;
+			$close[3] = $close[2] - $close[1] + 1;
+			$close[0] = substr( $text, $close[1], $close[3] );
+			$close[4] = true;
+			$close[5] = false;
+			$results[] = $close;
+			
+			$pos = $close[2] + 1;
+		} else {
+			$pos = $tag[2] + 1;
+		}
+	}
+		
+	// Was there any text after the last tag?
+	if ( $pos < strlen( $text ) ) {
+		$plain = array();
+		$plain[0] = substr( $text, $pos );
+		$plain[1] = $pos;
+		$plain[3] = strlen( $plain[0] );
+		$plain[2] = $plain[1] + $plain[3] - 1;
+		$plain[4] = false;
+		$plain[5] = false;
+		$results[] = $plain;
+	}
+
+	// Now remove the shortcodes so we can look for the HTML.
+	$html = array();
+	foreach( $results as &$chunk ) {
+		// Is this chunk a shortcode tag?
+		if ( $chunk[4] ) {
+			$html[] = str_repeat( ' ', $chunk[3] );
+		} else {
+			$html[] = $chunk[0];
+		}
+	}
+	unset( $chunk );
+	$html = implode( '', $html );
+	
+	// Now look for HTML.  If there are any nested shortcodes, avoid them,
+	// but do not allow HTML inside the attributes of nested shortcodes.
+	// As in the Shortcode API, there is no recursion by default.
+	
+	$comment_regex =
+		  '!'           // Start of comment, after the <.
+		. '(?:'         // Unroll the loop: Consume everything until --> is found.
+		.     '-(?!->)' // Dash not followed by end of comment.
+		.     '[^\-]*+' // Consume non-dashes.
+		. ')*+'         // Loop possessively.
+		. '(?:-->)?';   // End of comment. If not found, match all input.
+
+	$shortcode_regex =
+		  '\['            // Find start of shortcode.
+		. '[\/\[]?'       // Shortcodes may begin with [/ or [[
+		. '[^\s\/\[\]<>]' // No whitespace before name.
+		. '[^\[\]<>]*+'   // Shortcodes do not contain other shortcodes. Possessive critical.
+		. '\]'            // Find end of shortcode.
+		. '\]?';          // Shortcodes may end with ]]
+
+	$regex =
+		  '/('                   // Capture HTML.
+		.     '<'                // Find start of element.
+		.     '(?(?=!--)'        // Is this a comment?
+		.         $comment_regex // Find end of comment.
+		.     '|'
+		.         '[^>]+>'       // Find end of element.
+		.     ')'
+		. ')|('                  // Capture shortcodes.
+		.     $shortcode_regex   // Find shortcodes.
+		. ')/s';
+
+	preg_match_all( $regex, $html, $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER );
+	unset( $html );
+	
+	foreach( $matches as $data ) {
+		// Collect data for this tag.
+		$tag = array();
+		$tag[1] = $data[0][1];                       // Start position
+		$tag[3] = strlen( $data[0][0] );             // Length
+		$tag[2] = $tag[1] + $tag[3] - 1;             // End position
+		$tag[0] = substr( $text, $tag[1], $tag[3] ); // Tag text - Using $text to capture original input.
+		$tag[4] = empty( $data[1][0] );              // Is it a shortcode?
+		$tag[5] = !$tag[4];                          // Is it HTML?
+		$results2[] = $tag;
+	}
+	
+	// Look for standalone HTML tags.
+	// Look for HTML tags surrounding shortcodes.
+	// Look for shortcodes enclosed within shortcodes.
+	
+	$next_i = 0; // When inner-looping, there is no need to visit the same nodes every time, so keep track.
+	$next_start = 0; // Keep track of how much output has been saved.
+
+	foreach( $results2 as $tag ) {
+		$tag_start_found = false;
+	
+		// Is it HTML?
+		if ( $tag[5] ) {
+			// Now loop through the shortcodes to see if any of them are inside an HTML element.
+			// Texturization has been a left-to-right process, and when inside HTML,
+			// the $no_texturize_tags list gets tested but the $no_texturize_shortcodes list does not.
+			for( $i = $next_i; $i < count( $results ); $i++ ) {
+				// Test for intersection of HTML in $results2 and shortcode chunks in $results.
+				$chunk_start = $results[$i][1];
+				$chunk_end = $results[$i][2];
+				
+				if ( $chunk_start > $tag[2] ) {
+					break;
+				}
+
+				$html_starts_in_chunk = $tag[1] >= $chunk_start && $tag[1] <= $chunk_end;
+				$html_ends_in_chunk   = $tag[2] >= $chunk_start && $tag[2] <= $chunk_end;
+				$chunk_starts_in_html = $chunk_start >= $tag[1] && $chunk_start <= $tag[2];
+				$chunk_ends_in_html   = $chunk_end >= $tag[1] && $chunk_end <= $tag[2];
+				
+				if ( !$html_starts_in_chunk && !$html_ends_in_chunk && !$chunk_starts_in_html && !$chunk_ends_in_html ) {
+					if ( !$tag_start_found ) {
+						$chunk = $results[$i];
+						if ( $next_start > $chunk[1] ) {
+							$chunk[1] = $next_start;
+							$chunk[3] = $chunk[2] - $chunk[1] + 1;
+							$chunk[0] = substr( $text, $chunk[1], $chunk[3] );
+						}
+						$final[] = $chunk;
+						$next_i = $i + 1;
+						$next_start = $results[$i][2] + 1;
+					}
+					continue;
+				}
+
+				// Is the chunk a shortcode?
+				if ( $results[$i][4] ) {
+				
+					// Our regexp never looks for HTML inside of shortcodes,
+					// so the above tests are adequate to determine this chunk
+					// is a shortcode contained within an HTML element.
+					
+					// Ignore the shortcode node.  It's getting merged into the HTML node.
+					$next_i = $i + 1;
+					$next_start = $results[$i][2] + 1;
+				
+				// Deal with the plain text chunk(s) that needs to be marked as HTML.
+				} elseif ( $html_starts_in_chunk ) {
+					$tag_start_found = true;
+					if ( $tag[1] > $chunk_start ) {
+						// Truncate the plain text.
+						$plain = array();
+						$plain[1] = $next_start;
+						$plain[2] = $tag[1] - 1;
+						$plain[3] = $plain[2] - $plain[1] + 1;
+						$plain[0] = substr( $text, $plain[1], $plain[3] );
+						$plain[4] = false;
+						$plain[5] = false;
+						$final[] = $plain;
+					}
+					$final[] = $tag;
+					if ( $tag[2] < $chunk_end ) {
+						// Need to visit this chunk again on the next loop.
+						$next_i = $i--;
+						$next_start = $tag[2] + 1;
+						break;
+					} else {
+						$next_i = $i + 1;
+						$next_start = $chunk_end + 1;
+					}
+				} elseif ( $html_ends_in_chunk ) {
+					if ( $tag[2] < $chunk_end ) {
+						// Need to visit this chunk again on the next loop.
+						$next_i = $i--;
+						$next_start = $tag[2] + 1;
+					} else {
+						// This chunk just duplicates the end of the tag.  Ignore it.
+						$next_i = $i + 1;
+						$next_start = $tag[2] + 1;
+					}
+				} else {
+					// Any other chunks (text between shortcodes, inside HTML) can be ignored also.
+					$next_i = $i + 1;
+					$next_start = $chunk_end + 1;
+				}
+			}
+		} else {
+			// It's a shortcode tag.
+			// Now loop through the plain text chunks to see if any of them look like shortcodes enclosed in shortcodes.
+			// We still want to avoid texturizing shortcodes, but we do not intend to run the full regexp recursively.
+			for( $i = $next_i; $i < count( $results ); $i++ ) {
+				// Test for intersection of shortcode-like tags in $results2 and plain text chunks in $results.
+				$chunk_start = $results[$i][1];
+				$chunk_end = $results[$i][2];
+				
+				if ( $chunk_start > $tag[2] ) {
+					break;
+				}
+
+				$tag_starts_in_chunk = $tag[1] >= $chunk_start && $tag[1] <= $chunk_end;
+				$tag_ends_in_chunk   = $tag[2] >= $chunk_start && $tag[2] <= $chunk_end;
+				
+				if ( !$tag_starts_in_chunk || !$tag_ends_in_chunk ) {
+					if ( !$tag_start_found ) {
+						$chunk = $results[$i];
+						if ( $next_start > $chunk[1] ) {
+							$chunk[1] = $next_start;
+							$chunk[3] = $chunk[2] - $chunk[1] + 1;
+							$chunk[0] = substr( $text, $chunk[1], $chunk[3] );
+						}
+						$final[] = $chunk;
+						$next_i = $i + 1;
+						$next_start = $results[$i][2] + 1;
+					}
+					continue;
+				}
+
+				$tag_start_found = true;
+
+				if ( $tag[1] > $chunk_start ) {
+					// Truncate the plain text.
+					$plain = array();
+					$plain[1] = $next_start;
+					$plain[2] = $tag[1] - 1;
+					$plain[3] = $plain[2] - $plain[1] + 1;
+					$plain[0] = substr( $text, $plain[1], $plain[3] );
+					$plain[4] = false;
+					$plain[5] = false;
+					$final[] = $plain;
+				}
+				$final[] = $tag;
+				if ( $tag[2] < $chunk_end ) {
+					$next_i = $i--;
+					$next_start = $tag[2] + 1;
+					break;
+				} else {
+					$next_i = $i + 1;
+					$next_start = $chunk_end + 1;
+				}
+			}
+		}
+	}
+
+	// Now check for plain text and shortcodes after the last HTML tag.
+	for( $i = $next_i; $i < count( $results ); $i++ ) {
+		$chunk_start = $results[$i][1];
+		$chunk_end = $results[$i][2];
+		if ( $next_start > $chunk_start ) {
+			$plain = $results[$i];
+			$plain[1] = $next_start;
+			$plain[3] = $plain[2] - $plain[1] + 1;
+			$plain[0] = substr( $text, $plain[1], $plain[3] );
+			$final[] = $plain;
+		} else {
+			$final[] = $results[$i];
+		}
+	}
+
+	return $final;
+}
+
+/**
  * Replaces double line-breaks with paragraph elements.
  *
  * A group of regex replaces used to identify text formatted with newlines and
Index: src/wp-includes/shortcodes.php
===================================================================
--- src/wp-includes/shortcodes.php	(revision 30135)
+++ src/wp-includes/shortcodes.php	(working copy)
@@ -229,7 +229,7 @@
 	$tagnames = array_keys($shortcode_tags);
 	$tagregexp = join( '|', array_map('preg_quote', $tagnames) );
 
-	// WARNING! Do not change this regex without changing do_shortcode_tag() and strip_shortcode_tag()
+	// WARNING! Do not change this regex without changing do_shortcode_tag() and strip_shortcode_tag() and wptexturize_parse()
 	// Also, see shortcode_unautop() and shortcode.js.
 	return
 		  '\\['                              // Opening bracket
Index: tests/phpunit/tests/formatting/WPTexturize.php
===================================================================
--- tests/phpunit/tests/formatting/WPTexturize.php	(revision 30135)
+++ tests/phpunit/tests/formatting/WPTexturize.php	(working copy)
@@ -1197,7 +1197,7 @@
 			),
 			array(
 				'[is it wise to <a title="allow user content ] here? hmm"> maybe </a> ]', // HTML corruption is a known bug.  See tickets #12690 and #29557.
-				'[is it wise to <a title="allow user content ] here? hmm&#8221;> maybe </a> ]',
+				'[is it wise to <a title="allow user content ] here? hmm"> maybe </a> ]',
 			),
 			array(
 				'[caption - is it wise to <a title="allow user content ] here? hmm"> maybe </a> ]',
@@ -1377,7 +1377,7 @@
 			),
 			array(
 				'[Let\'s get crazy<input>[caption code="<a href=\'?a[]=100\'>hello</a>"]</input>world]', // caption shortcode is invalid here because it contains [] chars.
-				'[Let&#8217;s get crazy<input>[caption code=&#8221;<a href=\'?a[]=100\'>hello</a>&#8220;]</input>world]',
+				'[Let&#8217;s get crazy<input>[caption code="<a href=\'?a[]=100&#8217;>hello</a>&#8220;]</input>world]',
 			),
 		);
 	}
@@ -1779,7 +1779,7 @@
 			),
 			array(
 				'[gal>ery ...]',
-				'[gal>ery ...]',
+				'[gal>ery &#8230;]',
 			),
 			array(
 				'[randomthing param="test"]',
