| 208 | | $comment_regex = |
| 209 | | '!' // Start of comment, after the <. |
| 210 | | . '(?:' // Unroll the loop: Consume everything until --> is found. |
| 211 | | . '-(?!->)' // Dash not followed by end of comment. |
| 212 | | . '[^\-]*+' // Consume non-dashes. |
| 213 | | . ')*+' // Loop possessively. |
| 214 | | . '(?:-->)?'; // End of comment. If not found, match all input. |
| 215 | | |
| 216 | | $shortcode_regex = |
| 217 | | '\[' // Find start of shortcode. |
| 218 | | . '[\/\[]?' // Shortcodes may begin with [/ or [[ |
| 219 | | . '[^\s\/\[\]]' // No whitespace before name. |
| 220 | | . '[^\[\]]*+' // Shortcodes do not contain other shortcodes. Possessive critical. |
| 221 | | . '\]' // Find end of shortcode. |
| 222 | | . '\]?'; // Shortcodes may end with ]] |
| 223 | | |
| 224 | | $regex = |
| 225 | | '/(' // Capture the entire match. |
| 226 | | . '<' // Find start of element. |
| 227 | | . '(?(?=!--)' // Is this a comment? |
| 228 | | . $comment_regex // Find end of comment. |
| 229 | | . '|' |
| 230 | | . '[^>]+>' // Find end of element. |
| 231 | | . ')' |
| 232 | | . '|' |
| 233 | | . $shortcode_regex // Find shortcodes. |
| 234 | | . ')/s'; |
| | 209 | foreach ( $textarr as $i => &$curl ) { |
| | 210 | $type = $typearr[ $i ]; |
| | 274 | * Look for 1. HTML comments, 2. then possible shortcodes, 3. then HTML tags (not comments). |
| | 275 | * |
| | 276 | * @access private |
| | 277 | * |
| | 278 | * @param string $text. |
| | 279 | * @param array $textarr. |
| | 280 | * @param array $typearr. |
| | 281 | */ |
| | 282 | function _wptexturize_split_extract_all( &$text, &$textarr, &$typearr ) { |
| | 283 | |
| | 284 | // Look for comment. |
| | 285 | while ( is_int( $p = strpos( $text, '<!--' ) ) ) { |
| | 286 | // Note text must advance in every case to prevent runaway loop. |
| | 287 | |
| | 288 | if ( $p ) { |
| | 289 | // Process text before comment. |
| | 290 | $html = substr( $text, 0, $p ); |
| | 291 | _wptexturize_split_extract_shortcodes_and_tags( $html, $textarr, $typearr ); |
| | 292 | // Advance to the comment. |
| | 293 | $text = substr( $text, $p ); |
| | 294 | } |
| | 295 | |
| | 296 | // Find the comment, minimally '<!-->'. |
| | 297 | if ( is_int( $p = strpos( $text, '-->' ) ) ) { |
| | 298 | $p += 3; |
| | 299 | // Push the comment. |
| | 300 | $textarr[] = substr( $text, 0, $p ); |
| | 301 | $typearr[]= 'comment'; |
| | 302 | // Advance past the comment. |
| | 303 | $text = substr( $text, $p); |
| | 304 | } else { |
| | 305 | // Unclosed comment, treat as comment. |
| | 306 | // Note in other function, unclosed tags treated as text, not tags. |
| | 307 | // Push and stop. |
| | 308 | $textarr[] = $text; |
| | 309 | $typearr[]= 'comment'; |
| | 310 | $text = ''; |
| | 311 | } |
| | 312 | |
| | 313 | } |
| | 314 | // Process text after last comment. |
| | 315 | if ( $text != '' ) { |
| | 316 | _wptexturize_split_extract_shortcodes_and_tags( $text, $textarr, $typearr ); |
| | 317 | } |
| | 318 | } |
| | 319 | |
| | 320 | /** |
| | 321 | * Look for 1. possible shortcodes, 2. then HTML tags (not comments). |
| | 322 | * |
| | 323 | * Text must have comments already stripped. |
| | 324 | * |
| | 325 | * @access private |
| | 326 | * |
| | 327 | * @param string $text. |
| | 328 | * @param array $textarr. |
| | 329 | * @param array $typearr. |
| | 330 | */ |
| | 331 | function _wptexturize_split_extract_shortcodes_and_tags( &$text, &$textarr, &$typearr ) { |
| | 332 | |
| | 333 | // xx would static be the same as php parser work? |
| | 334 | $shortcode_regex = |
| | 335 | '/' |
| | 336 | . '\[' // Find start of shortcode. |
| | 337 | . '[\/\[]?' // Shortcodes may begin with [/ or [[ |
| | 338 | . '[^\s\/\[\]]' // No whitespace before name. |
| | 339 | . '[^\[\]]*+' // Shortcodes do not contain other shortcodes. Possessive critical. |
| | 340 | . '\]' // Find end of shortcode. |
| | 341 | . '\]?' // Shortcodes may end with ]] |
| | 342 | . '/'; |
| | 343 | |
| | 344 | // Look for possible shortcodes. |
| | 345 | if ( is_int( strpos( $text, '[' ) ) ) { |
| | 346 | |
| | 347 | while ( preg_match( $shortcode_regex, $text, $matches, PREG_OFFSET_CAPTURE ) ) { |
| | 348 | // Note text must advance in every case to prevent runaway loop. |
| | 349 | |
| | 350 | if ( $start = $matches[0][1] ) { // integer position |
| | 351 | // Process text before match. |
| | 352 | $html = substr( $text, 0, $start ); |
| | 353 | _wptexturize_split_extract_tags( $html, $textarr, $typearr ); |
| | 354 | } |
| | 355 | |
| | 356 | // Push match. |
| | 357 | $textarr[] = $matches[0][0]; // string match |
| | 358 | $typearr[]= 'shortcode'; |
| | 359 | // Advance past match. |
| | 360 | // Match guaranteed not empty. |
| | 361 | $text = substr( $text, $start + strlen( $matches[0][0] ) ); |
| | 362 | } |
| | 363 | |
| | 364 | } |
| | 365 | |
| | 366 | // Process text after last shortcode. |
| | 367 | if ( $text != '' ) { |
| | 368 | _wptexturize_split_extract_tags( $text, $textarr, $typearr ); |
| | 369 | } |
| | 370 | } |
| | 371 | |
| | 372 | /** |
| | 373 | * Look for HTML tags (not comments). |
| | 374 | * |
| | 375 | * Text must have comments already stripped. |
| | 376 | * |
| | 377 | * @access private |
| | 378 | * |
| | 379 | * @param string $text. |
| | 380 | * @param array $textarr. |
| | 381 | * @param array $typearr. |
| | 382 | */ |
| | 383 | function _wptexturize_split_extract_tags( &$text, &$textarr, &$typearr ) { |
| | 384 | |
| | 385 | // Look for tag. |
| | 386 | while ( is_int( $p = strpos( $text, '<' ) ) ) { |
| | 387 | // Note text must advance in every case to prevent runaway loop. |
| | 388 | |
| | 389 | if ( $p ) { |
| | 390 | // Push the text before the match. |
| | 391 | $textarr[] = substr( $text, 0, $p ); |
| | 392 | $typearr[] = 'text'; |
| | 393 | // Advance to the match. |
| | 394 | $text = substr( $text, $p ); |
| | 395 | } |
| | 396 | |
| | 397 | // Find the tag, minimally '<>'. |
| | 398 | if ( is_int( $p = strpos( $text, '>' ) ) ) { |
| | 399 | $p++; |
| | 400 | // Push the match. |
| | 401 | $textarr[] = substr( $text, 0, $p ); |
| | 402 | $typearr[] = 'tag'; |
| | 403 | // Advance past the match. |
| | 404 | $text = substr( $text, $p ); |
| | 405 | } else { |
| | 406 | // Unclosed tag. Treat as text, not tag. |
| | 407 | // Note in other function, unclosed comments treated as comments. |
| | 408 | // Push and stop. |
| | 409 | $textarr[] = $text; |
| | 410 | $typearr[] = 'text'; |
| | 411 | $text = ''; |
| | 412 | } |
| | 413 | |
| | 414 | } |
| | 415 | |
| | 416 | // Push text after last tag. |
| | 417 | if ( $text != '' ) { |
| | 418 | $textarr[] = $text; |
| | 419 | $typearr[] = 'text'; |
| | 420 | } |
| | 421 | } |
| | 422 | |
| | 423 | /** |