208 | | $comment_regex = |
209 | | '!' // Start of comment, after the <. |
210 | | . '(?:' // Unroll the loop: Consume everything until --> is found. |
211 | | . '-(?!->)' // Dash not followed by end of comment. |
212 | | . '[^\-]*+' // Consume non-dashes. |
213 | | . ')*+' // Loop possessively. |
214 | | . '(?:-->)?'; // End of comment. If not found, match all input. |
215 | | |
216 | | $shortcode_regex = |
217 | | '\[' // Find start of shortcode. |
218 | | . '[\/\[]?' // Shortcodes may begin with [/ or [[ |
219 | | . '[^\s\/\[\]]' // No whitespace before name. |
220 | | . '[^\[\]]*+' // Shortcodes do not contain other shortcodes. Possessive critical. |
221 | | . '\]' // Find end of shortcode. |
222 | | . '\]?'; // Shortcodes may end with ]] |
223 | | |
224 | | $regex = |
225 | | '/(' // Capture the entire match. |
226 | | . '<' // Find start of element. |
227 | | . '(?(?=!--)' // Is this a comment? |
228 | | . $comment_regex // Find end of comment. |
229 | | . '|' |
230 | | . '[^>]+>' // Find end of element. |
231 | | . ')' |
232 | | . '|' |
233 | | . $shortcode_regex // Find shortcodes. |
234 | | . ')/s'; |
| 209 | foreach ( $textarr as $i => &$curl ) { |
| 210 | $type = $typearr[ $i ]; |
| 274 | * Look for 1. HTML comments, 2. then possible shortcodes, 3. then HTML tags (not comments). |
| 275 | * |
| 276 | * @access private |
| 277 | * |
| 278 | * @param string $text. |
| 279 | * @param array $textarr. |
| 280 | * @param array $typearr. |
| 281 | */ |
| 282 | function _wptexturize_split_extract_all( &$text, &$textarr, &$typearr ) { |
| 283 | |
| 284 | // Look for comment. |
| 285 | while ( is_int( $p = strpos( $text, '<!--' ) ) ) { |
| 286 | // Note text must advance in every case to prevent runaway loop. |
| 287 | |
| 288 | if ( $p ) { |
| 289 | // Process text before comment. |
| 290 | $html = substr( $text, 0, $p ); |
| 291 | _wptexturize_split_extract_shortcodes_and_tags( $html, $textarr, $typearr ); |
| 292 | // Advance to the comment. |
| 293 | $text = substr( $text, $p ); |
| 294 | } |
| 295 | |
| 296 | // Find the comment, minimally '<!-->'. |
| 297 | if ( is_int( $p = strpos( $text, '-->' ) ) ) { |
| 298 | $p += 3; |
| 299 | // Push the comment. |
| 300 | $textarr[] = substr( $text, 0, $p ); |
| 301 | $typearr[]= 'comment'; |
| 302 | // Advance past the comment. |
| 303 | $text = substr( $text, $p); |
| 304 | } else { |
| 305 | // Unclosed comment, treat as comment. |
| 306 | // Note in other function, unclosed tags treated as text, not tags. |
| 307 | // Push and stop. |
| 308 | $textarr[] = $text; |
| 309 | $typearr[]= 'comment'; |
| 310 | $text = ''; |
| 311 | } |
| 312 | |
| 313 | } |
| 314 | // Process text after last comment. |
| 315 | if ( $text != '' ) { |
| 316 | _wptexturize_split_extract_shortcodes_and_tags( $text, $textarr, $typearr ); |
| 317 | } |
| 318 | } |
| 319 | |
| 320 | /** |
| 321 | * Look for 1. possible shortcodes, 2. then HTML tags (not comments). |
| 322 | * |
| 323 | * Text must have comments already stripped. |
| 324 | * |
| 325 | * @access private |
| 326 | * |
| 327 | * @param string $text. |
| 328 | * @param array $textarr. |
| 329 | * @param array $typearr. |
| 330 | */ |
| 331 | function _wptexturize_split_extract_shortcodes_and_tags( &$text, &$textarr, &$typearr ) { |
| 332 | |
| 333 | // xx would static be the same as php parser work? |
| 334 | $shortcode_regex = |
| 335 | '/' |
| 336 | . '\[' // Find start of shortcode. |
| 337 | . '[\/\[]?' // Shortcodes may begin with [/ or [[ |
| 338 | . '[^\s\/\[\]]' // No whitespace before name. |
| 339 | . '[^\[\]]*+' // Shortcodes do not contain other shortcodes. Possessive critical. |
| 340 | . '\]' // Find end of shortcode. |
| 341 | . '\]?' // Shortcodes may end with ]] |
| 342 | . '/'; |
| 343 | |
| 344 | // Look for possible shortcodes. |
| 345 | if ( is_int( strpos( $text, '[' ) ) ) { |
| 346 | |
| 347 | while ( preg_match( $shortcode_regex, $text, $matches, PREG_OFFSET_CAPTURE ) ) { |
| 348 | // Note text must advance in every case to prevent runaway loop. |
| 349 | |
| 350 | if ( $start = $matches[0][1] ) { // integer position |
| 351 | // Process text before match. |
| 352 | $html = substr( $text, 0, $start ); |
| 353 | _wptexturize_split_extract_tags( $html, $textarr, $typearr ); |
| 354 | } |
| 355 | |
| 356 | // Push match. |
| 357 | $textarr[] = $matches[0][0]; // string match |
| 358 | $typearr[]= 'shortcode'; |
| 359 | // Advance past match. |
| 360 | // Match guaranteed not empty. |
| 361 | $text = substr( $text, $start + strlen( $matches[0][0] ) ); |
| 362 | } |
| 363 | |
| 364 | } |
| 365 | |
| 366 | // Process text after last shortcode. |
| 367 | if ( $text != '' ) { |
| 368 | _wptexturize_split_extract_tags( $text, $textarr, $typearr ); |
| 369 | } |
| 370 | } |
| 371 | |
| 372 | /** |
| 373 | * Look for HTML tags (not comments). |
| 374 | * |
| 375 | * Text must have comments already stripped. |
| 376 | * |
| 377 | * @access private |
| 378 | * |
| 379 | * @param string $text. |
| 380 | * @param array $textarr. |
| 381 | * @param array $typearr. |
| 382 | */ |
| 383 | function _wptexturize_split_extract_tags( &$text, &$textarr, &$typearr ) { |
| 384 | |
| 385 | // Look for tag. |
| 386 | while ( is_int( $p = strpos( $text, '<' ) ) ) { |
| 387 | // Note text must advance in every case to prevent runaway loop. |
| 388 | |
| 389 | if ( $p ) { |
| 390 | // Push the text before the match. |
| 391 | $textarr[] = substr( $text, 0, $p ); |
| 392 | $typearr[] = 'text'; |
| 393 | // Advance to the match. |
| 394 | $text = substr( $text, $p ); |
| 395 | } |
| 396 | |
| 397 | // Find the tag, minimally '<>'. |
| 398 | if ( is_int( $p = strpos( $text, '>' ) ) ) { |
| 399 | $p++; |
| 400 | // Push the match. |
| 401 | $textarr[] = substr( $text, 0, $p ); |
| 402 | $typearr[] = 'tag'; |
| 403 | // Advance past the match. |
| 404 | $text = substr( $text, $p ); |
| 405 | } else { |
| 406 | // Unclosed tag. Treat as text, not tag. |
| 407 | // Note in other function, unclosed comments treated as comments. |
| 408 | // Push and stop. |
| 409 | $textarr[] = $text; |
| 410 | $typearr[] = 'text'; |
| 411 | $text = ''; |
| 412 | } |
| 413 | |
| 414 | } |
| 415 | |
| 416 | // Push text after last tag. |
| 417 | if ( $text != '' ) { |
| 418 | $textarr[] = $text; |
| 419 | $typearr[] = 'text'; |
| 420 | } |
| 421 | } |
| 422 | |
| 423 | /** |