Make WordPress Core

Changeset 31423


Ignore:
Timestamp:
02/11/2015 10:17:27 PM (10 years ago)
Author:
wonderboymusic
Message:

Use RegEx instead of DOMDocument when protecting <pre> tags in WP_oEmbed::_strip_newlines(). It is incredibly difficult to maintain character encoding and whitespace when parsing via DOMDocument.

See #31214.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/wp-includes/class-oembed.php

    r31416 r31423  
    564564        }
    565565
    566         $pre = array();
    567         $tokens = array();
    568         if ( class_exists( 'DOMDocument' ) ) {
    569             $token = '__PRE__';
    570             $replace = array();
    571             $count = 1;
    572 
    573             $dom = new DOMDocument();
    574             $dom->loadHTML( $html );
    575             $tags = $dom->getElementsByTagName( 'pre' );
    576             foreach ( $tags as $i => $tag ) {
    577                 $tag_html = $dom->saveHTML( $tag );
    578                 $tag_token = $token . $i;
    579                 $replace[ $tag_token ] = $tag_html;
    580 
    581                 $html = str_replace( $tag_html, $tag_token, $html, $count );
    582             }
    583             $pre = array_values( $replace );
    584             $tokens = array_keys( $replace );
    585         }
    586 
    587         $stripped = str_replace( array( "\r\n", "\n" ), '', $html );
     566        $count = 1;
     567        $found = array();
     568        $token = '__PRE__';
     569        $search = array( "\t", "\n", "\r", ' ' );
     570        $replace = array( '__TAB__', '__NL__', '__CR__', '__SPACE__' );
     571        $tokenized = str_replace( $search, $replace, $html );
     572
     573        preg_match_all( '#(<pre[^>]*>.+?</pre>)#i', $tokenized, $matches, PREG_SET_ORDER );
     574        foreach ( $matches as $i => $match ) {
     575            $tag_html = str_replace( $replace, $search, $match[0] );
     576            $tag_token = $token . $i;
     577
     578            $found[ $tag_token ] = $tag_html;
     579            $html = str_replace( $tag_html, $tag_token, $html, $count );
     580        }
     581
     582        $replaced = str_replace( $replace, $search, $html );
     583        $stripped = str_replace( array( "\r\n", "\n" ), '', $replaced );
     584        $pre = array_values( $found );
     585        $tokens = array_keys( $found );
     586
    588587        return str_replace( $tokens, $pre, $stripped );
    589588    }
Note: See TracChangeset for help on using the changeset viewer.