Make WordPress Core

Ticket #25856: formatting.diff

File formatting.diff, 9.7 KB (added by mdbitz, 11 years ago)

diff of formatting.php function file with wpautop enhancements

  • formatting.php

     
    172172}
    173173
    174174/**
    175  * Replaces double line-breaks with paragraph elements.
     175 * Formats content to contain proper blocking of elements. The core logic
     176 * is a group of regex replaces to identify block elements and place paragraph
     177 * tags around non-blocked content.
    176178 *
    177  * A group of regex replaces used to identify text formatted with newlines and
    178  * replace double line-breaks with HTML paragraph tags. The remaining
    179  * line-breaks after conversion become <<br />> tags, unless $br is set to '0'
    180  * or 'false'.
     179 * Shortcode tags are are treated as normal text. (wpatup is unaware)
    181180 *
     181 * Details ::
     182 *   object, video, audio and pre tags are excluded from formatting
     183 *   script, style, math, select and svg tags are excluded from line-break conversions
     184 *   Double line breaks are replaced with HTML paragraph tags.
     185 *   Line-breaks after conversion become <<br />> tags, unless $br is set to '0'
     186 *   or 'false'.
     187 *
    182188 * @since 0.71
    183189 *
    184  * @param string $pee The text which has to be formatted.
     190 * @param string $content The text which has to be formatted.
    185191 * @param bool $br Optional. If set, this will convert all remaining line-breaks after paragraphing. Default true.
    186192 * @return string Text which has been converted into correct paragraph tags.
    187193 */
    188 function wpautop($pee, $br = true) {
    189         $pre_tags = array();
    190 
    191         if ( trim($pee) === '' )
     194function wpautop($content, $br = true) {
     195       
     196        if ( trim($content) === '' )
    192197                return '';
    193198
    194         $pee = $pee . "\n"; // just to make things a little easier, pad the end
     199        $save_tags = array();
     200        $no_format_tags = array( 'pre', 'audio', 'video', 'object');
     201        foreach( $no_format_tags as $no_format_tag ) {
     202       
     203                // replace <pre|audio|video> tags to ignore formatting
     204                if ( strpos($content, '<' . $no_format_tag) !== false ) {
     205                        $content_parts = explode( '</' . $no_format_tag . '>', $content );
     206                        $last_content = array_pop($content_parts);
     207                        $content = '';
     208                        $i = 0;
    195209
    196         if ( strpos($pee, '<pre') !== false ) {
    197                 $pee_parts = explode( '</pre>', $pee );
    198                 $last_pee = array_pop($pee_parts);
    199                 $pee = '';
    200                 $i = 0;
     210                        foreach ( $content_parts as $content_part ) {
     211                                $start = strpos( $content_part, '<' . $no_format_tag );
    201212
    202                 foreach ( $pee_parts as $pee_part ) {
    203                         $start = strpos($pee_part, '<pre');
     213                                // Malformed html?
     214                                if ( $start === false ) {
     215                                        $content .= $content_part;
     216                                        continue;
     217                                }
    204218
    205                         // Malformed html?
    206                         if ( $start === false ) {
    207                                 $pee .= $pee_part;
    208                                 continue;
     219                                $name = "<" . $no_format_tag ." wp-no-format-tag-$i></" . $no_format_tag . ">";
     220                                $save_tags[$name] = substr( $content_part, $start ) . '</' . $no_format_tag . '>';
     221
     222                                $content .= substr( $content_part, 0, $start ) . $name;
     223                                $i++;
    209224                        }
    210225
    211                         $name = "<pre wp-pre-tag-$i></pre>";
    212                         $pre_tags[$name] = substr( $pee_part, $start ) . '</pre>';
    213 
    214                         $pee .= substr( $pee_part, 0, $start ) . $name;
    215                         $i++;
     226                        $content .= $last_content;
    216227                }
    217 
    218                 $pee .= $last_pee;
    219228        }
    220229
    221         $pee = preg_replace('|<br />\s*<br />|', "\n\n", $pee);
    222         // Space things out a little
    223         $allblocks = '(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|select|option|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|noscript|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary)';
    224         $pee = preg_replace('!(<' . $allblocks . '[^>]*>)!', "\n$1", $pee);
    225         $pee = preg_replace('!(</' . $allblocks . '>)!', "$1\n\n", $pee);
    226         $pee = str_replace(array("\r\n", "\r"), "\n", $pee); // cross-platform newlines
    227         if ( strpos($pee, '<object') !== false ) {
    228                 $pee = preg_replace('|\s*<param([^>]*)>\s*|', "<param$1>", $pee); // no pee inside object/embed
    229                 $pee = preg_replace('|\s*</embed>\s*|', '</embed>', $pee);
     230        $content = preg_replace('|<br />\s*<br />|', "\n\n", $content);
     231       
     232        // skip formatting of predefined tags
     233        $content = preg_replace_callback('/<(script|style|math|select|svg).*?<\/\\1>/si', '_autop_newline_preservation_helper', $content);
     234       
     235        // skip new lines of comments
     236        $content = preg_replace_callback('/<!--.*?-->\s*/', '_autop_newline_preservation_helper', $content);
     237       
     238        // List of Blocking elements (minus p)
     239        $blocklist = "table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|select|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|noscript|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary";
     240       
     241        $allblocks = '(?:' . $blocklist . '|p)';
     242        $allblocks_p = '(?:' . $blocklist . ')';
     243       
     244        $content = preg_replace('!(<' . $allblocks . '[^>]*>)!i', "\n\n$1", $content);
     245        $content = preg_replace('!(<' . $allblocks . '[^>]*\/>)!i', "$1\n\n", $content);
     246        $content = preg_replace('!(</' . $allblocks . '>)!i', "$1\n\n", $content);
     247        $content = str_replace(array("\r\n", "\r"), "\n", $content); // cross-platform newlines
     248       
     249        while ( preg_match("/(<[^>\n]*)\n+/", $content) ) {
     250                $content = preg_replace("/(<[^>\n]*)\n+/", "$1 ", $content); // eliminate carriage returns in tags
    230251        }
    231         $pee = preg_replace("/\n\n+/", "\n\n", $pee); // take care of duplicates
     252       
     253        $content = preg_replace("/\n\n+/", "\n\n", $content); // take care of duplicates       
     254       
    232255        // make paragraphs, including one at the end
    233         $pees = preg_split('/\n\s*\n/', $pee, -1, PREG_SPLIT_NO_EMPTY);
    234         $pee = '';
    235         foreach ( $pees as $tinkle )
    236                 $pee .= '<p>' . trim($tinkle, "\n") . "</p>\n";
    237         $pee = preg_replace('|<p>\s*</p>|', '', $pee); // under certain strange conditions it could create a P of entirely whitespace
    238         $pee = preg_replace('!<p>([^<]+)</(div|address|form)>!', "<p>$1</p></$2>", $pee);
    239         $pee = preg_replace('!<p>\s*(</?' . $allblocks . '[^>]*>)\s*</p>!', "$1", $pee); // don't pee all over a tag
    240         $pee = preg_replace("|<p>(<li.+?)</p>|", "$1", $pee); // problem with nested lists
    241         $pee = preg_replace('|<p><blockquote([^>]*)>|i', "<blockquote$1><p>", $pee);
    242         $pee = str_replace('</blockquote></p>', '</p></blockquote>', $pee);
    243         $pee = preg_replace('!<p>\s*(</?' . $allblocks . '[^>]*>)!', "$1", $pee);
    244         $pee = preg_replace('!(</?' . $allblocks . '[^>]*>)\s*</p>!', "$1", $pee);
     256        $paragraphs = preg_split('/\n\s*\n/', $content, -1, PREG_SPLIT_NO_EMPTY);
     257        $content = '';
     258        foreach ( $paragraphs as $paragraph ) {
     259                // test if block contains open or closing blocking elements if so add necessary closures
     260               
     261                // current content segment does not contain an open and closing blocking element
     262                if( 0 == preg_match('/<' . $allblocks_p . '[^>]*>.*?<\/' . $allblocks_p . '[^>]*>/si', $paragraph) ) {
     263               
     264                        // current content contains an open blocking element
     265                        if( 1 == preg_match('/<' . $allblocks_p .'[^>]*>/si', $paragraph) && 0 == preg_match('/<' . $allblocks_p .'[^>]*\/>/si', $paragraph) ) {
     266                                $content .= preg_replace('/(<' . $allblocks_p . '[^>]*>)/si', "$1\n<p>", trim($paragraph, "\n") );
     267                                $content .= "</p>\n";
     268                               
     269                        // current content contains a closing blocking element
     270                        } else if ( 1 == preg_match('/(<\/' . $allblocks_p .'[^>]*>)/si', $paragraph) ) {
     271                                $content .= '<p>' . preg_replace('/(<\/' . $allblocks_p . '[^>]*>)/si', "</p>\n$1\n", trim($paragraph, "\n") );
     272                       
     273                        } else {
     274                       
     275                                // current content is not wrapped in <p> tag and is not self closing tag
     276                                if( 0 == preg_match( '/<p[^>]*>.*?<\/p[^>]*>/si', $paragraph ) && 0 == preg_match('/<' . $allblocks .'[^>]*\/>/si', $paragraph ) ) {
     277                                        $content .= '<p>' . trim($paragraph, "\n") . "</p>\n";
     278                                       
     279                                } else {
     280                                        $content .= trim($paragraph, "\n") . "\n";
     281                                }
     282                        }
     283                       
     284                // current content contains an open and closing blocking element
     285                } else {
     286                        $content .= trim($paragraph, "\n") . "\n";
     287                }
     288        }
     289        $content = preg_replace('|<p>\s*</p>|', '', $content); // under certain strange conditions it could create a P of entirely whitespace
    245290        if ( $br ) {
    246                 $pee = preg_replace_callback('/<(script|style).*?<\/\\1>/s', '_autop_newline_preservation_helper', $pee);
    247                 $pee = preg_replace('|(?<!<br />)\s*\n|', "<br />\n", $pee); // optionally make line breaks
    248                 $pee = str_replace('<WPPreserveNewline />', "\n", $pee);
     291                $content = preg_replace('|(?<!<br />)\s*\n|', "<br />\n", $content); // optionally make line breaks
    249292        }
    250         $pee = preg_replace('!(</?' . $allblocks . '[^>]*>)\s*<br />!', "$1", $pee);
    251         $pee = preg_replace('!<br />(\s*</?(?:p|li|div|dl|dd|dt|th|pre|td|ul|ol)[^>]*>)!', '$1', $pee);
    252         $pee = preg_replace( "|\n</p>$|", '</p>', $pee );
     293        $content = str_replace('<WPPreserveNewline />', "\n", $content);
     294        $content = preg_replace('!(</?' . $allblocks . '[^>]*>)\s*<br />!', "$1", $content);
     295        $content = preg_replace('!<br />(\s*</?(?:p|li|div|dl|dd|dt|th|pre|td|ul|ol)[^>]*>)!', '$1', $content);
     296        $content = preg_replace( "|\n</p>$|", '</p>', $content );
    253297
    254         if ( !empty($pre_tags) )
    255                 $pee = str_replace(array_keys($pre_tags), array_values($pre_tags), $pee);
     298        if ( !empty($save_tags) )
     299                $content = str_replace(array_keys($save_tags), array_values($save_tags), $content);
    256300
    257         return $pee;
     301        return trim($content);
    258302}
    259303
    260304/**
     
    277321 *
    278322 * @since 2.9.0
    279323 *
    280  * @param string $pee The content.
     324 * @param string $content The content.
    281325 * @return string The filtered content.
    282326 */
    283 function shortcode_unautop( $pee ) {
     327function shortcode_unautop( $content ) {
    284328        global $shortcode_tags;
    285329
    286330        if ( empty( $shortcode_tags ) || !is_array( $shortcode_tags ) ) {
    287                 return $pee;
     331                return $content;
    288332        }
    289333
    290334        $tagregexp = join( '|', array_map( 'preg_quote', array_keys( $shortcode_tags ) ) );
     
    321365                . '<\\/p>'                           // closing paragraph
    322366                . '/s';
    323367
    324         return preg_replace( $pattern, '$1', $pee );
     368        return preg_replace( $pattern, '$1', $content );
    325369}
    326370
    327371/**