Make WordPress Core

Ticket #45435: 45435-wpremovep.diff

File 45435-wpremovep.diff, 15.7 KB (added by gaveline, 6 years ago)
  • src/wp-includes/formatting.php

    diff --git a/src/wp-includes/formatting.php b/src/wp-includes/formatting.php
    index a0438e9c53..7b0d9a89bc 100644
    a b function wpautop( $pee, $br = true ) { 
    609609        return $pee;
    610610}
    611611
     612/**
     613 * Replaces <p> tags with two line breaks. "Opposite" of autop().
     614 *
     615 * Replaces <p> tags with two line breaks except where the <p> has attributes.
     616 * Unifies whitespace. Indents <li>, <dt> and <dd> for better readability.
     617 *
     618 * @param  String html The content from the editor.
     619 * @return String      The content with stripped paragraph tags.
     620 */
     621function wpremovep( $html ) {
     622    $blocklist = 'blockquote|ul|ol|li|dl|dt|dd|table|thead|tbody|tfoot|tr|th|td|h[1-6]|fieldset|figure';
     623    $blocklist1 = $blocklist . '|div|p';
     624    $blocklist2 = $blocklist . '|pre';
     625
     626    $preserve = [];
     627    $preserveLinebreaks = false;
     628    $preserveBr = false;
     629
     630        if ( !$html ) {
     631        return '';
     632    }
     633
     634    // Protect script and style tags.
     635    if ( strpos($html, "<script") !== false || strpos($html, "<style") !== false) {
     636        $html = preg_replace_callback( '/<(script|style)[^>]*>[\s\S]*?<\/\1>/', function( $match ) use (&$preserve) {
     637            array_push($preserve, $match[0]);
     638            return '<wp-preserve>';
     639        }, $html );
     640        }
     641
     642    // Protect pre tags.
     643    if ( strpos($html, "<pre") !== false ) {
     644        $preserveLinebreaks = true;
     645        $html = preg_replace_callback( '/<pre[^>]*>[\s\S]+?<\/pre>/', function( $matches ) {
     646            $match = $matches[0];
     647            $match = preg_replace( '/<b'.'r ?\/?>(\r\n|\n)?/', '<wp-line-break>', $match );
     648            $match = preg_replace( '/<\/?p( [^>]*)?>(\r\n|\n)?/', '<wp-line-break>', $match );
     649            $match = preg_replace( '/\r?\n/', '<wp-line-break>', $match );
     650
     651            return $match;
     652        }, $html );
     653    }
     654
     655    // Remove line breaks but keep <br> tags inside image captions.
     656    if ( strpos($html, "[caption") !== false ) {
     657        $preserveBr = true;
     658        $html = preg_replace_callback( '/\[caption[\s\S]+?\[\/caption\]/', function( $matches ) {
     659            $match = $matches[0];
     660            $match = preg_replace( '/<br([^>]*)>/', "<wp-temp-br$1>", $match );
     661            $match = preg_replace( '/[\r\n\t]+/', '', $match );
     662
     663            return $match;
     664        }, $html );
     665    }
     666
     667    // Normalize white space characters before and after block tags.
     668    $html = preg_replace( '/\s*<\/(' . $blocklist1 . ')>\s*/' , "</$1>\n", $html );
     669    $html = preg_replace( '/\s*<((?:' . $blocklist1 . ')(?: [^>]*)?)>/', "\n<$1>", $html );
     670
     671    // Mark </p> if it has any attributes.
     672    // weird '<"."p' thing to avois IDE bugs trying to parse html
     673    $html = preg_replace( "/(<"."p [^>]+>.*?)<\/p>/", '$1</p#>', $html );
     674
     675    // Preserve the first <p> inside a <div>.
     676    $html = preg_replace( '/<div( [^>]*)?>\s*<p>/i', "<div$1>\n\n", $html );
     677
     678    // Remove paragraph tags.
     679    $html = preg_replace( '/\s*<p>/i', '', $html);
     680    $html = preg_replace( '/\s*<\/p>\s*/i', "\n\n", $html);
     681
     682    // Normalize white space chars and remove multiple line breaks.
     683    $html = preg_replace( '/\n[\s\n]+\n/', "\n\n", $html);
     684
     685    // Replace <br> tags with line breaks.
     686    $html = preg_replace_callback( '/(\s*)<br\s?\/?>\s*/i', function( $match ) {
     687        return ( strpos($match[1], "\n") !== false ) ? "\n\n" : "\n";
     688    }, $html);
     689
     690    // Fix line breaks around <div>.
     691    $html = preg_replace( '/\s*<div/', "\n<div", $html );
     692    $html = preg_replace( '/<\/div>\s*/', "</div>\n", $html );
     693
     694    // Fix line breaks around caption shortcodes.
     695    $html = preg_replace( '/\s*\[caption([^\[]+)\[\/caption\]\s*/i', "\n\n[caption$1[/caption]\n\n", $html );
     696    $html = preg_replace( '/caption\]\n\n+\[caption/', "caption]\n\n[caption", $html );
     697
     698    // Pad block elements tags with a line break.
     699    $html = preg_replace( '/\s*<((?:' . $blocklist2 . ')(?: [^>]*)?)\s*>/', "\n<$1>", $html );
     700    $html = preg_replace( '/\s*<\/(' . $blocklist2 . ')>\s*/', "</$1>\n", $html );
     701
     702    // Indent <li>, <dt> and <dd> tags.
     703    $html = preg_replace( '/<((li|dt|dd)[^>]*)>/', " \t<$1>" , $html);
     704
     705    // Fix line breaks around <select> and <option>.
     706    if ( strpos($html, "<option") !== false ) {
     707        $html = preg_replace( '/\s*<option/', "\n \t<option", $html );
     708        $html = preg_replace( '/\s*<\/select>/', "\n</select>\n", $html );
     709    }
     710
     711    // Pad <hr> with two line breaks.
     712    if ( strpos($html, "<hr") !== false ) {
     713        $html = preg_replace( '/\s*<hr( [^>]*)?>\s*/', "\n\n<hr$1>\n\n", $html );
     714    }
     715
     716    // Remove line breaks in <object> tags.
     717    if ( strpos($html, "<object") !== false ) {
     718        $html = preg_replace_callback( '/<object[\s\S]+?<\/object>/', function( $a ) {
     719            return preg_replace( "/[\r\n]+/", '', $a[0] );
     720        }, $html );
     721    }
     722
     723    // Unmark special paragraph closing tags.
     724    $html = preg_replace( '/<\/p#>/', "</p>\n", $html );
     725
     726    // Pad remaining <p> tags whit a line break.
     727    $html = preg_replace( '/\s*(<'.'p [^>]+>[\s\S]*?<\/p>)/', "\n$1", $html );
     728
     729    // Trim.
     730    $html = preg_replace( "/^\s+/", '' , $html);
     731    $html = preg_replace( "/[\s\n]+$/", '' , $html);
     732
     733    if ( $preserveLinebreaks ) {
     734        $html = preg_replace( '/<wp-line-break>/', "\n", $html );
     735    }
     736
     737    if ( $preserveBr ) {
     738        $html = preg_replace( '/<wp-temp-br([^>]*)>/', "<br$1>", $html );
     739    }
     740
     741    // Restore preserved tags.
     742    if ( count($preserve) > 0 ) {
     743        $html = preg_replace_callback( '/<wp-preserve>/', function() use (&$preserve) {
     744            return array_shift($preserve);
     745        }, $html );
     746    }
     747
     748    return $html;
     749}
     750
    612751/**
    613752 * Separate HTML elements and comments from the text.
    614753 *
  • new file tests/phpunit/tests/formatting/Removep.php

    diff --git a/tests/phpunit/tests/formatting/Removep.php b/tests/phpunit/tests/formatting/Removep.php
    new file mode 100644
    index 0000000000..18a446bdbe
    - +  
     1<?php
     2
     3/**
     4 * @group formatting
     5 * @ticket 45435
     6 */
     7class Tests_Formatting_Removep extends WP_UnitTestCase {
     8    public function test_remove_p() {
     9        $test_data  = '<p>Welcome to WordPress!  This post contains important information.  After you read it, you can make it private to hide it from visitors but still have the information handy for future reference.</p>
     10<p></p>
     11<p>First things first:</p>
     12';
     13        $expected = 'Welcome to WordPress!  This post contains important information.  After you read it, you can make it private to hide it from visitors but still have the information handy for future reference.
     14
     15First things first:';
     16
     17        // On windows environments, the EOL-style is \r\n
     18        $test_data = str_replace( "\r\n", "\n", $test_data );
     19
     20        $this->assertEquals( $expected, wpremovep( $test_data ) );
     21    }
     22
     23    public function test_remove_br() {
     24        $test_data  = '<p>Welcome to WordPress!<br />This post contains important information.<br>After you read it, you can make it private to hide it from visitors but still have the information handy for future reference.</p>
     25<p>Test<br />   <br >   <br>Test</p>
     26<p>First things first:</p>
     27';
     28        $expected = 'Welcome to WordPress!
     29This post contains important information.
     30After you read it, you can make it private to hide it from visitors but still have the information handy for future reference.
     31
     32Test
     33
     34
     35Test
     36
     37First things first:';
     38
     39        // On windows environments, the EOL-style is \r\n
     40        $test_data = str_replace( "\r\n", "\n", $test_data );
     41
     42        $this->assertEquals( $expected, wpremovep( $test_data ) );
     43    }
     44
     45    public function test_line_break_around_div() {
     46        $test_data  = '<p>Hello</p><div>World</div><p>!!</p>';
     47        $expected = 'Hello
     48<div>World</div>
     49!!';
     50
     51        // On windows environments, the EOL-style is \r\n
     52        $test_data = str_replace( "\r\n", "\n", $test_data );
     53
     54        $this->assertEquals( $expected, wpremovep( $test_data ) );
     55    }
     56
     57    public function test_line_break_around_caption() {
     58        $test_data  = '<p>Hello</p>[caption id="1"]whatever 1[/caption]<p>!!</p>[caption id="2"]whatever 2[/caption][caption id="3"]whatever 3[/caption]';
     59        $expected = 'Hello
     60
     61[caption id="1"]whatever 1[/caption]
     62
     63!!
     64
     65[caption id="2"]whatever 2[/caption]
     66
     67[caption id="3"]whatever 3[/caption]';
     68
     69        // On windows environments, the EOL-style is \r\n
     70        $test_data = str_replace( "\r\n", "\n", $test_data );
     71
     72        $this->assertEquals( $expected, wpremovep( $test_data ) );
     73    }
     74
     75    public function test_line_break_around_block_elements() {
     76
     77        //const blocklist = 'blockquote|ul|ol|li|dl|dt|dd|table|thead|tbody|tfoot|tr|th|td|h[1-6]|fieldset|figure';
     78        //const blocklist1 = blocklist + '|div|p';
     79        //const blocklist2 = blocklist + '|pre';
     80
     81        $test_data  = '<p>Hello</p>
     82
     83<blockquote cite="https://www.huxley.net/bnw/four.html">
     84    <p>Words can be like X-rays, if you use them properly – they\'ll go through anything. You read and you\'re pierced.</p>
     85</blockquote>
     86<h1>This is an UL</h1><ul>
     87    <li>Milk</li><li>Cheese
     88        <ul><li>Blue cheese</li><li>Feta</li>    </ul>
     89    </li>
     90</ul><h2>This is an OL</h2>
     91<ol> <li>Mix flour, baking powder, sugar, and salt.</li><li>In another bowl, mix eggs, milk, and oil.</li>
     92  <li>Stir both mixtures together.</li>
     93  <li>Fill muffin tray 3/4 full.</li><li>Bake for 20 minutes.</li>
     94</ol>
     95
     96<p>Cryptids of Cornwall:</p>
     97<h3>This is an DL</h3>
     98<dl>
     99    <dt>Beast of Bodmin</dt><dd>A large feline inhabiting Bodmin Moor.</dd>
     100    <dt>Morgawr</dt><dd>A sea serpent.</dd>
     101    <dt>Owlman</dt><dd>A giant owl-like creature.</dd>
     102</dl>
     103<h4>This is an TABLE</h4><table>
     104    <thead>
     105        <tr>
     106            <th colspan="2">The table header</th>
     107        </tr>
     108    </thead>
     109    <tbody>
     110        <tr>
     111            <td>The table body</td>
     112            <td>with two columns</td>
     113        </tr>
     114    </tbody>
     115</table><pre>
     116  L          TE
     117    A       A
     118      C    V
     119       R A
     120       DOU
     121       LOU
     122      REUSE
     123      QUE TU
     124      PORTES
     125    ET QUI T\'
     126    ORNE O CI
     127     VILISÉ
     128    OTE-  TU VEUX
     129     LA    BIEN
     130    SI      RESPI
     131            RER       - Apollinaire
     132</pre>
     133
     134';
     135        $expected = 'Hello
     136<blockquote cite="https://www.huxley.net/bnw/four.html">Words can be like X-rays, if you use them properly – they\'ll go through anything. You read and you\'re pierced.</blockquote>
     137<h1>This is an UL</h1>
     138<ul>
     139        <li>Milk</li>
     140        <li>Cheese
     141<ul>
     142        <li>Blue cheese</li>
     143        <li>Feta</li>
     144</ul>
     145</li>
     146</ul>
     147<h2>This is an OL</h2>
     148<ol>
     149        <li>Mix flour, baking powder, sugar, and salt.</li>
     150        <li>In another bowl, mix eggs, milk, and oil.</li>
     151        <li>Stir both mixtures together.</li>
     152        <li>Fill muffin tray 3/4 full.</li>
     153        <li>Bake for 20 minutes.</li>
     154</ol>
     155Cryptids of Cornwall:
     156<h3>This is an DL</h3>
     157<dl>
     158        <dt>Beast of Bodmin</dt>
     159        <dd>A large feline inhabiting Bodmin Moor.</dd>
     160        <dt>Morgawr</dt>
     161        <dd>A sea serpent.</dd>
     162        <dt>Owlman</dt>
     163        <dd>A giant owl-like creature.</dd>
     164</dl>
     165<h4>This is an TABLE</h4>
     166<table>
     167<thead>
     168<tr>
     169<th colspan="2">The table header</th>
     170</tr>
     171</thead>
     172<tbody>
     173<tr>
     174<td>The table body</td>
     175<td>with two columns</td>
     176</tr>
     177</tbody>
     178</table>
     179<pre>
     180  L          TE
     181    A       A
     182      C    V
     183       R A
     184       DOU
     185       LOU
     186      REUSE
     187      QUE TU
     188      PORTES
     189    ET QUI T\'
     190    ORNE O CI
     191     VILISÉ
     192    OTE-  TU VEUX
     193     LA    BIEN
     194    SI      RESPI
     195            RER       - Apollinaire
     196</pre>';
     197
     198        // On windows environments, the EOL-style is \r\n
     199        $test_data = str_replace( "\r\n", "\n", $test_data );
     200
     201        $this->assertEquals( $expected, wpremovep( $test_data ) );
     202    }
     203
     204    public function test_line_break_around_select() {
     205        $test_data  = '<p>Hello</p><select><option value="1">Value 1</option>
     206<option value="2">Value 2</option></select><p>World</p>';
     207
     208        $expected = 'Hello
     209
     210<select>
     211        <option value="1">Value 1</option>
     212        <option value="2">Value 2</option>
     213</select>
     214World';
     215
     216        // On windows environments, the EOL-style is \r\n
     217        $test_data = str_replace( "\r\n", "\n", $test_data );
     218
     219        $this->assertEquals( $expected, wpremovep( $test_data ) );
     220    }
     221
     222    public function test_line_break_around_hr() {
     223        $test_data  = '<p>Hello</p><hr /><p>World</p>';
     224
     225        $expected = 'Hello
     226
     227<hr />
     228
     229World';
     230
     231        // On windows environments, the EOL-style is \r\n
     232        $test_data = str_replace( "\r\n", "\n", $test_data );
     233
     234        $this->assertEquals( $expected, wpremovep( $test_data ) );
     235    }
     236
     237    public function test_first_p_in_div() {
     238        $test_data  = '<p>Hello</p><div><p>World</p></div>';
     239
     240        $expected = 'Hello
     241<div>
     242
     243World
     244
     245</div>';
     246
     247        // On windows environments, the EOL-style is \r\n
     248        $test_data = str_replace( "\r\n", "\n", $test_data );
     249
     250        $this->assertEquals( $expected, wpremovep( $test_data ) );
     251    }
     252
     253    public function test_remove_break_in_object() {
     254        $test_data  = '<p>Hello</p><object><p>World</p></object>';
     255
     256        $expected = 'Hello
     257
     258<object>World</object>';
     259
     260        // On windows environments, the EOL-style is \r\n
     261        $test_data = str_replace( "\r\n", "\n", $test_data );
     262
     263        $this->assertEquals( $expected, wpremovep( $test_data ) );
     264    }
     265
     266    public function test_mark_p_with_attributes() {
     267        $test_data  = '<p>Welcome to WordPress!  This post contains important information.  After you read it, you can make it private to hide it from visitors but still have the information handy for future reference.</p><p>New P here</p>
     268<p></p><p style="color:red">First things first</p><p style="color:blue">Last things last</p>
     269';
     270        $expected = 'Welcome to WordPress!  This post contains important information.  After you read it, you can make it private to hide it from visitors but still have the information handy for future reference.
     271
     272New P here
     273<p style="color:red">First things first</p>
     274<p style="color:blue">Last things last</p>';
     275
     276        // On windows environments, the EOL-style is \r\n
     277        $test_data = str_replace( "\r\n", "\n", $test_data );
     278
     279        $this->assertEquals( $expected, wpremovep( $test_data ) );
     280    }
     281
     282    public function test_preserve_scripts_and_styles() {
     283        $test_data  = '<p>Welcome to WordPress!</p><p>We test here</p>
     284<script>
     285    const html = "<p>Preserve script</p>"
     286</script>
     287<style type="text/css">p.preserve {
     288    content: "<p>Preserve style</p>"
     289}</style><p>End of test</p>
     290';
     291        $expected = 'Welcome to WordPress!
     292
     293We test here
     294
     295<script>
     296    const html = "<p>Preserve script</p>"
     297</script>
     298<style type="text/css">p.preserve {
     299    content: "<p>Preserve style</p>"
     300}</style>End of test';
     301
     302        // On windows environments, the EOL-style is \r\n
     303        $test_data = str_replace( "\r\n", "\n", $test_data );
     304
     305        $this->assertEquals( $expected, wpremovep( $test_data ) );
     306    }
     307
     308    public function test_protect_pre() {
     309
     310        $test_data  = '<p>Hello</p>
     311<pre>
     312<p>Protect me</p>
     313<br />
     314<p>Please</p><br />
     315<p>Thank you.</p>
     316</pre>
     317
     318';
     319        $expected = 'Hello
     320<pre>
     321
     322Protect me
     323
     324
     325Please
     326
     327
     328Thank you.
     329</pre>';
     330
     331        // On windows environments, the EOL-style is \r\n
     332        $test_data = str_replace( "\r\n", "\n", $test_data );
     333
     334        $this->assertEquals( $expected, wpremovep( $test_data ) );
     335    }
     336
     337    public function test_protect_caption_br() {
     338
     339        $test_data  = 'Hello<br />[caption]this<br>is<br />a caption[/caption]World';
     340        $expected = 'Hello
     341
     342[caption]this<br>is<br />a caption[/caption]
     343
     344World';
     345
     346        // On windows environments, the EOL-style is \r\n
     347        $test_data = str_replace( "\r\n", "\n", $test_data );
     348
     349        $this->assertEquals( $expected, wpremovep( $test_data ) );
     350    }
     351}