Make WordPress Core

Ticket #15549: new-ping-urls-regex.diff

File new-ping-urls-regex.diff, 4.3 KB (added by wonderboymusic, 12 years ago)
  • wp-includes/comment.php

     
    17811781
    17821782        $pung = get_pung($post_ID);
    17831783
    1784         // Variables
    1785         $ltrs = '\w';
    1786         $gunk = '/#~:.?+=&%@!\-';
    1787         $punc = '.:?\-';
    1788         $any = $ltrs . $gunk . $punc;
    1789 
    17901784        // Step 1
    17911785        // Parsing the post, external links (if any) are stored in the $post_links array
    1792         // This regexp comes straight from phpfreaks.com
    1793         // http://www.phpfreaks.com/quickcode/Extract_All_URLs_on_a_Page/15.php
    1794         preg_match_all("{\b http : [$any] +? (?= [$punc] * [^$any] | $)}x", $content, $post_links_temp);
    1795 
     1786        $post_links_temp = _extract_pingable_urls( $content );
     1787       
    17961788        // Step 2.
    17971789        // Walking thru the links array
    17981790        // first we get rid of links pointing to sites, not to specific files
     
    18021794        // http://dummy-weblog.org/post.php
    18031795        // We don't wanna ping first and second types, even if they have a valid <link/>
    18041796
    1805         foreach ( (array) $post_links_temp[0] as $link_test ) :
     1797        foreach ( (array) $post_links_temp as $link_test ) :
    18061798                if ( !in_array($link_test, $pung) && (url_to_postid($link_test) != $post_ID) // If we haven't pung it already and it isn't a link to itself
    18071799                                && !is_local_attachment($link_test) ) : // Also, let's never ping local attachments.
    18081800                        if ( $test = @parse_url($link_test) ) {
  • wp-includes/functions.php

     
    393393}
    394394
    395395/**
     396 * Use RegEx to extract URLs from arbitrary content
     397 *
     398 * (?xi)
     399 * \b
     400 * (                                      # Capture 1: entire matched URL
     401 *   (?:
     402 *     https?://                          # http or https protocol
     403 *     |                                          #   or
     404 *     www\d{0,3}[.]                          # "www.", "www1.", "www2." … "www999."
     405 *     |                                          #   or
     406 *     [a-z0-9.\-]+[.][a-z]{2,4}/         # looks like domain name followed by a slash
     407 *   )
     408 *   (?:                                          # One or more:
     409 *     [^\s()<>]+                         # Run of non-space, non-()<>
     410 *     |                                  #   or
     411 *     \(([^\s()<>]+|(\([^\s()<>]+\)))*\) # balanced parens, up to 2 levels
     412 *   )+
     413 *   (?:                                          # End with:
     414 *     \(([^\s()<>]+|(\([^\s()<>]+\)))*\) # balanced parens, up to 2 levels
     415 *     |                                          #   or
     416 *     [^\s`!()\[\]{};:'".,<>?«»“”‘’]     # not a space or one of these punct chars
     417 *   )
     418 * )
     419 *
     420 * @since 3.5.0
     421 *
     422 * @param string $content
     423 * @return array URLs to ping
     424 */
     425function _extract_pingable_urls( $content ) {
     426        preg_match_all(
     427                "#(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’]))#",
     428                $content,
     429                $post_links
     430        );
     431       
     432        foreach ( $post_links[0] as &$post_link )
     433                $post_link = html_entity_decode( $post_link );
     434       
     435        return $post_links[0];
     436}
     437
     438/**
    396439 * Check content for video and audio links to add as enclosures.
    397440 *
    398441 * Will not add enclosures that have already been added and will
     
    417460
    418461        $pung = get_enclosed( $post_ID );
    419462
    420         $ltrs = '\w';
    421         $gunk = '/#~:.?+=&%@!\-';
    422         $punc = '.:?\-';
    423         $any = $ltrs . $gunk . $punc;
     463        $post_links_temp = _extract_pingable_urls( $content );
    424464
    425         preg_match_all( "{\b http : [$any] +? (?= [$punc] * [^$any] | $)}x", $content, $post_links_temp );
    426 
    427465        foreach ( $pung as $link_test ) {
    428                 if ( !in_array( $link_test, $post_links_temp[0] ) ) { // link no longer in post
     466                if ( !in_array( $link_test, $post_links_temp ) ) { // link no longer in post
    429467                        $mids = $wpdb->get_col( $wpdb->prepare("SELECT meta_id FROM $wpdb->postmeta WHERE post_id = %d AND meta_key = 'enclosure' AND meta_value LIKE (%s)", $post_ID, like_escape( $link_test ) . '%') );
    430468                        foreach ( $mids as $mid )
    431469                                delete_metadata_by_mid( 'post', $mid );
    432470                }
    433471        }
    434472
    435         foreach ( (array) $post_links_temp[0] as $link_test ) {
     473        foreach ( (array) $post_links_temp as $link_test ) {
    436474                if ( !in_array( $link_test, $pung ) ) { // If we haven't pung it already
    437475                        $test = @parse_url( $link_test );
    438476                        if ( false === $test )