Ticket #15549: new-ping-urls-regex.diff
File new-ping-urls-regex.diff, 4.3 KB (added by , 12 years ago) |
---|
-
wp-includes/comment.php
1781 1781 1782 1782 $pung = get_pung($post_ID); 1783 1783 1784 // Variables1785 $ltrs = '\w';1786 $gunk = '/#~:.?+=&%@!\-';1787 $punc = '.:?\-';1788 $any = $ltrs . $gunk . $punc;1789 1790 1784 // Step 1 1791 1785 // Parsing the post, external links (if any) are stored in the $post_links array 1792 // This regexp comes straight from phpfreaks.com 1793 // http://www.phpfreaks.com/quickcode/Extract_All_URLs_on_a_Page/15.php 1794 preg_match_all("{\b http : [$any] +? (?= [$punc] * [^$any] | $)}x", $content, $post_links_temp); 1795 1786 $post_links_temp = _extract_pingable_urls( $content ); 1787 1796 1788 // Step 2. 1797 1789 // Walking thru the links array 1798 1790 // first we get rid of links pointing to sites, not to specific files … … 1802 1794 // http://dummy-weblog.org/post.php 1803 1795 // We don't wanna ping first and second types, even if they have a valid <link/> 1804 1796 1805 foreach ( (array) $post_links_temp [0]as $link_test ) :1797 foreach ( (array) $post_links_temp as $link_test ) : 1806 1798 if ( !in_array($link_test, $pung) && (url_to_postid($link_test) != $post_ID) // If we haven't pung it already and it isn't a link to itself 1807 1799 && !is_local_attachment($link_test) ) : // Also, let's never ping local attachments. 1808 1800 if ( $test = @parse_url($link_test) ) { -
wp-includes/functions.php
393 393 } 394 394 395 395 /** 396 * Use RegEx to extract URLs from arbitrary content 397 * 398 * (?xi) 399 * \b 400 * ( # Capture 1: entire matched URL 401 * (?: 402 * https?:// # http or https protocol 403 * | # or 404 * www\d{0,3}[.] # "www.", "www1.", "www2." … "www999." 405 * | # or 406 * [a-z0-9.\-]+[.][a-z]{2,4}/ # looks like domain name followed by a slash 407 * ) 408 * (?: # One or more: 409 * [^\s()<>]+ # Run of non-space, non-()<> 410 * | # or 411 * \(([^\s()<>]+|(\([^\s()<>]+\)))*\) # balanced parens, up to 2 levels 412 * )+ 413 * (?: # End with: 414 * \(([^\s()<>]+|(\([^\s()<>]+\)))*\) # balanced parens, up to 2 levels 415 * | # or 416 * [^\s`!()\[\]{};:'".,<>?«»“”‘’] # not a space or one of these punct chars 417 * ) 418 * ) 419 * 420 * @since 3.5.0 421 * 422 * @param string $content 423 * @return array URLs to ping 424 */ 425 function _extract_pingable_urls( $content ) { 426 preg_match_all( 427 "#(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’]))#", 428 $content, 429 $post_links 430 ); 431 432 foreach ( $post_links[0] as &$post_link ) 433 $post_link = html_entity_decode( $post_link ); 434 435 return $post_links[0]; 436 } 437 438 /** 396 439 * Check content for video and audio links to add as enclosures. 397 440 * 398 441 * Will not add enclosures that have already been added and will … … 417 460 418 461 $pung = get_enclosed( $post_ID ); 419 462 420 $ltrs = '\w'; 421 $gunk = '/#~:.?+=&%@!\-'; 422 $punc = '.:?\-'; 423 $any = $ltrs . $gunk . $punc; 463 $post_links_temp = _extract_pingable_urls( $content ); 424 464 425 preg_match_all( "{\b http : [$any] +? (?= [$punc] * [^$any] | $)}x", $content, $post_links_temp );426 427 465 foreach ( $pung as $link_test ) { 428 if ( !in_array( $link_test, $post_links_temp [0]) ) { // link no longer in post466 if ( !in_array( $link_test, $post_links_temp ) ) { // link no longer in post 429 467 $mids = $wpdb->get_col( $wpdb->prepare("SELECT meta_id FROM $wpdb->postmeta WHERE post_id = %d AND meta_key = 'enclosure' AND meta_value LIKE (%s)", $post_ID, like_escape( $link_test ) . '%') ); 430 468 foreach ( $mids as $mid ) 431 469 delete_metadata_by_mid( 'post', $mid ); 432 470 } 433 471 } 434 472 435 foreach ( (array) $post_links_temp [0]as $link_test ) {473 foreach ( (array) $post_links_temp as $link_test ) { 436 474 if ( !in_array( $link_test, $pung ) ) { // If we haven't pung it already 437 475 $test = @parse_url( $link_test ); 438 476 if ( false === $test )