Changeset 31609 for trunk/src/wp-admin/includes/class-wp-press-this.php
- Timestamp:
- 03/04/2015 07:28:53 PM (10 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/wp-admin/includes/class-wp-press-this.php
r31607 r31609 41 41 // Used to trigger the bookmarklet update notice. 42 42 // Needs to be set here and in get_shortcut_link() in wp-includes/link-template.php. 43 'version' => ' 5',43 'version' => '6', 44 44 45 45 /** … … 279 279 public function fetch_source_html( $url ) { 280 280 // Download source page to tmp file. 281 $source_tmp_file = ( ! empty( $url ) ) ? download_url( $url ) : '';281 $source_tmp_file = ( ! empty( $url ) ) ? download_url( $url, 30 ) : ''; 282 282 $source_content = ''; 283 283 … … 319 319 } 320 320 321 private function _limit_array( $value ) { 322 if ( is_array( $value ) ) { 323 if ( count( $value ) > 50 ) { 324 return array_slice( $value, 0, 50 ); 325 } 326 327 return $value; 328 } 329 330 return array(); 331 } 332 333 private function _limit_string( $value ) { 334 $return = ''; 335 336 if ( is_numeric( $value ) || is_bool( $value ) ) { 337 $return = (string) $value; 338 } else if ( is_string( $value ) ) { 339 if ( mb_strlen( $value ) > 5000 ) { 340 $return = mb_substr( $value, 0, 5000 ); 341 } else { 342 $return = $value; 343 } 344 345 $return = html_entity_decode( $return, ENT_QUOTES, 'UTF-8' ); 346 $return = sanitize_text_field( trim( $return ) ); 347 } 348 349 return $return; 350 } 351 352 private function _limit_url( $url ) { 353 if ( ! is_string( $url ) ) { 354 return ''; 355 } 356 357 $url = $this->_limit_string( $url ); 358 359 // HTTP 1.1 allows 8000 chars but the "de-facto" standard supported in all current browsers is 2048. 360 if ( mb_strlen( $url ) > 2048 ) { 361 return ''; // Return empty rather than a trunacted/invalid URL 362 } 363 364 // Only allow http(s) or protocol relative URLs. 365 if ( ! preg_match( '%^(https?:)?//%i', $url ) ) { 366 return ''; 367 } 368 369 if ( strpos( $url, '"' ) !== false || strpos( $url, ' ' ) !== false ) { 370 return ''; 371 } 372 373 return $url; 374 } 375 376 private function _limit_img( $src ) { 377 $src = $this->_limit_url( $src ); 378 379 if ( preg_match( '/\/ad[sx]{1}?\//', $src ) ) { 380 // Ads 381 return ''; 382 } else if ( preg_match( '/(\/share-?this[^\.]+?\.[a-z0-9]{3,4})(\?.*)?$/', $src ) ) { 383 // Share-this type button 384 return ''; 385 } else if ( preg_match( '/\/(spinner|loading|spacer|blank|rss)\.(gif|jpg|png)/', $src ) ) { 386 // Loaders, spinners, spacers 387 return ''; 388 } else if ( preg_match( '/\/([^\.\/]+[-_]{1})?(spinner|loading|spacer|blank)s?([-_]{1}[^\.\/]+)?\.[a-z0-9]{3,4}/', $src ) ) { 389 // Fancy loaders, spinners, spacers 390 return ''; 391 } else if ( preg_match( '/([^\.\/]+[-_]{1})?thumb[^.]*\.(gif|jpg|png)$/', $src ) ) { 392 // Thumbnails, too small, usually irrelevant to context 393 return ''; 394 } else if ( preg_match( '/\/wp-includes\//', $src ) ) { 395 // Classic WP interface images 396 return ''; 397 } else if ( preg_match( '/[^\d]{1}\d{1,2}x\d+\.(gif|jpg|png)$/', $src ) ) { 398 // Most often tiny buttons/thumbs (< 100px wide) 399 return ''; 400 } else if ( preg_match( '/\/pixel\.(mathtag|quantserve)\.com/', $src ) ) { 401 // See mathtag.com and https://www.quantcast.com/how-we-do-it/iab-standard-measurement/how-we-collect-data/ 402 return ''; 403 } else if ( false !== strpos( $src, '/g.gif' ) ) { 404 // Classic WP stats gif 405 return ''; 406 } 407 408 return $src; 409 } 410 411 private function _limit_embed( $src ) { 412 $src = $this->_limit_url( $src ); 413 414 if ( preg_match( '/\/\/www\.youtube\.com\/(embed|v)\/([^\?]+)\?.+$/', $src, $src_matches ) ) { 415 $src = 'https://www.youtube.com/watch?v=' . $src_matches[2]; 416 } else if ( preg_match( '/\/\/player\.vimeo\.com\/video\/([\d]+)([\?\/]{1}.*)?$/', $src, $src_matches ) ) { 417 $src = 'https://vimeo.com/' . (int) $src_matches[1]; 418 } else if ( preg_match( '/\/\/vimeo\.com\/moogaloop\.swf\?clip_id=([\d]+)$/', $src, $src_matches ) ) { 419 $src = 'https://vimeo.com/' . (int) $src_matches[1]; 420 } else if ( preg_match( '/\/\/vine\.co\/v\/([^\/]+)\/embed/', $src, $src_matches ) ) { 421 $src = 'https://vine.co/v/' . $src_matches[1]; 422 } else if ( ! preg_match( '/\/\/(m\.|www\.)?youtube\.com\/watch\?/', $src ) 423 && ! preg_match( '/\/youtu\.be\/.+$/', $src ) 424 && ! preg_match( '/\/\/vimeo\.com\/[\d]+$/', $src ) 425 && ! preg_match( '/\/\/(www\.)?dailymotion\.com\/video\/.+$/', $src ) 426 && ! preg_match( '/\/\/soundcloud\.com\/.+$/', $src ) 427 && ! preg_match( '/\/\/twitter\.com\/[^\/]+\/status\/[\d]+$/', $src ) 428 && ! preg_match( '/\/\/vine\.co\/v\/[^\/]+/', $src ) ) { 429 $src = ''; 430 } 431 432 return $src; 433 } 434 435 private function _process_meta_entry( $meta_name, $meta_value, $data ) { 436 if ( preg_match( '/:?(title|description|keywords)$/', $meta_name ) ) { 437 $data['_meta'][ $meta_name ] = $meta_value; 438 } else { 439 switch ( $meta_name ) { 440 case 'og:url': 441 case 'og:video': 442 case 'og:video:secure_url': 443 $meta_value = $this->_limit_embed( $meta_value ); 444 445 if ( ! isset( $data['_embed'] ) ) { 446 $data['_embed'] = array(); 447 } 448 449 if ( ! empty( $meta_value ) && ! in_array( $meta_value, $data['_embed'] ) ) { 450 $data['_embed'][] = $meta_value; 451 } 452 453 break; 454 case 'og:image': 455 case 'og:image:secure_url': 456 case 'twitter:image0:src': 457 case 'twitter:image0': 458 case 'twitter:image:src': 459 case 'twitter:image': 460 $meta_value = $this->_limit_img( $meta_value ); 461 462 if ( ! isset( $data['_img'] ) ) { 463 $data['_img'] = array(); 464 } 465 466 if ( ! empty( $meta_value ) && ! in_array( $meta_value, $data['_img'] ) ) { 467 $data['_img'][] = $meta_value; 468 } 469 470 break; 471 } 472 } 473 474 return $data; 475 } 476 321 477 /** 322 478 * Fetches and parses _meta, _img, and _links data from the source. … … 340 496 } 341 497 498 // Fetch and gather <meta> data first, so discovered media is offered 1st to user. 499 if ( empty( $data['_meta'] ) ) { 500 $data['_meta'] = array(); 501 } 502 503 if ( preg_match_all( '/<meta [^>]+>/', $source_content, $matches ) ) { 504 $items = $this->_limit_array( $matches[0] ); 505 506 foreach ( $items as $value ) { 507 if ( preg_match( '/(property|name)="([^"]+)"[^>]+content="([^"]+)"/', $value, $new_matches ) ) { 508 $meta_name = $this->_limit_string( $new_matches[2] ); 509 $meta_value = $this->_limit_string( $new_matches[3] ); 510 511 // Sanity check. $key is usually things like 'title', 'description', 'keywords', etc. 512 if ( strlen( $meta_name ) > 100 ) { 513 continue; 514 } 515 516 $data = $this->_process_meta_entry( $meta_name, $meta_value, $data ); 517 } 518 } 519 } 520 342 521 // Fetch and gather <img> data. 343 522 if ( empty( $data['_img'] ) ) { … … 345 524 } 346 525 347 if ( preg_match_all( '/<img (.+)[\s]?\/>/', $source_content, $matches ) ) { 348 if ( ! empty( $matches[0] ) ) { 349 foreach ( $matches[0] as $value ) { 350 if ( preg_match( '/<img[^>]+src="([^"]+)"[^>]+\/>/', $value, $new_matches ) ) { 351 if ( ! in_array( $new_matches[1], $data['_img'] ) ) { 352 $data['_img'][] = $new_matches[1]; 526 if ( preg_match_all( '/<img [^>]+>/', $source_content, $matches ) ) { 527 $items = $this->_limit_array( $matches[0] ); 528 529 foreach ( $items as $value ) { 530 if ( preg_match( '/src=(\'|")([^\'"]+)\\1/', $value, $new_matches ) ) { 531 $src = $this->_limit_img( $new_matches[2] ); 532 if ( ! empty( $src ) && ! in_array( $src, $data['_img'] ) ) { 533 $data['_img'][] = $src; 534 } 535 } 536 } 537 } 538 539 // Fetch and gather <iframe> data. 540 if ( empty( $data['_embed'] ) ) { 541 $data['_embed'] = array(); 542 } 543 544 if ( preg_match_all( '/<iframe [^>]+>/', $source_content, $matches ) ) { 545 $items = $this->_limit_array( $matches[0] ); 546 547 foreach ( $items as $value ) { 548 if ( preg_match( '/src=(\'|")([^\'"]+)\\1/', $value, $new_matches ) ) { 549 $src = $this->_limit_embed( $new_matches[2] ); 550 551 if ( ! empty( $src ) && ! in_array( $src, $data['_embed'] ) ) { 552 $data['_embed'][] = $src; 553 } 554 } 555 } 556 } 557 558 // Fetch and gather <link> data 559 if ( empty( $data['_links'] ) ) { 560 $data['_links'] = array(); 561 } 562 563 if ( preg_match_all( '/<link [^>]+>/', $source_content, $matches ) ) { 564 $items = $this->_limit_array( $matches[0] ); 565 566 foreach ( $items as $value ) { 567 if ( preg_match( '/(rel|itemprop)="([^"]+)"[^>]+href="([^"]+)"/', $value, $new_matches ) ) { 568 if ( 'alternate' === $new_matches[2] || 'thumbnailUrl' === $new_matches[2] || 'url' === $new_matches[2] ) { 569 $url = $this->_limit_url( $new_matches[3] ); 570 571 if ( ! empty( $url ) && empty( $data['_links'][ $new_matches[2] ] ) ) { 572 $data['_links'][ $new_matches[2] ] = $url; 353 573 } 354 574 } … … 357 577 } 358 578 359 // Fetch and gather <iframe> data.360 if ( empty( $data['_embed'] ) ) {361 $data['_embed'] = array();362 }363 364 if ( preg_match_all( '/<iframe (.+)[\s][^>]*>/', $source_content, $matches ) ) {365 if ( ! empty( $matches[0] ) ) {366 foreach ( $matches[0] as $value ) {367 if ( preg_match( '/<iframe[^>]+src=(\'|")([^"]+)(\'|")/', $value, $new_matches ) ) {368 if ( ! in_array( $new_matches[2], $data['_embed'] ) ) {369 if ( preg_match( '/\/\/www\.youtube\.com\/embed\/([^\?]+)\?.+$/', $new_matches[2], $src_matches ) ) {370 $data['_embed'][] = 'https://www.youtube.com/watch?v=' . $src_matches[1];371 } else if ( preg_match( '/\/\/player\.vimeo\.com\/video\/([\d]+)([\?\/]{1}.*)?$/', $new_matches[2], $src_matches ) ) {372 $data['_embed'][] = 'https://vimeo.com/' . (int) $src_matches[1];373 } else if ( preg_match( '/\/\/vine\.co\/v\/([^\/]+)\/embed/', $new_matches[2], $src_matches ) ) {374 $data['_embed'][] = 'https://vine.co/v/' . $src_matches[1];375 }376 }377 }378 }379 }380 }381 382 // Fetch and gather <meta> data.383 if ( empty( $data['_meta'] ) ) {384 $data['_meta'] = array();385 }386 387 if ( preg_match_all( '/<meta ([^>]+)[\s]?\/?>/', $source_content, $matches ) ) {388 if ( ! empty( $matches[0] ) ) {389 foreach ( $matches[0] as $key => $value ) {390 if ( preg_match( '/<meta[^>]+(property|name)="(.+)"[^>]+content="(.+)"/', $value, $new_matches ) ) {391 if ( empty( $data['_meta'][ $new_matches[2] ] ) ) {392 if ( preg_match( '/:?(title|description|keywords)$/', $new_matches[2] ) ) {393 $data['_meta'][ $new_matches[2] ] = str_replace( ''', "'", str_replace( '"', '', html_entity_decode( $new_matches[3] ) ) );394 } else {395 $data['_meta'][ $new_matches[2] ] = $new_matches[3];396 if ( 'og:url' == $new_matches[2] ) {397 if ( false !== strpos( $new_matches[3], '//www.youtube.com/watch?' )398 || false !== strpos( $new_matches[3], '//www.dailymotion.com/video/' )399 || preg_match( '/\/\/vimeo\.com\/[\d]+$/', $new_matches[3] )400 || preg_match( '/\/\/soundcloud\.com\/.+$/', $new_matches[3] )401 || preg_match( '/\/\/twitter\.com\/[^\/]+\/status\/[\d]+$/', $new_matches[3] )402 || preg_match( '/\/\/vine\.co\/v\/[^\/]+/', $new_matches[3] ) ) {403 if ( ! in_array( $new_matches[3], $data['_embed'] ) ) {404 $data['_embed'][] = $new_matches[3];405 }406 }407 } else if ( 'og:video' == $new_matches[2] || 'og:video:secure_url' == $new_matches[2] ) {408 if ( preg_match( '/\/\/www\.youtube\.com\/v\/([^\?]+)/', $new_matches[3], $src_matches ) ) {409 if ( ! in_array( 'https://www.youtube.com/watch?v=' . $src_matches[1], $data['_embed'] ) ) {410 $data['_embed'][] = 'https://www.youtube.com/watch?v=' . $src_matches[1];411 }412 } else if ( preg_match( '/\/\/vimeo.com\/moogaloop\.swf\?clip_id=([\d]+)$/', $new_matches[3], $src_matches ) ) {413 if ( ! in_array( 'https://vimeo.com/' . $src_matches[1], $data['_embed'] ) ) {414 $data['_embed'][] = 'https://vimeo.com/' . $src_matches[1];415 }416 }417 } else if ( 'og:image' == $new_matches[2] || 'og:image:secure_url' == $new_matches[2] ) {418 if ( ! in_array( $new_matches[3], $data['_img'] ) ) {419 $data['_img'][] = $new_matches[3];420 }421 }422 }423 }424 }425 }426 }427 }428 429 // Fetch and gather <link> data430 if ( empty( $data['_links'] ) ) {431 $data['_links'] = array();432 }433 434 if ( preg_match_all( '/<link ([^>]+)[\s]?\/>/', $source_content, $matches ) ) {435 if ( ! empty( $matches[0] ) ) {436 foreach ( $matches[0] as $key => $value ) {437 if ( preg_match( '/<link[^>]+(rel|itemprop)="([^"]+)"[^>]+href="([^"]+)"[^>]+\/>/', $value, $new_matches ) ) {438 if ( 'alternate' == $new_matches[2] || 'thumbnailUrl' == $new_matches[2] || 'url' == $new_matches[2] ) {439 if ( empty( $data['_links'][ $new_matches[2] ] ) ) {440 $data['_links'][ $new_matches[2] ] = $new_matches[3];441 }442 }443 }444 }445 }446 }447 448 579 return $data; 449 580 } … … 458 589 */ 459 590 public function merge_or_fetch_data() { 460 // Merge $_POST and $_GET, as appropriate ($_POST > $_GET), to remain backward compatible. 461 $data = array_merge_recursive( $_POST, $_GET ); 462 463 // Get the legacy QS params, or equiv POST data 464 $data['u'] = ( ! empty( $data['u'] ) && preg_match( '/^https?:/', $data['u'] ) ) ? $data['u'] : ''; 465 $data['s'] = ( ! empty( $data['s'] ) ) ? $data['s'] : ''; 466 $data['t'] = ( ! empty( $data['t'] ) ) ? $data['t'] : ''; 591 // Get data from $_POST and $_GET, as appropriate ($_POST > $_GET), to remain backward compatible. 592 $data = array(); 593 594 // Only instantiate the keys we want. Sanity check and sanitize each one. 595 foreach ( array( 'u', 's', 't', 'v', '_version' ) as $key ) { 596 if ( ! empty( $_POST[ $key ] ) ) { 597 $value = wp_unslash( $_POST[ $key ] ); 598 } else if ( ! empty( $_GET[ $key ] ) ) { 599 $value = wp_unslash( $_GET[ $key ] ); 600 } else { 601 continue; 602 } 603 604 if ( 'u' === $key ) { 605 $value = $this->_limit_url( $value ); 606 } else { 607 $value = $this->_limit_string( $value ); 608 } 609 610 if ( ! empty( $value ) ) { 611 $data[ $key ] = $value; 612 } 613 } 467 614 468 615 /** … … 475 622 if ( apply_filters( 'enable_press_this_media_discovery', true ) ) { 476 623 /* 477 * If no _meta (a new thing)was passed via $_POST, fetch data from source as fallback,478 * mak es PT fully backward compatible624 * If no title, _img, _embed, and _meta was passed via $_POST, fetch data from source as fallback, 625 * making PT fully backward compatible with the older bookmarklet. 479 626 */ 480 if ( empty( $ data['_meta']) && ! empty( $data['u'] ) ) {627 if ( empty( $_POST ) && ! empty( $data['u'] ) ) { 481 628 $data = $this->source_data_fetch_fallback( $data['u'], $data ); 482 } 483 } else { 484 if ( ! empty( $data['_img'] ) ) { 485 $data['_img'] = array(); 486 } 487 if ( ! empty( $data['_embed'] ) ) { 488 $data['_embed'] = array(); 489 } 490 if ( ! empty( $data['_meta'] ) ) { 491 $data['_meta'] = array(); 629 } else { 630 foreach ( array( '_img', '_embed', '_meta' ) as $type ) { 631 if ( empty( $_POST[ $type ] ) ) { 632 continue; 633 } 634 635 $data[ $type ] = array(); 636 $items = $this->_limit_array( $_POST[ $type ] ); 637 $items = wp_unslash( $items ); 638 639 foreach ( $items as $key => $value ) { 640 $key = $this->_limit_string( wp_unslash( $key ) ); 641 642 // Sanity check. $key is usually things like 'title', 'description', 'keywords', etc. 643 if ( empty( $key ) || strlen( $key ) > 100 ) { 644 continue; 645 } 646 647 if ( $type === '_meta' ) { 648 $value = $this->_limit_string( $value ); 649 650 if ( ! empty( $value ) ) { 651 $data = $this->_process_meta_entry( $key, $value, $data ); 652 } 653 } else if ( $type === '_img' ) { 654 $value = $this->_limit_img( $value ); 655 656 if ( ! empty( $value ) ) { 657 $data[ $type ][] = $value; 658 } 659 } else if ( $type === '_embed' ) { 660 $value = $this->_limit_embed( $value ); 661 662 if ( ! empty( $value ) ) { 663 $data[ $type ][] = $value; 664 } 665 } 666 } 667 } 492 668 } 493 669 }
Note: See TracChangeset
for help on using the changeset viewer.