Changeset 59141 for trunk/src/wp-includes/SimplePie/src/HTTP/Parser.php
- Timestamp:
- 09/30/2024 10:48:16 PM (8 months ago)
- Location:
- trunk/src/wp-includes/SimplePie/src
- Files:
-
- 1 added
- 1 edited
- 1 moved
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/wp-includes/SimplePie/src/HTTP/Parser.php
r52393 r59141 1 1 <?php 2 2 3 /** 3 4 * SimplePie … … 6 7 * Takes the hard work out of managing a complete RSS/Atom solution. 7 8 * 8 * Copyright (c) 2004-20 16, Ryan Parman, Sam Sneddon, Ryan McCue, and contributors9 * Copyright (c) 2004-2022, Ryan Parman, Sam Sneddon, Ryan McCue, and contributors 9 10 * All rights reserved. 10 11 * … … 42 43 */ 43 44 45 namespace SimplePie\HTTP; 44 46 45 47 /** … … 49 51 * @subpackage HTTP 50 52 */ 51 class SimplePie_HTTP_Parser53 class Parser 52 54 { 53 /** 54 * HTTP Version 55 * 56 * @var float 57 */ 58 public $http_version = 0.0; 59 60 /** 61 * Status code 62 * 63 * @var int 64 */ 65 public $status_code = 0; 66 67 /** 68 * Reason phrase 69 * 70 * @var string 71 */ 72 public $reason = ''; 73 74 /** 75 * Key/value pairs of the headers 76 * 77 * @var array 78 */ 79 public $headers = array(); 80 81 /** 82 * Body of the response 83 * 84 * @var string 85 */ 86 public $body = ''; 87 88 /** 89 * Current state of the state machine 90 * 91 * @var string 92 */ 93 protected $state = 'http_version'; 94 95 /** 96 * Input data 97 * 98 * @var string 99 */ 100 protected $data = ''; 101 102 /** 103 * Input data length (to avoid calling strlen() everytime this is needed) 104 * 105 * @var int 106 */ 107 protected $data_length = 0; 108 109 /** 110 * Current position of the pointer 111 * 112 * @var int 113 */ 114 protected $position = 0; 115 116 /** 117 * Name of the hedaer currently being parsed 118 * 119 * @var string 120 */ 121 protected $name = ''; 122 123 /** 124 * Value of the hedaer currently being parsed 125 * 126 * @var string 127 */ 128 protected $value = ''; 129 130 /** 131 * Create an instance of the class with the input data 132 * 133 * @param string $data Input data 134 */ 135 public function __construct($data) 136 { 137 $this->data = $data; 138 $this->data_length = strlen($this->data); 139 } 140 141 /** 142 * Parse the input data 143 * 144 * @return bool true on success, false on failure 145 */ 146 public function parse() 147 { 148 while ($this->state && $this->state !== 'emit' && $this->has_data()) 149 { 150 $state = $this->state; 151 $this->$state(); 152 } 153 $this->data = ''; 154 if ($this->state === 'emit' || $this->state === 'body') 155 { 156 return true; 157 } 158 159 $this->http_version = ''; 160 $this->status_code = ''; 161 $this->reason = ''; 162 $this->headers = array(); 163 $this->body = ''; 164 return false; 165 } 166 167 /** 168 * Check whether there is data beyond the pointer 169 * 170 * @return bool true if there is further data, false if not 171 */ 172 protected function has_data() 173 { 174 return (bool) ($this->position < $this->data_length); 175 } 176 177 /** 178 * See if the next character is LWS 179 * 180 * @return bool true if the next character is LWS, false if not 181 */ 182 protected function is_linear_whitespace() 183 { 184 return (bool) ($this->data[$this->position] === "\x09" 185 || $this->data[$this->position] === "\x20" 186 || ($this->data[$this->position] === "\x0A" 187 && isset($this->data[$this->position + 1]) 188 && ($this->data[$this->position + 1] === "\x09" || $this->data[$this->position + 1] === "\x20"))); 189 } 190 191 /** 192 * Parse the HTTP version 193 */ 194 protected function http_version() 195 { 196 if (strpos($this->data, "\x0A") !== false && strtoupper(substr($this->data, 0, 5)) === 'HTTP/') 197 { 198 $len = strspn($this->data, '0123456789.', 5); 199 $this->http_version = substr($this->data, 5, $len); 200 $this->position += 5 + $len; 201 if (substr_count($this->http_version, '.') <= 1) 202 { 203 $this->http_version = (float) $this->http_version; 204 $this->position += strspn($this->data, "\x09\x20", $this->position); 205 $this->state = 'status'; 206 } 207 else 208 { 209 $this->state = false; 210 } 211 } 212 else 213 { 214 $this->state = false; 215 } 216 } 217 218 /** 219 * Parse the status code 220 */ 221 protected function status() 222 { 223 if ($len = strspn($this->data, '0123456789', $this->position)) 224 { 225 $this->status_code = (int) substr($this->data, $this->position, $len); 226 $this->position += $len; 227 $this->state = 'reason'; 228 } 229 else 230 { 231 $this->state = false; 232 } 233 } 234 235 /** 236 * Parse the reason phrase 237 */ 238 protected function reason() 239 { 240 $len = strcspn($this->data, "\x0A", $this->position); 241 $this->reason = trim(substr($this->data, $this->position, $len), "\x09\x0D\x20"); 242 $this->position += $len + 1; 243 $this->state = 'new_line'; 244 } 245 246 /** 247 * Deal with a new line, shifting data around as needed 248 */ 249 protected function new_line() 250 { 251 $this->value = trim($this->value, "\x0D\x20"); 252 if ($this->name !== '' && $this->value !== '') 253 { 254 $this->name = strtolower($this->name); 255 // We should only use the last Content-Type header. c.f. issue #1 256 if (isset($this->headers[$this->name]) && $this->name !== 'content-type') 257 { 258 $this->headers[$this->name] .= ', ' . $this->value; 259 } 260 else 261 { 262 $this->headers[$this->name] = $this->value; 263 } 264 } 265 $this->name = ''; 266 $this->value = ''; 267 if (substr($this->data[$this->position], 0, 2) === "\x0D\x0A") 268 { 269 $this->position += 2; 270 $this->state = 'body'; 271 } 272 elseif ($this->data[$this->position] === "\x0A") 273 { 274 $this->position++; 275 $this->state = 'body'; 276 } 277 else 278 { 279 $this->state = 'name'; 280 } 281 } 282 283 /** 284 * Parse a header name 285 */ 286 protected function name() 287 { 288 $len = strcspn($this->data, "\x0A:", $this->position); 289 if (isset($this->data[$this->position + $len])) 290 { 291 if ($this->data[$this->position + $len] === "\x0A") 292 { 293 $this->position += $len; 294 $this->state = 'new_line'; 295 } 296 else 297 { 298 $this->name = substr($this->data, $this->position, $len); 299 $this->position += $len + 1; 300 $this->state = 'value'; 301 } 302 } 303 else 304 { 305 $this->state = false; 306 } 307 } 308 309 /** 310 * Parse LWS, replacing consecutive LWS characters with a single space 311 */ 312 protected function linear_whitespace() 313 { 314 do 315 { 316 if (substr($this->data, $this->position, 2) === "\x0D\x0A") 317 { 318 $this->position += 2; 319 } 320 elseif ($this->data[$this->position] === "\x0A") 321 { 322 $this->position++; 323 } 324 $this->position += strspn($this->data, "\x09\x20", $this->position); 325 } while ($this->has_data() && $this->is_linear_whitespace()); 326 $this->value .= "\x20"; 327 } 328 329 /** 330 * See what state to move to while within non-quoted header values 331 */ 332 protected function value() 333 { 334 if ($this->is_linear_whitespace()) 335 { 336 $this->linear_whitespace(); 337 } 338 else 339 { 340 switch ($this->data[$this->position]) 341 { 342 case '"': 343 // Workaround for ETags: we have to include the quotes as 344 // part of the tag. 345 if (strtolower($this->name) === 'etag') 346 { 347 $this->value .= '"'; 348 $this->position++; 349 $this->state = 'value_char'; 350 break; 351 } 352 $this->position++; 353 $this->state = 'quote'; 354 break; 355 356 case "\x0A": 357 $this->position++; 358 $this->state = 'new_line'; 359 break; 360 361 default: 362 $this->state = 'value_char'; 363 break; 364 } 365 } 366 } 367 368 /** 369 * Parse a header value while outside quotes 370 */ 371 protected function value_char() 372 { 373 $len = strcspn($this->data, "\x09\x20\x0A\"", $this->position); 374 $this->value .= substr($this->data, $this->position, $len); 375 $this->position += $len; 376 $this->state = 'value'; 377 } 378 379 /** 380 * See what state to move to while within quoted header values 381 */ 382 protected function quote() 383 { 384 if ($this->is_linear_whitespace()) 385 { 386 $this->linear_whitespace(); 387 } 388 else 389 { 390 switch ($this->data[$this->position]) 391 { 392 case '"': 393 $this->position++; 394 $this->state = 'value'; 395 break; 396 397 case "\x0A": 398 $this->position++; 399 $this->state = 'new_line'; 400 break; 401 402 case '\\': 403 $this->position++; 404 $this->state = 'quote_escaped'; 405 break; 406 407 default: 408 $this->state = 'quote_char'; 409 break; 410 } 411 } 412 } 413 414 /** 415 * Parse a header value while within quotes 416 */ 417 protected function quote_char() 418 { 419 $len = strcspn($this->data, "\x09\x20\x0A\"\\", $this->position); 420 $this->value .= substr($this->data, $this->position, $len); 421 $this->position += $len; 422 $this->state = 'value'; 423 } 424 425 /** 426 * Parse an escaped character within quotes 427 */ 428 protected function quote_escaped() 429 { 430 $this->value .= $this->data[$this->position]; 431 $this->position++; 432 $this->state = 'quote'; 433 } 434 435 /** 436 * Parse the body 437 */ 438 protected function body() 439 { 440 $this->body = substr($this->data, $this->position); 441 if (!empty($this->headers['transfer-encoding'])) 442 { 443 unset($this->headers['transfer-encoding']); 444 $this->state = 'chunked'; 445 } 446 else 447 { 448 $this->state = 'emit'; 449 } 450 } 451 452 /** 453 * Parsed a "Transfer-Encoding: chunked" body 454 */ 455 protected function chunked() 456 { 457 if (!preg_match('/^([0-9a-f]+)[^\r\n]*\r\n/i', trim($this->body))) 458 { 459 $this->state = 'emit'; 460 return; 461 } 462 463 $decoded = ''; 464 $encoded = $this->body; 465 466 while (true) 467 { 468 $is_chunked = (bool) preg_match( '/^([0-9a-f]+)[^\r\n]*\r\n/i', $encoded, $matches ); 469 if (!$is_chunked) 470 { 471 // Looks like it's not chunked after all 472 $this->state = 'emit'; 473 return; 474 } 475 476 $length = hexdec(trim($matches[1])); 477 if ($length === 0) 478 { 479 // Ignore trailer headers 480 $this->state = 'emit'; 481 $this->body = $decoded; 482 return; 483 } 484 485 $chunk_length = strlen($matches[0]); 486 $decoded .= $part = substr($encoded, $chunk_length, $length); 487 $encoded = substr($encoded, $chunk_length + $length + 2); 488 489 if (trim($encoded) === '0' || empty($encoded)) 490 { 491 $this->state = 'emit'; 492 $this->body = $decoded; 493 return; 494 } 495 } 496 } 497 498 /** 499 * Prepare headers (take care of proxies headers) 500 * 501 * @param string $headers Raw headers 502 * @param integer $count Redirection count. Default to 1. 503 * 504 * @return string 505 */ 506 static public function prepareHeaders($headers, $count = 1) 507 { 508 $data = explode("\r\n\r\n", $headers, $count); 509 $data = array_pop($data); 510 if (false !== stripos($data, "HTTP/1.0 200 Connection established\r\n")) { 511 $exploded = explode("\r\n\r\n", $data, 2); 512 $data = end($exploded); 513 } 514 if (false !== stripos($data, "HTTP/1.1 200 Connection established\r\n")) { 515 $exploded = explode("\r\n\r\n", $data, 2); 516 $data = end($exploded); 517 } 518 return $data; 519 } 55 /** 56 * HTTP Version 57 * 58 * @var float 59 */ 60 public $http_version = 0.0; 61 62 /** 63 * Status code 64 * 65 * @var int 66 */ 67 public $status_code = 0; 68 69 /** 70 * Reason phrase 71 * 72 * @var string 73 */ 74 public $reason = ''; 75 76 /** 77 * Key/value pairs of the headers 78 * 79 * @var array 80 */ 81 public $headers = []; 82 83 /** 84 * Body of the response 85 * 86 * @var string 87 */ 88 public $body = ''; 89 90 private const STATE_HTTP_VERSION = 'http_version'; 91 92 private const STATE_STATUS = 'status'; 93 94 private const STATE_REASON = 'reason'; 95 96 private const STATE_NEW_LINE = 'new_line'; 97 98 private const STATE_BODY = 'body'; 99 100 private const STATE_NAME = 'name'; 101 102 private const STATE_VALUE = 'value'; 103 104 private const STATE_VALUE_CHAR = 'value_char'; 105 106 private const STATE_QUOTE = 'quote'; 107 108 private const STATE_QUOTE_ESCAPED = 'quote_escaped'; 109 110 private const STATE_QUOTE_CHAR = 'quote_char'; 111 112 private const STATE_CHUNKED = 'chunked'; 113 114 private const STATE_EMIT = 'emit'; 115 116 private const STATE_ERROR = false; 117 118 /** 119 * Current state of the state machine 120 * 121 * @var self::STATE_* 122 */ 123 protected $state = self::STATE_HTTP_VERSION; 124 125 /** 126 * Input data 127 * 128 * @var string 129 */ 130 protected $data = ''; 131 132 /** 133 * Input data length (to avoid calling strlen() everytime this is needed) 134 * 135 * @var int 136 */ 137 protected $data_length = 0; 138 139 /** 140 * Current position of the pointer 141 * 142 * @var int 143 */ 144 protected $position = 0; 145 146 /** 147 * Name of the hedaer currently being parsed 148 * 149 * @var string 150 */ 151 protected $name = ''; 152 153 /** 154 * Value of the hedaer currently being parsed 155 * 156 * @var string 157 */ 158 protected $value = ''; 159 160 /** 161 * Create an instance of the class with the input data 162 * 163 * @param string $data Input data 164 */ 165 public function __construct($data) 166 { 167 $this->data = $data; 168 $this->data_length = strlen($this->data); 169 } 170 171 /** 172 * Parse the input data 173 * 174 * @return bool true on success, false on failure 175 */ 176 public function parse() 177 { 178 while ($this->state && $this->state !== self::STATE_EMIT && $this->has_data()) { 179 $state = $this->state; 180 $this->$state(); 181 } 182 $this->data = ''; 183 if ($this->state === self::STATE_EMIT || $this->state === self::STATE_BODY) { 184 return true; 185 } 186 187 $this->http_version = ''; 188 $this->status_code = 0; 189 $this->reason = ''; 190 $this->headers = []; 191 $this->body = ''; 192 return false; 193 } 194 195 /** 196 * Check whether there is data beyond the pointer 197 * 198 * @return bool true if there is further data, false if not 199 */ 200 protected function has_data() 201 { 202 return (bool) ($this->position < $this->data_length); 203 } 204 205 /** 206 * See if the next character is LWS 207 * 208 * @return bool true if the next character is LWS, false if not 209 */ 210 protected function is_linear_whitespace() 211 { 212 return (bool) ($this->data[$this->position] === "\x09" 213 || $this->data[$this->position] === "\x20" 214 || ($this->data[$this->position] === "\x0A" 215 && isset($this->data[$this->position + 1]) 216 && ($this->data[$this->position + 1] === "\x09" || $this->data[$this->position + 1] === "\x20"))); 217 } 218 219 /** 220 * Parse the HTTP version 221 */ 222 protected function http_version() 223 { 224 if (strpos($this->data, "\x0A") !== false && strtoupper(substr($this->data, 0, 5)) === 'HTTP/') { 225 $len = strspn($this->data, '0123456789.', 5); 226 $this->http_version = substr($this->data, 5, $len); 227 $this->position += 5 + $len; 228 if (substr_count($this->http_version, '.') <= 1) { 229 $this->http_version = (float) $this->http_version; 230 $this->position += strspn($this->data, "\x09\x20", $this->position); 231 $this->state = self::STATE_STATUS; 232 } else { 233 $this->state = self::STATE_ERROR; 234 } 235 } else { 236 $this->state = self::STATE_ERROR; 237 } 238 } 239 240 /** 241 * Parse the status code 242 */ 243 protected function status() 244 { 245 if ($len = strspn($this->data, '0123456789', $this->position)) { 246 $this->status_code = (int) substr($this->data, $this->position, $len); 247 $this->position += $len; 248 $this->state = self::STATE_REASON; 249 } else { 250 $this->state = self::STATE_ERROR; 251 } 252 } 253 254 /** 255 * Parse the reason phrase 256 */ 257 protected function reason() 258 { 259 $len = strcspn($this->data, "\x0A", $this->position); 260 $this->reason = trim(substr($this->data, $this->position, $len), "\x09\x0D\x20"); 261 $this->position += $len + 1; 262 $this->state = self::STATE_NEW_LINE; 263 } 264 265 /** 266 * Deal with a new line, shifting data around as needed 267 */ 268 protected function new_line() 269 { 270 $this->value = trim($this->value, "\x0D\x20"); 271 if ($this->name !== '' && $this->value !== '') { 272 $this->name = strtolower($this->name); 273 // We should only use the last Content-Type header. c.f. issue #1 274 if (isset($this->headers[$this->name]) && $this->name !== 'content-type') { 275 $this->headers[$this->name] .= ', ' . $this->value; 276 } else { 277 $this->headers[$this->name] = $this->value; 278 } 279 } 280 $this->name = ''; 281 $this->value = ''; 282 if (substr($this->data[$this->position], 0, 2) === "\x0D\x0A") { 283 $this->position += 2; 284 $this->state = self::STATE_BODY; 285 } elseif ($this->data[$this->position] === "\x0A") { 286 $this->position++; 287 $this->state = self::STATE_BODY; 288 } else { 289 $this->state = self::STATE_NAME; 290 } 291 } 292 293 /** 294 * Parse a header name 295 */ 296 protected function name() 297 { 298 $len = strcspn($this->data, "\x0A:", $this->position); 299 if (isset($this->data[$this->position + $len])) { 300 if ($this->data[$this->position + $len] === "\x0A") { 301 $this->position += $len; 302 $this->state = self::STATE_NEW_LINE; 303 } else { 304 $this->name = substr($this->data, $this->position, $len); 305 $this->position += $len + 1; 306 $this->state = self::STATE_VALUE; 307 } 308 } else { 309 $this->state = self::STATE_ERROR; 310 } 311 } 312 313 /** 314 * Parse LWS, replacing consecutive LWS characters with a single space 315 */ 316 protected function linear_whitespace() 317 { 318 do { 319 if (substr($this->data, $this->position, 2) === "\x0D\x0A") { 320 $this->position += 2; 321 } elseif ($this->data[$this->position] === "\x0A") { 322 $this->position++; 323 } 324 $this->position += strspn($this->data, "\x09\x20", $this->position); 325 } while ($this->has_data() && $this->is_linear_whitespace()); 326 $this->value .= "\x20"; 327 } 328 329 /** 330 * See what state to move to while within non-quoted header values 331 */ 332 protected function value() 333 { 334 if ($this->is_linear_whitespace()) { 335 $this->linear_whitespace(); 336 } else { 337 switch ($this->data[$this->position]) { 338 case '"': 339 // Workaround for ETags: we have to include the quotes as 340 // part of the tag. 341 if (strtolower($this->name) === 'etag') { 342 $this->value .= '"'; 343 $this->position++; 344 $this->state = self::STATE_VALUE_CHAR; 345 break; 346 } 347 $this->position++; 348 $this->state = self::STATE_QUOTE; 349 break; 350 351 case "\x0A": 352 $this->position++; 353 $this->state = self::STATE_NEW_LINE; 354 break; 355 356 default: 357 $this->state = self::STATE_VALUE_CHAR; 358 break; 359 } 360 } 361 } 362 363 /** 364 * Parse a header value while outside quotes 365 */ 366 protected function value_char() 367 { 368 $len = strcspn($this->data, "\x09\x20\x0A\"", $this->position); 369 $this->value .= substr($this->data, $this->position, $len); 370 $this->position += $len; 371 $this->state = self::STATE_VALUE; 372 } 373 374 /** 375 * See what state to move to while within quoted header values 376 */ 377 protected function quote() 378 { 379 if ($this->is_linear_whitespace()) { 380 $this->linear_whitespace(); 381 } else { 382 switch ($this->data[$this->position]) { 383 case '"': 384 $this->position++; 385 $this->state = self::STATE_VALUE; 386 break; 387 388 case "\x0A": 389 $this->position++; 390 $this->state = self::STATE_NEW_LINE; 391 break; 392 393 case '\\': 394 $this->position++; 395 $this->state = self::STATE_QUOTE_ESCAPED; 396 break; 397 398 default: 399 $this->state = self::STATE_QUOTE_CHAR; 400 break; 401 } 402 } 403 } 404 405 /** 406 * Parse a header value while within quotes 407 */ 408 protected function quote_char() 409 { 410 $len = strcspn($this->data, "\x09\x20\x0A\"\\", $this->position); 411 $this->value .= substr($this->data, $this->position, $len); 412 $this->position += $len; 413 $this->state = self::STATE_VALUE; 414 } 415 416 /** 417 * Parse an escaped character within quotes 418 */ 419 protected function quote_escaped() 420 { 421 $this->value .= $this->data[$this->position]; 422 $this->position++; 423 $this->state = self::STATE_QUOTE; 424 } 425 426 /** 427 * Parse the body 428 */ 429 protected function body() 430 { 431 $this->body = substr($this->data, $this->position); 432 if (!empty($this->headers['transfer-encoding'])) { 433 unset($this->headers['transfer-encoding']); 434 $this->state = self::STATE_CHUNKED; 435 } else { 436 $this->state = self::STATE_EMIT; 437 } 438 } 439 440 /** 441 * Parsed a "Transfer-Encoding: chunked" body 442 */ 443 protected function chunked() 444 { 445 if (!preg_match('/^([0-9a-f]+)[^\r\n]*\r\n/i', trim($this->body))) { 446 $this->state = self::STATE_EMIT; 447 return; 448 } 449 450 $decoded = ''; 451 $encoded = $this->body; 452 453 while (true) { 454 $is_chunked = (bool) preg_match('/^([0-9a-f]+)[^\r\n]*\r\n/i', $encoded, $matches); 455 if (!$is_chunked) { 456 // Looks like it's not chunked after all 457 $this->state = self::STATE_EMIT; 458 return; 459 } 460 461 $length = hexdec(trim($matches[1])); 462 if ($length === 0) { 463 // Ignore trailer headers 464 $this->state = self::STATE_EMIT; 465 $this->body = $decoded; 466 return; 467 } 468 469 $chunk_length = strlen($matches[0]); 470 $decoded .= substr($encoded, $chunk_length, $length); 471 $encoded = substr($encoded, $chunk_length + $length + 2); 472 473 // BC for PHP < 8.0: substr() can return bool instead of string 474 $encoded = ($encoded === false) ? '' : $encoded; 475 476 if (trim($encoded) === '0' || empty($encoded)) { 477 $this->state = self::STATE_EMIT; 478 $this->body = $decoded; 479 return; 480 } 481 } 482 } 483 484 /** 485 * Prepare headers (take care of proxies headers) 486 * 487 * @param string $headers Raw headers 488 * @param integer $count Redirection count. Default to 1. 489 * 490 * @return string 491 */ 492 public static function prepareHeaders($headers, $count = 1) 493 { 494 $data = explode("\r\n\r\n", $headers, $count); 495 $data = array_pop($data); 496 if (false !== stripos($data, "HTTP/1.0 200 Connection established\r\n")) { 497 $exploded = explode("\r\n\r\n", $data, 2); 498 $data = end($exploded); 499 } 500 if (false !== stripos($data, "HTTP/1.1 200 Connection established\r\n")) { 501 $exploded = explode("\r\n\r\n", $data, 2); 502 $data = end($exploded); 503 } 504 return $data; 505 } 520 506 } 507 508 class_alias('SimplePie\HTTP\Parser', 'SimplePie_HTTP_Parser');
Note: See TracChangeset
for help on using the changeset viewer.