You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
500 lines
11 KiB
500 lines
11 KiB
<?php |
|
/** |
|
* SimplePie |
|
* |
|
* A PHP-Based RSS and Atom Feed Framework. |
|
* Takes the hard work out of managing a complete RSS/Atom solution. |
|
* |
|
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors |
|
* All rights reserved. |
|
* |
|
* Redistribution and use in source and binary forms, with or without modification, are |
|
* permitted provided that the following conditions are met: |
|
* |
|
* * Redistributions of source code must retain the above copyright notice, this list of |
|
* conditions and the following disclaimer. |
|
* |
|
* * Redistributions in binary form must reproduce the above copyright notice, this list |
|
* of conditions and the following disclaimer in the documentation and/or other materials |
|
* provided with the distribution. |
|
* |
|
* * Neither the name of the SimplePie Team nor the names of its contributors may be used |
|
* to endorse or promote products derived from this software without specific prior |
|
* written permission. |
|
* |
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS |
|
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY |
|
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS |
|
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
|
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR |
|
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
|
* POSSIBILITY OF SUCH DAMAGE. |
|
* |
|
* @package SimplePie |
|
* @version 1.3.1 |
|
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue |
|
* @author Ryan Parman |
|
* @author Geoffrey Sneddon |
|
* @author Ryan McCue |
|
* @link http://simplepie.org/ SimplePie |
|
* @license http://www.opensource.org/licenses/bsd-license.php BSD License |
|
*/ |
|
|
|
|
|
/** |
|
* HTTP Response Parser |
|
* |
|
* @package SimplePie |
|
* @subpackage HTTP |
|
*/ |
|
class SimplePie_HTTP_Parser |
|
{ |
|
/** |
|
* HTTP Version |
|
* |
|
* @var float |
|
*/ |
|
public $http_version = 0.0; |
|
|
|
/** |
|
* Status code |
|
* |
|
* @var int |
|
*/ |
|
public $status_code = 0; |
|
|
|
/** |
|
* Reason phrase |
|
* |
|
* @var string |
|
*/ |
|
public $reason = ''; |
|
|
|
/** |
|
* Key/value pairs of the headers |
|
* |
|
* @var array |
|
*/ |
|
public $headers = array(); |
|
|
|
/** |
|
* Body of the response |
|
* |
|
* @var string |
|
*/ |
|
public $body = ''; |
|
|
|
/** |
|
* Current state of the state machine |
|
* |
|
* @var string |
|
*/ |
|
protected $state = 'http_version'; |
|
|
|
/** |
|
* Input data |
|
* |
|
* @var string |
|
*/ |
|
protected $data = ''; |
|
|
|
/** |
|
* Input data length (to avoid calling strlen() everytime this is needed) |
|
* |
|
* @var int |
|
*/ |
|
protected $data_length = 0; |
|
|
|
/** |
|
* Current position of the pointer |
|
* |
|
* @var int |
|
*/ |
|
protected $position = 0; |
|
|
|
/** |
|
* Name of the hedaer currently being parsed |
|
* |
|
* @var string |
|
*/ |
|
protected $name = ''; |
|
|
|
/** |
|
* Value of the hedaer currently being parsed |
|
* |
|
* @var string |
|
*/ |
|
protected $value = ''; |
|
|
|
/** |
|
* Create an instance of the class with the input data |
|
* |
|
* @param string $data Input data |
|
*/ |
|
public function __construct($data) |
|
{ |
|
$this->data = $data; |
|
$this->data_length = strlen($this->data); |
|
} |
|
|
|
/** |
|
* Parse the input data |
|
* |
|
* @return bool true on success, false on failure |
|
*/ |
|
public function parse() |
|
{ |
|
while ($this->state && $this->state !== 'emit' && $this->has_data()) |
|
{ |
|
$state = $this->state; |
|
$this->$state(); |
|
} |
|
$this->data = ''; |
|
if ($this->state === 'emit' || $this->state === 'body') |
|
{ |
|
return true; |
|
} |
|
else |
|
{ |
|
$this->http_version = ''; |
|
$this->status_code = ''; |
|
$this->reason = ''; |
|
$this->headers = array(); |
|
$this->body = ''; |
|
return false; |
|
} |
|
} |
|
|
|
/** |
|
* Check whether there is data beyond the pointer |
|
* |
|
* @return bool true if there is further data, false if not |
|
*/ |
|
protected function has_data() |
|
{ |
|
return (bool) ($this->position < $this->data_length); |
|
} |
|
|
|
/** |
|
* See if the next character is LWS |
|
* |
|
* @return bool true if the next character is LWS, false if not |
|
*/ |
|
protected function is_linear_whitespace() |
|
{ |
|
return (bool) ($this->data[$this->position] === "\x09" |
|
|| $this->data[$this->position] === "\x20" |
|
|| ($this->data[$this->position] === "\x0A" |
|
&& isset($this->data[$this->position + 1]) |
|
&& ($this->data[$this->position + 1] === "\x09" || $this->data[$this->position + 1] === "\x20"))); |
|
} |
|
|
|
/** |
|
* Parse the HTTP version |
|
*/ |
|
protected function http_version() |
|
{ |
|
if (strpos($this->data, "\x0A") !== false && strtoupper(substr($this->data, 0, 5)) === 'HTTP/') |
|
{ |
|
$len = strspn($this->data, '0123456789.', 5); |
|
$this->http_version = substr($this->data, 5, $len); |
|
$this->position += 5 + $len; |
|
if (substr_count($this->http_version, '.') <= 1) |
|
{ |
|
$this->http_version = (float) $this->http_version; |
|
$this->position += strspn($this->data, "\x09\x20", $this->position); |
|
$this->state = 'status'; |
|
} |
|
else |
|
{ |
|
$this->state = false; |
|
} |
|
} |
|
else |
|
{ |
|
$this->state = false; |
|
} |
|
} |
|
|
|
/** |
|
* Parse the status code |
|
*/ |
|
protected function status() |
|
{ |
|
if ($len = strspn($this->data, '0123456789', $this->position)) |
|
{ |
|
$this->status_code = (int) substr($this->data, $this->position, $len); |
|
$this->position += $len; |
|
$this->state = 'reason'; |
|
} |
|
else |
|
{ |
|
$this->state = false; |
|
} |
|
} |
|
|
|
/** |
|
* Parse the reason phrase |
|
*/ |
|
protected function reason() |
|
{ |
|
$len = strcspn($this->data, "\x0A", $this->position); |
|
$this->reason = trim(substr($this->data, $this->position, $len), "\x09\x0D\x20"); |
|
$this->position += $len + 1; |
|
$this->state = 'new_line'; |
|
} |
|
|
|
/** |
|
* Deal with a new line, shifting data around as needed |
|
*/ |
|
protected function new_line() |
|
{ |
|
$this->value = trim($this->value, "\x0D\x20"); |
|
if ($this->name !== '' && $this->value !== '') |
|
{ |
|
$this->name = strtolower($this->name); |
|
// We should only use the last Content-Type header. c.f. issue #1 |
|
if (isset($this->headers[$this->name]) && $this->name !== 'content-type') |
|
{ |
|
$this->headers[$this->name] .= ', ' . $this->value; |
|
} |
|
else |
|
{ |
|
$this->headers[$this->name] = $this->value; |
|
} |
|
} |
|
$this->name = ''; |
|
$this->value = ''; |
|
if (substr($this->data[$this->position], 0, 2) === "\x0D\x0A") |
|
{ |
|
$this->position += 2; |
|
$this->state = 'body'; |
|
} |
|
elseif ($this->data[$this->position] === "\x0A") |
|
{ |
|
$this->position++; |
|
$this->state = 'body'; |
|
} |
|
else |
|
{ |
|
$this->state = 'name'; |
|
} |
|
} |
|
|
|
/** |
|
* Parse a header name |
|
*/ |
|
protected function name() |
|
{ |
|
$len = strcspn($this->data, "\x0A:", $this->position); |
|
if (isset($this->data[$this->position + $len])) |
|
{ |
|
if ($this->data[$this->position + $len] === "\x0A") |
|
{ |
|
$this->position += $len; |
|
$this->state = 'new_line'; |
|
} |
|
else |
|
{ |
|
$this->name = substr($this->data, $this->position, $len); |
|
$this->position += $len + 1; |
|
$this->state = 'value'; |
|
} |
|
} |
|
else |
|
{ |
|
$this->state = false; |
|
} |
|
} |
|
|
|
/** |
|
* Parse LWS, replacing consecutive LWS characters with a single space |
|
*/ |
|
protected function linear_whitespace() |
|
{ |
|
do |
|
{ |
|
if (substr($this->data, $this->position, 2) === "\x0D\x0A") |
|
{ |
|
$this->position += 2; |
|
} |
|
elseif ($this->data[$this->position] === "\x0A") |
|
{ |
|
$this->position++; |
|
} |
|
$this->position += strspn($this->data, "\x09\x20", $this->position); |
|
} while ($this->has_data() && $this->is_linear_whitespace()); |
|
$this->value .= "\x20"; |
|
} |
|
|
|
/** |
|
* See what state to move to while within non-quoted header values |
|
*/ |
|
protected function value() |
|
{ |
|
if ($this->is_linear_whitespace()) |
|
{ |
|
$this->linear_whitespace(); |
|
} |
|
else |
|
{ |
|
switch ($this->data[$this->position]) |
|
{ |
|
case '"': |
|
// Workaround for ETags: we have to include the quotes as |
|
// part of the tag. |
|
if (strtolower($this->name) === 'etag') |
|
{ |
|
$this->value .= '"'; |
|
$this->position++; |
|
$this->state = 'value_char'; |
|
break; |
|
} |
|
$this->position++; |
|
$this->state = 'quote'; |
|
break; |
|
|
|
case "\x0A": |
|
$this->position++; |
|
$this->state = 'new_line'; |
|
break; |
|
|
|
default: |
|
$this->state = 'value_char'; |
|
break; |
|
} |
|
} |
|
} |
|
|
|
/** |
|
* Parse a header value while outside quotes |
|
*/ |
|
protected function value_char() |
|
{ |
|
$len = strcspn($this->data, "\x09\x20\x0A\"", $this->position); |
|
$this->value .= substr($this->data, $this->position, $len); |
|
$this->position += $len; |
|
$this->state = 'value'; |
|
} |
|
|
|
/** |
|
* See what state to move to while within quoted header values |
|
*/ |
|
protected function quote() |
|
{ |
|
if ($this->is_linear_whitespace()) |
|
{ |
|
$this->linear_whitespace(); |
|
} |
|
else |
|
{ |
|
switch ($this->data[$this->position]) |
|
{ |
|
case '"': |
|
$this->position++; |
|
$this->state = 'value'; |
|
break; |
|
|
|
case "\x0A": |
|
$this->position++; |
|
$this->state = 'new_line'; |
|
break; |
|
|
|
case '\\': |
|
$this->position++; |
|
$this->state = 'quote_escaped'; |
|
break; |
|
|
|
default: |
|
$this->state = 'quote_char'; |
|
break; |
|
} |
|
} |
|
} |
|
|
|
/** |
|
* Parse a header value while within quotes |
|
*/ |
|
protected function quote_char() |
|
{ |
|
$len = strcspn($this->data, "\x09\x20\x0A\"\\", $this->position); |
|
$this->value .= substr($this->data, $this->position, $len); |
|
$this->position += $len; |
|
$this->state = 'value'; |
|
} |
|
|
|
/** |
|
* Parse an escaped character within quotes |
|
*/ |
|
protected function quote_escaped() |
|
{ |
|
$this->value .= $this->data[$this->position]; |
|
$this->position++; |
|
$this->state = 'quote'; |
|
} |
|
|
|
/** |
|
* Parse the body |
|
*/ |
|
protected function body() |
|
{ |
|
$this->body = substr($this->data, $this->position); |
|
if (!empty($this->headers['transfer-encoding'])) |
|
{ |
|
unset($this->headers['transfer-encoding']); |
|
$this->state = 'chunked'; |
|
} |
|
else |
|
{ |
|
$this->state = 'emit'; |
|
} |
|
} |
|
|
|
/** |
|
* Parsed a "Transfer-Encoding: chunked" body |
|
*/ |
|
protected function chunked() |
|
{ |
|
if (!preg_match('/^([0-9a-f]+)[^\r\n]*\r\n/i', trim($this->body))) |
|
{ |
|
$this->state = 'emit'; |
|
return; |
|
} |
|
|
|
$decoded = ''; |
|
$encoded = $this->body; |
|
|
|
while (true) |
|
{ |
|
$is_chunked = (bool) preg_match( '/^([0-9a-f]+)[^\r\n]*\r\n/i', $encoded, $matches ); |
|
if (!$is_chunked) |
|
{ |
|
// Looks like it's not chunked after all |
|
$this->state = 'emit'; |
|
return; |
|
} |
|
|
|
$length = hexdec(trim($matches[1])); |
|
if ($length === 0) |
|
{ |
|
// Ignore trailer headers |
|
$this->state = 'emit'; |
|
$this->body = $decoded; |
|
return; |
|
} |
|
|
|
$chunk_length = strlen($matches[0]); |
|
$decoded .= $part = substr($encoded, $chunk_length, $length); |
|
$encoded = substr($encoded, $chunk_length + $length + 2); |
|
|
|
if (trim($encoded) === '0' || empty($encoded)) |
|
{ |
|
$this->state = 'emit'; |
|
$this->body = $decoded; |
|
return; |
|
} |
|
} |
|
} |
|
}
|
|
|