You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
102 lines
3.9 KiB
102 lines
3.9 KiB
--- C:\Users\Edward\Webs\htmlpurifier\maintenance\PH5P.php 2008-07-07 09:12:12.000000000 -0400 |
|
+++ C:\Users\Edward\Webs\htmlpurifier\maintenance/PH5P.new.php 2008-12-06 02:29:34.988800000 -0500 |
|
@@ -65,7 +65,7 @@ |
|
|
|
public function __construct($data) { |
|
$data = str_replace("\r\n", "\n", $data); |
|
- $date = str_replace("\r", null, $data); |
|
+ $data = str_replace("\r", null, $data); |
|
|
|
$this->data = $data; |
|
$this->char = -1; |
|
@@ -211,7 +211,10 @@ |
|
// If nothing is returned, emit a U+0026 AMPERSAND character token. |
|
// Otherwise, emit the character token that was returned. |
|
$char = (!$entity) ? '&' : $entity; |
|
- $this->emitToken($char); |
|
+ $this->emitToken(array( |
|
+ 'type' => self::CHARACTR, |
|
+ 'data' => $char |
|
+ )); |
|
|
|
// Finally, switch to the data state. |
|
$this->state = 'data'; |
|
@@ -708,7 +711,7 @@ |
|
} elseif($char === '&') { |
|
/* U+0026 AMPERSAND (&) |
|
Switch to the entity in attribute value state. */ |
|
- $this->entityInAttributeValueState('non'); |
|
+ $this->entityInAttributeValueState(); |
|
|
|
} elseif($char === '>') { |
|
/* U+003E GREATER-THAN SIGN (>) |
|
@@ -738,7 +741,8 @@ |
|
? '&' |
|
: $entity; |
|
|
|
- $this->emitToken($char); |
|
+ $last = count($this->token['attr']) - 1; |
|
+ $this->token['attr'][$last]['value'] .= $char; |
|
} |
|
|
|
private function bogusCommentState() { |
|
@@ -1066,6 +1070,11 @@ |
|
$this->char++; |
|
|
|
if(in_array($id, $this->entities)) { |
|
+ if ($e_name[$c-1] !== ';') { |
|
+ if ($c < $len && $e_name[$c] == ';') { |
|
+ $this->char++; // consume extra semicolon |
|
+ } |
|
+ } |
|
$entity = $id; |
|
break; |
|
} |
|
@@ -2084,7 +2093,7 @@ |
|
/* Reconstruct the active formatting elements, if any. */ |
|
$this->reconstructActiveFormattingElements(); |
|
|
|
- $this->insertElement($token); |
|
+ $this->insertElement($token, true, true); |
|
break; |
|
} |
|
break; |
|
@@ -3465,7 +3474,18 @@ |
|
} |
|
} |
|
|
|
- private function insertElement($token, $append = true) { |
|
+ private function insertElement($token, $append = true, $check = false) { |
|
+ // Proprietary workaround for libxml2's limitations with tag names |
|
+ if ($check) { |
|
+ // Slightly modified HTML5 tag-name modification, |
|
+ // removing anything that's not an ASCII letter, digit, or hyphen |
|
+ $token['name'] = preg_replace('/[^a-z0-9-]/i', '', $token['name']); |
|
+ // Remove leading hyphens and numbers |
|
+ $token['name'] = ltrim($token['name'], '-0..9'); |
|
+ // In theory, this should ever be needed, but just in case |
|
+ if ($token['name'] === '') $token['name'] = 'span'; // arbitrary generic choice |
|
+ } |
|
+ |
|
$el = $this->dom->createElement($token['name']); |
|
|
|
foreach($token['attr'] as $attr) { |
|
@@ -3659,7 +3679,7 @@ |
|
} |
|
} |
|
|
|
- private function generateImpliedEndTags(array $exclude = array()) { |
|
+ private function generateImpliedEndTags($exclude = array()) { |
|
/* When the steps below require the UA to generate implied end tags, |
|
then, if the current node is a dd element, a dt element, an li element, |
|
a p element, a td element, a th element, or a tr element, the UA must |
|
@@ -3673,7 +3693,8 @@ |
|
} |
|
} |
|
|
|
- private function getElementCategory($name) { |
|
+ private function getElementCategory($node) { |
|
+ $name = $node->tagName; |
|
if(in_array($name, $this->special)) |
|
return self::SPECIAL; |
|
|
|
|