You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
314 lines
8.3 KiB
314 lines
8.3 KiB
<?php |
|
/** |
|
* Class AMP_Blacklist_Sanitizer |
|
* |
|
* @package AMP |
|
*/ |
|
|
|
/** |
|
* Strips blacklisted tags and attributes from content. |
|
* |
|
* See following for blacklist: |
|
* https://github.com/ampproject/amphtml/blob/master/spec/amp-html-format.md#html-tags |
|
* |
|
* @since 0.5 This has been replaced by AMP_Tag_And_Attribute_Sanitizer but is kept around for back-compat. |
|
* @deprecated |
|
*/ |
|
class AMP_Blacklist_Sanitizer extends AMP_Base_Sanitizer { |
|
const PATTERN_REL_WP_ATTACHMENT = '#wp-att-([\d]+)#'; |
|
|
|
/** |
|
* Default args. |
|
* |
|
* @var array |
|
*/ |
|
protected $DEFAULT_ARGS = [ |
|
'add_blacklisted_protocols' => [], |
|
'add_blacklisted_tags' => [], |
|
'add_blacklisted_attributes' => [], |
|
]; |
|
|
|
/** |
|
* Sanitize. |
|
*/ |
|
public function sanitize() { |
|
_deprecated_function( __METHOD__, '0.7', 'AMP_Tag_And_Attribute_Sanitizer::sanitize' ); |
|
|
|
$blacklisted_tags = $this->get_blacklisted_tags(); |
|
$blacklisted_attributes = $this->get_blacklisted_attributes(); |
|
$blacklisted_protocols = $this->get_blacklisted_protocols(); |
|
|
|
$body = $this->root_element; |
|
$this->strip_tags( $body, $blacklisted_tags ); |
|
$this->strip_attributes_recursive( $body, $blacklisted_attributes, $blacklisted_protocols ); |
|
} |
|
|
|
/** |
|
* Strip attributes recursively. |
|
* |
|
* @param DOMNode $node DOM Node. |
|
* @param array $bad_attributes Bad attributes. |
|
* @param array $bad_protocols Bad protocols. |
|
*/ |
|
private function strip_attributes_recursive( $node, $bad_attributes, $bad_protocols ) { |
|
if ( XML_ELEMENT_NODE !== $node->nodeType ) { |
|
return; |
|
} |
|
|
|
$node_name = $node->nodeName; |
|
|
|
// Some nodes may contain valid content but are themselves invalid. |
|
// Remove the node but preserve the children. |
|
if ( 'font' === $node_name ) { |
|
$this->replace_node_with_children( $node, $bad_attributes, $bad_protocols ); |
|
return; |
|
} |
|
|
|
if ( 'a' === $node_name && false === $this->validate_a_node( $node ) ) { |
|
$this->replace_node_with_children( $node, $bad_attributes, $bad_protocols ); |
|
return; |
|
} |
|
|
|
if ( $node->hasAttributes() ) { |
|
$length = $node->attributes->length; |
|
for ( $i = $length - 1; $i >= 0; $i-- ) { |
|
$attribute = $node->attributes->item( $i ); |
|
$attribute_name = strtolower( $attribute->name ); |
|
if ( in_array( $attribute_name, $bad_attributes, true ) ) { |
|
$this->remove_invalid_attribute( $node, $attribute_name ); |
|
continue; |
|
} |
|
|
|
// The on* attributes (like onclick) are a special case. |
|
if ( 0 === stripos( $attribute_name, 'on' ) && 'on' !== $attribute_name ) { |
|
$this->remove_invalid_attribute( $node, $attribute_name ); |
|
continue; |
|
} |
|
|
|
if ( 'a' === $node_name ) { |
|
$this->sanitize_a_attribute( $node, $attribute ); |
|
} |
|
} |
|
} |
|
|
|
$length = $node->childNodes->length; |
|
for ( $i = $length - 1; $i >= 0; $i-- ) { |
|
$child_node = $node->childNodes->item( $i ); |
|
|
|
$this->strip_attributes_recursive( $child_node, $bad_attributes, $bad_protocols ); |
|
} |
|
} |
|
|
|
/** |
|
* Strip tags. |
|
* |
|
* @param DOMElement $node Node. |
|
* @param string[] $tag_names Tag names. |
|
*/ |
|
private function strip_tags( $node, $tag_names ) { |
|
foreach ( $tag_names as $tag_name ) { |
|
$elements = $node->getElementsByTagName( $tag_name ); |
|
$length = $elements->length; |
|
if ( 0 === $length ) { |
|
continue; |
|
} |
|
|
|
for ( $i = $length - 1; $i >= 0; $i-- ) { |
|
$element = $elements->item( $i ); |
|
$parent_node = $element->parentNode; |
|
$this->remove_invalid_child( $element ); |
|
|
|
if ( 'body' !== $parent_node->nodeName && AMP_DOM_Utils::is_node_empty( $parent_node ) ) { |
|
$this->remove_invalid_child( $parent_node ); |
|
} |
|
} |
|
} |
|
} |
|
|
|
/** |
|
* Sanitize attribute. |
|
* |
|
* @param DOMElement $node Node. |
|
* @param DOMAttr $attribute Attribute. |
|
*/ |
|
private function sanitize_a_attribute( $node, $attribute ) { |
|
$attribute_name = strtolower( $attribute->name ); |
|
|
|
if ( 'rel' === $attribute_name ) { |
|
$old_value = $attribute->value; |
|
$new_value = trim( preg_replace( self::PATTERN_REL_WP_ATTACHMENT, '', $old_value ) ); |
|
if ( empty( $new_value ) ) { |
|
$this->remove_invalid_attribute( $node, $attribute_name ); |
|
} elseif ( $old_value !== $new_value ) { |
|
$node->setAttribute( $attribute_name, $new_value ); |
|
} |
|
} elseif ( 'rev' === $attribute_name ) { |
|
// rev removed from HTML5 spec, which was used by Jetpack Markdown. |
|
$this->remove_invalid_attribute( $node, $attribute_name ); |
|
} elseif ( 'target' === $attribute_name ) { |
|
// _blank is the only allowed value and it must be lowercase. |
|
// replace _new with _blank and others should simply be removed. |
|
$old_value = strtolower( $attribute->value ); |
|
if ( '_blank' === $old_value || '_new' === $old_value ) { |
|
// _new is not allowed; swap with _blank |
|
$node->setAttribute( $attribute_name, '_blank' ); |
|
} else { |
|
// Only _blank is allowed. |
|
$this->remove_invalid_attribute( $node, $attribute_name ); |
|
} |
|
} |
|
} |
|
|
|
/** |
|
* Validate node. |
|
* |
|
* @param DOMElement $node Node. |
|
* @return bool |
|
*/ |
|
private function validate_a_node( $node ) { |
|
// Get the href attribute. |
|
$href = $node->getAttribute( 'href' ); |
|
|
|
if ( empty( $href ) ) { |
|
/* |
|
* If no href, check that a is an anchor or not. |
|
* We don't need to validate anchors any further. |
|
*/ |
|
return $node->hasAttribute( 'name' ) || $node->hasAttribute( 'id' ); |
|
} |
|
|
|
// If this is an anchor link, just return true. |
|
if ( 0 === strpos( $href, '#' ) ) { |
|
return true; |
|
} |
|
|
|
// If the href starts with a '/', append the home_url to it for validation purposes. |
|
if ( 0 === strpos( $href, '/' ) ) { |
|
$href = untrailingslashit( get_home_url() ) . $href; |
|
} |
|
|
|
$valid_protocols = [ 'http', 'https', 'mailto', 'sms', 'tel', 'viber', 'whatsapp' ]; |
|
$special_protocols = [ 'tel', 'sms' ]; // These ones don't valid with `filter_var+FILTER_VALIDATE_URL`. |
|
$protocol = strtok( $href, ':' ); |
|
|
|
if ( false === filter_var( $href, FILTER_VALIDATE_URL ) |
|
&& ! in_array( $protocol, $special_protocols, true ) ) { |
|
return false; |
|
} |
|
|
|
if ( ! in_array( $protocol, $valid_protocols, true ) ) { |
|
return false; |
|
} |
|
|
|
return true; |
|
} |
|
|
|
/** |
|
* Replace node with children. |
|
* |
|
* @param DOMElement $node Node. |
|
* @param array $bad_attributes Bad attributes. |
|
* @param array $bad_protocols Bad protocols. |
|
*/ |
|
private function replace_node_with_children( $node, $bad_attributes, $bad_protocols ) { |
|
// If the node has children and also has a parent node, |
|
// clone and re-add all the children just before current node. |
|
if ( $node->hasChildNodes() && $node->parentNode ) { |
|
foreach ( $node->childNodes as $child_node ) { |
|
$new_child = $child_node->cloneNode( true ); |
|
$this->strip_attributes_recursive( $new_child, $bad_attributes, $bad_protocols ); |
|
$node->parentNode->insertBefore( $new_child, $node ); |
|
} |
|
} |
|
|
|
// Remove the node from the parent, if defined. |
|
if ( $node->parentNode ) { |
|
$this->remove_invalid_child( $node ); |
|
} |
|
} |
|
|
|
/** |
|
* Merge defaults with args. |
|
* |
|
* @param string $key Key. |
|
* @param array $values Values. |
|
* @return array Merged args. |
|
*/ |
|
private function merge_defaults_with_args( $key, $values ) { |
|
// Merge default values with user specified args. |
|
if ( ! empty( $this->args[ $key ] ) |
|
&& is_array( $this->args[ $key ] ) ) { |
|
$values = array_merge( $values, $this->args[ $key ] ); |
|
} |
|
|
|
return $values; |
|
} |
|
|
|
/** |
|
* Get blacklisted protocols. |
|
* |
|
* @return array Protocols. |
|
*/ |
|
private function get_blacklisted_protocols() { |
|
return $this->merge_defaults_with_args( |
|
'add_blacklisted_protocols', |
|
[ |
|
'javascript', |
|
] |
|
); |
|
} |
|
|
|
/** |
|
* Get blacklisted tags. |
|
* |
|
* @return array Tags. |
|
*/ |
|
private function get_blacklisted_tags() { |
|
return $this->merge_defaults_with_args( |
|
'add_blacklisted_tags', |
|
[ |
|
'script', |
|
'noscript', |
|
'style', |
|
'frame', |
|
'frameset', |
|
'object', |
|
'param', |
|
'applet', |
|
'form', |
|
'label', |
|
'input', |
|
'textarea', |
|
'select', |
|
'option', |
|
'link', |
|
'picture', |
|
|
|
// Sanitizers run after embed handlers, so if anything wasn't matched, it needs to be removed. |
|
'embed', |
|
'embedvideo', |
|
|
|
// Other weird ones. |
|
'comments-count', |
|
] |
|
); |
|
} |
|
|
|
/** |
|
* Get blacklisted attributes. |
|
* |
|
* @return array Attributes. |
|
*/ |
|
private function get_blacklisted_attributes() { |
|
return $this->merge_defaults_with_args( |
|
'add_blacklisted_attributes', |
|
[ |
|
'style', |
|
'size', |
|
'clear', |
|
'align', |
|
'valign', |
|
] |
|
); |
|
} |
|
}
|
|
|