You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

324 lines
10 KiB

<?php
/**
* Class AMP_Iframe_Sanitizer
*
* @package AMP
*/
/**
* Class AMP_Iframe_Sanitizer
*
* Converts <iframe> tags to <amp-iframe>
*/
class AMP_Iframe_Sanitizer extends AMP_Base_Sanitizer {
use AMP_Noscript_Fallback;
/**
* Default values for sandboxing IFrame.
*
* @since 0.2
*
* @const int
*/
const SANDBOX_DEFAULTS = 'allow-scripts allow-same-origin';
/**
* Tag.
*
* @var string HTML <iframe> tag to identify and replace with AMP version.
*
* @since 0.2
*/
public static $tag = 'iframe';
/**
* Default args.
*
* @var array {
* Default args.
*
* @type bool $add_placeholder Whether to add a placeholder element.
* @type bool $add_noscript_fallback Whether to add a noscript fallback.
* @type string $current_origin The current origin serving the page. Normally this will be the $_SERVER[HTTP_HOST].
* @type string $alias_origin An alternative origin which can be supplied which is used when encountering same-origin iframes.
* }
*/
protected $DEFAULT_ARGS = [
'add_placeholder' => false,
'add_noscript_fallback' => true,
'current_origin' => null,
'alias_origin' => null,
];
/**
* Get mapping of HTML selectors to the AMP component selectors which they may be converted into.
*
* @return array Mapping.
*/
public function get_selector_conversion_mapping() {
return [
'iframe' => [
'amp-iframe',
],
];
}
/**
* Sanitize the <iframe> elements from the HTML contained in this instance's DOMDocument.
*
* @since 0.2
*/
public function sanitize() {
$nodes = $this->dom->getElementsByTagName( self::$tag );
$num_nodes = $nodes->length;
if ( 0 === $num_nodes ) {
return;
}
if ( $this->args['add_noscript_fallback'] ) {
$this->initialize_noscript_allowed_attributes( self::$tag );
}
// Ensure origins are normalized.
$this->args['current_origin'] = $this->get_origin_from_url( $this->args['current_origin'] );
if ( ! empty( $this->args['alias_origin'] ) ) {
$this->args['alias_origin'] = $this->get_origin_from_url( $this->args['alias_origin'] );
}
for ( $i = $num_nodes - 1; $i >= 0; $i-- ) {
/**
* Iframe element.
*
* @var DOMElement $node
*/
$node = $nodes->item( $i );
// Skip element if already inside of an AMP element as a noscript fallback, or if it has a dev mode exemption.
if ( $this->is_inside_amp_noscript( $node ) || $this->has_dev_mode_exemption( $node ) ) {
continue;
}
$normalized_attributes = AMP_DOM_Utils::get_node_attributes_as_assoc_array( $node );
$normalized_attributes = $this->set_layout( $normalized_attributes );
$normalized_attributes = $this->normalize_attributes( $normalized_attributes );
/**
* If the src doesn't exist, remove the node. Either it never
* existed or was invalidated while filtering attributes above.
*
* @todo: add an arg to allow for a fallback element in this instance (note that filter cannot be used inside a sanitizer).
* @see: https://github.com/ampproject/amphtml/issues/2261
*/
if ( empty( $normalized_attributes['src'] ) ) {
$this->remove_invalid_child( $node );
continue;
}
$this->did_convert_elements = true;
if ( empty( $normalized_attributes['layout'] ) && ! empty( $normalized_attributes['width'] ) && ! empty( $normalized_attributes['height'] ) ) {
$normalized_attributes['layout'] = 'intrinsic';
// Set layout to responsive if the iframe is aligned to full width.
$figure_node = null;
if ( $node->parentNode instanceof DOMElement && 'figure' === $node->parentNode->tagName ) {
$figure_node = $node->parentNode;
}
if ( $node->parentNode->parentNode instanceof DOMElement && 'figure' === $node->parentNode->parentNode->tagName ) {
$figure_node = $node->parentNode->parentNode;
}
if ( $figure_node && $figure_node->hasAttribute( 'class' ) && in_array( 'alignfull', explode( ' ', $figure_node->getAttribute( 'class' ) ), true ) ) {
$normalized_attributes['layout'] = 'responsive';
}
$this->add_or_append_attribute( $normalized_attributes, 'class', 'amp-wp-enforced-sizes' );
}
$new_node = AMP_DOM_Utils::create_node( $this->dom, 'amp-iframe', $normalized_attributes );
if ( true === $this->args['add_placeholder'] ) {
$placeholder_node = $this->build_placeholder( $normalized_attributes );
$new_node->appendChild( $placeholder_node );
}
$node->parentNode->replaceChild( $new_node, $node );
if ( $this->args['add_noscript_fallback'] ) {
$node->setAttribute( 'src', $normalized_attributes['src'] );
// AMP is stricter than HTML5 for this attribute, so make sure we use a normalized value.
if ( $node->hasAttribute( 'frameborder' ) ) {
$node->setAttribute( 'frameborder', $normalized_attributes['frameborder'] );
}
// Preserve original node in noscript for no-JS environments.
$this->append_old_node_noscript( $new_node, $node, $this->dom );
}
}
}
/**
* Normalize HTML attributes for <amp-iframe> elements.
*
* @param string[] $attributes {
* Attributes.
*
* @type string $src IFrame URL - Empty if HTTPS required per $this->args['require_https_src']
* @type int $width <iframe> width attribute - Set to numeric value if px or %
* @type int $height <iframe> width attribute - Set to numeric value if px or %
* @type string $sandbox <iframe> `sandbox` attribute - Pass along if found; default to value of self::SANDBOX_DEFAULTS
* @type string $class <iframe> `class` attribute - Pass along if found
* @type string $sizes <iframe> `sizes` attribute - Pass along if found
* @type string $id <iframe> `id` attribute - Pass along if found
* @type int $frameborder <iframe> `frameborder` attribute - Filter to '0' or '1'; default to '0'
* @type bool $allowfullscreen <iframe> `allowfullscreen` attribute - Convert 'false' to empty string ''
* @type bool $allowtransparency <iframe> `allowtransparency` attribute - Convert 'false' to empty string ''
* }
* @return array Returns HTML attributes; normalizes src, dimensions, frameborder, sandbox, allowtransparency and allowfullscreen
*/
private function normalize_attributes( $attributes ) {
$out = [];
$remove_allow_same_origin = false;
foreach ( $attributes as $name => $value ) {
switch ( $name ) {
case 'src':
// Make the URL absolute since relative URLs are not allowed in amp-iframe.
if ( '/' === substr( $value, 0, 1 ) && '/' !== substr( $value, 1, 1 ) ) {
$value = untrailingslashit( $this->args['current_origin'] ) . $value;
}
$value = $this->maybe_enforce_https_src( $value, true );
// Handle case where iframe source origin is the same as the host page's origin.
if ( $this->get_origin_from_url( $value ) === $this->args['current_origin'] ) {
if ( ! empty( $this->args['alias_origin'] ) ) {
$value = preg_replace( '#^\w+://[^/]+#', $this->args['alias_origin'], $value );
} else {
$remove_allow_same_origin = true;
}
}
$out[ $name ] = $value;
break;
case 'width':
case 'height':
$out[ $name ] = $this->sanitize_dimension( $value, $name );
break;
case 'frameborder':
$out[ $name ] = $this->sanitize_boolean_digit( $value );
break;
case 'allowfullscreen':
case 'allowtransparency':
if ( 'false' !== $value ) {
$out[ $name ] = '';
}
break;
case 'mozallowfullscreen':
case 'webkitallowfullscreen':
// Omit these since amp-iframe will add them if needed if the `allowfullscreen` attribute is present.
break;
default:
$out[ $name ] = $value;
break;
}
}
if ( ! isset( $out['sandbox'] ) ) {
$out['sandbox'] = self::SANDBOX_DEFAULTS;
}
// Remove allow-same-origin from sandbox if required.
if ( $remove_allow_same_origin ) {
$out['sandbox'] = trim( preg_replace( '/(^|\s)allow-same-origin(\s|$)/', ' ', $out['sandbox'] ) );
}
return $out;
}
/**
* Obtain the origin part of a given URL (scheme, host, port).
*
* @param string $url URL.
* @return string|null Origin URL or null if parse failed.
*/
private function get_origin_from_url( $url ) {
$parsed_url = wp_parse_url( $url );
if ( ! isset( $parsed_url['host'] ) ) {
return null;
}
if ( ! isset( $parsed_url['scheme'] ) ) {
$parsed_url['scheme'] = wp_parse_url( $this->args['current_origin'], PHP_URL_SCHEME );
}
$origin = $parsed_url['scheme'] . '://';
$origin .= $parsed_url['host'];
if ( isset( $parsed_url['port'] ) ) {
$origin .= ':' . $parsed_url['port'];
}
return $origin;
}
/**
* Builds a DOMElement to use as a placeholder for an <iframe>.
*
* Important: The element returned must not be block-level (e.g. div) as the PHP DOM parser
* will move it out from inside any containing paragraph. So this is why a span is used.
*
* @since 0.2
*
* @param string[] $parent_attributes {
* Attributes.
*
* @type string $placeholder AMP HTML <amp-iframe> `placeholder` attribute; default to 'amp-wp-iframe-placeholder'
* @type string $class AMP HTML <amp-iframe> `class` attribute; default to 'amp-wp-iframe-placeholder'
* }
* @return DOMElement|false
*/
private function build_placeholder( $parent_attributes ) {
$placeholder_node = AMP_DOM_Utils::create_node(
$this->dom,
'span',
[
'placeholder' => '',
'class' => 'amp-wp-iframe-placeholder',
]
);
return $placeholder_node;
}
/**
* Sanitizes a boolean character (or string) into a '0' or '1' character.
*
* @param string $value A boolean character to sanitize. If a string containing more than a single
* character is provided, only the first character is taken into account.
*
* @return string Returns either '0' or '1'.
*/
private function sanitize_boolean_digit( $value ) {
// Default to false if the value was forgotten.
if ( empty( $value ) ) {
return '0';
}
// Default to false if the value has an unexpected type.
if ( ! is_string( $value ) && ! is_numeric( $value ) ) {
return '0';
}
// See: https://github.com/ampproject/amp-wp/issues/2335#issuecomment-493209861.
switch ( substr( (string) $value, 0, 1 ) ) {
case '1':
case 'y':
case 'Y':
return '1';
}
return '0';
}
}