Last active
October 24, 2024 00:34
-
-
Save westonruter/c1e000c900ee54a9bc9ff23b0c1170e7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* This file is intended to be executed using WP-CLI's eval-file command. | |
*/ | |
$bookmark_name = 'the-bookmark'; | |
$html = <<<HTML | |
<html lang="en"> | |
<head> | |
<meta charset="utf-8"> | |
<title>...</title> | |
</head> | |
<body> | |
<div id="$bookmark_name"></div> | |
</body> | |
</html> | |
HTML; | |
$processor = WP_HTML_Processor::create_full_parser( $html ); | |
while ( $processor->next_tag() ) { | |
if ( 'DIV' === $processor->get_tag() && ! $processor->set_bookmark( $bookmark_name ) ) { | |
throw new Exception( "Failed to set bookmark" ); | |
} | |
} | |
if ( ! $processor->has_bookmark( $bookmark_name ) ) { | |
throw new Exception( "Unexpectedly has_bookmark returned false." ); | |
} | |
if ( ! $processor->seek( $bookmark_name ) ) { | |
throw new Exception( "Failed to seek to bookmark." ); | |
} | |
echo "Success!"; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* This code extracts optimization logic from the Embed Optimizer plugin from the WordPress Core Performance Team. | |
* It demonstrates an issue where seeking fails when using WP_HTML_Processor when it succeeds with WP_HTML_Tag_Processor. | |
* | |
* This file is intended to be executed using WP-CLI's eval-file command. | |
* | |
* Assuming this Gist is cloned into the plugins/optimization-detective directory of the WordPress/performance project, | |
* the following will re-run the script whenever the file is changed: | |
* | |
* while true; do inotifywait -e modify repro-html-processor-seek-issue.php 2> /dev/null; clear; npm run wp-env run cli wp eval-file /var/www/html/wp-content/plugins/optimization-detective/repro-html-processor-seek-issue/repro-html-processor-seek-issue.php; done | |
*/ | |
namespace ReproHtmlProcessorSeekIssue; | |
use WP_HTML_Processor; | |
use WP_HTML_Tag_Processor; | |
use Exception; | |
$html = <<<HTML | |
<html lang="en"> | |
<head> | |
<meta charset="utf-8"> | |
<title>...</title> | |
</head> | |
<body> | |
<figure class="wp-block-embed is-type-video is-provider-wordpress-tv wp-block-embed-wordpress-tv wp-embed-aspect-16-9 wp-has-aspect-ratio"> | |
<div class="wp-block-embed__wrapper"> | |
<iframe title="VideoPress Video Player" aria-label=\'VideoPress Video Player\' width=\'750\' height=\'422\' src=\'https://video.wordpress.com/embed/vaWm9zO6?hd=1&cover=1\' frameborder=\'0\' allowfullscreen allow=\'clipboard-write\'></iframe> | |
<script src=\'https://v0.wordpress.com/js/next/videopress-iframe.js?m=1674852142\'></script> | |
</div> | |
</figure> | |
</body> | |
</html> | |
HTML; | |
/** | |
* Applies changes to HTML in the supplied tag processor to lazy-load the embed. | |
* | |
* @since 0.2.0 | |
* @link https://github.com/WordPress/performance/blob/356fa9b8b7ff86633578b9ccdf92323eb04ed24d/plugins/embed-optimizer/hooks.php#L168-L307 | |
* | |
* phpcs:disable Squiz.Commenting.FunctionCommentThrowTag.Missing -- The exception is caught. | |
* | |
* @param WP_HTML_Tag_Processor $html_processor HTML Processor. | |
* @return bool Whether the lazy-loading script is required. | |
*/ | |
function embed_optimizer_update_markup( WP_HTML_Tag_Processor $html_processor ): bool { | |
$bookmark_names = array( | |
'script' => 'embed_optimizer_script', | |
'iframe' => 'embed_optimizer_iframe', | |
); | |
$trigger_error = static function ( string $message ): void { | |
trigger_error( $message, E_USER_WARNING ); | |
}; | |
try { | |
/* | |
* Determine how to lazy load the embed. | |
* | |
* - If there is only one iframe, set loading="lazy". | |
* - Prevent making scripts lazy if there is an inline script. | |
* - Only make script lazy if there is a single external script (since if there are | |
* multiple they may not get loaded in the right order). | |
* - Ensure that both the iframe and the script are made lazy if both occur in the same embed. | |
*/ | |
$iframe_count = 0; | |
$script_count = 0; | |
$needs_lazy_script = false; | |
$has_inline_script = false; | |
$figure_depth = 0; | |
// Locate the iframes and scripts. | |
do { | |
// When iterating over an embed inside a larger document, stop once we reach closing </figure> tag. | |
if ( 'FIGURE' === $html_processor->get_tag() ) { | |
if ( $html_processor->is_tag_closer() ) { | |
--$figure_depth; | |
if ( $figure_depth <= 0 ) { | |
// We reached the end of the embed. | |
break; | |
} | |
} else { | |
++$figure_depth; | |
// Move to next element to start looking for IFRAME or SCRIPT tag. | |
continue; | |
} | |
} | |
if ( 0 === $figure_depth ) { | |
continue; | |
} | |
if ( 'IFRAME' === $html_processor->get_tag() ) { | |
$loading_value = $html_processor->get_attribute( 'loading' ); | |
// Per the HTML spec: "The attribute's missing value default and invalid value default are both the Eager state". | |
if ( 'lazy' !== $loading_value ) { | |
++$iframe_count; | |
if ( ! $html_processor->set_bookmark( $bookmark_names['iframe'] ) ) { | |
throw new Exception( | |
/* translators: %s is bookmark name */ | |
sprintf( __( 'Embed Optimizer unable to set %s bookmark.', 'embed-optimizer' ), $bookmark_names['iframe'] ) | |
); | |
} | |
} | |
} elseif ( 'SCRIPT' === $html_processor->get_tag() ) { | |
if ( null === $html_processor->get_attribute( 'src' ) ) { | |
$has_inline_script = true; | |
} else { | |
++$script_count; | |
if ( ! $html_processor->set_bookmark( $bookmark_names['script'] ) ) { | |
throw new Exception( | |
/* translators: %s is bookmark name */ | |
sprintf( __( 'Embed Optimizer unable to set %s bookmark.', 'embed-optimizer' ), $bookmark_names['script'] ) | |
); | |
} | |
} | |
} | |
} while ( $html_processor->next_tag() ); | |
// If there was only one non-inline script, make it lazy. | |
if ( 1 === $script_count && ! $has_inline_script && $html_processor->has_bookmark( $bookmark_names['script'] ) ) { | |
$needs_lazy_script = true; | |
if ( $html_processor->seek( $bookmark_names['script'] ) ) { | |
if ( is_string( $html_processor->get_attribute( 'type' ) ) ) { | |
$html_processor->set_attribute( 'data-original-type', $html_processor->get_attribute( 'type' ) ); | |
} | |
$html_processor->set_attribute( 'type', 'application/vnd.embed-optimizer.javascript' ); | |
} else { | |
$trigger_error( | |
/* translators: %s is bookmark name */ | |
sprintf( __( 'Embed Optimizer unable to seek to %s bookmark.', 'embed-optimizer' ), $bookmark_names['script'] ) | |
); | |
} | |
} | |
// If there was only one iframe, make it lazy. | |
if ( 1 === $iframe_count && $html_processor->has_bookmark( $bookmark_names['iframe'] ) ) { | |
if ( $html_processor->seek( $bookmark_names['iframe'] ) ) { | |
$html_processor->set_attribute( 'loading', 'lazy' ); | |
// For post embeds, use visibility:hidden instead of clip since browsers will consistently load the | |
// lazy-loaded iframe (where Chromium is unreliably with clip) while at the same time improve accessibility | |
// by preventing links in the hidden iframe from receiving focus. | |
if ( true === $html_processor->has_class( 'wp-embedded-content' ) ) { | |
$style = $html_processor->get_attribute( 'style' ); | |
if ( is_string( $style ) ) { | |
// WordPress core injects this clip CSS property: | |
// <https://github.com/WordPress/wordpress-develop/blob/6974b994de5/src/wp-includes/embed.php#L968>. | |
$style = str_replace( 'clip: rect(1px, 1px, 1px, 1px);', 'visibility: hidden;', $style ); | |
// Note: wp-embed.js removes the style attribute entirely when the iframe is loaded: | |
// <https://github.com/WordPress/wordpress-develop/blob/6974b994d/src/js/_enqueues/wp/embed.js#L60>. | |
$html_processor->set_attribute( 'style', $style ); | |
} | |
} | |
} else { | |
$trigger_error( | |
/* translators: %s is bookmark name */ | |
sprintf( __( 'Embed Optimizer unable to seek to %s bookmark.', 'embed-optimizer' ), $bookmark_names['iframe'] ) | |
); | |
} | |
} | |
} catch ( Exception $exception ) { | |
$trigger_error( $exception->getMessage() ); | |
$needs_lazy_script = false; | |
} | |
// Since there is a limit to the number of bookmarks we can add, make sure any new ones we add get removed. | |
foreach ( $bookmark_names as $bookmark_name ) { | |
$html_processor->release_bookmark( $bookmark_name ); | |
} | |
return $needs_lazy_script; | |
} | |
/** | |
* Processes document. | |
* | |
* @param WP_HTML_Tag_Processor|WP_HTML_Processor $processor | |
* @return string | |
*/ | |
function process_document( $processor ): string { | |
$needed_lazy_load_script = false; | |
while ( $processor->next_tag() ) { | |
if ( | |
! $processor->is_tag_closer() | |
&& | |
'FIGURE' === $processor->get_tag() | |
&& | |
true === $processor->has_class( 'wp-block-embed' ) | |
) { | |
if ( embed_optimizer_update_markup( $processor ) ) { | |
$needed_lazy_load_script = true; | |
} | |
} | |
} | |
$updated_html = $processor->get_updated_html(); | |
if ( $needed_lazy_load_script ) { | |
$updated_html .= "\n<!--TODO: Also inject the lazy-load script at the end of the BODY.-->"; | |
} | |
return $updated_html; | |
} | |
echo "Parsing with HTML Tag Processor...\n"; | |
$html_tag_processor = new WP_HTML_Tag_Processor( $html ); | |
$html_tag_processor_result = process_document( $html_tag_processor ); | |
echo "Done.\n\n"; | |
echo "Parsing with HTML Processor...\n"; | |
$html_processor = WP_HTML_Processor::create_full_parser( $html ); | |
$html_processor_result = process_document( $html_processor ); | |
echo "Done.\n\n"; | |
if ( $html_tag_processor_result !== $html_processor_result ) { | |
echo "FAIL. The HTML Processor and HTML Tag Processor results are different.\n\n"; | |
echo "### HTML Tag Processor ###\n"; | |
echo "$html_tag_processor_result\n\n\n"; | |
echo "### HTML Processor Result ###\n"; | |
echo "$html_processor_result\n"; | |
exit( 1 ); | |
} else { | |
echo "PASS. The HTML Processor and HTML Tag Processor results are the same:\n"; | |
echo $html_processor_result; | |
exit( 0 ); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Current output as of WordPress 6.8-alpha-59285: