Skip to content

Instantly share code, notes, and snippets.

@kasparsd
Last active November 4, 2024 17:59
Show Gist options
  • Save kasparsd/e9c0779fc6821a3722103ec4d1b7a735 to your computer and use it in GitHub Desktop.
Save kasparsd/e9c0779fc6821a3722103ec4d1b7a735 to your computer and use it in GitHub Desktop.

Sitemap HTML Tree

Generate a human-readable sitemap tree from an XML sitemap:

php sitemap-to-html.php https://xwp.co/sitemap_index.xml

Then open the generated sitemap-*.html file in your web browser.

Sitemap URL CSV

php sitemap-to-csv.php https://xwp.co/sitemap_index.xml
<?php
function urls_from_sitemap( $sitemap_file ) {
$sitemap = simplexml_load_file( $sitemap_file );
$urls = [];
foreach ( $sitemap->url as $url ) {
$urls[] = [ (string) $url->loc, $sitemap_file ];
}
printf( "Found %d URLs in %s\n", count( $urls ), $sitemap_file );
// Resolve child sitemaps, if any.
foreach ( $sitemap->sitemap as $sitemap ) {
$urls = array_merge( $urls, urls_from_sitemap( (string) $sitemap->loc ) );
}
return $urls;
}
if ( empty( $argv[1] ) ) {
die( 'Please specify the sitemap URL!' );
}
$sitemap_url = $argv[1]; // Accept local file path or URL as sitemap source.
$sitemap_html_file = sprintf( '%s/sitemap-%d.csv', getcwd(), time() );
if ( false !== strpos( $sitemap_url, '//' ) ) {
$sitemap_html_file = sprintf( '%s/sitemap-%s.csv', getcwd(), parse_url( $sitemap_url, PHP_URL_HOST ) );
}
$fields = array_merge(
[ [ 'URL', 'Sitemap URL' ] ],
urls_from_sitemap( $sitemap_url )
);
$sitemap_fp = fopen( $sitemap_html_file, 'w' );
foreach ( $fields as $fields ) {
fputcsv( $sitemap_fp, $fields );
}
fclose( $sitemap_fp );
printf( 'Generated CSV sitemap: %s', $sitemap_html_file );
<?php
function parts_to_tree( $parts ) {
if ( empty( $parts ) ) {
return [];
}
return [ array_shift( $parts ) => parts_to_tree( $parts ) ];
}
function tree_to_list( $tree ) {
$branches = [];
foreach ( $tree as $branch => $children ) {
$branches[] = sprintf(
'<li><details><summary>%s (%d)</summary>%s</details></li>',
$branch,
count( $children, COUNT_RECURSIVE ),
tree_to_list( $children )
);
}
if ( ! empty( $branches ) ) {
return sprintf( '<ul>%s</ul>', implode( '', $branches ) );
}
return null;
}
function urls_from_sitemap( $sitemap_file ) {
$sitemap = simplexml_load_file( $sitemap_file );
$urls = [];
// Resolve child sitemaps, if any.
foreach ( $sitemap->sitemap as $sitemap ) {
$urls = array_merge( urls_from_sitemap( (string) $sitemap->loc ), $urls );
}
foreach ( $sitemap->url as $url ) {
$urls[] = (string) $url->loc;
}
return $urls;
}
if ( empty( $argv[1] ) ) {
die( 'Please specify the sitemap URL!' );
}
$sitemap_url = $argv[1]; // Accept local file path or URL as sitemap source.
$tree = [];
foreach ( urls_from_sitemap( $sitemap_url ) as $url ) {
$parts = explode( '/', trim( parse_url( $url, PHP_URL_PATH ), '/\\' ) );
$tree = array_merge_recursive( $tree, parts_to_tree( $parts ) );
}
$sitemap_html_file = sprintf( '%s/sitemap-%d.html', getcwd(), time() );
if ( false !== strpos( $sitemap_url, '//' ) ) {
$sitemap_html_file = sprintf( '%s/sitemap-%s.html', getcwd(), parse_url( $sitemap_url, PHP_URL_HOST ) );
}
file_put_contents( $sitemap_html_file, tree_to_list( $tree ) );
printf( 'Generated HTML sitemap: %s', $sitemap_html_file );
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment