Generate a human-readable sitemap tree from an XML sitemap:
php sitemap-to-html.php https://xwp.co/sitemap_index.xml
Then open the generated sitemap-*.html
file in your web browser.
php sitemap-to-csv.php https://xwp.co/sitemap_index.xml
<?php | |
function urls_from_sitemap( $sitemap_file ) { | |
$sitemap = simplexml_load_file( $sitemap_file ); | |
$urls = []; | |
foreach ( $sitemap->url as $url ) { | |
$urls[] = [ (string) $url->loc, $sitemap_file ]; | |
} | |
printf( "Found %d URLs in %s\n", count( $urls ), $sitemap_file ); | |
// Resolve child sitemaps, if any. | |
foreach ( $sitemap->sitemap as $sitemap ) { | |
$urls = array_merge( $urls, urls_from_sitemap( (string) $sitemap->loc ) ); | |
} | |
return $urls; | |
} | |
if ( empty( $argv[1] ) ) { | |
die( 'Please specify the sitemap URL!' ); | |
} | |
$sitemap_url = $argv[1]; // Accept local file path or URL as sitemap source. | |
$sitemap_html_file = sprintf( '%s/sitemap-%d.csv', getcwd(), time() ); | |
if ( false !== strpos( $sitemap_url, '//' ) ) { | |
$sitemap_html_file = sprintf( '%s/sitemap-%s.csv', getcwd(), parse_url( $sitemap_url, PHP_URL_HOST ) ); | |
} | |
$fields = array_merge( | |
[ [ 'URL', 'Sitemap URL' ] ], | |
urls_from_sitemap( $sitemap_url ) | |
); | |
$sitemap_fp = fopen( $sitemap_html_file, 'w' ); | |
foreach ( $fields as $fields ) { | |
fputcsv( $sitemap_fp, $fields ); | |
} | |
fclose( $sitemap_fp ); | |
printf( 'Generated CSV sitemap: %s', $sitemap_html_file ); |
<?php | |
function parts_to_tree( $parts ) { | |
if ( empty( $parts ) ) { | |
return []; | |
} | |
return [ array_shift( $parts ) => parts_to_tree( $parts ) ]; | |
} | |
function tree_to_list( $tree ) { | |
$branches = []; | |
foreach ( $tree as $branch => $children ) { | |
$branches[] = sprintf( | |
'<li><details><summary>%s (%d)</summary>%s</details></li>', | |
$branch, | |
count( $children, COUNT_RECURSIVE ), | |
tree_to_list( $children ) | |
); | |
} | |
if ( ! empty( $branches ) ) { | |
return sprintf( '<ul>%s</ul>', implode( '', $branches ) ); | |
} | |
return null; | |
} | |
function urls_from_sitemap( $sitemap_file ) { | |
$sitemap = simplexml_load_file( $sitemap_file ); | |
$urls = []; | |
// Resolve child sitemaps, if any. | |
foreach ( $sitemap->sitemap as $sitemap ) { | |
$urls = array_merge( urls_from_sitemap( (string) $sitemap->loc ), $urls ); | |
} | |
foreach ( $sitemap->url as $url ) { | |
$urls[] = (string) $url->loc; | |
} | |
return $urls; | |
} | |
if ( empty( $argv[1] ) ) { | |
die( 'Please specify the sitemap URL!' ); | |
} | |
$sitemap_url = $argv[1]; // Accept local file path or URL as sitemap source. | |
$tree = []; | |
foreach ( urls_from_sitemap( $sitemap_url ) as $url ) { | |
$parts = explode( '/', trim( parse_url( $url, PHP_URL_PATH ), '/\\' ) ); | |
$tree = array_merge_recursive( $tree, parts_to_tree( $parts ) ); | |
} | |
$sitemap_html_file = sprintf( '%s/sitemap-%d.html', getcwd(), time() ); | |
if ( false !== strpos( $sitemap_url, '//' ) ) { | |
$sitemap_html_file = sprintf( '%s/sitemap-%s.html', getcwd(), parse_url( $sitemap_url, PHP_URL_HOST ) ); | |
} | |
file_put_contents( $sitemap_html_file, tree_to_list( $tree ) ); | |
printf( 'Generated HTML sitemap: %s', $sitemap_html_file ); |