<?php
// /sitemap.xml.php
// Generates an XML sitemap for search engines

header('Content-Type: application/xml; charset=UTF-8');

// Helper: escape XML
function x($s){ return htmlspecialchars($s ?? '', ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8'); }

// Your site origin
$site = 'https://theimpactlab.co.uk'; // <-- set to your domain, no trailing slash

// 1) Core pages you want indexed
$core = [
  '/'             => 'weekly',
  '/about/'       => 'monthly',
  '/services/'    => 'monthly',
  '/products/'    => 'weekly',
  '/press/'       => 'daily',
  '/book/'        => 'monthly',
  '/contact/'     => 'monthly',
];

// 2) Collect blog posts under /press/blogs/
$blogs = [];
$blogDir = $_SERVER['DOCUMENT_ROOT'] . '/press/blogs';
if (is_dir($blogDir)) {
  $rii = new RecursiveIteratorIterator(new RecursiveDirectoryIterator($blogDir, FilesystemIterator::SKIP_DOTS));
  foreach ($rii as $file) {
    if (!$file->isFile()) continue;
    $ext = strtolower($file->getExtension());
    if (!in_array($ext, ['php','html','htm'])) continue;

    // Build public URL
    $relPath = str_replace($_SERVER['DOCUMENT_ROOT'], '', $file->getPathname());
    $relPath = str_replace(DIRECTORY_SEPARATOR, '/', $relPath);
    $url = $site . $relPath;

    $blogs[] = [
      'loc'      => $url,
      'lastmod'  => gmdate('c', $file->getMTime()),
      'changefreq' => 'monthly',
      'priority' => '0.6',
    ];
  }
}

// 3) Optionally, include internal press article pages if you add them later
// If your /press page is driven by media.json with external links, skip those here.
// This block looks for .php/.html files directly under /press (except index).
$pressPages = [];
$pressDir = $_SERVER['DOCUMENT_ROOT'] . '/press';
if (is_dir($pressDir)) {
  foreach (glob($pressDir.'/*.{php,html,htm}', GLOB_BRACE) as $f) {
    $base = basename($f);
    if (in_array($base, ['index.php','index.html','index.htm','bsky_feed.php'])) continue;
    $url = $site . str_replace($_SERVER['DOCUMENT_ROOT'], '', $f);
    $url = str_replace(DIRECTORY_SEPARATOR, '/', $url);
    $pressPages[] = [
      'loc'      => $url,
      'lastmod'  => gmdate('c', filemtime($f)),
      'changefreq' => 'monthly',
      'priority' => '0.5',
    ];
  }
}

// Build the XML
echo '<?xml version="1.0" encoding="UTF-8"?>' . "\n";
?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<?php
// Core pages
foreach ($core as $path => $freq) {
  $fileGuess = $_SERVER['DOCUMENT_ROOT'] . rtrim($path, '/') . '/index.php';
  $fileAlt   = $_SERVER['DOCUMENT_ROOT'] . rtrim($path, '/') . '/index.html';
  $lastmodTs = null;
  if (is_file($fileGuess)) $lastmodTs = filemtime($fileGuess);
  elseif (is_file($fileAlt)) $lastmodTs = filemtime($fileAlt);

  $lastmod = $lastmodTs ? gmdate('c', $lastmodTs) : gmdate('c');
  $priority = ($path === '/') ? '1.0' : '0.8';
  ?>
  <url>
    <loc><?= x($site . $path) ?></loc>
    <lastmod><?= x($lastmod) ?></lastmod>
    <changefreq><?= x($freq) ?></changefreq>
    <priority><?= x($priority) ?></priority>
  </url>
  <?php
}

// Blogs
foreach ($blogs as $b) { ?>
  <url>
    <loc><?= x($b['loc']) ?></loc>
    <lastmod><?= x($b['lastmod']) ?></lastmod>
    <changefreq><?= x($b['changefreq']) ?></changefreq>
    <priority><?= x($b['priority']) ?></priority>
  </url>
<?php }

// Press internal pages
foreach ($pressPages as $p) { ?>
  <url>
    <loc><?= x($p['loc']) ?></loc>
    <lastmod><?= x($p['lastmod']) ?></lastmod>
    <changefreq><?= x($p['changefreq']) ?></changefreq>
    <priority><?= x($p['priority']) ?></priority>
  </url>
<?php } ?>
</urlset>