Skip to content
4 changes: 4 additions & 0 deletions apps/trustlab/next.config.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ const nextConfig = {
source: "/robots.txt",
destination: "/api/v1/robots",
},
{
source: "/sitemap.xml",
destination: "/api/v1/sitemap",
},
];
},
};
Expand Down
134 changes: 134 additions & 0 deletions apps/trustlab/src/lib/data/common/sitemap.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
import { formatPagePath } from "@commons-ui/payload";

import { site } from "@/trustlab/utils";

function normalizePathname(pathname) {
if (!pathname || typeof pathname !== "string") {
return null;
}

if (pathname === "/") {
return pathname;
}

const trimmed = pathname.trim();
if (!trimmed) {
return null;
}

const withLeadingSlash = trimmed.startsWith("/") ? trimmed : `/${trimmed}`;
return withLeadingSlash.replace(/\/+$/, "");
}

function getAbsoluteUrl(pathname) {
const normalizedPathname = normalizePathname(pathname);
if (!normalizedPathname) {
return null;
}

const siteUrl = site.url.replace(/\/+$/, "");
return `${siteUrl}${normalizedPathname}`;
}

function getLastModified(doc) {
const rawDate = doc?.updatedAt || doc?.createdAt;
if (!rawDate) {
return null;
}

const parsedDate = new Date(rawDate);
if (Number.isNaN(parsedDate.getTime())) {
return null;
}

return parsedDate.toISOString();
}

function toSitemapEntry(doc, pathname) {
const url = getAbsoluteUrl(pathname);
if (!url) {
return null;
}

return {
url,
lastModified: getLastModified(doc),
};
}

function getPagePathname(doc) {
Comment thread
kelvinkipruto marked this conversation as resolved.
Outdated
if (!doc) {
return null;
}

const pathname = formatPagePath("pages", doc);
return normalizePathname(pathname);
}

async function getPagesEntries(api) {
const { docs } = await api.getCollection("pages", {
Comment thread
kelvinkipruto marked this conversation as resolved.
pagination: false,
select: {
slug: true,
parent: true,
breadcrumbs: true,
updatedAt: true,
createdAt: true,
},
where: {
slug: {
not_in: ["404", "500"],
},
},
});

return docs
.map((doc) => toSitemapEntry(doc, getPagePathname(doc)))
.filter(Boolean);
}

function dedupeEntries(entries) {
Comment thread
kelvinkipruto marked this conversation as resolved.
Outdated
const seen = new Set();

return entries.filter((entry) => {
if (seen.has(entry.url)) {
return false;
}

seen.add(entry.url);
return true;
});
}

function escapeXml(value) {
return String(value)
.replaceAll("&", "&")
.replaceAll("<", "&lt;")
.replaceAll(">", "&gt;")
.replaceAll('"', "&quot;")
.replaceAll("'", "&apos;");
}

async function getSitemapEntries(api) {
const pages = await getPagesEntries(api);
return dedupeEntries(pages).sort((left, right) =>
left.url.localeCompare(right.url),
Comment on lines +136 to +137
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Deduplicate merged sitemap entries by URL

Merging pages and opportunities with [...] can emit duplicate <url> nodes when two documents resolve to the same pathname (slug uniqueness is enforced per collection, not across collections). In that case /sitemap.xml contains repeated locations with potentially conflicting lastmod values, which weakens crawl signals and can cause crawlers to ignore one of the entries. Add a URL-based dedup step before sorting/serializing.

Useful? React with 👍 / 👎.

);
}

async function buildSitemapXml(api) {
const entries = await getSitemapEntries(api);
const xmlEntries = entries
.map(({ url, lastModified }) => {
const lastModifiedNode = lastModified
? `\n <lastmod>${lastModified}</lastmod>`
: "";

return ` <url>\n <loc>${escapeXml(url)}</loc>${lastModifiedNode}\n </url>`;
})
.join("\n");

return `<?xml version="1.0" encoding="UTF-8"?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n${xmlEntries}\n</urlset>\n`;
Comment thread
kelvinkipruto marked this conversation as resolved.
}

export default buildSitemapXml;
7 changes: 6 additions & 1 deletion apps/trustlab/src/lib/data/index.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
export { getPageStaticPaths, getPageStaticProps, getRobotsTxt } from "./local";
export {
getPageStaticPaths,
getPageStaticProps,
getRobotsTxt,
getSitemapXml,
} from "./local";

export default undefined;
5 changes: 5 additions & 0 deletions apps/trustlab/src/lib/data/local/index.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { getPageProps, getPagePaths } from "@/trustlab/lib/data/common";
import buildSitemapXml from "@/trustlab/lib/data/common/sitemap";
import api from "@/trustlab/lib/payload";

export async function getPageStaticPaths() {
Expand All @@ -21,4 +22,8 @@ export async function getRobotsTxt() {
return siteSettings?.robotsTxt;
}

export async function getSitemapXml() {
return buildSitemapXml(api);
}

export default undefined;
24 changes: 24 additions & 0 deletions apps/trustlab/src/pages/api/v1/sitemap.page.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import * as Sentry from "@sentry/nextjs";

import { getSitemapXml } from "@/trustlab/lib/data";

export default async function handler(req, res) {
if (req.method !== "GET") {
res.setHeader("Allow", "GET");
res.status(405).end();
return;
}

try {
const sitemapXml = await getSitemapXml();
res.setHeader(
"Cache-Control",
"public, max-age=3600, stale-while-revalidate=86400",
);
res.setHeader("Content-Type", "application/xml; charset=utf-8");
res.send(sitemapXml);
} catch (error) {
Sentry.captureException(error);
res.status(500).end();
}
}
Loading