Skip to content
156 changes: 156 additions & 0 deletions apps/trustlab/src/lib/data/common/sitemap.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
import * as Sentry from "@sentry/nextjs";

import { site } from "@/trustlab/utils";

function normalizePathname(pathname) {
if (!pathname || typeof pathname !== "string") {
return null;
}

if (pathname === "/") {
return pathname;
}

const trimmed = pathname.trim();
if (!trimmed) {
return null;
}

const withLeadingSlash = trimmed.startsWith("/") ? trimmed : `/${trimmed}`;
return withLeadingSlash.replace(/\/+$/, "");
}

function getAbsoluteUrl(pathname) {
const normalizedPathname = normalizePathname(pathname);
if (!normalizedPathname) {
return null;
}

const siteUrl = site.url.replace(/\/+$/, "");
return `${siteUrl}${normalizedPathname}`;
}

function getLastModified(doc) {
const rawDate = doc?.updatedAt || doc?.createdAt;
if (!rawDate) {
return null;
}

const parsedDate = new Date(rawDate);
if (Number.isNaN(parsedDate.getTime())) {
return null;
}

return parsedDate.toISOString();
}

function toSitemapEntry(doc, pathname) {
const url = getAbsoluteUrl(pathname);
if (!url) {
return null;
}

return {
url,
lastModified: getLastModified(doc),
};
}

function warnOnMissingPathname(collection, doc) {
Sentry.logger.warn(`Collection item without \`pathname\` in sitemap`, {
collection,
slug: doc?.slug,
});
}

async function getPagesEntries(api) {
const { docs } = await api.getCollection("pages", {
Comment thread
kelvinkipruto marked this conversation as resolved.
pagination: false,
select: {
pathname: true,
slug: true,
parent: true,
breadcrumbs: true,
updatedAt: true,
createdAt: true,
},
where: {
and: [
{
_status: {
equals: "published",
},
},
{
slug: {
not_in: ["404", "500"],
},
},
],
},
});

return docs
.map((doc) => {
if (!doc?.pathname) {
warnOnMissingPathname("pages", doc);
return null;
}

return toSitemapEntry(doc, doc.pathname);
})
.filter(Boolean);
}

async function getOpportunitiesEntries(api) {
const { docs } = await api.getCollection("opportunities", {
pagination: false,
select: {
pathname: true,
slug: true,
type: true,
updatedAt: true,
createdAt: true,
date: true,
},
});

return docs
.map((doc) => {
if (!doc?.pathname) {
warnOnMissingPathname("opportunities", doc);
return null;
}

return toSitemapEntry(doc, doc.pathname);
})
.filter(Boolean);
}

async function getSitemapEntries(api) {
const [pages, opportunities] = await Promise.all([
getPagesEntries(api),
getOpportunitiesEntries(api),
]);

return [...pages, ...opportunities].sort((left, right) =>
left.url.localeCompare(right.url),
Comment on lines +136 to +137
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Deduplicate merged sitemap entries by URL

Merging pages and opportunities with [...] can emit duplicate <url> nodes when two documents resolve to the same pathname (slug uniqueness is enforced per collection, not across collections). In that case /sitemap.xml contains repeated locations with potentially conflicting lastmod values, which weakens crawl signals and can cause crawlers to ignore one of the entries. Add a URL-based dedup step before sorting/serializing.

Useful? React with 👍 / 👎.

);
}

async function buildSitemapXml(api) {
const entries = await getSitemapEntries(api);
const xmlEntries = entries
.map(({ url, lastModified }) => {
const lastModifiedNode = lastModified
? `\n <lastmod>${lastModified}</lastmod>`
: "";

return ` <url>\n <loc>${url}</loc>${lastModifiedNode}\n </url>`;
})
.join("\n");

return `<?xml version="1.0" encoding="UTF-8"?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n${xmlEntries}\n</urlset>\n`;
Comment thread
kelvinkipruto marked this conversation as resolved.
}

export default buildSitemapXml;
7 changes: 6 additions & 1 deletion apps/trustlab/src/lib/data/index.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
export { getPageStaticPaths, getPageStaticProps, getRobotsTxt } from "./local";
export {
getPageStaticPaths,
getPageStaticProps,
getRobotsTxt,
getSitemapXml,
} from "./local";

export default undefined;
5 changes: 5 additions & 0 deletions apps/trustlab/src/lib/data/local/index.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { getPageProps, getPagePaths } from "@/trustlab/lib/data/common";
import buildSitemapXml from "@/trustlab/lib/data/common/sitemap";
import api from "@/trustlab/lib/payload";

export async function getPageStaticPaths() {
Expand All @@ -21,4 +22,8 @@ export async function getRobotsTxt() {
return siteSettings?.robotsTxt;
}

export async function getSitemapXml() {
return buildSitemapXml(api);
}

export default undefined;
30 changes: 30 additions & 0 deletions apps/trustlab/src/pages/sitemap.xml.page.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import * as Sentry from "@sentry/nextjs";

import { getSitemapXml } from "@/trustlab/lib/data";

export async function getServerSideProps({ res }) {
try {
const sitemapXml = await getSitemapXml();
res.setHeader(
"Cache-Control",
"public, max-age=3600, stale-while-revalidate=86400",
);
res.setHeader("Content-Type", "application/xml; charset=utf-8");
res.write(sitemapXml);
res.end();
return {
props: {},
};
} catch (error) {
Sentry.captureException(error);
res.statusCode = 500;
res.end();
return {
props: {},
};
}
}

export default function SitemapXml() {
return null;
}
Loading