From 7d2ae43163b7acffe91383a3742a2707de18d998 Mon Sep 17 00:00:00 2001 From: fpasquet Date: Thu, 11 Apr 2024 09:46:26 +0200 Subject: [PATCH] fix: disabling bot crawling on staging and refactor sitemap to add dates and images --- public/robots.txt | 3 - src/constants.ts | 1 + src/helpers/assetHelper.ts | 2 +- .../prerenderHelper/generateRobotsTxt.ts | 17 +++++ .../prerenderHelper/generateSitemap.ts | 22 ++++--- .../prerenderHelper/getSitemapEntries.test.ts | 43 ++++++++----- .../prerenderHelper/getSitemapEntries.ts | 63 ++++++++++++------- src/helpers/prerenderHelper/index.ts | 4 ++ .../imageHandler.ts} | 4 +- src/server.ts | 35 ++++++----- 10 files changed, 128 insertions(+), 66 deletions(-) delete mode 100644 public/robots.txt create mode 100644 src/helpers/prerenderHelper/generateRobotsTxt.ts rename src/{middlewares/imageMiddleware.ts => requestHandlers/imageHandler.ts} (95%) diff --git a/public/robots.txt b/public/robots.txt deleted file mode 100644 index e8628b1e7..000000000 --- a/public/robots.txt +++ /dev/null @@ -1,3 +0,0 @@ -User-agent: * -Allow: / -Sitemap: https://blog.eleven-labs.com/sitemap.xml diff --git a/src/constants.ts b/src/constants.ts index 71f312ad9..59afc33f1 100644 --- a/src/constants.ts +++ b/src/constants.ts @@ -4,6 +4,7 @@ import { DeviceType, ImageExtensionType, ImageFormatType } from '@/types'; export const IS_SSR = import.meta.env?.SSR ?? false; export const IS_PRERENDER = import.meta.env?.MODE === 'prerender'; export const HOST_URL = getEnv('VITE_HOST_URL') || 'https://blog.eleven-labs.com'; +export const IS_ENV_PRODUCTION = HOST_URL === 'https://blog.eleven-labs.com'; export const BASE_URL = import.meta.env?.BASE_URL || '/'; export const IS_DEBUG = getEnv('VITE_IS_DEBUG') === 'true'; diff --git a/src/helpers/assetHelper.ts b/src/helpers/assetHelper.ts index 8b6283705..1d70b007a 100644 --- a/src/helpers/assetHelper.ts +++ b/src/helpers/assetHelper.ts @@ -44,7 +44,7 @@ export const getCoverPath = ({ extension?: ImageExtensionType; position?: ImagePositionType; }): string => { - const isProd: boolean = process.env.NODE_ENV === 'production'; + const isProd = process.env.NODE_ENV === 'production'; const directoryPath = dirname(path); const filename = basename(path, extname(path)); const imageFormat = SIZES_BY_IMAGE_FORMAT[device][format]; diff --git a/src/helpers/prerenderHelper/generateRobotsTxt.ts b/src/helpers/prerenderHelper/generateRobotsTxt.ts new file mode 100644 index 000000000..e5ee5c92b --- /dev/null +++ b/src/helpers/prerenderHelper/generateRobotsTxt.ts @@ -0,0 +1,17 @@ +import { writeFileSync } from 'node:fs'; +import { resolve } from 'node:path'; + +import { HOST_URL, IS_ENV_PRODUCTION } from '@/constants'; + +export const getRobotsTxt = (): string => { + return ( + IS_ENV_PRODUCTION + ? ['User-agent: *', 'Allow: /', `Sitemap: ${HOST_URL}/sitemap.xml`] + : ['User-agent: *', 'Disallow: /'] + ).join('\n'); +}; + +export const generateRobotsTxt = async (options: { rootDir: string }): Promise => { + const robotsTxt = getRobotsTxt(); + writeFileSync(resolve(options.rootDir, 'robots.txt'), robotsTxt, 'utf8'); +}; diff --git a/src/helpers/prerenderHelper/generateSitemap.ts b/src/helpers/prerenderHelper/generateSitemap.ts index 3732bc949..aeb2e1b2c 100644 --- a/src/helpers/prerenderHelper/generateSitemap.ts +++ b/src/helpers/prerenderHelper/generateSitemap.ts @@ -4,24 +4,32 @@ import * as xml2js from 'xml2js'; import { DEFAULT_LANGUAGE } from '@/constants'; import { generateUrl } from '@/helpers/assetHelper'; +import type { SitemapEntry } from '@/helpers/prerenderHelper/getSitemapEntries'; -export const getSitemap = ( - sitemapEntries: { links: { lang: string; url: string }[]; changefreq?: string; priority?: number }[] -): string => { +export const getSitemap = (sitemapEntries: SitemapEntry[]): string => { const builder = new xml2js.Builder(); return builder.buildObject({ urlset: { $: { xmlns: 'http://www.sitemaps.org/schemas/sitemap/0.9', 'xmlns:xhtml': 'http://www.w3.org/1999/xhtml', - 'xmlns:news': 'http://www.google.com/schemas/sitemap-news/0.9', + 'xmlns:image': 'http://www.google.com/schemas/sitemap-image/1.1', }, - url: sitemapEntries.map(({ links, priority, changefreq }) => { + url: sitemapEntries.map(({ links, priority, changeFrequency, lastModified, image }) => { const defaultLink = links.find((link) => link.lang === DEFAULT_LANGUAGE) ?? links[0]; return { loc: generateUrl(defaultLink.url), - ...(changefreq ? { changefreq } : {}), - priority: priority?.toFixed(1) ?? 0.3, + ...(lastModified ? { lastmod: lastModified } : {}), + ...(changeFrequency ? { changefreq: changeFrequency } : {}), + ...(priority ? { priority } : {}), + ...(image + ? { + 'image:image': { + 'image:loc': `${blogUrl}${image.url}`, + ...(image.description ? { 'image:caption': image.description } : {}), + }, + } + : {}), ...(links.length > 1 ? { 'xhtml:link': links.map((link) => ({ diff --git a/src/helpers/prerenderHelper/getSitemapEntries.test.ts b/src/helpers/prerenderHelper/getSitemapEntries.test.ts index e303007e8..9f77137dc 100644 --- a/src/helpers/prerenderHelper/getSitemapEntries.test.ts +++ b/src/helpers/prerenderHelper/getSitemapEntries.test.ts @@ -1,4 +1,4 @@ -import { getSitemapEntries } from './getSitemapEntries'; +import { getSitemapEntries, SitemapEntry } from './getSitemapEntries'; describe('getSitemapEntries', () => { test('should generate sitemap entries correctly', () => { @@ -11,20 +11,37 @@ describe('getSitemapEntries', () => { }; }); vi.mock('@/helpers/markdownContentManagerHelper', () => ({ - getPosts: (): { lang: string; slug: string; categories: string[]; authors: string[] }[] => [ - { lang: 'fr', slug: 'post-1', categories: ['architecture'], authors: ['author-1'] }, - { lang: 'en', slug: 'post-2', categories: ['php'], authors: ['author-1'] }, + getPosts: (): { + lang: string; + slug: string; + categories: string[]; + authors: string[]; + date: string; + cover?: { path: string }; + }[] => [ + { + lang: 'fr', + slug: 'post-1', + categories: ['architecture'], + authors: ['author-1'], + date: '2024-01-01T00:00:00', + cover: { path: '/imgs/post-1/cover.png' }, + }, + { lang: 'en', slug: 'post-2', categories: ['php'], authors: ['author-1'], date: '2024-01-01T00:00:00' }, ], getAuthors: (): { username: string }[] => [{ username: 'author-1' }], })); // Expected result - const expectedSitemapEntries = [ - { priority: 1, links: [{ lang: 'fr', url: '/fr/post-1/' }] }, - { priority: 1, links: [{ lang: 'en', url: '/en/post-2/' }] }, + const expectedSitemapEntries: SitemapEntry[] = [ { - priority: 0.8, - changefreq: 'weekly', + links: [{ lang: 'fr', url: '/fr/post-1/' }], + lastModified: '2024-01-01T00:00:00', + image: { url: '/imgs/post-1/cover-w400-h245-x2.avif' }, + }, + { links: [{ lang: 'en', url: '/en/post-2/' }], lastModified: '2024-01-01T00:00:00' }, + { + changeFrequency: 'weekly', links: [ { lang: 'fr', url: '/' }, { lang: 'fr', url: '/fr/' }, @@ -32,17 +49,15 @@ describe('getSitemapEntries', () => { ], }, { - priority: 0.7, - changefreq: 'weekly', + changeFrequency: 'weekly', links: [ { lang: 'fr', url: '/fr/categories/all/' }, { lang: 'en', url: '/en/categories/all/' }, ], }, - { priority: 0.7, changefreq: 'weekly', links: [{ lang: 'en', url: '/en/categories/php/' }] }, - { priority: 0.7, changefreq: 'weekly', links: [{ lang: 'fr', url: '/fr/categories/architecture/' }] }, + { changeFrequency: 'weekly', links: [{ lang: 'en', url: '/en/categories/php/' }] }, + { changeFrequency: 'weekly', links: [{ lang: 'fr', url: '/fr/categories/architecture/' }] }, { - priority: 0.5, links: [ { lang: 'fr', url: '/fr/authors/author-1/' }, { lang: 'en', url: '/en/authors/author-1/' }, diff --git a/src/helpers/prerenderHelper/getSitemapEntries.ts b/src/helpers/prerenderHelper/getSitemapEntries.ts index e07130057..fd271fb2f 100644 --- a/src/helpers/prerenderHelper/getSitemapEntries.ts +++ b/src/helpers/prerenderHelper/getSitemapEntries.ts @@ -1,59 +1,74 @@ +import { DEVICES, IMAGE_FORMATS, PATHS } from '@/constants'; +import { getCoverPath } from '@/helpers/assetHelper'; import { getAuthors, getPosts } from '@/helpers/markdownContentManagerHelper'; import { getAuthorPageUrls, getCategoryPageUrls, getHomePageUrls, - getPostPageUrls, getTutorialStepPageUrls, } from '@/helpers/prerenderHelper/getUrls'; +import { generatePath } from '@/helpers/routerHelper'; -type Link = { - lang: string; - url: string; -}; - -type SitemapEntry = { - links: Link[]; - changefreq?: string; - priority: number; -}; +export interface SitemapEntry { + links: { + lang: string; + url: string; + }[]; + image?: { + url: string; + description?: string; + }; + lastModified?: string; + changeFrequency?: 'always' | 'hourly' | 'daily' | 'weekly' | 'monthly' | 'yearly' | 'never'; + priority?: number; +} export const getSitemapEntries = (): SitemapEntry[] => { const posts = getPosts(); const authors = getAuthors(); const rootEntry: SitemapEntry = { - priority: 0.8, links: getHomePageUrls(), - changefreq: 'weekly', + changeFrequency: 'weekly', }; const categoryPageUrls = getCategoryPageUrls(posts); const categoryEntries: SitemapEntry[] = categoryPageUrls.map((urls) => ({ - priority: 0.7, links: urls, - changefreq: 'weekly', + changeFrequency: 'weekly', })); const authorPageUrls = getAuthorPageUrls(posts, authors); const authorEntries: SitemapEntry[] = authorPageUrls.map((urls) => ({ - priority: 0.5, links: urls, })); - const postPageUrls = getPostPageUrls(posts); - const postEntries: SitemapEntry[] = postPageUrls.map((urls) => ({ - priority: 1, - links: urls, + const postEntries: SitemapEntry[] = posts.map((post) => ({ + links: [ + { + lang: post.lang, + url: generatePath(PATHS.POST, { lang: post.lang, slug: post.slug }), + }, + ], + lastModified: post.date, + image: post.cover?.path + ? { + url: getCoverPath({ + path: post.cover?.path, + format: IMAGE_FORMATS.HIGHLIGHTED_TUTORIAL_POST_CARD_COVER, + pixelRatio: 2, + device: DEVICES.DESKTOP, + position: post.cover.position, + }), + description: post.cover?.alt, + } + : undefined, })); const tutorialStepUrls = getTutorialStepPageUrls(posts); const tutorialStepEntries: SitemapEntry[] = tutorialStepUrls.map((urls) => ({ - priority: 0.9, links: urls, })); - return [rootEntry, ...categoryEntries, ...authorEntries, ...postEntries, ...tutorialStepEntries].sort( - (a, b) => b?.priority - a?.priority - ); + return [...postEntries, rootEntry, ...categoryEntries, ...authorEntries, ...tutorialStepEntries]; }; diff --git a/src/helpers/prerenderHelper/index.ts b/src/helpers/prerenderHelper/index.ts index 4bf7051db..700d32f61 100644 --- a/src/helpers/prerenderHelper/index.ts +++ b/src/helpers/prerenderHelper/index.ts @@ -1,6 +1,7 @@ import { resolve } from 'node:path'; import { DEFAULT_LANGUAGE, LANGUAGES_AVAILABLE, PATHS } from '@/constants'; +import { generateRobotsTxt } from '@/helpers/prerenderHelper/generateRobotsTxt'; import { generatePath } from '@/helpers/routerHelper'; import { generateFeedFile } from './generateFeedFile'; @@ -41,6 +42,9 @@ export const generateFiles = async (options: { rootDir: string; baseUrl: string rootDir: __dirname, sitemapEntries, }), + generateRobotsTxt({ + rootDir: __dirname, + }), ]); generateFeedFile({ rootDir: __dirname }); diff --git a/src/middlewares/imageMiddleware.ts b/src/requestHandlers/imageHandler.ts similarity index 95% rename from src/middlewares/imageMiddleware.ts rename to src/requestHandlers/imageHandler.ts index b512cfdee..de5f27509 100644 --- a/src/middlewares/imageMiddleware.ts +++ b/src/requestHandlers/imageHandler.ts @@ -1,4 +1,4 @@ -import type { Request, Response } from 'express'; +import type { RequestHandler } from 'express'; import mime from 'mime'; import { existsSync, readFileSync } from 'node:fs'; import { resolve } from 'node:path'; @@ -7,7 +7,7 @@ import Sharp from 'sharp'; import { DEFAULT_EXTENSION_FOR_IMAGES, IMAGE_CONTENT_TYPES } from '@/constants'; import { ImageExtensionType, ImagePositionType } from '@/types'; -export const imageMiddleware = async (req: Request, res: Response): Promise => { +export const imageHandler: RequestHandler = async (req, res) => { try { const imagePath = resolve(process.cwd(), 'public', req.path.slice(1) as string); if (!existsSync(imagePath)) { diff --git a/src/server.ts b/src/server.ts index 2d85debf2..7b9840871 100644 --- a/src/server.ts +++ b/src/server.ts @@ -1,5 +1,5 @@ import chokidar from 'chokidar'; -import express from 'express'; +import express, { RequestHandler } from 'express'; import i18next from 'i18next'; import i18nextHttpMiddleware from 'i18next-http-middleware'; import { cpSync, statSync } from 'node:fs'; @@ -12,13 +12,25 @@ import { i18nResources } from '@/config/i18n/i18nResources'; import { BASE_URL } from '@/constants'; import { writeJsonDataFiles } from '@/helpers/contentHelper'; import { loadDataByMarkdownFilePath } from '@/helpers/markdownContentManagerHelper'; +import { getRobotsTxt } from '@/helpers/prerenderHelper/generateRobotsTxt'; import { getSitemap } from '@/helpers/prerenderHelper/generateSitemap'; import { getSitemapEntries } from '@/helpers/prerenderHelper/getSitemapEntries'; import { createRequestByExpressRequest } from '@/helpers/requestHelper'; -import { imageMiddleware } from '@/middlewares/imageMiddleware'; +import { imageHandler } from '@/requestHandlers/imageHandler'; const isProd: boolean = process.env.NODE_ENV === 'production'; +const robotsTxtHandler: RequestHandler = (_, res) => { + const robotsTxt = getRobotsTxt(); + res.status(200).set({ 'Content-Type': 'text/plain' }).end(robotsTxt); +}; + +const sitemapHandler: RequestHandler = (_, res) => { + const sitemapEntries = getSitemapEntries(); + const sitemap = getSitemap(sitemapEntries); + res.status(200).set({ 'Content-Type': 'text/xml' }).end(sitemap); +}; + const createServer = async (): Promise => { i18next.use(i18nextHttpMiddleware.LanguageDetector).init({ ...i18nConfig, @@ -41,13 +53,10 @@ const createServer = async (): Promise => { dirname: __dirname, }); + app.get(/\/imgs\//, imageHandler); app.use(BASE_URL, serveStatic(__dirname, { index: false })); - - app.get('/sitemap.xml', (_, res) => { - const sitemapEntries = getSitemapEntries(); - const sitemap = getSitemap(sitemapEntries); - res.status(200).set({ 'Content-Type': 'text/xml' }).end(sitemap); - }); + app.get('/robots.txt', robotsTxtHandler); + app.get('/sitemap.xml', sitemapHandler); app.use('*', async (req, res, next) => { try { @@ -93,14 +102,10 @@ const createServer = async (): Promise => { }); }); - app.get(/\/imgs\//, imageMiddleware); + app.get(/\/imgs\//, imageHandler); app.use(vite.middlewares); - - app.get('/sitemap.xml', (_, res) => { - const sitemapEntries = getSitemapEntries(); - const sitemap = getSitemap(sitemapEntries); - res.status(200).set({ 'Content-Type': 'text/xml' }).end(sitemap); - }); + app.get('/robots.txt', robotsTxtHandler); + app.get('/sitemap.xml', sitemapHandler); app.use('*', async (req, res, next) => { const url = req.originalUrl;