A WordPress migration is a graph problem before it is a code problem. Every URL the old site served is a contract with a crawler, a backlink, an RSS subscriber or an email sent five years ago. The migration succeeds when every URL on the inventory still resolves to the right destination after cutover. It fails when somebody forgets the attachment pages.
The five files below compose a migration that keeps rankings: the REST exporter, the HTML-to-MDX transformer, the R2 media uploader, the Edge redirect map, and the sitemap that announces continuity to Googlebot.
1. The REST API exporter
WordPress exposes content at /wp-json/wp/v2/* with pagination via the page and per_page query parameters. The exporter walks every endpoint, writes one JSON file per item, and is idempotent on re-run. We export raw HTML for the transformer to consume and metadata fields directly for the frontmatter.
// scripts/migration/export-wordpress.ts
import { writeFile, mkdir } from 'node:fs/promises'
import { join } from 'node:path'
const WP_API = process.env.WP_API_URL // e.g. https://old.example.com/wp-json/wp/v2
const OUT_DIR = './data/wp-export'
const TYPES = ['posts', 'pages', 'categories', 'tags', 'media'] as const
async function fetchPaginated(type: string): Promise<unknown[]> {
const items: unknown[] = []
let page = 1
// The REST API caps per_page at 100; the X-WP-TotalPages header drives the loop.
while (true) {
const res = await fetch(
`${WP_API}/${type}?per_page=100&page=${page}&_embed=1`,
{ headers: { 'User-Agent': 'adamarant-migration/1.0' } },
)
if (!res.ok) throw new Error(`${type} page ${page}: ${res.status}`)
const batch = (await res.json()) as unknown[]
items.push(...batch)
const totalPages = Number(res.headers.get('X-WP-TotalPages') ?? '1')
if (page >= totalPages) break
page += 1
}
return items
}
async function main(): Promise<void> {
await mkdir(OUT_DIR, { recursive: true })
for (const type of TYPES) {
console.log(`exporting ${type}…`)
const items = await fetchPaginated(type)
await writeFile(
join(OUT_DIR, `${type}.json`),
JSON.stringify(items, null, 2),
'utf8',
)
console.log(` ${items.length} ${type} written`)
}
}
void main()
2. The HTML-to-MDX transformer
Each post in the export becomes an MDX file with typed frontmatter and a body that is HTML stripped of WordPress noise. Inline styles disappear, Gutenberg block wrappers unwrap, shortcodes become React components, image URLs swap to the R2 path. The transformer is a pure function; running it twice produces the same output.
// scripts/migration/transform.ts
import { readFile, writeFile, mkdir } from 'node:fs/promises'
import { join } from 'node:path'
import { JSDOM } from 'jsdom'
import { MEDIA_MAP } from './media-map.json' assert { type: 'json' }
interface WPPost {
id: number
slug: string
date_gmt: string
modified_gmt: string
title: { rendered: string }
excerpt: { rendered: string }
content: { rendered: string }
categories: number[]
tags: number[]
featured_media: number
yoast_head_json?: { canonical?: string; og_description?: string }
}
function rewriteImages(html: string): string {
const dom = new JSDOM(`<body>${html}</body>`)
const doc = dom.window.document
doc.querySelectorAll('img').forEach((img) => {
const src = img.getAttribute('src') ?? ''
const newSrc = (MEDIA_MAP as Record<string, string>)[src]
if (newSrc) img.setAttribute('src', newSrc)
img.removeAttribute('srcset') // R2 + next/image handles this
img.removeAttribute('sizes')
})
doc.querySelectorAll('[style]').forEach((el) => el.removeAttribute('style'))
doc.querySelectorAll('.wp-block-image, .wp-block-paragraph').forEach((el) => {
while (el.firstChild) el.parentNode?.insertBefore(el.firstChild, el)
el.remove()
})
return doc.body.innerHTML
}
function toFrontmatter(post: WPPost, categorySlug: string): string {
return [
'---',
`title: ${JSON.stringify(post.title.rendered)}`,
`slug: ${post.slug}`,
`published_at: ${post.date_gmt}Z`,
`updated_at: ${post.modified_gmt}Z`,
`description: ${JSON.stringify(post.excerpt.rendered.replace(/<[^>]+>/g, '').trim())}`,
`category: ${categorySlug}`,
post.yoast_head_json?.canonical
? `canonical: ${post.yoast_head_json.canonical}`
: null,
'---',
'',
]
.filter(Boolean)
.join('\n')
}
export async function transformPost(
post: WPPost,
categorySlug: string,
outDir: string,
): Promise<void> {
const body = rewriteImages(post.content.rendered)
const mdx = toFrontmatter(post, categorySlug) + body
await mkdir(outDir, { recursive: true })
await writeFile(join(outDir, `${post.slug}.mdx`), mdx, 'utf8')
}
3. The R2 media uploader
Every image in the WordPress upload directory moves to a single R2 bucket under a content-addressed path. The hash makes the URL stable forever; cache invalidation stops being a problem because the URL changes when the bytes change. The uploader emits a map from old URL to new URL that the transformer reads.
// scripts/migration/upload-media.ts
import { createHash } from 'node:crypto'
import { readFile, writeFile } from 'node:fs/promises'
import { S3Client, PutObjectCommand } from '@aws-sdk/client-s3'
const r2 = new S3Client({
region: 'auto',
endpoint: `https://${process.env.R2_ACCOUNT_ID}.r2.cloudflarestorage.com`,
credentials: {
accessKeyId: process.env.R2_ACCESS_KEY_ID!,
secretAccessKey: process.env.R2_SECRET_ACCESS_KEY!,
},
})
interface MediaItem {
source_url: string
mime_type: string
}
async function uploadOne(item: MediaItem): Promise<[string, string]> {
const res = await fetch(item.source_url)
if (!res.ok) throw new Error(`fetch ${item.source_url}: ${res.status}`)
const buf = Buffer.from(await res.arrayBuffer())
const hash = createHash('sha256').update(buf).digest('hex').slice(0, 16)
const ext = item.source_url.split('.').pop()!.toLowerCase()
const key = `media/${hash}.${ext}`
await r2.send(
new PutObjectCommand({
Bucket: process.env.R2_BUCKET!,
Key: key,
Body: buf,
ContentType: item.mime_type,
CacheControl: 'public, max-age=31536000, immutable',
}),
)
return [item.source_url, `https://cdn.adamarant.com/${key}`]
}
export async function uploadAll(items: MediaItem[]): Promise<void> {
const map: Record<string, string> = {}
// Concurrency of 8 keeps R2 happy and the local network saturated.
const queue = [...items]
const workers = Array.from({ length: 8 }, async () => {
while (queue.length) {
const item = queue.shift()!
const [oldUrl, newUrl] = await uploadOne(item)
map[oldUrl] = newUrl
}
})
await Promise.all(workers)
await writeFile(
'./scripts/migration/media-map.json',
JSON.stringify(map, null, 2),
'utf8',
)
}
4. The Edge redirect map
Every URL on the inventory CSV becomes an entry in redirects.json. Next.js consumes it at build time and Vercel serves the redirects from the Edge, before the Node runtime is involved. The 301 latency is in the single-digit milliseconds; the rankings carry over because Googlebot honours permanent redirects within the same domain reliably.
// next.config.ts
import type { NextConfig } from 'next'
import redirectsFromInventory from './data/redirects.json' assert { type: 'json' }
interface RedirectEntry {
source: string
destination: string
permanent: true
}
const config: NextConfig = {
async redirects(): Promise<RedirectEntry[]> {
// The inventory CSV is processed offline into redirects.json; CI fails if
// the count drops below the previous build (rankings are downstream of
// this contract).
return (redirectsFromInventory as RedirectEntry[]).map((r) => ({
source: r.source,
destination: r.destination,
permanent: true,
}))
},
}
export default config
// data/redirects.json (excerpt)
[
{ "source": "/2023/04/how-we-think-about-design-systems", "destination": "/blog/how-we-think-about-design-systems", "permanent": true },
{ "source": "/category/engineering", "destination": "/blog/category/engineering", "permanent": true },
{ "source": "/?p=1247", "destination": "/blog/multi-tenant-saas-architecture", "permanent": true },
{ "source": "/wp-content/uploads/:path*", "destination": "https://cdn.adamarant.com/media/:path*", "permanent": true },
{ "source": "/author/ricardo", "destination": "/authors/ricardo", "permanent": true },
{ "source": "/feed/", "destination": "/feed.xml", "permanent": true }
]
5. The sitemap, the RSS feed and the cutover monitor
The sitemap is dynamic; it reads the MDX directory at build time and emits one entry per post with lastmod derived from frontmatter. The RSS feed preserves the GUID per post so existing subscribers do not get a wall of "new" items on cutover. A Playwright monitor hits the top 200 URLs every five minutes for 24 hours after DNS switch.
// app/sitemap.ts
import type { MetadataRoute } from 'next'
import { getAllPosts } from '@/lib/blog'
export default async function sitemap(): Promise<MetadataRoute.Sitemap> {
const posts = await getAllPosts()
const site = 'https://adamarant.com'
return [
{ url: site, lastModified: new Date(), changeFrequency: 'weekly', priority: 1 },
{ url: `${site}/blog`, lastModified: new Date(), changeFrequency: 'weekly', priority: 0.9 },
...posts.map((p) => ({
url: `${site}/blog/${p.slug}`,
lastModified: new Date(p.updated_at),
changeFrequency: 'monthly' as const,
priority: 0.7,
})),
]
}
// app/feed.xml/route.ts
import { getAllPosts } from '@/lib/blog'
export async function GET(): Promise<Response> {
const posts = await getAllPosts()
const items = posts
.map(
(p) => `
<item>
<title><![CDATA[${p.title}]]></title>
<link>https://adamarant.com/blog/${p.slug}</link>
<!-- GUID preserved from WordPress export so subscribers see no duplicates -->
<guid isPermaLink="false">${p.wp_guid}</guid>
<pubDate>${new Date(p.published_at).toUTCString()}</pubDate>
<description><![CDATA[${p.description}]]></description>
</item>`,
)
.join('\n')
const xml = `<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<title>Adamarant</title>
<link>https://adamarant.com</link>
<description>Design and engineering studio</description>
<language>en</language>
${items}
</channel>
</rss>`
return new Response(xml, {
headers: { 'Content-Type': 'application/xml; charset=utf-8' },
})
}
// scripts/monitor/cutover.ts
import { chromium } from 'playwright'
import topUrls from './top-200.json' assert { type: 'json' }
async function check(url: string): Promise<{ url: string; status: number; title: string }> {
const browser = await chromium.launch()
const page = await browser.newPage()
const res = await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 15000 })
const title = await page.title()
await browser.close()
return { url, status: res?.status() ?? 0, title }
}
async function main(): Promise<void> {
const results = await Promise.all((topUrls as string[]).map(check))
const bad = results.filter((r) => r.status >= 400 || !r.title)
if (bad.length > 0) {
console.error(`FAIL: ${bad.length} bad URLs`, bad)
process.exit(1)
}
console.log(`OK: ${results.length} URLs healthy`)
}
void main()
6. What this composes
The exporter pulls the WordPress content. The transformer converts it to typed MDX. The uploader rehouses media on R2 under stable URLs. The redirect map serves every old URL at the Edge with a 301. The sitemap and RSS announce continuity to crawlers and subscribers. The cutover monitor catches the bugs nobody saw in staging.
WordPress stops being the production system. Next.js renders the marketing site in under 200 ms because the database is gone from the request path. The build pipeline replaces the WordPress admin: editorial works in MDX (or a CMS layer on top of MDX); developers see real diffs in pull requests. Search Console stays flat through the cutover because the URL graph survived intact. The migration was a graph problem first; the rendering speed was a side effect of doing it right.