|
| 1 | +import { readFileSync } from 'node:fs' |
| 2 | +import { createHash } from 'node:crypto' |
| 3 | +import { defineNuxtModule, useNuxt, createResolver } from 'nuxt/kit' |
| 4 | +import { safeParse } from 'valibot' |
| 5 | +import * as site from '../shared/types/lexicons/site' |
| 6 | +import { BlogPostSchema } from '../shared/schemas/blog' |
| 7 | +import { NPMX_SITE } from '../shared/utils/constants' |
| 8 | +import { parseBasicFrontmatter } from '../shared/utils/parse-basic-frontmatter' |
| 9 | +import { TID } from '@atproto/common' |
| 10 | +import { Client } from '@atproto/lex' |
| 11 | + |
| 12 | +const syncedDocuments = new Map<string, string>() |
| 13 | +const CLOCK_ID_THREE = 3 |
| 14 | +const DATE_TO_MICROSECONDS = 1000 |
| 15 | + |
| 16 | +// TODO: Currently logging quite a lot, can remove some later if we want |
| 17 | +export default defineNuxtModule({ |
| 18 | + meta: { name: 'standard-site-sync' }, |
| 19 | + async setup() { |
| 20 | + const nuxt = useNuxt() |
| 21 | + const { resolve } = createResolver(import.meta.url) |
| 22 | + const contentDir = resolve('../app/pages/blog') |
| 23 | + |
| 24 | + // Authentication with PDS using an app password |
| 25 | + const pdsUrl = process.env.NPMX_PDS_URL |
| 26 | + if (!pdsUrl) { |
| 27 | + console.warn('[standard-site-sync] NPMX_PDS_URL not set, skipping sync') |
| 28 | + return |
| 29 | + } |
| 30 | + // Instantiate a single new client instance that is reused for every file |
| 31 | + const client = new Client(pdsUrl) |
| 32 | + |
| 33 | + if (nuxt.options._prepare) return |
| 34 | + |
| 35 | + nuxt.hook('build:before', async () => { |
| 36 | + const { glob } = await import('tinyglobby') |
| 37 | + const files: string[] = await glob(`${contentDir}/**/*.md`) |
| 38 | + |
| 39 | + // INFO: Arbitrarily chosen concurrency limit, can be changed if needed |
| 40 | + const concurrencyLimit = 5 |
| 41 | + for (let i = 0; i < files.length; i += concurrencyLimit) { |
| 42 | + const batch = files.slice(i, i + concurrencyLimit) |
| 43 | + // Process files in parallel |
| 44 | + await Promise.all( |
| 45 | + batch.map(file => |
| 46 | + syncFile(file, NPMX_SITE, client).catch(error => |
| 47 | + console.error(`[standard-site-sync] Error in ${file}:` + error), |
| 48 | + ), |
| 49 | + ), |
| 50 | + ) |
| 51 | + } |
| 52 | + }) |
| 53 | + |
| 54 | + nuxt.hook('builder:watch', async (event, path) => { |
| 55 | + if (!path.endsWith('.md')) return |
| 56 | + |
| 57 | + // Ignore deleted files |
| 58 | + if (event === 'unlink') { |
| 59 | + console.log(`[standard-site-sync] File deleted: ${path}`) |
| 60 | + return |
| 61 | + } |
| 62 | + |
| 63 | + // Process add/change events only |
| 64 | + await syncFile(resolve(nuxt.options.rootDir, path), NPMX_SITE, client).catch(err => |
| 65 | + console.error(`[standard-site-sync] Failed ${path}:`, err), |
| 66 | + ) |
| 67 | + }) |
| 68 | + }, |
| 69 | +}) |
| 70 | + |
| 71 | +/* |
| 72 | + * INFO: Loads record to atproto and ensures uniqueness by checking the date the article is published |
| 73 | + * publishedAt is an id that does not change |
| 74 | + * Atomicity is enforced with upsert using publishedAt so we always update existing records instead of creating new ones |
| 75 | + * Clock id(3) provides a deterministic ID |
| 76 | + * WARN: DOES NOT CATCH ERRORS, THIS MUST BE HANDLED |
| 77 | + */ |
| 78 | +const syncFile = async (filePath: string, siteUrl: string, client: Client) => { |
| 79 | + const fileContent = readFileSync(filePath, 'utf-8') |
| 80 | + const frontmatter = parseBasicFrontmatter(fileContent) |
| 81 | + |
| 82 | + // Schema expects 'path' & frontmatter provides 'slug' |
| 83 | + const normalizedFrontmatter = { |
| 84 | + ...frontmatter, |
| 85 | + path: typeof frontmatter.slug === 'string' ? `/blog/${frontmatter.slug}` : frontmatter.path, |
| 86 | + } |
| 87 | + |
| 88 | + const result = safeParse(BlogPostSchema, normalizedFrontmatter) |
| 89 | + if (!result.success) { |
| 90 | + console.warn(`[standard-site-sync] Validation failed for ${filePath}`, result.issues) |
| 91 | + return |
| 92 | + } |
| 93 | + |
| 94 | + const data = result.output |
| 95 | + |
| 96 | + // filter drafts |
| 97 | + if (data.draft) { |
| 98 | + if (process.env.DEBUG === 'true') { |
| 99 | + console.debug(`[standard-site-sync] Skipping draft: ${data.path}`) |
| 100 | + } |
| 101 | + return |
| 102 | + } |
| 103 | + |
| 104 | + // Keys are sorted to provide a more stable hash |
| 105 | + const hash = createHash('sha256') |
| 106 | + .update(JSON.stringify(data, Object.keys(data).sort())) |
| 107 | + .digest('hex') |
| 108 | + |
| 109 | + if (syncedDocuments.get(data.path) === hash) { |
| 110 | + return |
| 111 | + } |
| 112 | + |
| 113 | + const document = site.standard.document.$build({ |
| 114 | + site: siteUrl as `${string}:${string}`, |
| 115 | + path: data.path, |
| 116 | + title: data.title, |
| 117 | + description: data.description ?? data.excerpt, |
| 118 | + tags: data.tags, |
| 119 | + // This can be extended to update the site.standard.document .updatedAt if it is changed and use the posts date here |
| 120 | + publishedAt: new Date(data.date).toISOString(), |
| 121 | + }) |
| 122 | + |
| 123 | + const dateInMicroSeconds = new Date(result.output.date).getTime() * DATE_TO_MICROSECONDS |
| 124 | + |
| 125 | + // Clock id(3) needs to be the same everytime to get the same TID from a timestamp |
| 126 | + const tid = TID.fromTime(dateInMicroSeconds, CLOCK_ID_THREE) |
| 127 | + |
| 128 | + // client.put is async and needs to be awaited |
| 129 | + await client.put(site.standard.document, document, { |
| 130 | + rkey: tid.str, |
| 131 | + }) |
| 132 | + |
| 133 | + syncedDocuments.set(data.path, hash) |
| 134 | +} |
0 commit comments