@@ -5,124 +5,133 @@ import { safeParse } from 'valibot'
55import * as site from '../shared/types/lexicons/site'
66import { BlogPostSchema } from '../shared/schemas/blog'
77import { NPMX_SITE } from '../shared/utils/constants'
8+ import { parseBasicFrontmatter } from '../shared/utils/parse-basic-frontmatter'
9+ import { TID } from '@atproto/common'
10+ import { Client } from '@atproto/lex'
811
912const syncedDocuments = new Map < string , string > ( )
13+ const CLOCK_ID_THREE = 3
14+ const DATE_TO_MICROSECONDS = 1000
1015
16+ // TODO: Currently logging quite a lot, can remove some later if we want
1117export default defineNuxtModule ( {
1218 meta : { name : 'standard-site-sync' } ,
1319 async setup ( ) {
1420 const nuxt = useNuxt ( )
1521 const { resolve } = createResolver ( import . meta. url )
1622 const contentDir = resolve ( '../app/pages/blog' )
1723
24+ // Authentication with PDS using an app password
25+ const pdsUrl = process . env . NPMX_PDS_URL
26+ if ( ! pdsUrl ) {
27+ console . warn ( '[standard-site-sync] NPMX_PDS_URL not set, skipping sync' )
28+ return
29+ }
30+ // Instantiate a single new client instance that is reused for every file
31+ const client = new Client ( pdsUrl )
32+
1833 if ( nuxt . options . _prepare ) return
1934
2035 nuxt . hook ( 'build:before' , async ( ) => {
21- const glob = await import ( 'fast-glob' ) . then ( m => m . default )
22- const files = await glob ( `${ contentDir } /**/*.md` )
23-
24- for ( const file of files ) {
25- await syncFile ( file , NPMX_SITE )
36+ const { globby } = await import ( 'globby' )
37+ const files : string [ ] = await globby ( `${ contentDir } /**/*.md` )
38+
39+ // INFO: Arbitrarily chosen concurrency limit, can be changed if needed
40+ const concurrencyLimit = 5
41+ for ( let i = 0 ; i < files . length ; i += concurrencyLimit ) {
42+ const batch = files . slice ( i , i + concurrencyLimit )
43+ // Process files in parallel
44+ await Promise . all (
45+ batch . map ( file =>
46+ syncFile ( file , NPMX_SITE , client ) . catch ( error =>
47+ console . error ( `[standard-site-sync] Error in ${ file } :` + error ) ,
48+ ) ,
49+ ) ,
50+ )
2651 }
2752 } )
2853
29- nuxt . hook ( 'builder:watch' , async ( _event , path ) => {
30- if ( path . endsWith ( '.md' ) ) {
31- await syncFile ( resolve ( nuxt . options . rootDir , path ) , NPMX_SITE )
54+ nuxt . hook ( 'builder:watch' , async ( event , path ) => {
55+ if ( ! path . endsWith ( '.md' ) ) return
56+
57+ // Ignore deleted files
58+ if ( event === 'unlink' ) {
59+ console . log ( `[standard-site-sync] File deleted: ${ path } ` )
60+ return
3261 }
62+
63+ // Process add/change events only
64+ await syncFile ( resolve ( nuxt . options . rootDir , path ) , NPMX_SITE , client ) . catch ( err =>
65+ console . error ( `[standard-site-sync] Failed ${ path } :` , err ) ,
66+ )
3367 } )
3468 } ,
3569} )
3670
37- // TODO: Placeholder algo, can likely be simplified
38- function parseBasicFrontmatter ( fileContent : string ) : Record < string , any > {
39- const match = fileContent . match ( / ^ - - - \r ? \n ( [ \s \S ] + ?) \r ? \n - - - / )
40- if ( ! match ) return { }
41-
42- const obj : Record < string , any > = { }
43- const lines = match [ 1 ] ?. split ( '\n' )
44-
45- if ( ! lines ) return { }
46-
47- for ( const line of lines ) {
48- const [ key , ...valParts ] = line . split ( ':' )
49- if ( key && valParts . length ) {
50- let value = valParts . join ( ':' ) . trim ( )
51-
52- // Remove surrounding quotes
53- value = value . replace ( / ^ [ " ' ] | [ " ' ] $ / g, '' )
54-
55- // Handle Booleans
56- if ( value === 'true' ) {
57- obj [ key . trim ( ) ] = true
58- continue
59- }
60- if ( value === 'false' ) {
61- obj [ key . trim ( ) ] = false
62- continue
63- }
71+ /*
72+ * INFO: Loads record to atproto and ensures uniqueness by checking the date the article is published
73+ * publishedAt is an id that does not change
74+ * Atomicity is enforced with upsert using publishedAt so we always update existing records instead of creating new ones
75+ * Clock id(3) provides a deterministic ID
76+ * WARN: DOES NOT CATCH ERRORS, THIS MUST BE HANDLED
77+ */
78+ const syncFile = async ( filePath : string , siteUrl : string , client : Client ) => {
79+ const fileContent = readFileSync ( filePath , 'utf-8' )
80+ const frontmatter = parseBasicFrontmatter ( fileContent )
81+
82+ // Schema expects 'path' & frontmatter provides 'slug'
83+ const normalizedFrontmatter = {
84+ ...frontmatter ,
85+ path : typeof frontmatter . slug === 'string' ? `/blog/${ frontmatter . slug } ` : frontmatter . path ,
86+ }
6487
65- // Handle basic array [tag1, tag2]
66- if ( value . startsWith ( '[' ) && value . endsWith ( ']' ) ) {
67- obj [ key . trim ( ) ] = value
68- . slice ( 1 , - 1 )
69- . split ( ',' )
70- . map ( s => s . trim ( ) . replace ( / ^ [ " ' ] | [ " ' ] $ / g, '' ) )
71- } else {
72- obj [ key . trim ( ) ] = value
73- }
74- }
88+ const result = safeParse ( BlogPostSchema , normalizedFrontmatter )
89+ if ( ! result . success ) {
90+ console . warn ( `[standard-site-sync] Validation failed for ${ filePath } ` , result . issues )
91+ return
7592 }
76- return obj
77- }
7893
79- const syncFile = async ( filePath : string , siteUrl : string ) => {
80- try {
81- const fileContent = readFileSync ( filePath , 'utf-8' )
82- const frontmatter = parseBasicFrontmatter ( fileContent )
94+ const data = result . output
8395
84- // Schema expects 'path' & frontmatter provides 'slug'
85- if ( frontmatter . slug ) {
86- frontmatter . path = `/blog/${ frontmatter . slug } `
96+ // filter drafts
97+ if ( data . draft ) {
98+ if ( process . env . DEBUG === 'true' ) {
99+ console . debug ( `[standard-site-sync] Skipping draft: ${ data . path } ` )
87100 }
101+ return
102+ }
88103
89- const result = safeParse ( BlogPostSchema , frontmatter )
90- if ( ! result . success ) {
91- console . warn ( `[standard-site-sync] Validation failed for ${ filePath } ` , result . issues )
92- return
93- }
104+ // Keys are sorted to provide a more stable hash
105+ const hash = createHash ( 'sha256' )
106+ . update ( JSON . stringify ( data , Object . keys ( data ) . sort ( ) ) )
107+ . digest ( 'hex' )
94108
95- const data = result . output
109+ if ( syncedDocuments . get ( data . path ) === hash ) {
110+ return
111+ }
96112
97- // filter drafts
98- if ( data . draft ) {
99- if ( process . env . DEBUG === 'true' ) {
100- console . debug ( `[standard-site-sync] Skipping draft: ${ data . path } ` )
101- }
102- return
103- }
113+ const document = site . standard . document . $build ( {
114+ site : siteUrl as `${string } :${string } `,
115+ path : data . path ,
116+ title : data . title ,
117+ description : data . description ?? data . excerpt ,
118+ tags : data . tags ,
119+ // This can be extended to update the site.standard.document .updatedAt if it is changed and use the posts date here
120+ publishedAt : new Date ( data . date ) . toISOString ( ) ,
121+ } )
104122
105- const hash = createHash ( 'sha1' ) . update ( JSON . stringify ( data ) ) . digest ( 'hex' )
123+ const dateInMicroSeconds = new Date ( result . output . date ) . getTime ( ) * DATE_TO_MICROSECONDS
106124
107- if ( syncedDocuments . get ( data . path ) === hash ) {
108- return
109- }
125+ // Clock id(3) needs to be the same everytime to get the same TID from a timestamp
126+ const tid = TID . fromTime ( dateInMicroSeconds , CLOCK_ID_THREE )
110127
111- // TODO: Review later
112- const document = site . standard . document . $build ( {
113- site : siteUrl as `${string } :${string } `,
114- path : data . path ,
115- title : data . title ,
116- description : data . description ?? data . excerpt ,
117- tags : data . tags ,
118- publishedAt : new Date ( data . date ) . toISOString ( ) ,
119- } )
128+ // client.put is async and needs to be awaited
129+ await client . put ( site . standard . document , document , {
130+ rkey : tid . str ,
131+ } )
120132
121- console . log ( '[standard-site-sync] Pushing:' , JSON . stringify ( document , null , 2 ) )
122- // TODO: Real PDS push
133+ // TODO: Replace with real PDS push
134+ console . log ( '[standard-site-sync] Pushing:' , JSON . stringify ( document , null , 2 ) )
123135
124- syncedDocuments . set ( data . path , hash )
125- } catch ( error ) {
126- console . error ( `[standard-site-sync] Error in ${ filePath } :` , error )
127- }
136+ syncedDocuments . set ( data . path , hash )
128137}
0 commit comments