7676*/
7777#define DISKANN_BLOCK_SIZE_SHIFT 9
7878
79- #define VECTOR_NODE_METADATA_SIZE (sizeof(u64) + sizeof(u16))
80- #define VECTOR_EDGE_METADATA_SIZE (sizeof(u64) + sizeof(u64))
8179
8280typedef struct VectorPair VectorPair ;
8381typedef struct DiskAnnSearchCtx DiskAnnSearchCtx ;
@@ -300,46 +298,54 @@ void blobSpotFree(BlobSpot *pBlobSpot) {
300298** Layout specific utilities
301299**************************************************************************/
302300
303- int nodeEdgeOverhead (int nEdgeVectorSize ){
304- return nEdgeVectorSize + VECTOR_EDGE_METADATA_SIZE ;
301+ int nodeMetadataSize (int nFormatVersion ){
302+ return ( sizeof ( u64 ) + sizeof ( u16 )) ;
305303}
306304
307- int nodeOverhead (int nNodeVectorSize ){
308- return nNodeVectorSize + VECTOR_NODE_METADATA_SIZE ;
305+ int edgeMetadataSize (int nFormatVersion ){
306+ return (sizeof (u64 ) + sizeof (u64 ));
307+ }
308+
309+ int nodeEdgeOverhead (int nFormatVersion , int nEdgeVectorSize ){
310+ return nEdgeVectorSize + edgeMetadataSize (nFormatVersion );
311+ }
312+
313+ int nodeOverhead (int nFormatVersion , int nNodeVectorSize ){
314+ return nNodeVectorSize + nodeMetadataSize (nFormatVersion );
309315}
310316
311317int nodeEdgesMaxCount (const DiskAnnIndex * pIndex ){
312- unsigned int nMaxEdges = (pIndex -> nBlockSize - nodeOverhead (pIndex -> nNodeVectorSize )) / nodeEdgeOverhead (pIndex -> nEdgeVectorSize );
318+ unsigned int nMaxEdges = (pIndex -> nBlockSize - nodeOverhead (pIndex -> nFormatVersion , pIndex -> nNodeVectorSize )) / nodeEdgeOverhead (pIndex -> nFormatVersion , pIndex -> nEdgeVectorSize );
313319 assert ( nMaxEdges > 0 );
314320 return nMaxEdges ;
315321}
316322
317323int nodeEdgesMetadataOffset (const DiskAnnIndex * pIndex ){
318324 unsigned int offset ;
319325 unsigned int nMaxEdges = nodeEdgesMaxCount (pIndex );
320- offset = VECTOR_NODE_METADATA_SIZE + pIndex -> nNodeVectorSize + nMaxEdges * pIndex -> nEdgeVectorSize ;
326+ offset = nodeMetadataSize ( pIndex -> nFormatVersion ) + pIndex -> nNodeVectorSize + nMaxEdges * pIndex -> nEdgeVectorSize ;
321327 assert ( offset <= pIndex -> nBlockSize );
322328 return offset ;
323329}
324330
325331void nodeBinInit (const DiskAnnIndex * pIndex , BlobSpot * pBlobSpot , u64 nRowid , Vector * pVector ){
326- assert ( VECTOR_NODE_METADATA_SIZE + pIndex -> nNodeVectorSize <= pBlobSpot -> nBufferSize );
332+ assert ( nodeMetadataSize ( pIndex -> nFormatVersion ) + pIndex -> nNodeVectorSize <= pBlobSpot -> nBufferSize );
327333
328334 memset (pBlobSpot -> pBuffer , 0 , pBlobSpot -> nBufferSize );
329335 writeLE64 (pBlobSpot -> pBuffer , nRowid );
330336 // neighbours count already zero after memset - no need to set it explicitly
331337
332- vectorSerializeToBlob (pVector , pBlobSpot -> pBuffer + VECTOR_NODE_METADATA_SIZE , pIndex -> nNodeVectorSize );
338+ vectorSerializeToBlob (pVector , pBlobSpot -> pBuffer + nodeMetadataSize ( pIndex -> nFormatVersion ) , pIndex -> nNodeVectorSize );
333339}
334340
335341void nodeBinVector (const DiskAnnIndex * pIndex , const BlobSpot * pBlobSpot , Vector * pVector ) {
336- assert ( VECTOR_NODE_METADATA_SIZE + pIndex -> nNodeVectorSize <= pBlobSpot -> nBufferSize );
342+ assert ( nodeMetadataSize ( pIndex -> nFormatVersion ) + pIndex -> nNodeVectorSize <= pBlobSpot -> nBufferSize );
337343
338- vectorInitStatic (pVector , pIndex -> nNodeVectorType , pIndex -> nVectorDims , pBlobSpot -> pBuffer + VECTOR_NODE_METADATA_SIZE );
344+ vectorInitStatic (pVector , pIndex -> nNodeVectorType , pIndex -> nVectorDims , pBlobSpot -> pBuffer + nodeMetadataSize ( pIndex -> nFormatVersion ) );
339345}
340346
341347u16 nodeBinEdges (const DiskAnnIndex * pIndex , const BlobSpot * pBlobSpot ) {
342- assert ( VECTOR_NODE_METADATA_SIZE <= pBlobSpot -> nBufferSize );
348+ assert ( nodeMetadataSize ( pIndex -> nFormatVersion ) <= pBlobSpot -> nBufferSize );
343349
344350 return readLE16 (pBlobSpot -> pBuffer + sizeof (u64 ));
345351}
@@ -349,20 +355,20 @@ void nodeBinEdge(const DiskAnnIndex *pIndex, const BlobSpot *pBlobSpot, int iEdg
349355 int offset = nodeEdgesMetadataOffset (pIndex );
350356
351357 if ( pRowid != NULL ){
352- assert ( offset + (iEdge + 1 ) * VECTOR_EDGE_METADATA_SIZE <= pBlobSpot -> nBufferSize );
353- * pRowid = readLE64 (pBlobSpot -> pBuffer + offset + iEdge * VECTOR_EDGE_METADATA_SIZE + sizeof (u64 ));
358+ assert ( offset + (iEdge + 1 ) * edgeMetadataSize ( pIndex -> nFormatVersion ) <= pBlobSpot -> nBufferSize );
359+ * pRowid = readLE64 (pBlobSpot -> pBuffer + offset + iEdge * edgeMetadataSize ( pIndex -> nFormatVersion ) + sizeof (u64 ));
354360 }
355361 if ( pIndex -> nFormatVersion != VECTOR_FORMAT_V1 && pDistance != NULL ){
356- distance = readLE32 (pBlobSpot -> pBuffer + offset + iEdge * VECTOR_EDGE_METADATA_SIZE + sizeof (u32 ));
362+ distance = readLE32 (pBlobSpot -> pBuffer + offset + iEdge * edgeMetadataSize ( pIndex -> nFormatVersion ) + sizeof (u32 ));
357363 * pDistance = * ((float * )& distance );
358364 }
359365 if ( pVector != NULL ){
360- assert ( VECTOR_NODE_METADATA_SIZE + pIndex -> nNodeVectorSize + iEdge * pIndex -> nEdgeVectorSize < offset );
366+ assert ( nodeMetadataSize ( pIndex -> nFormatVersion ) + pIndex -> nNodeVectorSize + iEdge * pIndex -> nEdgeVectorSize < offset );
361367 vectorInitStatic (
362368 pVector ,
363369 pIndex -> nEdgeVectorType ,
364370 pIndex -> nVectorDims ,
365- pBlobSpot -> pBuffer + VECTOR_NODE_METADATA_SIZE + pIndex -> nNodeVectorSize + iEdge * pIndex -> nEdgeVectorSize
371+ pBlobSpot -> pBuffer + nodeMetadataSize ( pIndex -> nFormatVersion ) + pIndex -> nNodeVectorSize + iEdge * pIndex -> nEdgeVectorSize
366372 );
367373 }
368374}
@@ -399,11 +405,11 @@ void nodeBinReplaceEdge(const DiskAnnIndex *pIndex, BlobSpot *pBlobSpot, int iRe
399405 nEdges ++ ;
400406 }
401407
402- edgeVectorOffset = VECTOR_NODE_METADATA_SIZE + pIndex -> nNodeVectorSize + iReplace * pIndex -> nEdgeVectorSize ;
403- edgeMetaOffset = nodeEdgesMetadataOffset (pIndex ) + iReplace * VECTOR_EDGE_METADATA_SIZE ;
408+ edgeVectorOffset = nodeMetadataSize ( pIndex -> nFormatVersion ) + pIndex -> nNodeVectorSize + iReplace * pIndex -> nEdgeVectorSize ;
409+ edgeMetaOffset = nodeEdgesMetadataOffset (pIndex ) + iReplace * edgeMetadataSize ( pIndex -> nFormatVersion ) ;
404410
405411 assert ( edgeVectorOffset + pIndex -> nEdgeVectorSize <= pBlobSpot -> nBufferSize );
406- assert ( edgeMetaOffset + VECTOR_EDGE_METADATA_SIZE <= pBlobSpot -> nBufferSize );
412+ assert ( edgeMetaOffset + edgeMetadataSize ( pIndex -> nFormatVersion ) <= pBlobSpot -> nBufferSize );
407413
408414 vectorSerializeToBlob (pVector , pBlobSpot -> pBuffer + edgeVectorOffset , pIndex -> nEdgeVectorSize );
409415 writeLE32 (pBlobSpot -> pBuffer + edgeMetaOffset + sizeof (u32 ), * ((u32 * )& distance ));
@@ -419,19 +425,19 @@ void nodeBinDeleteEdge(const DiskAnnIndex *pIndex, BlobSpot *pBlobSpot, int iDel
419425
420426 assert ( 0 <= iDelete && iDelete < nEdges );
421427
422- edgeVectorOffset = VECTOR_NODE_METADATA_SIZE + pIndex -> nNodeVectorSize + iDelete * pIndex -> nEdgeVectorSize ;
423- lastVectorOffset = VECTOR_NODE_METADATA_SIZE + pIndex -> nNodeVectorSize + (nEdges - 1 ) * pIndex -> nEdgeVectorSize ;
424- edgeMetaOffset = nodeEdgesMetadataOffset (pIndex ) + iDelete * VECTOR_EDGE_METADATA_SIZE ;
425- lastMetaOffset = nodeEdgesMetadataOffset (pIndex ) + (nEdges - 1 ) * VECTOR_EDGE_METADATA_SIZE ;
428+ edgeVectorOffset = nodeMetadataSize ( pIndex -> nFormatVersion ) + pIndex -> nNodeVectorSize + iDelete * pIndex -> nEdgeVectorSize ;
429+ lastVectorOffset = nodeMetadataSize ( pIndex -> nFormatVersion ) + pIndex -> nNodeVectorSize + (nEdges - 1 ) * pIndex -> nEdgeVectorSize ;
430+ edgeMetaOffset = nodeEdgesMetadataOffset (pIndex ) + iDelete * edgeMetadataSize ( pIndex -> nFormatVersion ) ;
431+ lastMetaOffset = nodeEdgesMetadataOffset (pIndex ) + (nEdges - 1 ) * edgeMetadataSize ( pIndex -> nFormatVersion ) ;
426432
427433 assert ( edgeVectorOffset + pIndex -> nEdgeVectorSize <= pBlobSpot -> nBufferSize );
428434 assert ( lastVectorOffset + pIndex -> nEdgeVectorSize <= pBlobSpot -> nBufferSize );
429- assert ( edgeMetaOffset + VECTOR_EDGE_METADATA_SIZE <= pBlobSpot -> nBufferSize );
430- assert ( lastMetaOffset + VECTOR_EDGE_METADATA_SIZE <= pBlobSpot -> nBufferSize );
435+ assert ( edgeMetaOffset + edgeMetadataSize ( pIndex -> nFormatVersion ) <= pBlobSpot -> nBufferSize );
436+ assert ( lastMetaOffset + edgeMetadataSize ( pIndex -> nFormatVersion ) <= pBlobSpot -> nBufferSize );
431437
432438 if ( edgeVectorOffset < lastVectorOffset ){
433439 memmove (pBlobSpot -> pBuffer + edgeVectorOffset , pBlobSpot -> pBuffer + lastVectorOffset , pIndex -> nEdgeVectorSize );
434- memmove (pBlobSpot -> pBuffer + edgeMetaOffset , pBlobSpot -> pBuffer + lastMetaOffset , VECTOR_EDGE_METADATA_SIZE );
440+ memmove (pBlobSpot -> pBuffer + edgeMetaOffset , pBlobSpot -> pBuffer + lastMetaOffset , edgeMetadataSize ( pIndex -> nFormatVersion ) );
435441 }
436442
437443 writeLE16 (pBlobSpot -> pBuffer + sizeof (u64 ), nEdges - 1 );
@@ -517,9 +523,9 @@ int diskAnnCreateIndex(
517523 if ( maxNeighborsParam == 0 ){
518524 // 3 D**(1/2) gives good recall values (90%+)
519525 // we also want to keep disk overhead at moderate level - 50x of the disk size increase is the current upper bound
520- maxNeighborsParam = MIN (3 * ((int )(sqrt (dims )) + 1 ), (50 * nodeOverhead (vectorDataSize (type , dims ))) / nodeEdgeOverhead (vectorDataSize (neighbours , dims )) + 1 );
526+ maxNeighborsParam = MIN (3 * ((int )(sqrt (dims )) + 1 ), (50 * nodeOverhead (VECTOR_FORMAT_DEFAULT , vectorDataSize (type , dims ))) / nodeEdgeOverhead (VECTOR_FORMAT_DEFAULT , vectorDataSize (neighbours , dims )) + 1 );
521527 }
522- blockSizeBytes = nodeOverhead (vectorDataSize (type , dims )) + maxNeighborsParam * (u64 )nodeEdgeOverhead (vectorDataSize (neighbours , dims ));
528+ blockSizeBytes = nodeOverhead (VECTOR_FORMAT_DEFAULT , vectorDataSize (type , dims )) + maxNeighborsParam * (u64 )nodeEdgeOverhead (VECTOR_FORMAT_DEFAULT , vectorDataSize (neighbours , dims ));
523529 if ( blockSizeBytes > DISKANN_MAX_BLOCK_SZ ){
524530 return SQLITE_ERROR ;
525531 }
0 commit comments