Skip to content

Commit 83d029d

Browse files
committed
cleanup code a bit in order to simplify working with vector of different types
1 parent 2c40df2 commit 83d029d

5 files changed

Lines changed: 76 additions & 132 deletions

File tree

libsql-sqlite3/src/vector.c

Lines changed: 51 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -252,11 +252,29 @@ int vectorParseSqliteBlob(
252252
Vector *pVector,
253253
char **pzErrMsg
254254
){
255+
const unsigned char *pBlob;
256+
size_t nBlobSize;
257+
258+
assert( sqlite3_value_type(arg) == SQLITE_BLOB );
259+
260+
pBlob = sqlite3_value_blob(arg);
261+
nBlobSize = sqlite3_value_bytes(arg);
262+
if( nBlobSize % 2 == 1 ){
263+
nBlobSize--;
264+
}
265+
266+
if( nBlobSize < vectorDataSize(pVector->type, pVector->dims) ){
267+
*pzErrMsg = sqlite3_mprintf("invalid vector: not enough bytes: type=%d, dims=%d, size=%ull", pVector->type, pVector->dims, nBlobSize);
268+
return SQLITE_ERROR;
269+
}
270+
255271
switch (pVector->type) {
256272
case VECTOR_TYPE_FLOAT32:
257-
return vectorF32ParseSqliteBlob(arg, pVector, pzErrMsg);
273+
vectorF32DeserializeFromBlob(pVector, pBlob, nBlobSize);
274+
return 0;
258275
case VECTOR_TYPE_FLOAT64:
259-
return vectorF64ParseSqliteBlob(arg, pVector, pzErrMsg);
276+
vectorF64DeserializeFromBlob(pVector, pBlob, nBlobSize);
277+
return 0;
260278
default:
261279
assert(0);
262280
}
@@ -384,20 +402,47 @@ void vectorMarshalToText(
384402
}
385403
}
386404

387-
void vectorSerialize(
405+
void vectorSerializeWithType(
388406
sqlite3_context *context,
389407
const Vector *pVector
390408
){
409+
unsigned char *pBlob;
410+
size_t nBlobSize, nDataSize;
411+
412+
assert( pVector->dims <= MAX_VECTOR_SZ );
413+
414+
nDataSize = vectorDataSize(pVector->type, pVector->dims);
415+
nBlobSize = nDataSize;
416+
if( pVector->type != VECTOR_TYPE_FLOAT32 ){
417+
nBlobSize += (nBlobSize % 2 == 0 ? 1 : 2);
418+
}
419+
420+
if( nBlobSize == 0 ){
421+
sqlite3_result_zeroblob(context, 0);
422+
return;
423+
}
424+
425+
pBlob = sqlite3_malloc64(nBlobSize);
426+
if( pBlob == NULL ){
427+
sqlite3_result_error_nomem(context);
428+
return;
429+
}
430+
431+
if( pVector->type != VECTOR_TYPE_FLOAT32 ){
432+
pBlob[nBlobSize - 1] = pVector->type;
433+
}
434+
391435
switch (pVector->type) {
392436
case VECTOR_TYPE_FLOAT32:
393-
vectorF32Serialize(context, pVector);
437+
vectorF32SerializeToBlob(pVector, pBlob, nDataSize);
394438
break;
395439
case VECTOR_TYPE_FLOAT64:
396-
vectorF64Serialize(context, pVector);
440+
vectorF64SerializeToBlob(pVector, pBlob, nDataSize);
397441
break;
398442
default:
399443
assert(0);
400444
}
445+
sqlite3_result_blob(context, (char*)pBlob, nBlobSize, sqlite3_free);
401446
}
402447

403448
size_t vectorSerializeToBlob(const Vector *pVector, unsigned char *pBlob, size_t nBlobSize){
@@ -412,18 +457,6 @@ size_t vectorSerializeToBlob(const Vector *pVector, unsigned char *pBlob, size_t
412457
return 0;
413458
}
414459

415-
size_t vectorDeserializeFromBlob(Vector *pVector, const unsigned char *pBlob, size_t nBlobSize){
416-
switch (pVector->type) {
417-
case VECTOR_TYPE_FLOAT32:
418-
return vectorF32DeserializeFromBlob(pVector, pBlob, nBlobSize);
419-
case VECTOR_TYPE_FLOAT64:
420-
return vectorF64DeserializeFromBlob(pVector, pBlob, nBlobSize);
421-
default:
422-
assert(0);
423-
}
424-
return 0;
425-
}
426-
427460
void vectorInitFromBlob(Vector *pVector, const unsigned char *pBlob, size_t nBlobSize){
428461
switch (pVector->type) {
429462
case VECTOR_TYPE_FLOAT32:
@@ -470,7 +503,7 @@ static void vectorFuncHintedType(
470503
sqlite3_free(pzErrMsg);
471504
goto out_free_vec;
472505
}
473-
vectorSerialize(context, pVector);
506+
vectorSerializeWithType(context, pVector);
474507
out_free_vec:
475508
vectorFree(pVector);
476509
}

libsql-sqlite3/src/vectorInt.h

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -65,13 +65,6 @@ size_t vectorSerializeToBlob (const Vector *, unsigned char *, size_t);
6565
size_t vectorF32SerializeToBlob(const Vector *, unsigned char *, size_t);
6666
size_t vectorF64SerializeToBlob(const Vector *, unsigned char *, size_t);
6767

68-
/*
69-
* Deserializes vector from the blob in little-endian format according to the IEEE-754 standard
70-
*/
71-
size_t vectorDeserializeFromBlob (Vector *, const unsigned char *, size_t);
72-
size_t vectorF32DeserializeFromBlob(Vector *, const unsigned char *, size_t);
73-
size_t vectorF64DeserializeFromBlob(Vector *, const unsigned char *, size_t);
74-
7568
/*
7669
* Calculates cosine distance between two vectors (vector must have same type and same dimensions)
7770
*/
@@ -91,16 +84,15 @@ double vectorF64DistanceL2(const Vector *, const Vector *);
9184
* LibSQL can append one trailing byte in the end of final blob. This byte will be later used to determine type of the blob
9285
* By default, blob with even length will be treated as a f32 blob
9386
*/
94-
void vectorSerialize (sqlite3_context *, const Vector *);
95-
void vectorF32Serialize(sqlite3_context *, const Vector *);
96-
void vectorF64Serialize(sqlite3_context *, const Vector *);
87+
void vectorSerializeWithType(sqlite3_context *, const Vector *);
9788

9889
/*
9990
* Parses Vector content from the blob; vector type and dimensions must be filled already
10091
*/
10192
int vectorParseSqliteBlob (sqlite3_value *, Vector *, char **);
102-
int vectorF32ParseSqliteBlob(sqlite3_value *, Vector *, char **);
103-
int vectorF64ParseSqliteBlob(sqlite3_value *, Vector *, char **);
93+
94+
void vectorF32DeserializeFromBlob(Vector *, const unsigned char *, size_t);
95+
void vectorF64DeserializeFromBlob(Vector *, const unsigned char *, size_t);
10496

10597
void vectorInitStatic(Vector *, VectorType, const unsigned char *, size_t);
10698
void vectorInitFromBlob(Vector *, const unsigned char *, size_t);

libsql-sqlite3/src/vectordiskann.c

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
** diskAnnInsert() Insert single new(!) vector in an opened index
4848
** diskAnnDelete() Delete row by key from an opened index
4949
*/
50+
#include "vectorInt.h"
5051
#ifndef SQLITE_OMIT_VECTOR
5152

5253
#include "math.h"
@@ -1490,6 +1491,7 @@ int diskAnnOpenIndex(
14901491
){
14911492
DiskAnnIndex *pIndex;
14921493
u64 nBlockSize;
1494+
int compressNeighbours;
14931495
pIndex = sqlite3DbMallocRaw(db, sizeof(DiskAnnIndex));
14941496
if( pIndex == NULL ){
14951497
return SQLITE_NOMEM;
@@ -1536,9 +1538,17 @@ int diskAnnOpenIndex(
15361538
pIndex->searchL = VECTOR_SEARCH_L_DEFAULT;
15371539
}
15381540
pIndex->nNodeVectorSize = vectorDataSize(pIndex->nNodeVectorType, pIndex->nVectorDims);
1539-
// will change in future when we will support compression of edges vectors
1540-
pIndex->nEdgeVectorType = pIndex->nNodeVectorType;
1541-
pIndex->nEdgeVectorSize = pIndex->nNodeVectorSize;
1541+
1542+
compressNeighbours = vectorIdxParamsGetU64(pParams, VECTOR_COMPRESS_NEIGHBORS_PARAM_ID);
1543+
if( compressNeighbours == 0 ){
1544+
pIndex->nEdgeVectorType = pIndex->nNodeVectorType;
1545+
pIndex->nEdgeVectorSize = pIndex->nNodeVectorSize;
1546+
}else if( compressNeighbours == VECTOR_TYPE_1BIT ){
1547+
pIndex->nEdgeVectorType = VECTOR_TYPE_1BIT;
1548+
pIndex->nEdgeVectorSize = vectorDataSize(VECTOR_TYPE_1BIT, pIndex->nVectorDims);
1549+
}else{
1550+
return SQLITE_ERROR;
1551+
}
15421552

15431553
*ppIndex = pIndex;
15441554
return SQLITE_OK;

libsql-sqlite3/src/vectorfloat32.c

Lines changed: 4 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -94,26 +94,6 @@ size_t vectorF32SerializeToBlob(
9494
return sizeof(float) * pVector->dims;
9595
}
9696

97-
size_t vectorF32DeserializeFromBlob(
98-
Vector *pVector,
99-
const unsigned char *pBlob,
100-
size_t nBlobSize
101-
){
102-
float *elems = pVector->data;
103-
unsigned i;
104-
pVector->type = VECTOR_TYPE_FLOAT32;
105-
pVector->dims = nBlobSize / sizeof(float);
106-
107-
assert( pVector->dims <= MAX_VECTOR_SZ );
108-
assert( nBlobSize % 2 == 0 || pBlob[nBlobSize - 1] == VECTOR_TYPE_FLOAT32 );
109-
110-
for(i = 0; i < pVector->dims; i++){
111-
elems[i] = deserializeF32(pBlob);
112-
pBlob += sizeof(float);
113-
}
114-
return vectorDataSize(pVector->type, pVector->dims);
115-
}
116-
11797
void vectorF32Serialize(
11898
sqlite3_context *context,
11999
const Vector *pVector
@@ -220,32 +200,22 @@ void vectorF32InitFromBlob(Vector *pVector, const unsigned char *pBlob, size_t n
220200
pVector->data = (void*)pBlob;
221201
}
222202

223-
int vectorF32ParseSqliteBlob(
224-
sqlite3_value *arg,
203+
void vectorF32DeserializeFromBlob(
225204
Vector *pVector,
226-
char **pzErr
205+
const unsigned char *pBlob,
206+
size_t nBlobSize
227207
){
228-
const unsigned char *pBlob;
229208
float *elems = pVector->data;
230209
unsigned i;
231210

232211
assert( pVector->type == VECTOR_TYPE_FLOAT32 );
233212
assert( 0 <= pVector->dims && pVector->dims <= MAX_VECTOR_SZ );
234-
assert( sqlite3_value_type(arg) == SQLITE_BLOB );
235-
236-
pBlob = sqlite3_value_blob(arg);
237-
if( sqlite3_value_bytes(arg) < sizeof(float) * pVector->dims ){
238-
*pzErr = sqlite3_mprintf("invalid f32 vector: not enough bytes for all dimensions");
239-
goto error;
240-
}
213+
assert( nBlobSize >= pVector->dims * sizeof(float) );
241214

242215
for(i = 0; i < pVector->dims; i++){
243216
elems[i] = deserializeF32(pBlob);
244217
pBlob += sizeof(float);
245218
}
246-
return 0;
247-
error:
248-
return -1;
249219
}
250220

251221
#endif /* !defined(SQLITE_OMIT_VECTOR) */

libsql-sqlite3/src/vectorfloat64.c

Lines changed: 4 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -98,57 +98,6 @@ size_t vectorF64SerializeToBlob(
9898
return sizeof(double) * pVector->dims;
9999
}
100100

101-
size_t vectorF64DeserializeFromBlob(
102-
Vector *pVector,
103-
const unsigned char *pBlob,
104-
size_t nBlobSize
105-
){
106-
double *elems = pVector->data;
107-
unsigned i;
108-
pVector->type = VECTOR_TYPE_FLOAT64;
109-
pVector->dims = nBlobSize / sizeof(double);
110-
111-
assert( pVector->dims <= MAX_VECTOR_SZ );
112-
assert( nBlobSize % 2 == 1 && pBlob[nBlobSize - 1] == VECTOR_TYPE_FLOAT64 );
113-
114-
for(i = 0; i < pVector->dims; i++){
115-
elems[i] = deserializeF64(pBlob);
116-
pBlob += sizeof(double);
117-
}
118-
return vectorDataSize(pVector->type, pVector->dims);
119-
}
120-
121-
void vectorF64Serialize(
122-
sqlite3_context *context,
123-
const Vector *pVector
124-
){
125-
double *elems = pVector->data;
126-
unsigned char *pBlob;
127-
size_t nBlobSize;
128-
129-
assert( pVector->type == VECTOR_TYPE_FLOAT64 );
130-
assert( pVector->dims <= MAX_VECTOR_SZ );
131-
132-
// allocate one extra trailing byte with vector blob type metadata
133-
nBlobSize = vectorDataSize(pVector->type, pVector->dims) + 1;
134-
135-
if( nBlobSize == 0 ){
136-
sqlite3_result_zeroblob(context, 0);
137-
return;
138-
}
139-
140-
pBlob = sqlite3_malloc64(nBlobSize);
141-
if( pBlob == NULL ){
142-
sqlite3_result_error_nomem(context);
143-
return;
144-
}
145-
146-
vectorF64SerializeToBlob(pVector, pBlob, nBlobSize - 1);
147-
pBlob[nBlobSize - 1] = VECTOR_TYPE_FLOAT64;
148-
149-
sqlite3_result_blob(context, (char*)pBlob, nBlobSize, sqlite3_free);
150-
}
151-
152101
#define SINGLE_DOUBLE_CHAR_LIMIT 32
153102
void vectorF64MarshalToText(
154103
sqlite3_context *context,
@@ -227,32 +176,22 @@ void vectorF64InitFromBlob(Vector *pVector, const unsigned char *pBlob, size_t n
227176
pVector->data = (void*)pBlob;
228177
}
229178

230-
int vectorF64ParseSqliteBlob(
231-
sqlite3_value *arg,
179+
void vectorF64DeserializeFromBlob(
232180
Vector *pVector,
233-
char **pzErr
181+
const unsigned char *pBlob,
182+
size_t nBlobSize
234183
){
235-
const unsigned char *pBlob;
236184
double *elems = pVector->data;
237185
unsigned i;
238186

239187
assert( pVector->type == VECTOR_TYPE_FLOAT64 );
240188
assert( 0 <= pVector->dims && pVector->dims <= MAX_VECTOR_SZ );
241-
assert( sqlite3_value_type(arg) == SQLITE_BLOB );
242-
243-
pBlob = sqlite3_value_blob(arg);
244-
if( sqlite3_value_bytes(arg) < sizeof(double) * pVector->dims ){
245-
*pzErr = sqlite3_mprintf("invalid f64 vector: not enough bytes for all dimensions");
246-
goto error;
247-
}
189+
assert( nBlobSize >= pVector->dims * sizeof(double) );
248190

249191
for(i = 0; i < pVector->dims; i++){
250192
elems[i] = deserializeF64(pBlob);
251193
pBlob += sizeof(double);
252194
}
253-
return 0;
254-
error:
255-
return -1;
256195
}
257196

258197
#endif /* !defined(SQLITE_OMIT_VECTOR) */

0 commit comments

Comments
 (0)