@@ -167,6 +167,43 @@ impl ManagedConnectionWalWrapper {
167167 extended_code : 517 , // stale read
168168 } ) ;
169169 }
170+ // If other connection is about to checkpoint - we better to immediately return.
171+ //
172+ // The reason is that write transaction are upgraded from read transactions in SQLite.
173+ // Due to this, every write transaction need to hold SHARED-WAL lock and if we will
174+ // block write transaction here - we will prevent checkpoint process from restarting the WAL
175+ // (because it needs to acquire EXCLUSIVE-WAL lock)
176+ //
177+ // So, the scenario is following:
178+ // T0: we have a bunch of SELECT queries which will execute till time T2
179+ // T1: CHECKPOINT process is starting: it holds CKPT and WRITE lock and attempt to acquire
180+ // EXCLUSIVE-WAL locks one by one in order to check the position of readers. CHECKPOINT will
181+ // use busy handler and can potentially acquire lock not from the first attempt.
182+ // T2: CHECKPOINT process were able to check all WAL reader positions (by acquiring lock or atomically check reader position)
183+ // and started to transfer WAL to the DB file
184+ // T3: INSERT query starts executing: it started as a read transaction and holded SHARED-WAL lock but then it needs to
185+ // upgrade to write transaction through begin_write_txn call
186+ // T4: CHECKPOINT transferred all pages from WAL to DB file and need to check if it can restart the WAL. In order to
187+ // do that it needs to hold all EXCLUSIVE-WAL locks to make sure that all readers use only DB file
188+ //
189+ // In the scenario above, if we will park INSERT at the time T3 - CHECKPOINT will be unable to hold EXCLUSIVE-WAL
190+ // locks and so WAL will not be truncated.
191+ // In case when DB has continious load with overlapping reads and writes - this problem became very noticeable
192+ // as it can defer WAL truncation a lot.
193+ //
194+ // Also, such implementation is more aligned with LibSQL/SQLite behaviour where sqlite3WalBeginWriteTransaction
195+ // immediately abort with SQLITE_BUSY error if it can't acquire WRITE lock (which CHECKPOINT also take before start of the work)
196+ // and busy handler (e.g. retries) for writes are invoked by SQLite at upper layer of request processing.
197+ match * current {
198+ Some ( Slot {
199+ id,
200+ state : SlotState :: Acquired ( SlotType :: Checkpoint ) ,
201+ ..
202+ } ) if id != self . id => {
203+ return Err ( rusqlite:: ffi:: Error :: new ( rusqlite:: ffi:: SQLITE_BUSY ) ) ;
204+ }
205+ _ => { }
206+ }
170207 if current. as_mut ( ) . map_or ( true , |slot| slot. id != self . id ) && !enqueued {
171208 self . manager
172209 . write_queue
@@ -196,7 +233,7 @@ impl ManagedConnectionWalWrapper {
196233 let since_started = slot. started_at . elapsed ( ) ;
197234 let deadline = slot. started_at + self . manager . txn_timeout_duration ;
198235 match slot. state {
199- SlotState :: Acquired => {
236+ SlotState :: Acquired ( .. ) => {
200237 if since_started >= self . manager . txn_timeout_duration {
201238 let id = slot. id ;
202239 drop ( current) ;
@@ -354,11 +391,17 @@ impl ManagedConnectionWalWrapper {
354391 }
355392}
356393
394+ #[ derive( Copy , Clone , Debug , PartialEq ) ]
395+ enum SlotType {
396+ WriteTxn ,
397+ Checkpoint ,
398+ }
399+
357400#[ derive( Copy , Clone , Debug ) ]
358401enum SlotState {
359402 Notified ,
360403 Acquiring ,
361- Acquired ,
404+ Acquired ( SlotType ) ,
362405 Failure ,
363406}
364407
@@ -389,7 +432,7 @@ impl WrapWal<InnerWal> for ManagedConnectionWalWrapper {
389432 Ok ( _) => {
390433 tracing:: debug!( "transaction acquired" ) ;
391434 let mut lock = self . manager . current . lock ( ) ;
392- lock. as_mut ( ) . unwrap ( ) . state = SlotState :: Acquired ;
435+ lock. as_mut ( ) . unwrap ( ) . state = SlotState :: Acquired ( SlotType :: WriteTxn ) ;
393436
394437 Ok ( ( ) )
395438 }
@@ -424,7 +467,8 @@ impl WrapWal<InnerWal> for ManagedConnectionWalWrapper {
424467 ) -> libsql_sys:: wal:: Result < ( ) > {
425468 let before = Instant :: now ( ) ;
426469 self . acquire ( ) ?;
427- self . manager . current . lock ( ) . as_mut ( ) . unwrap ( ) . state = SlotState :: Acquired ;
470+ self . manager . current . lock ( ) . as_mut ( ) . unwrap ( ) . state =
471+ SlotState :: Acquired ( SlotType :: Checkpoint ) ;
428472
429473 let mode = if rand:: random :: < f32 > ( ) < 0.1 {
430474 CheckpointMode :: Truncate
@@ -476,7 +520,7 @@ impl WrapWal<InnerWal> for ManagedConnectionWalWrapper {
476520 // if the slot acquire the transaction lock
477521 if let Some ( Slot {
478522 id,
479- state : SlotState :: Acquired ,
523+ state : SlotState :: Acquired ( .. ) ,
480524 ..
481525 } ) = * current
482526 {
0 commit comments